diff --git a/.clang-format b/.clang-format index 893d9c613f18..52e014971878 100644 --- a/.clang-format +++ b/.clang-format @@ -79,7 +79,7 @@ IndentWidth: 4 IndentWrappedFunctionNames: false MacroBlockBegin: '' MacroBlockEnd: '' -NamespaceIndentation: Inner +NamespaceIndentation: None ObjCBlockIndentWidth: 4 ObjCSpaceAfterProperty: true ObjCSpaceBeforeProtocolList: true @@ -89,6 +89,7 @@ PenaltyBreakFirstLessLess: 120 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 +RemoveBracesLLVM: true SpaceAfterCStyleCast: false SpaceBeforeAssignmentOperators: true SpaceBeforeParens: ControlStatements diff --git a/.clang-tidy b/.clang-tidy index 85989d311a22..3903911a277a 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -5,6 +5,9 @@ # a) the new check is not controversial (this includes many checks in readability-* and google-*) or # b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*). +# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML +# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy + # TODO Let clang-tidy check headers in further directories # --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$' HeaderFilterRegex: '^.*/(base)/.*(h|hpp)$' @@ -35,38 +38,9 @@ Checks: '*, -cert-oop54-cpp, -cert-oop57-cpp, - -clang-analyzer-optin.performance.Padding, - -clang-analyzer-optin.portability.UnixAPI, - -clang-analyzer-security.insecureAPI.bzero, - -clang-analyzer-security.insecureAPI.strcpy, - - -cppcoreguidelines-avoid-c-arrays, - -cppcoreguidelines-avoid-const-or-ref-data-members, - -cppcoreguidelines-avoid-do-while, - -cppcoreguidelines-avoid-goto, - -cppcoreguidelines-avoid-magic-numbers, - -cppcoreguidelines-avoid-non-const-global-variables, - -cppcoreguidelines-explicit-virtual-functions, - -cppcoreguidelines-init-variables, - -cppcoreguidelines-interfaces-global-init, - -cppcoreguidelines-macro-usage, - -cppcoreguidelines-narrowing-conversions, - -cppcoreguidelines-no-malloc, - -cppcoreguidelines-non-private-member-variables-in-classes, - -cppcoreguidelines-owning-memory, - -cppcoreguidelines-prefer-member-initializer, - -cppcoreguidelines-pro-bounds-array-to-pointer-decay, - -cppcoreguidelines-pro-bounds-constant-array-index, - -cppcoreguidelines-pro-bounds-pointer-arithmetic, - -cppcoreguidelines-pro-type-const-cast, - -cppcoreguidelines-pro-type-cstyle-cast, - -cppcoreguidelines-pro-type-member-init, - -cppcoreguidelines-pro-type-reinterpret-cast, - -cppcoreguidelines-pro-type-static-cast-downcast, - -cppcoreguidelines-pro-type-union-access, - -cppcoreguidelines-pro-type-vararg, - -cppcoreguidelines-slicing, - -cppcoreguidelines-special-member-functions, + -clang-analyzer-unix.Malloc, + + -cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow -darwin-*, @@ -78,7 +52,6 @@ Checks: '*, -google-readability-function-size, -google-readability-namespace-comments, -google-readability-todo, - -google-upgrade-googletest-case, -hicpp-avoid-c-arrays, -hicpp-avoid-goto, @@ -108,6 +81,7 @@ Checks: '*, -openmp-*, -misc-const-correctness, + -misc-include-cleaner, # useful but far too many occurrences -misc-no-recursion, -misc-non-private-member-variables-in-classes, -misc-confusable-identifiers, # useful but slooow @@ -127,10 +101,12 @@ Checks: '*, -performance-inefficient-string-concatenation, -performance-no-int-to-ptr, + -performance-avoid-endl, -performance-unnecessary-value-param, -portability-simd-intrinsics, + -readability-avoid-unconditional-preprocessor-if, -readability-braces-around-statements, -readability-convert-member-functions-to-static, -readability-else-after-return, @@ -154,6 +130,13 @@ Checks: '*, WarningsAsErrors: '*' +ExtraArgs: +# clang-tidy 17 started to complain (for unknown reasons) that various pragmas are unknown ("clang-diagnostic-unknown-pragmas"). +# This is technically a compiler error, not a clang-tidy error. We could litter the code base with more pragmas that suppress +# this error but it is better to pass the following flag to the compiler: +- '-Wno-unknown-pragmas' +- '-Wno-unused-command-line-argument' # similar issue + CheckOptions: readability-identifier-naming.ClassCase: CamelCase readability-identifier-naming.EnumCase: CamelCase diff --git a/.github/ISSUE_TEMPLATE/85_bug-report.md b/.github/ISSUE_TEMPLATE/85_bug-report.md index 08d03c284ca3..fde5917a8a73 100644 --- a/.github/ISSUE_TEMPLATE/85_bug-report.md +++ b/.github/ISSUE_TEMPLATE/85_bug-report.md @@ -21,8 +21,7 @@ assignees: '' **Enable crash reporting** -> If possible, change "enabled" to true in "send_crash_reports" section in `config.xml`: - +> Change "enabled" to true in "send_crash_reports" section in `config.xml`: ``` diff --git a/.github/workflows/auto_release.yml b/.github/workflows/auto_release.yml new file mode 100644 index 000000000000..f1a6b307b40a --- /dev/null +++ b/.github/workflows/auto_release.yml @@ -0,0 +1,45 @@ +name: AutoRelease + +env: + # Force the stdout and stderr streams to be unbuffered + PYTHONUNBUFFERED: 1 + +concurrency: + group: auto-release +on: # yamllint disable-line rule:truthy + # schedule: + # - cron: '0 10-16 * * 1-5' + workflow_dispatch: + +jobs: + CherryPick: + runs-on: [self-hosted, style-checker-aarch64] + steps: + - name: Set envs + # https://docs.github.com/en/actions/learn-github-actions/workflow-commands-for-github-actions#multiline-strings + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/cherry_pick + ROBOT_CLICKHOUSE_SSH_KEY<> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=fuzzers + EOF + - name: Download changed images + # even if artifact does not exist, e.g. on `do not test` label or failed Docker job + continue-on-error: true + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + ref: ${{github.ref}} + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + libFuzzerTest: + needs: [BuilderFuzzers] + runs-on: [self-hosted, func-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/libfuzzer + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=libFuzzer tests + REPO_COPY=${{runner.temp}}/libfuzzer/ClickHouse + KILL_TIMEOUT=10800 + EOF + - name: Download changed images + # even if artifact does not exist, e.g. on `do not test` label or failed Docker job + continue-on-error: true + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.TEMP_PATH }} + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: libFuzzer test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 libfuzzer_test_check.py "$CHECK_NAME" "$KILL_TIMEOUT" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index f0741b5465f3..4771e5842666 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -60,6 +60,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: @@ -580,6 +581,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -706,6 +714,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -850,6 +865,90 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinRISCV64: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_riscv64 + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinS390X: + needs: [DockerHubPush] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_s390x + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + fetch-depth: 0 # otherwise we will have no info about contributors + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -932,6 +1031,8 @@ jobs: - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - BuilderBinPPC64 + - BuilderBinRISCV64 + - BuilderBinS390X - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy @@ -2827,6 +2928,216 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan0: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan4: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan5: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -3390,7 +3701,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/unit_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-clang) + CHECK_NAME=Unit tests (release) REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports @@ -3920,6 +4231,12 @@ jobs: - IntegrationTestsAsan3 - IntegrationTestsAsan4 - IntegrationTestsAsan5 + - IntegrationTestsAnalyzerAsan0 + - IntegrationTestsAnalyzerAsan1 + - IntegrationTestsAnalyzerAsan2 + - IntegrationTestsAnalyzerAsan3 + - IntegrationTestsAnalyzerAsan4 + - IntegrationTestsAnalyzerAsan5 - IntegrationTestsRelease0 - IntegrationTestsRelease1 - IntegrationTestsRelease2 diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index acf6bbe8f6a3..ed8159b229e7 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -53,6 +53,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: @@ -75,61 +76,14 @@ jobs: Codebrowser: needs: [DockerHubPush] uses: ./.github/workflows/woboq.yml - BuilderCoverity: - needs: DockerHubPush - runs-on: [self-hosted, builder] - steps: - - name: Set envs - run: | - cat >> "$GITHUB_ENV" << 'EOF' - BUILD_NAME=coverity - CACHES_PATH=${{runner.temp}}/../ccaches - IMAGES_PATH=${{runner.temp}}/images_path - REPO_COPY=${{runner.temp}}/build_check/ClickHouse - TEMP_PATH=${{runner.temp}}/build_check - EOF - echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV" - - name: Download changed images - uses: actions/download-artifact@v3 - with: - name: changed_images - path: ${{ env.IMAGES_PATH }} - - name: Check out repository code - uses: ClickHouse/checkout@v1 - with: - clear-repository: true - submodules: true - - name: Build - run: | - sudo rm -fr "$TEMP_PATH" - mkdir -p "$TEMP_PATH" - cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" - cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" - - name: Upload Coverity Analysis - if: ${{ success() || failure() }} - run: | - curl --form token="${COVERITY_TOKEN}" \ - --form email='security+coverity@clickhouse.com' \ - --form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tar.gz" \ - --form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \ - --form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \ - https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse - - name: Cleanup - if: always() - run: | - docker ps --quiet | xargs --no-run-if-empty docker kill ||: - docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: - sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" SonarCloud: - # TODO: Remove if: whenever SonarCloud supports c++23 - if: ${{ false }} runs-on: [self-hosted, builder] env: SONAR_SCANNER_VERSION: 4.8.0.2856 SONAR_SERVER_URL: "https://sonarcloud.io" BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed - CC: clang-16 - CXX: clang++-16 + CC: clang-17 + CXX: clang++-17 steps: - name: Check out repository code uses: ClickHouse/checkout@v1 @@ -159,7 +113,7 @@ jobs: - name: Set Up Build Tools run: | sudo apt-get update - sudo apt-get install -yq git cmake ccache ninja-build python3 yasm + sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" - name: Run build-wrapper run: | @@ -178,4 +132,5 @@ jobs: --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \ --define sonar.projectKey="ClickHouse_ClickHouse" \ --define sonar.organization="clickhouse-java" \ - --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" \ + --define sonar.cfamily.cpp23.enabled=true \ + --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql" diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index afc08f3e6376..676fb993ca89 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -13,12 +13,11 @@ on: # yamllint disable-line rule:truthy branches: - master paths-ignore: - - 'CHANGELOG.md' - - 'README.md' - - 'SECURITY.md' + - '**.md' - 'docker/docs/**' - 'docs/**' - 'utils/check-style/aspell-ignore/**' + - 'tests/ci/docs_check.py' ########################################################################################## ##################################### SMALL CHECKS ####################################### ########################################################################################## @@ -94,6 +93,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: @@ -647,6 +647,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -770,6 +777,13 @@ jobs: with: clear-repository: true submodules: true + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -911,6 +925,88 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinRISCV64: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_riscv64 + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" + BuilderBinS390X: + needs: [DockerHubPush, FastTest, StyleCheck] + runs-on: [self-hosted, builder] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/build_check + IMAGES_PATH=${{runner.temp}}/images_path + REPO_COPY=${{runner.temp}}/build_check/ClickHouse + CACHES_PATH=${{runner.temp}}/../ccaches + BUILD_NAME=binary_s390x + EOF + - name: Download changed images + uses: actions/download-artifact@v3 + with: + name: changed_images + path: ${{ env.IMAGES_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + submodules: true + - name: Build + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME" + - name: Upload build URLs to artifacts + if: ${{ success() || failure() }} + uses: actions/upload-artifact@v3 + with: + name: ${{ env.BUILD_URLS }} + path: ${{ env.TEMP_PATH }}/${{ env.BUILD_URLS }}.json + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" "$CACHES_PATH" ############################################################################################ ##################################### Docker images ####################################### ############################################################################################ @@ -992,6 +1088,8 @@ jobs: - BuilderBinDarwinAarch64 - BuilderBinFreeBSD - BuilderBinPPC64 + - BuilderBinRISCV64 + - BuilderBinS390X - BuilderBinAmd64Compat - BuilderBinAarch64V80Compat - BuilderBinClangTidy @@ -3861,6 +3959,216 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan0: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=0 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan1: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=1 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan2: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=2 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan3: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=3 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan4: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=4 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" + IntegrationTestsAnalyzerAsan5: + needs: [BuilderDebAsan] + runs-on: [self-hosted, stress-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/integration_tests_asan + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=Integration tests (asan, analyzer) + REPO_COPY=${{runner.temp}}/integration_tests_asan/ClickHouse + RUN_BY_HASH_NUM=5 + RUN_BY_HASH_TOTAL=6 + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: Integration test + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 integration_test_check.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" IntegrationTestsTsan0: needs: [BuilderDebTsan] runs-on: [self-hosted, stress-tester] @@ -4289,7 +4597,7 @@ jobs: cat >> "$GITHUB_ENV" << 'EOF' TEMP_PATH=${{runner.temp}}/unit_tests_asan REPORTS_PATH=${{runner.temp}}/reports_dir - CHECK_NAME=Unit tests (release-clang) + CHECK_NAME=Unit tests (release) REPO_COPY=${{runner.temp}}/unit_tests_asan/ClickHouse EOF - name: Download json reports @@ -4847,6 +5155,12 @@ jobs: - IntegrationTestsAsan3 - IntegrationTestsAsan4 - IntegrationTestsAsan5 + - IntegrationTestsAnalyzerAsan0 + - IntegrationTestsAnalyzerAsan1 + - IntegrationTestsAnalyzerAsan2 + - IntegrationTestsAnalyzerAsan3 + - IntegrationTestsAnalyzerAsan4 + - IntegrationTestsAnalyzerAsan5 - IntegrationTestsRelease0 - IntegrationTestsRelease1 - IntegrationTestsRelease2 @@ -4886,9 +5200,16 @@ jobs: cd "$GITHUB_WORKSPACE/tests/ci" python3 finish_check.py python3 merge_pr.py --check-approved -############################################################################################## -########################### SQLLOGIC TEST ################################################### -############################################################################################## +############################################################################################# +####################################### libFuzzer ########################################### +############################################################################################# + libFuzzer: + if: contains(github.event.pull_request.labels.*.name, 'libFuzzer') + needs: [DockerHubPush, StyleCheck] + uses: ./.github/workflows/libfuzzer.yml + ############################################################################################## + ############################ SQLLOGIC TEST ################################################### + ############################################################################################## SQLLogicTestRelease: needs: [BuilderDebRelease] runs-on: [self-hosted, func-tester] @@ -4924,3 +5245,39 @@ jobs: docker ps --quiet | xargs --no-run-if-empty docker kill ||: docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: sudo rm -fr "$TEMP_PATH" +############################################################################################## +##################################### SQL TEST ############################################### +############################################################################################## + SQLTest: + needs: [BuilderDebRelease] + runs-on: [self-hosted, fuzzer-unit-tester] + steps: + - name: Set envs + run: | + cat >> "$GITHUB_ENV" << 'EOF' + TEMP_PATH=${{runner.temp}}/sqltest + REPORTS_PATH=${{runner.temp}}/reports_dir + CHECK_NAME=SQLTest + REPO_COPY=${{runner.temp}}/sqltest/ClickHouse + EOF + - name: Download json reports + uses: actions/download-artifact@v3 + with: + path: ${{ env.REPORTS_PATH }} + - name: Check out repository code + uses: ClickHouse/checkout@v1 + with: + clear-repository: true + - name: SQLTest + run: | + sudo rm -fr "$TEMP_PATH" + mkdir -p "$TEMP_PATH" + cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH" + cd "$REPO_COPY/tests/ci" + python3 sqltest.py "$CHECK_NAME" + - name: Cleanup + if: always() + run: | + docker ps --quiet | xargs --no-run-if-empty docker kill ||: + docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||: + sudo rm -fr "$TEMP_PATH" diff --git a/.github/workflows/release_branches.yml b/.github/workflows/release_branches.yml index 212848155831..fba56339d166 100644 --- a/.github/workflows/release_branches.yml +++ b/.github/workflows/release_branches.yml @@ -52,6 +52,7 @@ jobs: uses: ClickHouse/checkout@v1 with: clear-repository: true + fetch-depth: 0 # to find ancestor merge commits necessary for finding proper docker tags - name: Download changed aarch64 images uses: actions/download-artifact@v3 with: @@ -455,6 +456,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" @@ -497,6 +505,13 @@ jobs: clear-repository: true submodules: true fetch-depth: 0 # otherwise we will have no info about contributors + - name: Apply sparse checkout for contrib # in order to check that it doesn't break build + run: | + rm -rf "$GITHUB_WORKSPACE/contrib" && echo 'removed' + git -C "$GITHUB_WORKSPACE" checkout . && echo 'restored' + "$GITHUB_WORKSPACE/contrib/update-submodules.sh" && echo 'OK' + du -hs "$GITHUB_WORKSPACE/contrib" ||: + find "$GITHUB_WORKSPACE/contrib" -type f | wc -l ||: - name: Build run: | sudo rm -fr "$TEMP_PATH" diff --git a/.gitignore b/.gitignore index a04c60d5ca3a..5341f23a94f1 100644 --- a/.gitignore +++ b/.gitignore @@ -69,6 +69,7 @@ cmake-build-* *.pyc __pycache__ *.pytest_cache +.mypy_cache test.cpp CPackConfig.cmake @@ -161,8 +162,10 @@ tests/queries/0_stateless/test_* tests/queries/0_stateless/*.binary tests/queries/0_stateless/*.generated-expect tests/queries/0_stateless/*.expect.history +tests/integration/**/_gen # rust /rust/**/target # It is autogenerated from *.in /rust/**/.cargo/config.toml +/rust/**/vendor diff --git a/.gitmodules b/.gitmodules index 151dc28c55bc..f790e0f8d5a7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,7 +3,7 @@ url = https://github.com/facebook/zstd [submodule "contrib/lz4"] path = contrib/lz4 - url = https://github.com/lz4/lz4 + url = https://github.com/ClickHouse/lz4 [submodule "contrib/librdkafka"] path = contrib/librdkafka url = https://github.com/ClickHouse/librdkafka @@ -13,7 +13,6 @@ [submodule "contrib/zlib-ng"] path = contrib/zlib-ng url = https://github.com/ClickHouse/zlib-ng - branch = clickhouse-2.0.x [submodule "contrib/googletest"] path = contrib/googletest url = https://github.com/google/googletest @@ -41,13 +40,9 @@ [submodule "contrib/boost"] path = contrib/boost url = https://github.com/ClickHouse/boost -[submodule "contrib/base64"] - path = contrib/base64 - url = https://github.com/ClickHouse/Turbo-Base64 [submodule "contrib/arrow"] path = contrib/arrow url = https://github.com/ClickHouse/arrow - branch = blessed/release-6.0.1 [submodule "contrib/thrift"] path = contrib/thrift url = https://github.com/apache/thrift @@ -93,7 +88,6 @@ [submodule "contrib/grpc"] path = contrib/grpc url = https://github.com/ClickHouse/grpc - branch = v1.33.2 [submodule "contrib/aws"] path = contrib/aws url = https://github.com/ClickHouse/aws-sdk-cpp @@ -140,11 +134,9 @@ [submodule "contrib/cassandra"] path = contrib/cassandra url = https://github.com/ClickHouse/cpp-driver - branch = clickhouse [submodule "contrib/libuv"] path = contrib/libuv url = https://github.com/ClickHouse/libuv - branch = clickhouse [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt @@ -157,11 +149,9 @@ [submodule "contrib/cyrus-sasl"] path = contrib/cyrus-sasl url = https://github.com/ClickHouse/cyrus-sasl - branch = cyrus-sasl-2.1 [submodule "contrib/croaring"] path = contrib/croaring url = https://github.com/RoaringBitmap/CRoaring - branch = v0.2.66 [submodule "contrib/miniselect"] path = contrib/miniselect url = https://github.com/danlark1/miniselect @@ -174,7 +164,6 @@ [submodule "contrib/abseil-cpp"] path = contrib/abseil-cpp url = https://github.com/abseil/abseil-cpp - branch = lts_2021_11_02 [submodule "contrib/dragonbox"] path = contrib/dragonbox url = https://github.com/ClickHouse/dragonbox @@ -187,7 +176,6 @@ [submodule "contrib/boringssl"] path = contrib/boringssl url = https://github.com/ClickHouse/boringssl - branch = unknown_branch_from_artur [submodule "contrib/NuRaft"] path = contrib/NuRaft url = https://github.com/ClickHouse/NuRaft @@ -248,7 +236,6 @@ [submodule "contrib/annoy"] path = contrib/annoy url = https://github.com/ClickHouse/annoy - branch = ClickHouse-master [submodule "contrib/qpl"] path = contrib/qpl url = https://github.com/intel/qpl @@ -258,9 +245,6 @@ [submodule "contrib/wyhash"] path = contrib/wyhash url = https://github.com/wangyi-fudan/wyhash -[submodule "contrib/hashidsxx"] - path = contrib/hashidsxx - url = https://github.com/schoentoon/hashidsxx [submodule "contrib/nats-io"] path = contrib/nats-io url = https://github.com/ClickHouse/nats.c @@ -273,6 +257,9 @@ [submodule "contrib/corrosion"] path = contrib/corrosion url = https://github.com/corrosion-rs/corrosion +[submodule "contrib/libssh"] + path = contrib/libssh + url = https://github.com/ClickHouse/libssh.git [submodule "contrib/morton-nd"] path = contrib/morton-nd url = https://github.com/morton-nd/morton-nd @@ -285,7 +272,6 @@ [submodule "contrib/openssl"] path = contrib/openssl url = https://github.com/openssl/openssl - branch = openssl-3.0 [submodule "contrib/google-benchmark"] path = contrib/google-benchmark url = https://github.com/google/benchmark @@ -334,6 +320,10 @@ [submodule "contrib/liburing"] path = contrib/liburing url = https://github.com/axboe/liburing +[submodule "contrib/libarchive"] + path = contrib/libarchive + url = https://github.com/libarchive/libarchive.git + ignore = dirty [submodule "contrib/libfiu"] path = contrib/libfiu url = https://github.com/ClickHouse/libfiu.git @@ -343,3 +333,21 @@ [submodule "contrib/c-ares"] path = contrib/c-ares url = https://github.com/c-ares/c-ares.git +[submodule "contrib/incbin"] + path = contrib/incbin + url = https://github.com/graphitemaster/incbin.git +[submodule "contrib/usearch"] + path = contrib/usearch + url = https://github.com/unum-cloud/usearch.git +[submodule "contrib/SimSIMD"] + path = contrib/SimSIMD + url = https://github.com/ashvardanian/SimSIMD.git +[submodule "contrib/FP16"] + path = contrib/FP16 + url = https://github.com/Maratyszcza/FP16.git +[submodule "contrib/robin-map"] + path = contrib/robin-map + url = https://github.com/Tessil/robin-map.git +[submodule "contrib/aklomp-base64"] + path = contrib/aklomp-base64 + url = https://github.com/aklomp/base64.git diff --git a/CHANGELOG.md b/CHANGELOG.md index a2e7b021081b..e95daca2a466 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,7 @@ ### Table of Contents +**[ClickHouse release v23.9, 2023-09-28](#239)**
+**[ClickHouse release v23.8 LTS, 2023-08-31](#238)**
+**[ClickHouse release v23.7, 2023-07-27](#237)**
**[ClickHouse release v23.6, 2023-06-30](#236)**
**[ClickHouse release v23.5, 2023-06-08](#235)**
**[ClickHouse release v23.4, 2023-04-26](#234)**
@@ -9,6 +12,568 @@ # 2023 Changelog +### ClickHouse release 23.9, 2023-09-28 + +#### Backward Incompatible Change +* Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The experimental parts metadata cache is removed from the codebase. [#54215](https://github.com/ClickHouse/ClickHouse/pull/54215) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable setting `input_format_json_try_infer_numbers_from_strings` by default, so we don't try to infer numbers from strings in JSON formats by default to avoid possible parsing errors when sample data contains strings that looks like a number. [#55099](https://github.com/ClickHouse/ClickHouse/pull/55099) ([Kruglov Pavel](https://github.com/Avogar)). + +#### New Feature +* Improve schema inference from JSON formats: 1) Now it's possible to infer named Tuples from JSON objects without experimantal JSON type under a setting `input_format_json_try_infer_named_tuples_from_objects` in JSON formats. Previously without experimantal type JSON we could only infer JSON objects as Strings or Maps, now we can infer named Tuple. Resulting Tuple type will conain all keys of objects that were read in data sample during schema inference. It can be useful for reading structured JSON data without sparse objects. The setting is enabled by default. 2) Allow parsing JSON array into a column with type String under setting `input_format_json_read_arrays_as_strings`. It can help reading arrays with values with different types. 3) Allow to use type String for JSON keys with unkown types (`null`/`[]`/`{}`) in sample data under setting `input_format_json_infer_incomplete_types_as_strings`. Now in JSON formats we can read any value into String column and we can avoid getting error `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference by using type String for unknown types, so the data will be read successfully. [#54427](https://github.com/ClickHouse/ClickHouse/pull/54427) ([Kruglov Pavel](https://github.com/Avogar)). +* Added IO scheduling support for remote disks. Storage configuration for disk types `s3`, `s3_plain`, `hdfs` and `azure_blob_storage` can now contain `read_resource` and `write_resource` elements holding resource names. Scheduling policies for these resources can be configured in a separate server configuration section `resources`. Queries can be marked using setting `workload` and classified using server configuration section `workload_classifiers` to achieve diverse resource scheduling goals. More details in [the docs](https://clickhouse.com/docs/en/operations/workload-scheduling). [#47009](https://github.com/ClickHouse/ClickHouse/pull/47009) ([Sergei Trifonov](https://github.com/serxa)). Added "bandwidth_limit" IO scheduling node type. It allows you to specify `max_speed` and `max_burst` constraints on traffic passing though this node. [#54618](https://github.com/ClickHouse/ClickHouse/pull/54618) ([Sergei Trifonov](https://github.com/serxa)). +* Added new type of authentication based on SSH keys. It works only for the native TCP protocol. [#41109](https://github.com/ClickHouse/ClickHouse/pull/41109) ([George Gamezardashvili](https://github.com/InfJoker)). +* Added a new column `_block_number` for MergeTree tables. [#44532](https://github.com/ClickHouse/ClickHouse/issues/44532). [#47532](https://github.com/ClickHouse/ClickHouse/pull/47532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add `IF EMPTY` clause for `DROP TABLE` queries. [#48915](https://github.com/ClickHouse/ClickHouse/pull/48915) ([Pavel Novitskiy](https://github.com/pnovitskiy)). +* SQL functions `toString(datetime, timezone)` and `formatDateTime(datetime, format, timezone)` now support non-constant timezone arguments. [#53680](https://github.com/ClickHouse/ClickHouse/pull/53680) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add support for `ALTER TABLE MODIFY COMMENT`. Note: something similar was added by an external contributor a long time ago, but the feature did not work at all and only confused users. This closes [#36377](https://github.com/ClickHouse/ClickHouse/issues/36377). [#51304](https://github.com/ClickHouse/ClickHouse/pull/51304) ([Alexey Milovidov](https://github.com/alexey-milovidov)). Note: this command does not propagate between replicas, so the replicas of a table could have different comments. +* Added `GCD` a.k.a. "greatest common denominator" as a new data compression codec. The codec computes the GCD of all column values, and then divides each value by the GCD. The GCD codec is a data preparation codec (similar to Delta and DoubleDelta) and cannot be used stand-alone. It works with data integer, decimal and date/time type. A viable use case for the GCD codec are column values that change (increase/decrease) in multiples of the GCD, e.g. 24 - 28 - 16 - 24 - 8 - 24 (assuming GCD = 4). [#53149](https://github.com/ClickHouse/ClickHouse/pull/53149) ([Alexander Nam](https://github.com/seshWCS)). +* Two new type aliases `DECIMAL(P)` (as shortcut for `DECIMAL(P, 0)` and `DECIMAL` (as shortcut for `DECIMAL(10, 0)`) were added. This makes ClickHouse more compatible with MySQL's SQL dialect. [#53328](https://github.com/ClickHouse/ClickHouse/pull/53328) ([Val Doroshchuk](https://github.com/valbok)). +* Added a new system log table `backup_log` to track all `BACKUP` and `RESTORE` operations. [#53638](https://github.com/ClickHouse/ClickHouse/pull/53638) ([Victor Krasnov](https://github.com/sirvickr)). +* Added a format setting `output_format_markdown_escape_special_characters` (default: false). The setting controls whether special characters like `!`, `#`, `$` etc. are escaped (i.e. prefixed by a backslash) in the `Markdown` output format. [#53860](https://github.com/ClickHouse/ClickHouse/pull/53860) ([irenjj](https://github.com/irenjj)). +* Add function `decodeHTMLComponent`. [#54097](https://github.com/ClickHouse/ClickHouse/pull/54097) ([Bharat Nallan](https://github.com/bharatnc)). +* Added `peak_threads_usage` to query_log table. [#54335](https://github.com/ClickHouse/ClickHouse/pull/54335) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add `SHOW FUNCTIONS` support to clickhouse-client. [#54337](https://github.com/ClickHouse/ClickHouse/pull/54337) ([Julia Kartseva](https://github.com/wat-ze-hex)). +* Added function `toDaysSinceYearZero` with alias `TO_DAYS` (for compatibility with MySQL) which returns the number of days passed since `0001-01-01` (in Proleptic Gregorian Calendar). [#54479](https://github.com/ClickHouse/ClickHouse/pull/54479) ([Robert Schulze](https://github.com/rschu1ze)). Function `toDaysSinceYearZero()` now supports arguments of type `DateTime` and `DateTime64`. [#54856](https://github.com/ClickHouse/ClickHouse/pull/54856) ([Serge Klochkov](https://github.com/slvrtrn)). +* Added functions `YYYYMMDDtoDate`, `YYYYMMDDtoDate32`, `YYYYMMDDhhmmssToDateTime` and `YYYYMMDDhhmmssToDateTime64`. They convert a date or date with time encoded as integer (e.g. 20230911) into a native date or date with time. As such, they provide the opposite functionality of existing functions `YYYYMMDDToDate`, `YYYYMMDDToDateTime`, `YYYYMMDDhhmmddToDateTime`, `YYYYMMDDhhmmddToDateTime64`. [#54509](https://github.com/ClickHouse/ClickHouse/pull/54509) ([Quanfa Fu](https://github.com/dentiscalprum)) ([Robert Schulze](https://github.com/rschu1ze)). +* Add several string distance functions, including `byteHammingDistance`, `editDistance`. [#54935](https://github.com/ClickHouse/ClickHouse/pull/54935) ([flynn](https://github.com/ucasfl)). +* Allow specifying the expiration date and, optionally, the time for user credentials with `VALID UNTIL datetime` clause. [#51261](https://github.com/ClickHouse/ClickHouse/pull/51261) ([Nikolay Degterinsky](https://github.com/evillique)). +* Allow S3-style URLs for table functions `s3`, `gcs`, `oss`. URL is automatically converted to HTTP. Example: `'s3://clickhouse-public-datasets/hits.csv'` is converted to `'https://clickhouse-public-datasets.s3.amazonaws.com/hits.csv'`. [#54931](https://github.com/ClickHouse/ClickHouse/pull/54931) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add new setting `print_pretty_type_names` to print pretty deep nested types like Tuple/Maps/Arrays. [#55095](https://github.com/ClickHouse/ClickHouse/pull/55095) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Speed up reading from S3 by enabling prefetches by default. [#53709](https://github.com/ClickHouse/ClickHouse/pull/53709) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not implicitly read PK and version columns in lonely parts if unnecessary for queries with FINAL. [#53919](https://github.com/ClickHouse/ClickHouse/pull/53919) ([Duc Canh Le](https://github.com/canhld94)). +* Optimize group by constant keys. Will optimize queries with group by `_file/_path` after https://github.com/ClickHouse/ClickHouse/pull/53529. [#53549](https://github.com/ClickHouse/ClickHouse/pull/53549) ([Kruglov Pavel](https://github.com/Avogar)). +* Improve performance of sorting for `Decimal` columns. Improve performance of insertion into `MergeTree` if ORDER BY contains a `Decimal` column. Improve performance of sorting when data is already sorted or almost sorted. [#35961](https://github.com/ClickHouse/ClickHouse/pull/35961) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance for huge query analysis. Fixes [#51224](https://github.com/ClickHouse/ClickHouse/issues/51224). [#51469](https://github.com/ClickHouse/ClickHouse/pull/51469) ([frinkr](https://github.com/frinkr)). +* An optimization to rewrite `COUNT(DISTINCT ...)` and various `uniq` variants to `count` if it is selected from a subquery with GROUP BY. [#52082](https://github.com/ClickHouse/ClickHouse/pull/52082) [#52645](https://github.com/ClickHouse/ClickHouse/pull/52645) ([JackyWoo](https://github.com/JackyWoo)). +* Remove manual calls to `mmap/mremap/munmap` and delegate all this work to `jemalloc` - and it slightly improves performance. [#52792](https://github.com/ClickHouse/ClickHouse/pull/52792) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed high in CPU consumption when working with NATS. [#54399](https://github.com/ClickHouse/ClickHouse/pull/54399) ([Vasilev Pyotr](https://github.com/vahpetr)). +* Since we use separate instructions for executing `toString()` with datetime argument, it is possible to improve performance a bit for non-datetime arguments and have some parts of the code cleaner. Follows up [#53680](https://github.com/ClickHouse/ClickHouse/issues/53680). [#54443](https://github.com/ClickHouse/ClickHouse/pull/54443) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Instead of serializing json elements into a `std::stringstream`, this PR try to put the serialization result into `ColumnString` direclty. [#54613](https://github.com/ClickHouse/ClickHouse/pull/54613) ([lgbo](https://github.com/lgbo-ustc)). +* Enable ORDER BY optimization for reading data in corresponding order from a MergeTree table in case that the table is behind a view. [#54628](https://github.com/ClickHouse/ClickHouse/pull/54628) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve JSON SQL functions by reusing `GeneratorJSONPath` and removing several shared pointers. [#54735](https://github.com/ClickHouse/ClickHouse/pull/54735) ([lgbo](https://github.com/lgbo-ustc)). +* Keeper tries to batch flush requests for better performance. [#53049](https://github.com/ClickHouse/ClickHouse/pull/53049) ([Antonio Andelic](https://github.com/antonio2368)). +* Now `clickhouse-client` processes files in parallel in case of `INFILE 'glob_expression'`. Closes [#54218](https://github.com/ClickHouse/ClickHouse/issues/54218). [#54533](https://github.com/ClickHouse/ClickHouse/pull/54533) ([Max K.](https://github.com/mkaynov)). +* Allow to use primary key for IN function where primary key column types are different from `IN` function right side column types. Example: `SELECT id FROM test_table WHERE id IN (SELECT '5')`. Closes [#48936](https://github.com/ClickHouse/ClickHouse/issues/48936). [#54544](https://github.com/ClickHouse/ClickHouse/pull/54544) ([Maksim Kita](https://github.com/kitaisreal)). +* Hash JOIN tries to shrink internal buffers consuming half of maximal available memory (set by `max_bytes_in_join`). [#54584](https://github.com/ClickHouse/ClickHouse/pull/54584) ([vdimir](https://github.com/vdimir)). +* Respect `max_block_size` for array join to avoid possible OOM. Close [#54290](https://github.com/ClickHouse/ClickHouse/issues/54290). [#54664](https://github.com/ClickHouse/ClickHouse/pull/54664) ([李扬](https://github.com/taiyang-li)). +* Reuse HTTP connections in the `s3` table function. [#54812](https://github.com/ClickHouse/ClickHouse/pull/54812) ([Michael Kolupaev](https://github.com/al13n321)). +* Replace the linear search in `MergeTreeRangeReader::Stream::ceilRowsToCompleteGranules` with a binary search. [#54869](https://github.com/ClickHouse/ClickHouse/pull/54869) ([usurai](https://github.com/usurai)). + +#### Experimental Feature +* The creation of `Annoy` indexes can now be parallelized using setting `max_threads_for_annoy_index_creation`. [#54047](https://github.com/ClickHouse/ClickHouse/pull/54047) ([Robert Schulze](https://github.com/rschu1ze)). +* Parallel replicas over distributed don't read from all replicas [#54199](https://github.com/ClickHouse/ClickHouse/pull/54199) ([Igor Nikonov](https://github.com/devcrafter)). + +#### Improvement +* Allow to replace long names of files of columns in `MergeTree` data parts to hashes of names. It helps to avoid `File name too long` error in some cases. [#50612](https://github.com/ClickHouse/ClickHouse/pull/50612) ([Anton Popov](https://github.com/CurtizJ)). +* Parse data in `JSON` format as `JSONEachRow` if failed to parse metadata. It will allow to read files with `.json` extension even if real format is JSONEachRow. Closes [#45740](https://github.com/ClickHouse/ClickHouse/issues/45740). [#54405](https://github.com/ClickHouse/ClickHouse/pull/54405) ([Kruglov Pavel](https://github.com/Avogar)). +* Output valid JSON/XML on excetpion during HTTP query execution. Add setting `http_write_exception_in_output_format` to enable/disable this behaviour (enabled by default). [#52853](https://github.com/ClickHouse/ClickHouse/pull/52853) ([Kruglov Pavel](https://github.com/Avogar)). +* View `information_schema.tables` now has a new field `data_length` which shows the approximate size of the data on disk. Required to run queries generated by Amazon QuickSight. [#55037](https://github.com/ClickHouse/ClickHouse/pull/55037) ([Robert Schulze](https://github.com/rschu1ze)). +* The MySQL interface gained a minimal implementation of prepared statements, just enough to allow a connection from Tableau Online to ClickHouse via the MySQL connector. [#54115](https://github.com/ClickHouse/ClickHouse/pull/54115) ([Serge Klochkov](https://github.com/slvrtrn)). Please note: the prepared statements implementation is pretty minimal, we do not support arguments binding yet, it is not required in this particular Tableau online use case. It will be implemented as a follow-up if necessary after extensive testing of Tableau Online in case we discover issues. +* Support case-insensitive and dot-all matching modes in `regexp_tree` dictionaries. [#50906](https://github.com/ClickHouse/ClickHouse/pull/50906) ([Johann Gan](https://github.com/johanngan)). +* Keeper improvement: Add a `createIfNotExists` Keeper command. [#48855](https://github.com/ClickHouse/ClickHouse/pull/48855) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* More precise integer type inference, fix [#51236](https://github.com/ClickHouse/ClickHouse/issues/51236). [#53003](https://github.com/ClickHouse/ClickHouse/pull/53003) ([Chen768959](https://github.com/Chen768959)). +* Introduced resolving of charsets in the string literals for MaterializedMySQL. [#53220](https://github.com/ClickHouse/ClickHouse/pull/53220) ([Val Doroshchuk](https://github.com/valbok)). +* Fix a subtle issue with a rarely used `EmbeddedRocksDB` table engine in an extremely rare scenario: sometimes the `EmbeddedRocksDB` table engine does not close files correctly in NFS after running `DROP TABLE`. [#53502](https://github.com/ClickHouse/ClickHouse/pull/53502) ([Mingliang Pan](https://github.com/liangliangpan)). +* `RESTORE TABLE ON CLUSTER` must create replicated tables with a matching UUID on hosts. Otherwise the macro `{uuid}` in ZooKeeper path can't work correctly after RESTORE. This PR implements that. [#53765](https://github.com/ClickHouse/ClickHouse/pull/53765) ([Vitaly Baranov](https://github.com/vitlibar)). +* Added restore setting `restore_broken_parts_as_detached`: if it's true the RESTORE process won't stop on broken parts while restoring, instead all the broken parts will be copied to the `detached` folder with the prefix `broken-from-backup'. If it's false the RESTORE process will stop on the first broken part (if any). The default value is false. [#53877](https://github.com/ClickHouse/ClickHouse/pull/53877) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add `elapsed_ns` field to HTTP headers X-ClickHouse-Progress and X-ClickHouse-Summary. [#54179](https://github.com/ClickHouse/ClickHouse/pull/54179) ([joelynch](https://github.com/joelynch)). +* Implementation of `reconfig` (https://github.com/ClickHouse/ClickHouse/pull/49450), `sync`, and `exists` commands for keeper-client. [#54201](https://github.com/ClickHouse/ClickHouse/pull/54201) ([pufit](https://github.com/pufit)). +* `clickhouse-local` and `clickhouse-client` now allow to specify the `--query` parameter multiple times, e.g. `./clickhouse-client --query "SELECT 1" --query "SELECT 2"`. This syntax is slightly more intuitive than `./clickhouse-client --multiquery "SELECT 1;S ELECT 2"`, a bit easier to script (e.g. `queries.push_back('--query "$q"')`) and more consistent with the behavior of existing parameter `--queries-file` (e.g. `./clickhouse client --queries-file queries1.sql --queries-file queries2.sql`). [#54249](https://github.com/ClickHouse/ClickHouse/pull/54249) ([Robert Schulze](https://github.com/rschu1ze)). +* Add sub-second precision to `formatReadableTimeDelta`. [#54250](https://github.com/ClickHouse/ClickHouse/pull/54250) ([Andrey Zvonov](https://github.com/zvonand)). +* Enable `allow_remove_stale_moving_parts` by default. [#54260](https://github.com/ClickHouse/ClickHouse/pull/54260) ([vdimir](https://github.com/vdimir)). +* Fix using count from cache and improve progress bar for reading from archives. [#54271](https://github.com/ClickHouse/ClickHouse/pull/54271) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for S3 credentials using SSO. To define a profile to be used with SSO, set `AWS_PROFILE` environment variable. [#54347](https://github.com/ClickHouse/ClickHouse/pull/54347) ([Antonio Andelic](https://github.com/antonio2368)). +* Support NULL as default for nested types Array/Tuple/Map for input formats. Closes [#51100](https://github.com/ClickHouse/ClickHouse/issues/51100). [#54351](https://github.com/ClickHouse/ClickHouse/pull/54351) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow reading some unusual configuration of chunks from Arrow/Parquet formats. [#54370](https://github.com/ClickHouse/ClickHouse/pull/54370) ([Arthur Passos](https://github.com/arthurpassos)). +* Add `STD` alias to `stddevPop` function for MySQL compatibility. Closes [#54274](https://github.com/ClickHouse/ClickHouse/issues/54274). [#54382](https://github.com/ClickHouse/ClickHouse/pull/54382) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `addDate` function for compatibility with MySQL and `subDate` for consistency. Reference [#54275](https://github.com/ClickHouse/ClickHouse/issues/54275). [#54400](https://github.com/ClickHouse/ClickHouse/pull/54400) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `modification_time` into `system.detached_parts`. [#54506](https://github.com/ClickHouse/ClickHouse/pull/54506) ([Azat Khuzhin](https://github.com/azat)). +* Added a setting `splitby_max_substrings_includes_remaining_string` which controls if functions "splitBy*()" with argument "max_substring" > 0 include the remaining string (if any) in the result array (Python/Spark semantics) or not. The default behavior does not change. [#54518](https://github.com/ClickHouse/ClickHouse/pull/54518) ([Robert Schulze](https://github.com/rschu1ze)). +* Better integer types inference for `Int64`/`UInt64` fields. Continuation of [#53003](https://github.com/ClickHouse/ClickHouse/pull/53003). Now it works also for nested types like Arrays of Arrays and for functions like `map/tuple`. Issue: [#51236](https://github.com/ClickHouse/ClickHouse/issues/51236). [#54553](https://github.com/ClickHouse/ClickHouse/pull/54553) ([Kruglov Pavel](https://github.com/Avogar)). +* Added array operations for multiplying, dividing and modulo on scalar. Works in each way, for example `5 * [5, 5]` and `[5, 5] * 5` - both cases are possible. [#54608](https://github.com/ClickHouse/ClickHouse/pull/54608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add optional `version` argument to `rm` command in `keeper-client` to support safer deletes. [#54708](https://github.com/ClickHouse/ClickHouse/pull/54708) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Disable killing the server by systemd (that may lead to data loss when using Buffer tables). [#54744](https://github.com/ClickHouse/ClickHouse/pull/54744) ([Azat Khuzhin](https://github.com/azat)). +* Added field `is_deterministic` to system table `system.functions` which indicates whether the result of a function is stable between two invocations (given exactly the same inputs) or not. [#54766](https://github.com/ClickHouse/ClickHouse/pull/54766) [#55035](https://github.com/ClickHouse/ClickHouse/pull/55035) ([Robert Schulze](https://github.com/rschu1ze)). +* Made the views in schema `information_schema` more compatible with the equivalent views in MySQL (i.e. modified and extended them) up to a point where Tableau Online is able to connect to ClickHouse. More specifically: 1. The type of field `information_schema.tables.table_type` changed from Enum8 to String. 2. Added fields `table_comment` and `table_collation` to view `information_schema.table`. 3. Added views `information_schema.key_column_usage` and `referential_constraints`. 4. Replaced uppercase aliases in `information_schema` views with concrete uppercase columns. [#54773](https://github.com/ClickHouse/ClickHouse/pull/54773) ([Serge Klochkov](https://github.com/slvrtrn)). +* The query cache now returns an error if the user tries to cache the result of a query with a non-deterministic function such as `now`, `randomString` and `dictGet`. Compared to the previous behavior (silently don't cache the result), this reduces confusion and surprise for users. [#54801](https://github.com/ClickHouse/ClickHouse/pull/54801) ([Robert Schulze](https://github.com/rschu1ze)). +* Forbid special columns like materialized/ephemeral/alias for `file`/`s3`/`url`/... storages, fix insert into ephemeral columns from files. Closes [#53477](https://github.com/ClickHouse/ClickHouse/issues/53477). [#54803](https://github.com/ClickHouse/ClickHouse/pull/54803) ([Kruglov Pavel](https://github.com/Avogar)). +* More configurable collecting metadata for backup. [#54804](https://github.com/ClickHouse/ClickHouse/pull/54804) ([Vitaly Baranov](https://github.com/vitlibar)). +* `clickhouse-local`'s log file (if enabled with --server_logs_file flag) will now prefix each line with timestamp, thread id, etc, just like `clickhouse-server`. [#54807](https://github.com/ClickHouse/ClickHouse/pull/54807) ([Michael Kolupaev](https://github.com/al13n321)). +* Field `is_obsolete` in the `system.merge_tree_settings` table - it is now 1 for obsolete merge tree settings. Previously, only the description indicated that the setting is obsolete. [#54837](https://github.com/ClickHouse/ClickHouse/pull/54837) ([Robert Schulze](https://github.com/rschu1ze)). +* Make it possible to use plural when using interval literals. `INTERVAL 2 HOURS` should be equivalent to `INTERVAL 2 HOUR`. [#54860](https://github.com/ClickHouse/ClickHouse/pull/54860) ([Jordi Villar](https://github.com/jrdi)). +* Always allow the creation of a projection with `Nullable` PK. This fixes [#54814](https://github.com/ClickHouse/ClickHouse/issues/54814). [#54895](https://github.com/ClickHouse/ClickHouse/pull/54895) ([Amos Bird](https://github.com/amosbird)). +* Retry backup's S3 operations after connection reset failure. [#54900](https://github.com/ClickHouse/ClickHouse/pull/54900) ([Vitaly Baranov](https://github.com/vitlibar)). +* Make the exception message exact in case of the maximum value of a settings is less than the minimum value. [#54925](https://github.com/ClickHouse/ClickHouse/pull/54925) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* `LIKE`, `match`, and other regular expressions matching functions now allow matching with patterns containing non-UTF-8 substrings by falling back to binary matching. Example: you can use `string LIKE '\xFE\xFF%'` to detect BOM. This closes [#54486](https://github.com/ClickHouse/ClickHouse/issues/54486). [#54942](https://github.com/ClickHouse/ClickHouse/pull/54942) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added `ContextLockWaitMicroseconds` profile event. [#55029](https://github.com/ClickHouse/ClickHouse/pull/55029) ([Maksim Kita](https://github.com/kitaisreal)). +* The Keeper dynamically adjusts log levels. [#50372](https://github.com/ClickHouse/ClickHouse/pull/50372) ([helifu](https://github.com/helifu)). +* Added function `timestamp` for compatibility with MySQL. Closes [#54275](https://github.com/ClickHouse/ClickHouse/issues/54275). [#54639](https://github.com/ClickHouse/ClickHouse/pull/54639) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### Build/Testing/Packaging Improvement +* Bumped the compiler of official and continuous integration builds of ClickHouse from Clang 16 to 17. [#53831](https://github.com/ClickHouse/ClickHouse/pull/53831) ([Robert Schulze](https://github.com/rschu1ze)). +* Regenerated tld data for lookups (`tldLookup.generated.cpp`). [#54269](https://github.com/ClickHouse/ClickHouse/pull/54269) ([Bharat Nallan](https://github.com/bharatnc)). +* Remove the redundant `clickhouse-keeper-client` symlink. [#54587](https://github.com/ClickHouse/ClickHouse/pull/54587) ([Tomas Barton](https://github.com/deric)). +* Use `/usr/bin/env` to resolve bash - now it supports Nix OS. [#54603](https://github.com/ClickHouse/ClickHouse/pull/54603) ([Fionera](https://github.com/fionera)). +* CMake added `PROFILE_CPU` option needed to perform `perf record` without using a DWARF call graph. [#54917](https://github.com/ClickHouse/ClickHouse/pull/54917) ([Maksim Kita](https://github.com/kitaisreal)). +* If the linker is different than LLD, stop with a fatal error. [#55036](https://github.com/ClickHouse/ClickHouse/pull/55036) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Replaced the library to handle (encode/decode) base64 values from Turbo-Base64 to aklomp-base64. Both are SIMD-accelerated on x86 and ARM but 1. the license of the latter (BSD-2) is more favorable for ClickHouse, Turbo64 switched in the meantime to GPL-3, 2. with more GitHub stars, aklomp-base64 seems more future-proof, 3. aklomp-base64 has a slightly nicer API (which is arguably subjective), and 4. aklomp-base64 does not require us to hack around bugs (like non-threadsafe initialization). Note: aklomp-base64 rejects unpadded base64 values whereas Turbo-Base64 decodes them on a best-effort basis. RFC-4648 leaves it open whether padding is mandatory or not, but depending on the context this may be a behavioral change to be aware of. [#54119](https://github.com/ClickHouse/ClickHouse/pull/54119) ([Mikhail Koviazin](https://github.com/mkmkme)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix REPLACE/MOVE PARTITION with zero-copy replication (note: "zero-copy replication" is an experimental feature) [#54193](https://github.com/ClickHouse/ClickHouse/pull/54193) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix zero copy locks with hardlinks (note: "zero-copy replication" is an experimental feature) [#54859](https://github.com/ClickHouse/ClickHouse/pull/54859) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix zero copy garbage (note: "zero-copy replication" is an experimental feature) [#54550](https://github.com/ClickHouse/ClickHouse/pull/54550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Pass HTTP retry timeout as milliseconds (it was incorrect before). [#54438](https://github.com/ClickHouse/ClickHouse/pull/54438) ([Duc Canh Le](https://github.com/canhld94)). +* Fix misleading error message in OUTFILE with `CapnProto`/`Protobuf` [#52870](https://github.com/ClickHouse/ClickHouse/pull/52870) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix summary reporting with parallel replicas with LIMIT [#53050](https://github.com/ClickHouse/ClickHouse/pull/53050) ([Raúl Marín](https://github.com/Algunenano)). +* Fix throttling of BACKUPs from/to S3 (in case native copy was not used) and in some other places as well [#53336](https://github.com/ClickHouse/ClickHouse/pull/53336) ([Azat Khuzhin](https://github.com/azat)). +* Fix IO throttling during copying whole directories [#53338](https://github.com/ClickHouse/ClickHouse/pull/53338) ([Azat Khuzhin](https://github.com/azat)). +* Fix: moved to prewhere condition actions can lose column [#53492](https://github.com/ClickHouse/ClickHouse/pull/53492) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fixed internal error when replacing with byte-equal parts [#53735](https://github.com/ClickHouse/ClickHouse/pull/53735) ([Pedro Riera](https://github.com/priera)). +* Fix: require columns participating in interpolate expression [#53754](https://github.com/ClickHouse/ClickHouse/pull/53754) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix cluster discovery initialization + setting up fail points in config [#54113](https://github.com/ClickHouse/ClickHouse/pull/54113) ([vdimir](https://github.com/vdimir)). +* Fix issues in `accurateCastOrNull` [#54136](https://github.com/ClickHouse/ClickHouse/pull/54136) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix nullable primary key with the FINAL modifier [#54164](https://github.com/ClickHouse/ClickHouse/pull/54164) ([Amos Bird](https://github.com/amosbird)). +* Fixed error that prevented insertion in replicated materialized view of new data in presence of duplicated data. [#54184](https://github.com/ClickHouse/ClickHouse/pull/54184) ([Pedro Riera](https://github.com/priera)). +* Fix: allow `IPv6` for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* fix possible type mismatch with `IPv4` [#54212](https://github.com/ClickHouse/ClickHouse/pull/54212) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix `system.data_skipping_indices` for recreated indices [#54225](https://github.com/ClickHouse/ClickHouse/pull/54225) ([Artur Malchanau](https://github.com/Hexta)). +* fix name clash for multiple join rewriter v2 [#54240](https://github.com/ClickHouse/ClickHouse/pull/54240) ([Tao Wang](https://github.com/wangtZJU)). +* Fix unexpected errors in `system.errors` after join [#54306](https://github.com/ClickHouse/ClickHouse/pull/54306) ([vdimir](https://github.com/vdimir)). +* Fix `isZeroOrNull(NULL)` [#54316](https://github.com/ClickHouse/ClickHouse/pull/54316) ([flynn](https://github.com/ucasfl)). +* Fix: parallel replicas over distributed with `prefer_localhost_replica` = 1 [#54334](https://github.com/ClickHouse/ClickHouse/pull/54334) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix logical error in vertical merge + replacing merge tree + optimize cleanup [#54368](https://github.com/ClickHouse/ClickHouse/pull/54368) ([alesapin](https://github.com/alesapin)). +* Fix possible error `URI contains invalid characters` in the `s3` table function [#54373](https://github.com/ClickHouse/ClickHouse/pull/54373) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix segfault in AST optimization of `arrayExists` function [#54379](https://github.com/ClickHouse/ClickHouse/pull/54379) ([Nikolay Degterinsky](https://github.com/evillique)). +* Check for overflow before addition in `analysisOfVariance` function [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)). +* Reproduce and fix the bug in removeSharedRecursive [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)). +* Fix possible incorrect result with SimpleAggregateFunction in PREWHERE and FINAL [#54436](https://github.com/ClickHouse/ClickHouse/pull/54436) ([Azat Khuzhin](https://github.com/azat)). +* Fix filtering parts with indexHint for non analyzer [#54449](https://github.com/ClickHouse/ClickHouse/pull/54449) ([Azat Khuzhin](https://github.com/azat)). +* Fix aggregate projections with normalized states [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)). +* `clickhouse-local`: something for multiquery parameter [#54498](https://github.com/ClickHouse/ClickHouse/pull/54498) ([CuiShuoGuo](https://github.com/bakam412)). +* `clickhouse-local` supports `--database` command line argument [#54503](https://github.com/ClickHouse/ClickHouse/pull/54503) ([vdimir](https://github.com/vdimir)). +* Fix possible parsing error in `-WithNames` formats with disabled `input_format_with_names_use_header` [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare case of CHECKSUM_DOESNT_MATCH error [#54549](https://github.com/ClickHouse/ClickHouse/pull/54549) ([alesapin](https://github.com/alesapin)). +* Fix sorting of UNION ALL of already sorted results [#54564](https://github.com/ClickHouse/ClickHouse/pull/54564) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix snapshot install in Keeper [#54572](https://github.com/ClickHouse/ClickHouse/pull/54572) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix race in `ColumnUnique` [#54575](https://github.com/ClickHouse/ClickHouse/pull/54575) ([Nikita Taranov](https://github.com/nickitat)). +* Annoy/Usearch index: Fix LOGICAL_ERROR during build-up with default values [#54600](https://github.com/ClickHouse/ClickHouse/pull/54600) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix serialization of `ColumnDecimal` [#54601](https://github.com/ClickHouse/ClickHouse/pull/54601) ([Nikita Taranov](https://github.com/nickitat)). +* Fix schema inference for *Cluster functions for column names with spaces [#54635](https://github.com/ClickHouse/ClickHouse/pull/54635) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix using structure from insertion tables in case of defaults and explicit insert columns [#54655](https://github.com/ClickHouse/ClickHouse/pull/54655) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix ReplacingMergeTree with vertical merge and cleanup [#54706](https://github.com/ClickHouse/ClickHouse/pull/54706) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix virtual columns having incorrect values after ORDER BY [#54811](https://github.com/ClickHouse/ClickHouse/pull/54811) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix filtering parts with indexHint for non analyzer [#54825](https://github.com/ClickHouse/ClickHouse/pull/54825) [#54449](https://github.com/ClickHouse/ClickHouse/pull/54449) ([Azat Khuzhin](https://github.com/azat)). +* Fix Keeper segfault during shutdown [#54841](https://github.com/ClickHouse/ClickHouse/pull/54841) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix `Invalid number of rows in Chunk` in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Move obsolete format settings to separate section [#54855](https://github.com/ClickHouse/ClickHouse/pull/54855) ([Kruglov Pavel](https://github.com/Avogar)). +* Rebuild `minmax_count_projection` when partition key gets modified [#54943](https://github.com/ClickHouse/ClickHouse/pull/54943) ([Amos Bird](https://github.com/amosbird)). +* Fix bad cast to `ColumnVector` in function `if` [#55019](https://github.com/ClickHouse/ClickHouse/pull/55019) ([Kruglov Pavel](https://github.com/Avogar)). +* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Store NULL in scalar result map for empty subquery result [#52240](https://github.com/ClickHouse/ClickHouse/pull/52240) ([vdimir](https://github.com/vdimir)). +* Fix `FINAL` produces invalid read ranges in a rare case [#54934](https://github.com/ClickHouse/ClickHouse/pull/54934) ([Nikita Taranov](https://github.com/nickitat)). +* Fix: insert quorum w/o keeper retries [#55026](https://github.com/ClickHouse/ClickHouse/pull/55026) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix simple state with nullable [#55030](https://github.com/ClickHouse/ClickHouse/pull/55030) ([Pedro Riera](https://github.com/priera)). + + +### ClickHouse release 23.8 LTS, 2023-08-31 + +#### Backward Incompatible Change +* If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `clickhouse-benchmark` will establish connections in parallel when invoked with `--concurrency` more than one. Previously it was unusable if you ran it with 1000 concurrent connections from Europe to the US. Correct calculation of QPS for connections with high latency. Backward incompatible change: the option for JSON output of `clickhouse-benchmark` is removed. If you've used this option, you can also extract data from the `system.query_log` in JSON format as a workaround. [#53293](https://github.com/ClickHouse/ClickHouse/pull/53293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `microseconds` column is removed from the `system.text_log`, and the `milliseconds` column is removed from the `system.metric_log`, because they are redundant in the presence of the `event_time_microseconds` column. [#53601](https://github.com/ClickHouse/ClickHouse/pull/53601) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Deprecate the metadata cache feature. It is experimental and we have never used it. The feature is dangerous: [#51182](https://github.com/ClickHouse/ClickHouse/issues/51182). Remove the `system.merge_tree_metadata_cache` system table. The metadata cache is still available in this version but will be removed soon. This closes [#39197](https://github.com/ClickHouse/ClickHouse/issues/39197). [#51303](https://github.com/ClickHouse/ClickHouse/pull/51303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable support for 3DES in TLS connections. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)). + +#### New Feature +* Direct import from zip/7z/tar archives. Example: `file('*.zip :: *.csv')`. [#50321](https://github.com/ClickHouse/ClickHouse/pull/50321) ([nikitakeba](https://github.com/nikitakeba)). +* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Added table function `azureBlobStorageCluster`. The supported set of features is very similar to table function `s3Cluster`. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Allow using `cluster`, `clusterAllReplicas`, `remote`, and `remoteSecure` without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)). +* A system table to monitor Kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)). +* Added `max_sessions_for_user` setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* New functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to input/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controlled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/output using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)). +* A new field `query_cache_usage` in `system.query_log` now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)). +* Add new function `startsWithUTF8` and `endsWithUTF8`. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)). +* Allow variable number of columns in TSV/CustomSeparated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)). +* Add input format `One` that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)). +* Add `tupleConcat` function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)). +* Add `max_threads_for_indexes` setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)). +* Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Add column `name` to `system.clusters` as an alias to cluster. [#53605](https://github.com/ClickHouse/ClickHouse/pull/53605) ([irenjj](https://github.com/irenjj)). +* The advanced dashboard now allows mass editing (save/load). [#53608](https://github.com/ClickHouse/ClickHouse/pull/53608) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The advanced dashboard now has an option to maximize charts and move them around. [#53622](https://github.com/ClickHouse/ClickHouse/pull/53622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added support for adding and subtracting arrays: `[5,2] + [1,7]`. Division and multiplication were not implemented due to confusion between pointwise multiplication and the scalar product of arguments. Closes [#49939](https://github.com/ClickHouse/ClickHouse/issues/49939). [#52625](https://github.com/ClickHouse/ClickHouse/pull/52625) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add support for string literals as table names. Closes [#52178](https://github.com/ClickHouse/ClickHouse/issues/52178). [#52635](https://github.com/ClickHouse/ClickHouse/pull/52635) ([hendrik-m](https://github.com/hendrik-m)). + +#### Experimental Feature +* Add new table engine `S3Queue` for streaming data import from s3. Closes [#37012](https://github.com/ClickHouse/ClickHouse/issues/37012). [#49086](https://github.com/ClickHouse/ClickHouse/pull/49086) ([s-kat](https://github.com/s-kat)). It is not ready to use. Do not use it. +* Enable parallel reading from replicas over distributed table. Related to [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708). [#53005](https://github.com/ClickHouse/ClickHouse/pull/53005) ([Igor Nikonov](https://github.com/devcrafter)). +* Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). This is currently intended for those who continue working on the implementation. Do not use it. + +#### Performance Improvement +* Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)). +* Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)). +* Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)). +* Use filter by file/path before reading in `url`/`file`/`hdfs` table functions. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)). +* Enable JIT compilation for AArch64, PowerPC, SystemZ, RISC-V. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)). +* Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)). +* Speed up merging of states of `uniq` and `uniqExact` aggregate functions by parallelizing conversion before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize aggregation performance of nullable string key when using a large number of variable length keys. [#51399](https://github.com/ClickHouse/ClickHouse/pull/51399) ([LiuNeng](https://github.com/liuneng1994)). +* Add a pass in Analyzer for time filter optimization with preimage. The performance experiments of SSB on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of 8.5% to the geomean QPS when the experimental analyzer is enabled. [#52091](https://github.com/ClickHouse/ClickHouse/pull/52091) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Optimize the merge if all hash sets are single-level in the `uniqExact` (COUNT DISTINCT) function. [#52973](https://github.com/ClickHouse/ClickHouse/pull/52973) ([Jiebin Sun](https://github.com/jiebinn)). +* `Join` table engine: do not clone hash join data structure with all columns. [#53046](https://github.com/ClickHouse/ClickHouse/pull/53046) ([Duc Canh Le](https://github.com/canhld94)). +* Implement native `ORC` input format without the "apache arrow" library to improve performance. [#53324](https://github.com/ClickHouse/ClickHouse/pull/53324) ([李扬](https://github.com/taiyang-li)). +* The dashboard will tell the server to compress the data, which is useful for large time frames over slow internet connections. For example, one chart with 86400 points can be 1.5 MB uncompressed and 60 KB compressed with `br`. [#53569](https://github.com/ClickHouse/ClickHouse/pull/53569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better utilization of thread pool for BACKUPs and RESTOREs. [#53649](https://github.com/ClickHouse/ClickHouse/pull/53649) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve `move_primary_key_columns_to_end_of_prewhere`. [#53337](https://github.com/ClickHouse/ClickHouse/pull/53337) ([Han Fei](https://github.com/hanfei1991)). +* This optimizes the interaction with ClickHouse Keeper. Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)). +* Cache number of rows in files for count in file/s3/url/hdfs/azure functions. The cache can be enabled/disabled by setting `use_cache_for_count_from_files` (enabled by default). Continuation of https://github.com/ClickHouse/ClickHouse/pull/53637. [#53692](https://github.com/ClickHouse/ClickHouse/pull/53692) ([Kruglov Pavel](https://github.com/Avogar)). +* More careful thread management will improve the speed of the S3 table function over a large number of files by more than ~25%. [#53668](https://github.com/ClickHouse/ClickHouse/pull/53668) ([pufit](https://github.com/pufit)). + +#### Improvement +* Add `stderr_reaction` configuration/setting to control the reaction (none, log or throw) when external command stderr has data. This helps make debugging external command easier. [#43210](https://github.com/ClickHouse/ClickHouse/pull/43210) ([Amos Bird](https://github.com/amosbird)). +* Add `partition` column to the `system part_log` and merge table. [#48990](https://github.com/ClickHouse/ClickHouse/pull/48990) ([Jianfei Hu](https://github.com/incfly)). +* The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime (without server restart). [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)). +* If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. [#49587](https://github.com/ClickHouse/ClickHouse/pull/49587) ([xiebin](https://github.com/xbthink)). +* Add setting `use_concurrency_control` for better testing of the new concurrency control feature. [#49618](https://github.com/ClickHouse/ClickHouse/pull/49618) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added suggestions for mistyped names for databases and tables. [#49801](https://github.com/ClickHouse/ClickHouse/pull/49801) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* While read small files from HDFS by Gluten, we found that it will cost more times when compare to directly query by Spark. And we did something with that. [#50063](https://github.com/ClickHouse/ClickHouse/pull/50063) ([KevinyhZou](https://github.com/KevinyhZou)). +* There were too many worthless error logs after session expiration, which we didn't like. [#50171](https://github.com/ClickHouse/ClickHouse/pull/50171) ([helifu](https://github.com/helifu)). +* Introduce fallback ZooKeeper sessions which are time-bound. Fixed `index` column in system.zookeeper_connection for DNS addresses. [#50424](https://github.com/ClickHouse/ClickHouse/pull/50424) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Add ability to log when max_partitions_per_insert_block is reached. [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)). +* Added a bunch of custom commands to clickhouse-keeper-client (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)). +* Updated check for connection string in `azureBlobStorage` table function as connection string with "sas" does not always begin with the default endpoint and updated connection URL to include "sas" token after adding Azure's container to URL. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix description for filtering sets in the `full_sorting_merge` JOIN algorithm. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)). +* Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)). +* Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. This is only needed if you are making manual interventions in on-disk data, which is highly discouraged. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Attempt to create a generic proxy resolver for CH while keeping backwards compatibility with existing S3 storage conf proxy resolver. [#51749](https://github.com/ClickHouse/ClickHouse/pull/51749) ([Arthur Passos](https://github.com/arthurpassos)). +* Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)). +* Function `arrayIntersect` now returns the values in the order, corresponding to the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)). +* Make `ALTER TABLE FREEZE` queries not replicated in the Replicated database engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)). +* Added possibility to flush system tables on unexpected shutdown. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix the case when `s3` table function refused to work with pre-signed URLs. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)). +* Add column `name` as an alias to `event` and `metric` in the `system.events` and `system.metrics` tables. Closes [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)). +* Added support of syntax `CREATE UNIQUE INDEX` in parser as a no-op for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique = 1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)). +* Add support of predefined macro (`{database}` and `{table}`) in some Kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)). +* Disable updating the filesystem cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)). +* The configuration of S3 endpoint allow using it from the root, and append '/' automatically if needed. [#47809](https://github.com/ClickHouse/ClickHouse/issues/47809). [#52600](https://github.com/ClickHouse/ClickHouse/pull/52600) ([xiaolei565](https://github.com/xiaolei565)). +* For clickhouse-local allow positional options and populate global UDF settings (user_scripts_path and user_defined_executable_functions_config). [#52643](https://github.com/ClickHouse/ClickHouse/pull/52643) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* `system.asynchronous_metrics` now includes metrics "QueryCacheEntries" and "QueryCacheBytes" to inspect the query cache. [#52650](https://github.com/ClickHouse/ClickHouse/pull/52650) ([Robert Schulze](https://github.com/rschu1ze)). +* Added possibility to use `s3_storage_class` parameter in the `SETTINGS` clause of the `BACKUP` statement for backups to S3. [#52658](https://github.com/ClickHouse/ClickHouse/pull/52658) ([Roman Vasin](https://github.com/rvasin)). +* Add utility `print-backup-info.py` which parses a backup metadata file and prints information about the backup. [#52690](https://github.com/ClickHouse/ClickHouse/pull/52690) ([Vitaly Baranov](https://github.com/vitlibar)). +* Closes [#49510](https://github.com/ClickHouse/ClickHouse/issues/49510). Currently we have database and table names case-sensitive, but BI tools query `information_schema` sometimes in lowercase, sometimes in uppercase. For this reason we have `information_schema` database, containing lowercase tables, such as `information_schema.tables` and `INFORMATION_SCHEMA` database, containing uppercase tables, such as `INFORMATION_SCHEMA.TABLES`. But some tools are querying `INFORMATION_SCHEMA.tables` and `information_schema.TABLES`. The proposed solution is to duplicate both lowercase and uppercase tables in lowercase and uppercase `information_schema` database. [#52695](https://github.com/ClickHouse/ClickHouse/pull/52695) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). [#52745](https://github.com/ClickHouse/ClickHouse/pull/52745) ([vdimir](https://github.com/vdimir)). +* Add support for `modulo`, `intDiv`, `intDivOrZero` for tuples by distributing them across tuple's elements. [#52758](https://github.com/ClickHouse/ClickHouse/pull/52758) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Search for default `yaml` and `yml` configs in clickhouse-client after `xml`. [#52767](https://github.com/ClickHouse/ClickHouse/pull/52767) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* When merging into non-'clickhouse' rooted configuration, configs with different root node name just bypassed without exception. [#52770](https://github.com/ClickHouse/ClickHouse/pull/52770) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52779](https://github.com/ClickHouse/ClickHouse/pull/52779) ([alesapin](https://github.com/alesapin)). +* Add `precise_float_parsing` setting to switch float parsing methods (fast/precise). [#52791](https://github.com/ClickHouse/ClickHouse/pull/52791) ([Andrey Zvonov](https://github.com/zvonand)). +* Use the same default paths for `clickhouse-keeper` (symlink) as for `clickhouse-keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve error message for table function `remote`. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)). +* Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)). +* Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)). +* EXPLAIN actions for JOIN step. [#53006](https://github.com/ClickHouse/ClickHouse/pull/53006) ([Maksim Kita](https://github.com/kitaisreal)). +* Make `hasTokenOrNull` and `hasTokenCaseInsensitiveOrNull` return null for empty needles. [#53059](https://github.com/ClickHouse/ClickHouse/pull/53059) ([ltrk2](https://github.com/ltrk2)). +* Allow to restrict allowed paths for filesystem caches. Mainly useful for dynamic disks. If in server config `filesystem_caches_path` is specified, all filesystem caches' paths will be restricted to this directory. E.g. if the `path` in cache config is relative - it will be put in `filesystem_caches_path`; if `path` in cache config is absolute, it will be required to lie inside `filesystem_caches_path`. If `filesystem_caches_path` is not specified in config, then behaviour will be the same as in earlier versions. [#53124](https://github.com/ClickHouse/ClickHouse/pull/53124) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#53127](https://github.com/ClickHouse/ClickHouse/pull/53127) ([pufit](https://github.com/pufit)). +* Add diagnostic info about file name during schema inference - it helps when you process multiple files with globs. [#53135](https://github.com/ClickHouse/ClickHouse/pull/53135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Client will load suggestions using the main connection if the second connection is not allowed to create a session. [#53177](https://github.com/ClickHouse/ClickHouse/pull/53177) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add EXCEPT clause to `SYSTEM STOP/START LISTEN QUERIES [ALL/DEFAULT/CUSTOM]` query, for example `SYSTEM STOP LISTEN QUERIES ALL EXCEPT TCP, HTTP`. [#53280](https://github.com/ClickHouse/ClickHouse/pull/53280) ([Nikolay Degterinsky](https://github.com/evillique)). +* Change the default of `max_concurrent_queries` from 100 to 1000. It's ok to have many concurrent queries if they are not heavy, and mostly waiting for the network. Note: don't confuse concurrent queries and QPS: for example, ClickHouse server can do tens of thousands of QPS with less than 100 concurrent queries. [#53285](https://github.com/ClickHouse/ClickHouse/pull/53285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Limit number of concurrent background partition optimize merges. [#53405](https://github.com/ClickHouse/ClickHouse/pull/53405) ([Duc Canh Le](https://github.com/canhld94)). +* Added a setting `allow_moving_table_directory_to_trash` that allows to ignore `Directory for table data already exists` error when replicating/recovering a `Replicated` database. [#53425](https://github.com/ClickHouse/ClickHouse/pull/53425) ([Alexander Tokmakov](https://github.com/tavplubix)). +* If server settings `asynchronous_metrics_update_period_s` and `asynchronous_heavy_metrics_update_period_s` are misconfigured to 0, it will now fail gracefully instead of terminating the application. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)). +* The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)). +* Add ability to turn off flush of Distributed tables on `DETACH`, `DROP`, or server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). +* The `domainRFC` function now supports IPv6 in square brackets. [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). +* Use longer timeout for S3 CopyObject requests, which are used in backups. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). +* Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `SCHEMA()` was added as alias for `DATABASE()` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). +* Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* As expert-level settings, it is now possible to (1) configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, (2) configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). +* Added client info validation to the query packet in TCPHandler. [#53673](https://github.com/ClickHouse/ClickHouse/pull/53673) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Retry loading parts in case of network errors while interaction with Microsoft Azure. [#53750](https://github.com/ClickHouse/ClickHouse/pull/53750) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Stacktrace for exceptions, Materailized view exceptions are propagated. [#53766](https://github.com/ClickHouse/ClickHouse/pull/53766) ([Ilya Golshtein](https://github.com/ilejn)). +* If no hostname or port were specified, keeper client will try to search for a connection string in the ClickHouse's config.xml. [#53769](https://github.com/ClickHouse/ClickHouse/pull/53769) ([pufit](https://github.com/pufit)). +* Add profile event `PartsLockMicroseconds` which shows the amount of microseconds we hold the data parts lock in MergeTree table engine family. [#53797](https://github.com/ClickHouse/ClickHouse/pull/53797) ([alesapin](https://github.com/alesapin)). +* Make reconnect limit in RAFT limits configurable for keeper. This configuration can help to make keeper to rebuild connection with peers quicker if the current connection is broken. [#53817](https://github.com/ClickHouse/ClickHouse/pull/53817) ([Pengyuan Bian](https://github.com/bianpengyuan)). +* Ignore foreign keys in tables definition to improve compatibility with MySQL, so a user wouldn't need to rewrite his SQL of the foreign key part, ref [#53380](https://github.com/ClickHouse/ClickHouse/issues/53380). [#53864](https://github.com/ClickHouse/ClickHouse/pull/53864) ([jsc0218](https://github.com/jsc0218)). + +#### Build/Testing/Packaging Improvement +* Don't expose symbols from ClickHouse binary to dynamic linker. It might fix [#43933](https://github.com/ClickHouse/ClickHouse/issues/43933). [#47475](https://github.com/ClickHouse/ClickHouse/pull/47475) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add https://github.com/elliotchance/sqltest to CI to report the SQL 2016 conformance. [#52293](https://github.com/ClickHouse/ClickHouse/pull/52293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Upgrade PRQL to 0.9.3. [#53060](https://github.com/ClickHouse/ClickHouse/pull/53060) ([Maximilian Roos](https://github.com/max-sixty)). +* System tables from CI checks are exported to ClickHouse Cloud. [#53086](https://github.com/ClickHouse/ClickHouse/pull/53086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud. [#53100](https://github.com/ClickHouse/ClickHouse/pull/53100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up Debug and Tidy builds. [#53178](https://github.com/ClickHouse/ClickHouse/pull/53178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up the build by removing tons and tonnes of garbage. One of the frequently included headers was poisoned by boost. [#53180](https://github.com/ClickHouse/ClickHouse/pull/53180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove even more garbage. [#53182](https://github.com/ClickHouse/ClickHouse/pull/53182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `arrayAUC` was using heavy C++ templates - ditched them. [#53183](https://github.com/ClickHouse/ClickHouse/pull/53183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some translation units were always rebuilt regardless of ccache. The culprit is found and fixed. [#53184](https://github.com/ClickHouse/ClickHouse/pull/53184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in stateful tests to ClickHouse Cloud. [#53351](https://github.com/ClickHouse/ClickHouse/pull/53351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in stress tests. [#53353](https://github.com/ClickHouse/ClickHouse/pull/53353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in fuzzer. [#53354](https://github.com/ClickHouse/ClickHouse/pull/53354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow up for [#53418](https://github.com/ClickHouse/ClickHouse/issues/53418). Small improvements for install_check.py, adding tests for proper ENV parameters passing to the main process on `init.d start`. [#53457](https://github.com/ClickHouse/ClickHouse/pull/53457) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Reorganize file management in CMake to prevent potential duplications. For instance, `indexHint.cpp` is duplicated in both `dbms_sources` and `clickhouse_functions_sources`. [#53621](https://github.com/ClickHouse/ClickHouse/pull/53621) ([Amos Bird](https://github.com/amosbird)). +* Upgrade snappy to 1.1.10. [#53672](https://github.com/ClickHouse/ClickHouse/pull/53672) ([李扬](https://github.com/taiyang-li)). +* Slightly improve cmake build by sanitizing some dependencies and removing some duplicates. Each commit includes a short description of the changes made. [#53759](https://github.com/ClickHouse/ClickHouse/pull/53759) ([Amos Bird](https://github.com/amosbird)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Do not reset (experimental) Annoy index during build-up with more than one mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). +* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 data types as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* A fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't throw exception when executable UDF has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). +* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). +* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). +* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Init and destroy ares channel on demand. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix named collections on cluster [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll. [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible assert in `~PushingAsyncPipelineExecutor` in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). +* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Convert sparse column format to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). +* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "Not-ready Set" exception [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). +* Experimental session_log table: Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix `Context has expired` error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix `timeout_overflow_mode` when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). +* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). +* Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). +* Fix crash in join on sparse columna [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). +* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)). +* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). +* Try to fix bug with `NULL::LowCardinality(Nullable(...)) NOT IN` [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* `transform`: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix fuzzer crash in parseDateTime [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). +* MaterializedPostgreSQL: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). + +### ClickHouse release 23.7, 2023-07-27 + +#### Backward Incompatible Change +* Add `NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after *downgrade* (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Kafka connector can fetch Avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Add a column `is_obsolete` to `system.settings` and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file. Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from `` section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Grace Hash Join algorithm is now applicable to FULL and RIGHT JOINs. [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add `SYSTEM STOP LISTEN` query for more graceful termination. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `input_format_csv_allow_variable_number_of_columns` options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function `substring_index`, as in Spark or MySQL. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* A system table `jemalloc_bins` to show stats for jemalloc bins. Example `SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10`. Enjoy. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add `RowBinaryWithDefaults` format with extra byte before each column as a flag for using the column's default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new `initcap` / `initcapUTF8` functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage statistic to HTTP headers. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new `hasSubsequence` (+`CaseInsensitive` and `UTF8` versions) functions to match subsequences in strings. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Implement KQL-style formatting for the `Interval` data type. This is only needed for compatibility with the `Kusto` query language. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Aliases `current_database` and a new function `current_schemas` for compatibility with PostgreSQL. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Support `async_deduplication_token` for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. A downgrade may require to set `ratio_of_defaults_for_sparse_serialization=0.9375` [55153](https://github.com/ClickHouse/ClickHouse/issues/55153). You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Move conditions with primary key columns to the end of PREWHERE chain. The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* Speed up `COUNT(DISTINCT)` for String types by inlining SipHash. The performance experiments of *OnTime* on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of *11.6%* to the QPS of the query *Q8* while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1`. This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823). This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173). [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in `FileCache::loadMetadata` - this speeds up server startup if the filesystem cache is configured. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Improvement +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow filtering HTTP headers for the URL/S3 table functions with the new `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues in `geoHash` functions. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Log async insert flush queries into `system.query_log`. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Functions `date_diff` and `age` now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: a Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column `modification_time` into `system.parts_columns`. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function `range` of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of the statement `SHOW INDEX`. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Cache dictionary now requests only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed the case when settings were not applied for EXPLAIN query when FORMAT was provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-Int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Log messages are written to the `system.text_log` from the server startup. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception was thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Avro input format now supports Union even if it contains only a single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093). [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). + +#### Experimental Feature +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). This is controlled by the setting `output_format_parquet_use_custom_encoder` which is disabled by default, because the feature is non-ideal. +* Added support for [PRQL](https://prql-lang.org/) as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). This syntax can be changed in this release. +* (experimental MaterializedMySQL) Fixed crash when `mysqlxx::Pool::Entry` is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) `CREATE TABLE ... AS SELECT` .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now unquoted UTF-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* (experimental MaterializedMySQL) Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* The `session_timezone` setting (new in version 23.6) is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Support ZooKeeper `reconfig` command for ClickHouse Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). It is suspected that this feature is incomplete. + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add integration test check with the enabled Analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Update Cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) +* Fix MaterializedPostgreSQL syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + + ### ClickHouse release 23.6, 2023-06-29 #### Backward Incompatible Change @@ -18,15 +583,14 @@ * CGroups metrics related to CPU are replaced with one metric, `CGroupMaxCPU` for better usability. The `Normalized` CPU usage metrics will be normalized to CGroups limits instead of the total number of CPUs when they are set. This closes [#50836](https://github.com/ClickHouse/ClickHouse/issues/50836). [#50835](https://github.com/ClickHouse/ClickHouse/pull/50835) ([Alexey Milovidov](https://github.com/alexey-milovidov)). #### New Feature -* Added `Overlay` database engine to combine multiple databases into one. Added `Filesystem` database engine to represent a directory in the filesystem as a set of implicitly available tables with auto-detected formats and structures. A new `S3` database engine allows to read-only interact with s3 storage by representing a prefix as a set of tables. A new `HDFS` database engine allows to interact with HDFS storage in the same way. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). * The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). -* Add support for `APPEND` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). +* Add support for `TRUNCATE` modifier in `INTO OUTFILE` clause. Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). * Add table engine `Redis` and table function `redis`. It allows querying external Redis servers. [#50150](https://github.com/ClickHouse/ClickHouse/pull/50150) ([JackyWoo](https://github.com/JackyWoo)). * Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). * Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). * Clickhouse-client can now be called with a connection string instead of "--host", "--port", "--user" etc. [#50689](https://github.com/ClickHouse/ClickHouse/pull/50689) ([Alexey Gerasimchuck](https://github.com/Demilivor)). -* Add setting `session_timezone`, it is used as default timezone for session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). +* Add setting `session_timezone`; it is used as the default timezone for a session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). * Codec DEFLATE_QPL is now controlled via server setting "enable_deflate_qpl_codec" (default: false) instead of setting "allow_experimental_codecs". This marks DEFLATE_QPL non-experimental. [#50775](https://github.com/ClickHouse/ClickHouse/pull/50775) ([Robert Schulze](https://github.com/rschu1ze)). #### Performance Improvement @@ -40,12 +604,12 @@ * Make multiple list requests to ZooKeeper in parallel to speed up reading from system.zookeeper table. [#51042](https://github.com/ClickHouse/ClickHouse/pull/51042) ([Alexander Gololobov](https://github.com/davenger)). * Speedup initialization of DateTime lookup tables for time zones. This should reduce startup/connect time of clickhouse-client especially in debug build as it is rather heavy. [#51347](https://github.com/ClickHouse/ClickHouse/pull/51347) ([Alexander Gololobov](https://github.com/davenger)). * Fix data lakes slowness because of synchronous head requests. (Related to Iceberg/Deltalake/Hudi being slow with a lot of files). [#50976](https://github.com/ClickHouse/ClickHouse/pull/50976) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). * Do not read all the columns from right GLOBAL JOIN table. [#50721](https://github.com/ClickHouse/ClickHouse/pull/50721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). #### Experimental Feature * Support parallel replicas with the analyzer. [#50441](https://github.com/ClickHouse/ClickHouse/pull/50441) ([Raúl Marín](https://github.com/Algunenano)). * Add random sleep before large merges/mutations execution to split load more evenly between replicas in case of zero-copy replication. [#51282](https://github.com/ClickHouse/ClickHouse/pull/51282) ([alesapin](https://github.com/alesapin)). +* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). #### Improvement * Relax the thresholds for "too many parts" to be more modern. Return the backpressure during long-running insert queries. [#50856](https://github.com/ClickHouse/ClickHouse/pull/50856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). @@ -139,7 +703,7 @@ * Add MemoryTracker for the background tasks (merges and mutation). Introduces `merges_mutations_memory_usage_soft_limit` and `merges_mutations_memory_usage_to_ram_ratio` settings that represent the soft memory limit for merges and mutations. If this limit is reached ClickHouse won't schedule new merge or mutation tasks. Also `MergesMutationsMemoryTracking` metric is introduced to allow observing current memory usage of background tasks. Resubmit [#46089](https://github.com/ClickHouse/ClickHouse/issues/46089). Closes [#48774](https://github.com/ClickHouse/ClickHouse/issues/48774). [#48787](https://github.com/ClickHouse/ClickHouse/pull/48787) ([Dmitry Novik](https://github.com/novikd)). * Function `dotProduct` work for array. [#49050](https://github.com/ClickHouse/ClickHouse/pull/49050) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). * Support statement `SHOW INDEX` to improve compatibility with MySQL. [#49158](https://github.com/ClickHouse/ClickHouse/pull/49158) ([Robert Schulze](https://github.com/rschu1ze)). -* Add virtual column `_file` and `_path` support to table function `url`. - Impove error message for table function `url`. - resolves [#49231](https://github.com/ClickHouse/ClickHouse/issues/49231) - resolves [#49232](https://github.com/ClickHouse/ClickHouse/issues/49232). [#49356](https://github.com/ClickHouse/ClickHouse/pull/49356) ([Ziyi Tan](https://github.com/Ziy1-Tan)). +* Add virtual column `_file` and `_path` support to table function `url`. - Improve error message for table function `url`. - resolves [#49231](https://github.com/ClickHouse/ClickHouse/issues/49231) - resolves [#49232](https://github.com/ClickHouse/ClickHouse/issues/49232). [#49356](https://github.com/ClickHouse/ClickHouse/pull/49356) ([Ziyi Tan](https://github.com/Ziy1-Tan)). * Adding the `grants` field in the users.xml file, which allows specifying grants for users. [#49381](https://github.com/ClickHouse/ClickHouse/pull/49381) ([pufit](https://github.com/pufit)). * Support full/right join by using grace hash join algorithm. [#49483](https://github.com/ClickHouse/ClickHouse/pull/49483) ([lgbo](https://github.com/lgbo-ustc)). * `WITH FILL` modifier groups filling by sorting prefix. Controlled by `use_with_fill_by_sorting_prefix` setting (enabled by default). Related to [#33203](https://github.com/ClickHouse/ClickHouse/issues/33203)#issuecomment-1418736794. [#49503](https://github.com/ClickHouse/ClickHouse/pull/49503) ([Igor Nikonov](https://github.com/devcrafter)). @@ -184,7 +748,7 @@ * `DEFLATE_QPL` codec lower the minimum simd version to SSE 4.2. [doc change in qpl](https://github.com/intel/qpl/commit/3f8f5cea27739f5261e8fd577dc233ffe88bf679) - Intel® QPL relies on a run-time kernels dispatcher and cpuid check to choose the best available implementation(sse/avx2/avx512) - restructured cmakefile for qpl build in clickhouse to align with latest upstream qpl. [#49811](https://github.com/ClickHouse/ClickHouse/pull/49811) ([jasperzhu](https://github.com/jinjunzh)). * Add initial support to do JOINs with pure parallel replicas. [#49544](https://github.com/ClickHouse/ClickHouse/pull/49544) ([Raúl Marín](https://github.com/Algunenano)). * More parallelism on `Outdated` parts removal with "zero-copy replication". [#49630](https://github.com/ClickHouse/ClickHouse/pull/49630) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Parallel Replicas: 1) Fixed an error `NOT_FOUND_COLUMN_IN_BLOCK` in case of using parallel replicas with non-replicated storage with disabled setting `parallel_replicas_for_non_replicated_merge_tree` 2) Now `allow_experimental_parallel_reading_from_replicas` have 3 possible values - 0, 1 and 2. 0 - disabled, 1 - enabled, silently disable them in case of failure (in case of FINAL or JOIN), 2 - enabled, throw an expection in case of failure. 3) If FINAL modifier is used in SELECT query and parallel replicas are enabled, ClickHouse will try to disable them if `allow_experimental_parallel_reading_from_replicas` is set to 1 and throw an exception otherwise. [#50195](https://github.com/ClickHouse/ClickHouse/pull/50195) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Parallel Replicas: 1) Fixed an error `NOT_FOUND_COLUMN_IN_BLOCK` in case of using parallel replicas with non-replicated storage with disabled setting `parallel_replicas_for_non_replicated_merge_tree` 2) Now `allow_experimental_parallel_reading_from_replicas` have 3 possible values - 0, 1 and 2. 0 - disabled, 1 - enabled, silently disable them in case of failure (in case of FINAL or JOIN), 2 - enabled, throw an exception in case of failure. 3) If FINAL modifier is used in SELECT query and parallel replicas are enabled, ClickHouse will try to disable them if `allow_experimental_parallel_reading_from_replicas` is set to 1 and throw an exception otherwise. [#50195](https://github.com/ClickHouse/ClickHouse/pull/50195) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). * When parallel replicas are enabled they will always skip unavailable servers (the behavior is controlled by the setting `skip_unavailable_shards`, enabled by default and can be only disabled). This closes: [#48565](https://github.com/ClickHouse/ClickHouse/issues/48565). [#50293](https://github.com/ClickHouse/ClickHouse/pull/50293) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). #### Improvement @@ -203,7 +767,7 @@ * Add a new column `zookeeper_name` in system.replicas, to indicate on which (auxiliary) zookeeper cluster the replicated table's metadata is stored. [#48549](https://github.com/ClickHouse/ClickHouse/pull/48549) ([cangyin](https://github.com/cangyin)). * `IN` operator support the comparison of `Date` and `Date32`. Closes [#48736](https://github.com/ClickHouse/ClickHouse/issues/48736). [#48806](https://github.com/ClickHouse/ClickHouse/pull/48806) ([flynn](https://github.com/ucasfl)). * Support for erasure codes in `HDFS`, author: @M1eyu2018, @tomscut. [#48833](https://github.com/ClickHouse/ClickHouse/pull/48833) ([M1eyu](https://github.com/M1eyu2018)). -* Implement SYSTEM DROP REPLICA from auxillary ZooKeeper clusters, may be close [#48931](https://github.com/ClickHouse/ClickHouse/issues/48931). [#48932](https://github.com/ClickHouse/ClickHouse/pull/48932) ([wangxiaobo](https://github.com/wzb5212)). +* Implement SYSTEM DROP REPLICA from auxiliary ZooKeeper clusters, may be close [#48931](https://github.com/ClickHouse/ClickHouse/issues/48931). [#48932](https://github.com/ClickHouse/ClickHouse/pull/48932) ([wangxiaobo](https://github.com/wzb5212)). * Add Array data type to MongoDB. Closes [#48598](https://github.com/ClickHouse/ClickHouse/issues/48598). [#48983](https://github.com/ClickHouse/ClickHouse/pull/48983) ([Nikolay Degterinsky](https://github.com/evillique)). * Support storing `Interval` data types in tables. [#49085](https://github.com/ClickHouse/ClickHouse/pull/49085) ([larryluogit](https://github.com/larryluogit)). * Allow using `ntile` window function without explicit window frame definition: `ntile(3) OVER (ORDER BY a)`, close [#46763](https://github.com/ClickHouse/ClickHouse/issues/46763). [#49093](https://github.com/ClickHouse/ClickHouse/pull/49093) ([vdimir](https://github.com/vdimir)). @@ -259,7 +823,7 @@ #### Build/Testing/Packaging Improvement * New and improved `keeper-bench`. Everything can be customized from YAML/XML file: - request generator - each type of request generator can have a specific set of fields - multi requests can be generated just by doing the same under `multi` key - for each request or subrequest in multi a `weight` field can be defined to control distribution - define trees that need to be setup for a test run - hosts can be defined with all timeouts customizable and it's possible to control how many sessions to generate for each host - integers defined with `min_value` and `max_value` fields are random number generators. [#48547](https://github.com/ClickHouse/ClickHouse/pull/48547) ([Antonio Andelic](https://github.com/antonio2368)). -* Io_uring is not supported on macos, don't choose it when running tests on local to avoid occassional failures. [#49250](https://github.com/ClickHouse/ClickHouse/pull/49250) ([Frank Chen](https://github.com/FrankChen021)). +* Io_uring is not supported on macos, don't choose it when running tests on local to avoid occasional failures. [#49250](https://github.com/ClickHouse/ClickHouse/pull/49250) ([Frank Chen](https://github.com/FrankChen021)). * Support named fault injection for testing. [#49361](https://github.com/ClickHouse/ClickHouse/pull/49361) ([Han Fei](https://github.com/hanfei1991)). * Allow running ClickHouse in the OS where the `prctl` (process control) syscall is not available, such as AWS Lambda. [#49538](https://github.com/ClickHouse/ClickHouse/pull/49538) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Fixed the issue of build conflict between contrib/isa-l and isa-l in qpl [49296](https://github.com/ClickHouse/ClickHouse/issues/49296). [#49584](https://github.com/ClickHouse/ClickHouse/pull/49584) ([jasperzhu](https://github.com/jinjunzh)). @@ -963,7 +1527,7 @@ Add settings input_format_tsv/csv/custom_detect_header that enable this behaviou * Use already written part of the query for fuzzy search (pass to the `skim` library, which is written in Rust and linked statically to ClickHouse). [#44600](https://github.com/ClickHouse/ClickHouse/pull/44600) ([Azat Khuzhin](https://github.com/azat)). * Enable `input_format_json_read_objects_as_strings` by default to be able to read nested JSON objects while JSON Object type is experimental. [#44657](https://github.com/ClickHouse/ClickHouse/pull/44657) ([Kruglov Pavel](https://github.com/Avogar)). * Improvement for deduplication of async inserts: when users do duplicate async inserts, we should deduplicate inside the memory before we query Keeper. [#44682](https://github.com/ClickHouse/ClickHouse/pull/44682) ([Han Fei](https://github.com/hanfei1991)). -* Input/ouptut `Avro` format will parse bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)). +* Input/output `Avro` format will parse bool type as ClickHouse bool type. [#44684](https://github.com/ClickHouse/ClickHouse/pull/44684) ([Kruglov Pavel](https://github.com/Avogar)). * Support Bool type in Arrow/Parquet/ORC. Closes [#43970](https://github.com/ClickHouse/ClickHouse/issues/43970). [#44698](https://github.com/ClickHouse/ClickHouse/pull/44698) ([Kruglov Pavel](https://github.com/Avogar)). * Don't greedily parse beyond the quotes when reading UUIDs - it may lead to mistakenly successful parsing of incorrect data. [#44686](https://github.com/ClickHouse/ClickHouse/pull/44686) ([Raúl Marín](https://github.com/Algunenano)). * Infer UInt64 in case of Int64 overflow and fix some transforms in schema inference. [#44696](https://github.com/ClickHouse/ClickHouse/pull/44696) ([Kruglov Pavel](https://github.com/Avogar)). diff --git a/CMakeLists.txt b/CMakeLists.txt index 5d6ed75bb296..4be0c63e3b87 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,6 +19,7 @@ include (cmake/tools.cmake) include (cmake/ccache.cmake) include (cmake/clang_tidy.cmake) include (cmake/git.cmake) +include (cmake/utils.cmake) # Ignore export() since we don't use it, # but it gets broken with a global targets via link_libraries() @@ -87,7 +88,6 @@ if (ENABLE_FUZZING) set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF) set (ENABLE_LIBRARIES 0) set (ENABLE_SSL 1) - set (USE_UNWIND ON) set (ENABLE_EMBEDDED_COMPILER 0) set (ENABLE_EXAMPLES 0) set (ENABLE_UTILS 0) @@ -102,24 +102,17 @@ if (ENABLE_FUZZING) set (ENABLE_PROTOBUF 1) endif() -option (ENABLE_WOBOQ_CODEBROWSER "Build for woboq codebrowser" OFF) - -if (ENABLE_WOBOQ_CODEBROWSER) - set (ENABLE_EMBEDDED_COMPILER 0) - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wno-poison-system-directories") - # woboq codebrowser uses clang tooling, and they could add default system - # clang includes, and later clang will warn for those added by itself - # includes. - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-poison-system-directories") -endif() - # Global libraries # See: # - default_libs.cmake # - sanitize.cmake add_library(global-libs INTERFACE) -include (cmake/fuzzer.cmake) +# We don't want to instrument everything with fuzzer, but only specific targets (see below), +# also, since we build our own llvm, we specifically don't want to instrument +# libFuzzer library itself - it would result in infinite recursion +#include (cmake/fuzzer.cmake) + include (cmake/sanitize.cmake) option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON) @@ -166,8 +159,14 @@ elseif(GLIBC_COMPATIBILITY) message (${RECONFIGURE_MESSAGE_LEVEL} "Glibc compatibility cannot be enabled in current configuration") endif () -# Make sure the final executable has symbols exported -set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic") +if (OS_LINUX) + # We should not export dynamic symbols, because: + # - The main clickhouse binary does not use dlopen, + # and whatever is poisoning it by LD_PRELOAD should not link to our symbols. + # - The clickhouse-odbc-bridge and clickhouse-library-bridge binaries + # should not expose their symbols to ODBC drivers and libraries. + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-export-dynamic") +endif () if (OS_DARWIN) # The `-all_load` flag forces loading of all symbols from all libraries, @@ -203,9 +202,6 @@ option(OMIT_HEAVY_DEBUG_SYMBOLS "Do not generate debugger info for heavy modules (ClickHouse functions and dictionaries, some contrib)" ${OMIT_HEAVY_DEBUG_SYMBOLS_DEFAULT}) -if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG") - set(USE_DEBUG_HELPERS ON) -endif() option(USE_DEBUG_HELPERS "Enable debug helpers" ${USE_DEBUG_HELPERS}) option(BUILD_STANDALONE_KEEPER "Build keeper as small standalone binary" OFF) @@ -322,7 +318,16 @@ set (COMPILER_FLAGS "${COMPILER_FLAGS}") # Our built-in unwinder only supports DWARF version up to 4. set (DEBUG_INFO_FLAGS "-g -gdwarf-4") -set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS}") +# Disable omit frame pointer compiler optimization using -fno-omit-frame-pointer +option(DISABLE_OMIT_FRAME_POINTER "Disable omit frame pointer compiler optimization" OFF) + +if (DISABLE_OMIT_FRAME_POINTER) + set (CMAKE_CXX_FLAGS_ADD "${CMAKE_CXX_FLAGS_ADD} -fno-omit-frame-pointer") + set (CMAKE_C_FLAGS_ADD "${CMAKE_C_FLAGS_ADD} -fno-omit-frame-pointer") + set (CMAKE_ASM_FLAGS_ADD "${CMAKE_ASM_FLAGS_ADD} -fno-omit-frame-pointer") +endif() + +set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${COMPILER_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -O3 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") set (CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 ${DEBUG_INFO_FLAGS} ${CMAKE_CXX_FLAGS_ADD}") @@ -344,9 +349,9 @@ if (COMPILER_CLANG) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths") - if (NOT ENABLE_TESTS AND NOT SANITIZE) + if (NOT ENABLE_TESTS AND NOT SANITIZE AND OS_LINUX) # https://clang.llvm.org/docs/ThinLTO.html - # Applies to clang only. + # Applies to clang and linux only. # Disabled when building with tests or sanitizers. option(ENABLE_THINLTO "Clang-specific link time optimization" ON) endif() @@ -556,6 +561,34 @@ add_subdirectory (programs) add_subdirectory (tests) add_subdirectory (utils) +if (FUZZER) + # Bundle fuzzers target + add_custom_target(fuzzers) + # Instrument all targets fuzzer and link with libfuzzer + get_all_targets(all_targets) + foreach(target ${all_targets}) + if (NOT(target STREQUAL "_fuzzer" OR target STREQUAL "_fuzzer_no_main")) + get_target_property(target_type ${target} TYPE) + if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY")) + target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link") + endif() + # clickhouse fuzzer isn't working correctly + # initial PR https://github.com/ClickHouse/ClickHouse/pull/27526 + #if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse") + if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer") + message(STATUS "${target} instrumented with fuzzer") + target_link_libraries(${target} PUBLIC ch_contrib::fuzzer) + # Add to fuzzers bundle + add_dependencies(fuzzers ${target}) + get_target_filename(${target} target_bin_name) + get_target_property(target_bin_dir ${target} BINARY_DIR) + add_custom_command(TARGET fuzzers POST_BUILD COMMAND mv "${target_bin_dir}/${target_bin_name}" "${CMAKE_CURRENT_BINARY_DIR}/programs/" VERBATIM) + endif() + endif() + endforeach() + add_custom_command(TARGET fuzzers POST_BUILD COMMAND SRC=${CMAKE_SOURCE_DIR} BIN=${CMAKE_BINARY_DIR} OUT=${CMAKE_BINARY_DIR}/programs ${CMAKE_SOURCE_DIR}/tests/fuzz/build.sh VERBATIM) +endif() + include (cmake/sanitize_targets.cmake) # Build native targets if necessary diff --git a/PreLoad.cmake b/PreLoad.cmake index b456c724cc69..4879e721ae37 100644 --- a/PreLoad.cmake +++ b/PreLoad.cmake @@ -78,6 +78,14 @@ if (OS MATCHES "Linux" AND ("$ENV{CC}" MATCHES ".*clang.*" OR CMAKE_C_COMPILER MATCHES ".*clang.*")) if (ARCH MATCHES "amd64|x86_64") + # NOTE: right now musl is not ready, since unwind is too slow with it + # + # FWIW the following had been tried: + # - update musl + # - compile musl with debug + # - compile musl with debug and -fasynchronous-unwind-tables + # + # But none of this changes anything so far. set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-x86_64.cmake" CACHE INTERNAL "") elseif (ARCH MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)") set (CMAKE_TOOLCHAIN_FILE "cmake/linux/toolchain-aarch64.cmake" CACHE INTERNAL "") diff --git a/README.md b/README.md index eac036c2d9bd..67d4f46988f7 100644 --- a/README.md +++ b/README.md @@ -16,30 +16,32 @@ curl https://clickhouse.com/ | sh * [YouTube channel](https://www.youtube.com/c/ClickHouseDB) has a lot of content about ClickHouse in video format. * [Slack](https://clickhouse.com/slack) and [Telegram](https://telegram.me/clickhouse_en) allow chatting with ClickHouse users in real-time. * [Blog](https://clickhouse.com/blog/) contains various ClickHouse-related articles, as well as announcements and reports about events. -* [Code Browser (Woboq)](https://clickhouse.com/codebrowser/ClickHouse/index.html) with syntax highlight and navigation. -* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlight, powered by github.dev. +* [Code Browser (github.dev)](https://github.dev/ClickHouse/ClickHouse) with syntax highlighting, powered by github.dev. +* [Static Analysis (SonarCloud)](https://sonarcloud.io/project/issues?resolved=false&id=ClickHouse_ClickHouse) proposes C++ quality improvements. * [Contacts](https://clickhouse.com/company/contact) can help to get your questions answered if there are any. ## Upcoming Events -* [**v23.6 Release Webinar**](https://clickhouse.com/company/events/v23-6-release-call?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-06) - Jun 29 - 23.6 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. -* [**ClickHouse Meetup in Paris**](https://www.meetup.com/clickhouse-france-user-group/events/294283460) - Jul 4 -* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/293913596) - Jul 18 -* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/293913441) - Jul 19 -* [**ClickHouse Meetup in Toronto**](https://www.meetup.com/clickhouse-toronto-user-group/events/294183127) - Jul 20 -* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/294428050/) - Jul 27 +* [**v23.9 Community Call**]([https://clickhouse.com/company/events/v23-8-community-release-call](https://clickhouse.com/company/events/v23-9-community-release-call)?utm_source=github&utm_medium=social&utm_campaign=release-webinar-2023-08) - Sep 28 - 23.9 is rapidly approaching. Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release. +* [**ClickHouse Meetup in Amsterdam**](https://www.meetup.com/clickhouse-netherlands-user-group/events/296334590/) - Oct 31 +* [**ClickHouse Meetup in Beijing**](https://www.meetup.com/clickhouse-beijing-user-group/events/296334856/) - Nov 4 +* [**ClickHouse Meetup in San Francisco**](https://www.meetup.com/clickhouse-silicon-valley-meetup-group/events/296334923/) - Nov 8 +* [**ClickHouse Meetup in Singapore**](https://www.meetup.com/clickhouse-singapore-meetup-group/events/296334976/) - Nov 15 +* [**ClickHouse Meetup in Berlin**](https://www.meetup.com/clickhouse-berlin-user-group/events/296488501/) - Nov 30 +* [**ClickHouse Meetup in NYC**](https://www.meetup.com/clickhouse-new-york-user-group/events/296488779/) - Dec 11 +* [**ClickHouse Meetup in Boston**](https://www.meetup.com/clickhouse-boston-user-group/events/296488840/) - Dec 12 Also, keep an eye out for upcoming meetups around the world. Somewhere else you want us to be? Please feel free to reach out to tyler clickhouse com. ## Recent Recordings * **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments" -* **Recording available**: [**v23.4 Release Webinar**](https://www.youtube.com/watch?v=4rrf6bk_mOg) Faster Parquet Reading, Asynchonous Connections to Reoplicas, Trailing Comma before FROM, extractKeyValuePairs, integrations updates, and so much more! Watch it now! +* **Recording available**: [**v23.6 Release Webinar**](https://www.youtube.com/watch?v=cuf_hYn7dqU) All the features of 23.6, one convenient video! Watch it now! * **All release webinar recordings**: [YouTube playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3jAlSy1JxyP8zluvXaN3nxU) - ## Interested in joining ClickHouse and making it your full time job? + ## Interested in joining ClickHouse and making it your full-time job? -We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker as well as a doer - we’ll definitely click! +We are a globally diverse and distributed team, united behind a common goal of creating industry-leading, real-time analytics. Here, you will have an opportunity to solve some of the most cutting-edge technical challenges and have direct ownership of your work and vision. If you are a contributor by nature, a thinker and a doer - we’ll definitely click! Check out our **current openings** here: https://clickhouse.com/company/careers diff --git a/SECURITY.md b/SECURITY.md index 1864eb6e9e52..82b7254f8c16 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -13,16 +13,15 @@ The following versions of ClickHouse server are currently being supported with s | Version | Supported | |:-|:-| -| 23.5 | ✔️ | -| 23.4 | ✔️ | +| 23.9 | ✔️ | +| 23.8 | ✔️ | +| 23.7 | ✔️ | +| 23.6 | ❌ | +| 23.5 | ❌ | +| 23.4 | ❌ | | 23.3 | ✔️ | | 23.2 | ❌ | | 23.1 | ❌ | -| 22.12 | ❌ | -| 22.11 | ❌ | -| 22.10 | ❌ | -| 22.9 | ❌ | -| 22.8 | ✔️ | | 22.* | ❌ | | 21.* | ❌ | | 20.* | ❌ | diff --git a/base/base/EnumReflection.h b/base/base/EnumReflection.h index 0d1f8ae0a400..e3208f16a756 100644 --- a/base/base/EnumReflection.h +++ b/base/base/EnumReflection.h @@ -3,6 +3,7 @@ #include #include + template concept is_enum = std::is_enum_v; namespace detail diff --git a/base/base/JSON.cpp b/base/base/JSON.cpp index 4c6d97b44443..0b43be381493 100644 --- a/base/base/JSON.cpp +++ b/base/base/JSON.cpp @@ -7,8 +7,6 @@ #include #include -#include - #define JSON_MAX_DEPTH 100 diff --git a/base/base/StringRef.h b/base/base/StringRef.h index f428b7c747f5..6456706fafe0 100644 --- a/base/base/StringRef.h +++ b/base/base/StringRef.h @@ -8,8 +8,10 @@ #include #include +#include #include #include +#include #include @@ -28,6 +30,11 @@ #define CRC_INT __crc32cd #endif +#if defined(__aarch64__) && defined(__ARM_NEON) + #include + #pragma clang diagnostic ignored "-Wreserved-identifier" +#endif + /** * The std::string_view-like container to avoid creating strings to find substrings in the hash table. @@ -73,14 +80,14 @@ using StringRefs = std::vector; * For more information, see hash_map_string_2.cpp */ -inline bool compareSSE2(const char * p1, const char * p2) +inline bool compare8(const char * p1, const char * p2) { return 0xFFFF == _mm_movemask_epi8(_mm_cmpeq_epi8( _mm_loadu_si128(reinterpret_cast(p1)), _mm_loadu_si128(reinterpret_cast(p2)))); } -inline bool compareSSE2x4(const char * p1, const char * p2) +inline bool compare64(const char * p1, const char * p2) { return 0xFFFF == _mm_movemask_epi8( _mm_and_si128( @@ -100,7 +107,30 @@ inline bool compareSSE2x4(const char * p1, const char * p2) _mm_loadu_si128(reinterpret_cast(p2) + 3))))); } -inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) +#elif defined(__aarch64__) && defined(__ARM_NEON) + +inline bool compare8(const char * p1, const char * p2) +{ + uint64_t mask = getNibbleMask(vceqq_u8( + vld1q_u8(reinterpret_cast(p1)), vld1q_u8(reinterpret_cast(p2)))); + return 0xFFFFFFFFFFFFFFFF == mask; +} + +inline bool compare64(const char * p1, const char * p2) +{ + uint64_t mask = getNibbleMask(vandq_u8( + vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast(p1)), vld1q_u8(reinterpret_cast(p2))), + vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 16)), vld1q_u8(reinterpret_cast(p2 + 16)))), + vandq_u8(vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 32)), vld1q_u8(reinterpret_cast(p2 + 32))), + vceqq_u8(vld1q_u8(reinterpret_cast(p1 + 48)), vld1q_u8(reinterpret_cast(p2 + 48)))))); + return 0xFFFFFFFFFFFFFFFF == mask; +} + +#endif + +#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON)) + +inline bool memequalWide(const char * p1, const char * p2, size_t size) { /** The order of branches and the trick with overlapping comparisons * are the same as in memcpy implementation. @@ -137,7 +167,7 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) while (size >= 64) { - if (compareSSE2x4(p1, p2)) + if (compare64(p1, p2)) { p1 += 64; p2 += 64; @@ -147,19 +177,18 @@ inline bool memequalSSE2Wide(const char * p1, const char * p2, size_t size) return false; } - switch (size / 16) + switch (size / 16) // NOLINT(bugprone-switch-missing-default-case) { - case 3: if (!compareSSE2(p1 + 32, p2 + 32)) return false; [[fallthrough]]; - case 2: if (!compareSSE2(p1 + 16, p2 + 16)) return false; [[fallthrough]]; - case 1: if (!compareSSE2(p1, p2)) return false; + case 3: if (!compare8(p1 + 32, p2 + 32)) return false; [[fallthrough]]; + case 2: if (!compare8(p1 + 16, p2 + 16)) return false; [[fallthrough]]; + case 1: if (!compare8(p1, p2)) return false; } - return compareSSE2(p1 + size - 16, p2 + size - 16); + return compare8(p1 + size - 16, p2 + size - 16); } #endif - inline bool operator== (StringRef lhs, StringRef rhs) { if (lhs.size != rhs.size) @@ -168,8 +197,8 @@ inline bool operator== (StringRef lhs, StringRef rhs) if (lhs.size == 0) return true; -#if defined(__SSE2__) - return memequalSSE2Wide(lhs.data, rhs.data, lhs.size); +#if defined(__SSE2__) || (defined(__aarch64__) && defined(__ARM_NEON)) + return memequalWide(lhs.data, rhs.data, lhs.size); #else return 0 == memcmp(lhs.data, rhs.data, lhs.size); #endif @@ -274,6 +303,8 @@ struct CRC32Hash if (size == 0) return 0; + chassert(pos); + if (size < 8) { return static_cast(hashLessThan8(x.data, x.size)); diff --git a/base/base/defines.h b/base/base/defines.h index 6abf8155b957..4d3d8796d21b 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -115,8 +115,15 @@ /// because SIGABRT is easier to debug than SIGTRAP (the second one makes gdb crazy) #if !defined(chassert) #if defined(ABORT_ON_LOGICAL_ERROR) + // clang-format off + #include + namespace DB + { + void abortOnFailedAssertion(const String & description); + } #define chassert(x) static_cast(x) ? void(0) : ::DB::abortOnFailedAssertion(#x) #define UNREACHABLE() abort() + // clang-format off #else /// Here sizeof() trick is used to suppress unused warning for result, /// since simple "(void)x" will evaluate the expression, while @@ -178,6 +185,6 @@ /// A template function for suppressing warnings about unused variables or function results. template -constexpr void UNUSED(Args &&... args [[maybe_unused]]) +constexpr void UNUSED(Args &&... args [[maybe_unused]]) // NOLINT(cppcoreguidelines-missing-std-forward) { } diff --git a/base/base/find_symbols.h b/base/base/find_symbols.h index 83232669c047..fda94edaa88d 100644 --- a/base/base/find_symbols.h +++ b/base/base/find_symbols.h @@ -448,7 +448,7 @@ inline char * find_last_not_symbols_or_null(char * begin, char * end) /// See https://github.com/boostorg/algorithm/issues/63 /// And https://bugs.llvm.org/show_bug.cgi?id=41141 template -inline void splitInto(To & to, const std::string & what, bool token_compress = false) +inline To & splitInto(To & to, std::string_view what, bool token_compress = false) { const char * pos = what.data(); const char * end = pos + what.size(); @@ -464,4 +464,6 @@ inline void splitInto(To & to, const std::string & what, bool token_compress = f else pos = delimiter_or_end; } + + return to; } diff --git a/base/base/getThreadId.cpp b/base/base/getThreadId.cpp index b6c22bb8856b..a42d79c5698b 100644 --- a/base/base/getThreadId.cpp +++ b/base/base/getThreadId.cpp @@ -15,25 +15,34 @@ static thread_local uint64_t current_tid = 0; -uint64_t getThreadId() + +static void setCurrentThreadId() { - if (!current_tid) - { #if defined(OS_ANDROID) - current_tid = gettid(); + current_tid = gettid(); #elif defined(OS_LINUX) - current_tid = static_cast(syscall(SYS_gettid)); /// This call is always successful. - man gettid + current_tid = static_cast(syscall(SYS_gettid)); /// This call is always successful. - man gettid #elif defined(OS_FREEBSD) - current_tid = pthread_getthreadid_np(); + current_tid = pthread_getthreadid_np(); #elif defined(OS_SUNOS) - // On Solaris-derived systems, this returns the ID of the LWP, analogous - // to a thread. - current_tid = static_cast(pthread_self()); + // On Solaris-derived systems, this returns the ID of the LWP, analogous + // to a thread. + current_tid = static_cast(pthread_self()); #else - if (0 != pthread_threadid_np(nullptr, ¤t_tid)) - throw std::logic_error("pthread_threadid_np returned error"); + if (0 != pthread_threadid_np(nullptr, ¤t_tid)) + throw std::logic_error("pthread_threadid_np returned error"); #endif - } +} + +uint64_t getThreadId() +{ + if (!current_tid) + setCurrentThreadId(); return current_tid; } + +void updateCurrentThreadIdAfterFork() +{ + setCurrentThreadId(); +} diff --git a/base/base/getThreadId.h b/base/base/getThreadId.h index a1b5ff5f3e8d..f90c76029e1b 100644 --- a/base/base/getThreadId.h +++ b/base/base/getThreadId.h @@ -3,3 +3,5 @@ /// Obtain thread id from OS. The value is cached in thread local variable. uint64_t getThreadId(); + +void updateCurrentThreadIdAfterFork(); diff --git a/base/base/hex.h b/base/base/hex.h index b8cf95db893e..931f220aa081 100644 --- a/base/base/hex.h +++ b/base/base/hex.h @@ -4,212 +4,298 @@ #include #include "types.h" -/// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly. +namespace CityHash_v1_0_2 { struct uint128; } -constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF"; -constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef"; - -constexpr char hexDigitUppercase(unsigned char c) +namespace wide { - return hex_digit_to_char_uppercase_table[c]; + template + class integer; } -constexpr char hexDigitLowercase(unsigned char c) -{ - return hex_digit_to_char_lowercase_table[c]; -} - -/// Maps 0..255 to 00..FF or 00..ff correspondingly - -constexpr inline std::string_view hex_byte_to_char_uppercase_table = // - "000102030405060708090A0B0C0D0E0F" - "101112131415161718191A1B1C1D1E1F" - "202122232425262728292A2B2C2D2E2F" - "303132333435363738393A3B3C3D3E3F" - "404142434445464748494A4B4C4D4E4F" - "505152535455565758595A5B5C5D5E5F" - "606162636465666768696A6B6C6D6E6F" - "707172737475767778797A7B7C7D7E7F" - "808182838485868788898A8B8C8D8E8F" - "909192939495969798999A9B9C9D9E9F" - "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF" - "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF" - "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF" - "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF" - "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF" - "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; - -constexpr inline std::string_view hex_byte_to_char_lowercase_table = // - "000102030405060708090a0b0c0d0e0f" - "101112131415161718191a1b1c1d1e1f" - "202122232425262728292a2b2c2d2e2f" - "303132333435363738393a3b3c3d3e3f" - "404142434445464748494a4b4c4d4e4f" - "505152535455565758595a5b5c5d5e5f" - "606162636465666768696a6b6c6d6e6f" - "707172737475767778797a7b7c7d7e7f" - "808182838485868788898a8b8c8d8e8f" - "909192939495969798999a9b9c9d9e9f" - "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" - "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" - "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" - "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" - "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" - "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; -inline void writeHexByteUppercase(UInt8 byte, void * out) +namespace impl { - memcpy(out, &hex_byte_to_char_uppercase_table[static_cast(byte) * 2], 2); -} + /// Maps 0..15 to 0..9A..F or 0..9a..f correspondingly. + constexpr inline std::string_view hex_digit_to_char_uppercase_table = "0123456789ABCDEF"; + constexpr inline std::string_view hex_digit_to_char_lowercase_table = "0123456789abcdef"; -inline void writeHexByteLowercase(UInt8 byte, void * out) -{ - memcpy(out, &hex_byte_to_char_lowercase_table[static_cast(byte) * 2], 2); -} - -constexpr inline std::string_view bin_byte_to_char_table = // - "0000000000000001000000100000001100000100000001010000011000000111" - "0000100000001001000010100000101100001100000011010000111000001111" - "0001000000010001000100100001001100010100000101010001011000010111" - "0001100000011001000110100001101100011100000111010001111000011111" - "0010000000100001001000100010001100100100001001010010011000100111" - "0010100000101001001010100010101100101100001011010010111000101111" - "0011000000110001001100100011001100110100001101010011011000110111" - "0011100000111001001110100011101100111100001111010011111000111111" - "0100000001000001010000100100001101000100010001010100011001000111" - "0100100001001001010010100100101101001100010011010100111001001111" - "0101000001010001010100100101001101010100010101010101011001010111" - "0101100001011001010110100101101101011100010111010101111001011111" - "0110000001100001011000100110001101100100011001010110011001100111" - "0110100001101001011010100110101101101100011011010110111001101111" - "0111000001110001011100100111001101110100011101010111011001110111" - "0111100001111001011110100111101101111100011111010111111001111111" - "1000000010000001100000101000001110000100100001011000011010000111" - "1000100010001001100010101000101110001100100011011000111010001111" - "1001000010010001100100101001001110010100100101011001011010010111" - "1001100010011001100110101001101110011100100111011001111010011111" - "1010000010100001101000101010001110100100101001011010011010100111" - "1010100010101001101010101010101110101100101011011010111010101111" - "1011000010110001101100101011001110110100101101011011011010110111" - "1011100010111001101110101011101110111100101111011011111010111111" - "1100000011000001110000101100001111000100110001011100011011000111" - "1100100011001001110010101100101111001100110011011100111011001111" - "1101000011010001110100101101001111010100110101011101011011010111" - "1101100011011001110110101101101111011100110111011101111011011111" - "1110000011100001111000101110001111100100111001011110011011100111" - "1110100011101001111010101110101111101100111011011110111011101111" - "1111000011110001111100101111001111110100111101011111011011110111" - "1111100011111001111110101111101111111100111111011111111011111111"; + /// Maps 0..255 to 00..FF or 00..ff correspondingly. + constexpr inline std::string_view hex_byte_to_char_uppercase_table = // + "000102030405060708090A0B0C0D0E0F" + "101112131415161718191A1B1C1D1E1F" + "202122232425262728292A2B2C2D2E2F" + "303132333435363738393A3B3C3D3E3F" + "404142434445464748494A4B4C4D4E4F" + "505152535455565758595A5B5C5D5E5F" + "606162636465666768696A6B6C6D6E6F" + "707172737475767778797A7B7C7D7E7F" + "808182838485868788898A8B8C8D8E8F" + "909192939495969798999A9B9C9D9E9F" + "A0A1A2A3A4A5A6A7A8A9AAABACADAEAF" + "B0B1B2B3B4B5B6B7B8B9BABBBCBDBEBF" + "C0C1C2C3C4C5C6C7C8C9CACBCCCDCECF" + "D0D1D2D3D4D5D6D7D8D9DADBDCDDDEDF" + "E0E1E2E3E4E5E6E7E8E9EAEBECEDEEEF" + "F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF"; -inline void writeBinByte(UInt8 byte, void * out) -{ - memcpy(out, &bin_byte_to_char_table[static_cast(byte) * 8], 8); -} + constexpr inline std::string_view hex_byte_to_char_lowercase_table = // + "000102030405060708090a0b0c0d0e0f" + "101112131415161718191a1b1c1d1e1f" + "202122232425262728292a2b2c2d2e2f" + "303132333435363738393a3b3c3d3e3f" + "404142434445464748494a4b4c4d4e4f" + "505152535455565758595a5b5c5d5e5f" + "606162636465666768696a6b6c6d6e6f" + "707172737475767778797a7b7c7d7e7f" + "808182838485868788898a8b8c8d8e8f" + "909192939495969798999a9b9c9d9e9f" + "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" + "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" + "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" + "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" + "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" + "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff"; -/// Produces hex representation of an unsigned int with leading zeros (for checksums) -template -inline void writeHexUIntImpl(TUInt uint_, char * out, std::string_view table) -{ - union + /// Maps 0..255 to 00000000..11111111 correspondingly. + constexpr inline std::string_view bin_byte_to_char_table = // + "0000000000000001000000100000001100000100000001010000011000000111" + "0000100000001001000010100000101100001100000011010000111000001111" + "0001000000010001000100100001001100010100000101010001011000010111" + "0001100000011001000110100001101100011100000111010001111000011111" + "0010000000100001001000100010001100100100001001010010011000100111" + "0010100000101001001010100010101100101100001011010010111000101111" + "0011000000110001001100100011001100110100001101010011011000110111" + "0011100000111001001110100011101100111100001111010011111000111111" + "0100000001000001010000100100001101000100010001010100011001000111" + "0100100001001001010010100100101101001100010011010100111001001111" + "0101000001010001010100100101001101010100010101010101011001010111" + "0101100001011001010110100101101101011100010111010101111001011111" + "0110000001100001011000100110001101100100011001010110011001100111" + "0110100001101001011010100110101101101100011011010110111001101111" + "0111000001110001011100100111001101110100011101010111011001110111" + "0111100001111001011110100111101101111100011111010111111001111111" + "1000000010000001100000101000001110000100100001011000011010000111" + "1000100010001001100010101000101110001100100011011000111010001111" + "1001000010010001100100101001001110010100100101011001011010010111" + "1001100010011001100110101001101110011100100111011001111010011111" + "1010000010100001101000101010001110100100101001011010011010100111" + "1010100010101001101010101010101110101100101011011010111010101111" + "1011000010110001101100101011001110110100101101011011011010110111" + "1011100010111001101110101011101110111100101111011011111010111111" + "1100000011000001110000101100001111000100110001011100011011000111" + "1100100011001001110010101100101111001100110011011100111011001111" + "1101000011010001110100101101001111010100110101011101011011010111" + "1101100011011001110110101101101111011100110111011101111011011111" + "1110000011100001111000101110001111100100111001011110011011100111" + "1110100011101001111010101110101111101100111011011110111011101111" + "1111000011110001111100101111001111110100111101011111011011110111" + "1111100011111001111110101111101111111100111111011111111011111111"; + + /// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value. + constexpr inline std::string_view hex_char_to_digit_table + = {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9 + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" + "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", + 256}; + + /// Converts a hex digit '0'..'f' or '0'..'F' to its value 0..15. + constexpr UInt8 unhexDigit(char c) + { + return hex_char_to_digit_table[static_cast(c)]; + } + + /// Converts an unsigned integer in the native endian to hexadecimal representation and back. Used as a base class for HexConversion. + template + struct HexConversionUInt { - TUInt value; - UInt8 uint8[sizeof(TUInt)]; + static const constexpr size_t num_hex_digits = sizeof(TUInt) * 2; + + static void hex(TUInt uint_, char * out, std::string_view table) + { + union + { + TUInt value; + UInt8 uint8[sizeof(TUInt)]; + }; + + value = uint_; + + for (size_t i = 0; i < sizeof(TUInt); ++i) + { + if constexpr (std::endian::native == std::endian::little) + memcpy(out + i * 2, &table[static_cast(uint8[sizeof(TUInt) - 1 - i]) * 2], 2); + else + memcpy(out + i * 2, &table[static_cast(uint8[i]) * 2], 2); + } + } + + static TUInt unhex(const char * data) + { + TUInt res; + if constexpr (sizeof(TUInt) == 1) + { + res = static_cast(unhexDigit(data[0])) * 0x10 + static_cast(unhexDigit(data[1])); + } + else if constexpr (sizeof(TUInt) == 2) + { + res = static_cast(unhexDigit(data[0])) * 0x1000 + static_cast(unhexDigit(data[1])) * 0x100 + + static_cast(unhexDigit(data[2])) * 0x10 + static_cast(unhexDigit(data[3])); + } + else if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0)) + { + res = 0; + for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data) + { + res <<= 4; + res += unhexDigit(*data); + } + } + else + { + res = 0; + for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16) + { + res <<= 64; + res += HexConversionUInt::unhex(data); + } + } + return res; + } }; - value = uint_; + /// Helper template class to convert a value of any supported type to hexadecimal representation and back. + template + struct HexConversion; - for (size_t i = 0; i < sizeof(TUInt); ++i) + template + struct HexConversion>> : public HexConversionUInt {}; + + template + struct HexConversion> : public HexConversionUInt> {}; + + template /// Partial specialization here allows not to include in this header. + struct HexConversion>> { - if constexpr (std::endian::native == std::endian::little) - memcpy(out + i * 2, &table[static_cast(uint8[sizeof(TUInt) - 1 - i]) * 2], 2); - else - memcpy(out + i * 2, &table[static_cast(uint8[i]) * 2], 2); - } + static const constexpr size_t num_hex_digits = 32; + + static void hex(const CityHashUInt128 & uint_, char * out, std::string_view table) + { + HexConversion::hex(uint_.high64, out, table); + HexConversion::hex(uint_.low64, out + 16, table); + } + + static CityHashUInt128 unhex(const char * data) + { + CityHashUInt128 res; + res.high64 = HexConversion::unhex(data); + res.low64 = HexConversion::unhex(data + 16); + return res; + } + }; } -template -inline void writeHexUIntUppercase(TUInt uint_, char * out) +/// Produces a hexadecimal representation of an integer value with leading zeros (for checksums). +/// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128. +/// It can be used with signed types as well, however they are written as corresponding unsigned numbers +/// using two's complement (i.e. for example "-1" is written as "0xFF", not as "-0x01"). +template +void writeHexUIntUppercase(const T & value, char * out) { - writeHexUIntImpl(uint_, out, hex_byte_to_char_uppercase_table); + impl::HexConversion::hex(value, out, impl::hex_byte_to_char_uppercase_table); } -template -inline void writeHexUIntLowercase(TUInt uint_, char * out) +template +void writeHexUIntLowercase(const T & value, char * out) { - writeHexUIntImpl(uint_, out, hex_byte_to_char_lowercase_table); + impl::HexConversion::hex(value, out, impl::hex_byte_to_char_lowercase_table); } -template -std::string getHexUIntUppercase(TUInt uint_) +template +std::string getHexUIntUppercase(const T & value) { - std::string res(sizeof(TUInt) * 2, '\0'); - writeHexUIntUppercase(uint_, res.data()); + std::string res(impl::HexConversion::num_hex_digits, '\0'); + writeHexUIntUppercase(value, res.data()); return res; } -template -std::string getHexUIntLowercase(TUInt uint_) +template +std::string getHexUIntLowercase(const T & value) { - std::string res(sizeof(TUInt) * 2, '\0'); - writeHexUIntLowercase(uint_, res.data()); + std::string res(impl::HexConversion::num_hex_digits, '\0'); + writeHexUIntLowercase(value, res.data()); return res; } -/// Maps 0..9, A..F, a..f to 0..15. Other chars are mapped to implementation specific value. - -constexpr inline std::string_view hex_char_to_digit_table - = {"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\xff\xff\xff\xff\xff\xff" //0-9 - "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //A-Z - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\x0a\x0b\x0c\x0d\x0e\x0f\xff\xff\xff\xff\xff\xff\xff\xff\xff" //a-z - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" - "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff", - 256}; +constexpr char hexDigitUppercase(unsigned char c) +{ + return impl::hex_digit_to_char_uppercase_table[c]; +} + +constexpr char hexDigitLowercase(unsigned char c) +{ + return impl::hex_digit_to_char_lowercase_table[c]; +} + +inline void writeHexByteUppercase(UInt8 byte, void * out) +{ + memcpy(out, &impl::hex_byte_to_char_uppercase_table[static_cast(byte) * 2], 2); +} + +inline void writeHexByteLowercase(UInt8 byte, void * out) +{ + memcpy(out, &impl::hex_byte_to_char_lowercase_table[static_cast(byte) * 2], 2); +} +/// Converts a hex representation with leading zeros back to an integer value. +/// The function supports native integer types, wide::integer, CityHash_v1_0_2::uint128. +template +constexpr T unhexUInt(const char * data) +{ + return impl::HexConversion::unhex(data); +} + +/// Converts a hexadecimal digit '0'..'f' or '0'..'F' to UInt8. constexpr UInt8 unhex(char c) { - return hex_char_to_digit_table[static_cast(c)]; + return impl::unhexDigit(c); } +/// Converts two hexadecimal digits to UInt8. constexpr UInt8 unhex2(const char * data) { - return static_cast(unhex(data[0])) * 0x10 + static_cast(unhex(data[1])); + return unhexUInt(data); } +/// Converts four hexadecimal digits to UInt16. constexpr UInt16 unhex4(const char * data) { - return static_cast(unhex(data[0])) * 0x1000 + static_cast(unhex(data[1])) * 0x100 - + static_cast(unhex(data[2])) * 0x10 + static_cast(unhex(data[3])); + return unhexUInt(data); } -template -constexpr TUInt unhexUInt(const char * data) +/// Produces a binary representation of a single byte. +inline void writeBinByte(UInt8 byte, void * out) { - TUInt res = 0; - if constexpr ((sizeof(TUInt) <= 8) || ((sizeof(TUInt) % 8) != 0)) - { - for (size_t i = 0; i < sizeof(TUInt) * 2; ++i, ++data) - { - res <<= 4; - res += unhex(*data); - } - } - else - { - for (size_t i = 0; i < sizeof(TUInt) / 8; ++i, data += 16) - { - res <<= 64; - res += unhexUInt(data); - } - } - return res; + memcpy(out, &impl::bin_byte_to_char_table[static_cast(byte) * 8], 8); +} + +/// Converts byte array to a hex string. Useful for debug logging. +inline std::string hexString(const void * data, size_t size) +{ + const char * p = reinterpret_cast(data); + std::string s(size * 2, '\0'); + for (size_t i = 0; i < size; ++i) + writeHexByteLowercase(p[i], s.data() + i * 2); + return s; } diff --git a/base/base/iostream_debug_helpers.h b/base/base/iostream_debug_helpers.h index db974c911dfa..f531a56031bf 100644 --- a/base/base/iostream_debug_helpers.h +++ b/base/base/iostream_debug_helpers.h @@ -20,14 +20,14 @@ Out & dumpValue(Out &, T &&); /// Catch-all case. template -std::enable_if_t & dumpImpl(Out & out, T &&) +std::enable_if_t & dumpImpl(Out & out, T &&) // NOLINT(cppcoreguidelines-missing-std-forward) { return out << "{...}"; } /// An object, that could be output with operator <<. template -std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t() << std::declval())> * = nullptr) +std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t() << std::declval())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward) { return out << x; } @@ -37,7 +37,7 @@ template std::enable_if_t, std::decay_t())>> - , Out> & dumpImpl(Out & out, T && x, std::decay_t())> * = nullptr) + , Out> & dumpImpl(Out & out, T && x, std::decay_t())> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward) { if (!x) return out << "nullptr"; @@ -46,7 +46,7 @@ std::enable_if_t -std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t()))> * = nullptr) +std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward) { bool first = true; out << "{"; @@ -64,7 +64,7 @@ std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t< template std::enable_if_t>, Out> & -dumpImpl(Out & out, T && x) +dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) { return out << magic_enum::enum_name(x); } @@ -73,7 +73,7 @@ dumpImpl(Out & out, T && x) template std::enable_if_t, std::string> || std::is_same_v, const char *>), Out> & -dumpImpl(Out & out, T && x) +dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) { return out << std::quoted(x); } @@ -82,7 +82,7 @@ dumpImpl(Out & out, T && x) template std::enable_if_t, unsigned char>, Out> & -dumpImpl(Out & out, T && x) +dumpImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) { return out << int(x); } @@ -90,7 +90,7 @@ dumpImpl(Out & out, T && x) /// Tuple, pair template -Out & dumpTupleImpl(Out & out, T && x) +Out & dumpTupleImpl(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) { if constexpr (N == 0) out << "{"; @@ -108,14 +108,14 @@ Out & dumpTupleImpl(Out & out, T && x) } template -std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t(std::declval()))> * = nullptr) +std::enable_if_t & dumpImpl(Out & out, T && x, std::decay_t(std::declval()))> * = nullptr) // NOLINT(cppcoreguidelines-missing-std-forward) { return dumpTupleImpl<0>(out, x); } template -Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t(std::declval(), std::declval()))> *) +Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t(std::declval(), std::declval()))> *) // NOLINT(cppcoreguidelines-missing-std-forward) { return dumpImpl(out, x); } @@ -124,21 +124,21 @@ Out & dumpDispatchPriorities(Out & out, T && x, std::decay_t -Out & dumpDispatchPriorities(Out & out, T && x, LowPriority) +Out & dumpDispatchPriorities(Out & out, T && x, LowPriority) // NOLINT(cppcoreguidelines-missing-std-forward) { return dumpDispatchPriorities(out, x, nullptr); } template -Out & dumpValue(Out & out, T && x) +Out & dumpValue(Out & out, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) { return dumpDispatchPriorities<5>(out, x, nullptr); } template -Out & dump(Out & out, const char * name, T && x) +Out & dump(Out & out, const char * name, T && x) // NOLINT(cppcoreguidelines-missing-std-forward) { // Dumping string literal, printing name and demangled type is irrelevant. if constexpr (std::is_same_v>>) diff --git a/base/base/move_extend.h b/base/base/move_extend.h new file mode 100644 index 000000000000..6e5b16e037cc --- /dev/null +++ b/base/base/move_extend.h @@ -0,0 +1,9 @@ +#pragma once + +/// Extend @p to by moving elements from @p from to @p to end +/// @return @p to iterator to first of moved elements. +template +typename To::iterator moveExtend(To & to, From && from) +{ + return to.insert(to.end(), std::make_move_iterator(from.begin()), std::make_move_iterator(from.end())); +} diff --git a/base/base/scope_guard.h b/base/base/scope_guard.h index 8524beac7ea5..03670792d596 100644 --- a/base/base/scope_guard.h +++ b/base/base/scope_guard.h @@ -9,9 +9,9 @@ class [[nodiscard]] BasicScopeGuard { public: constexpr BasicScopeGuard() = default; - constexpr BasicScopeGuard(BasicScopeGuard && src) : function{src.release()} {} // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor) + constexpr BasicScopeGuard(BasicScopeGuard && src) : function{src.release()} {} // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor, cppcoreguidelines-noexcept-move-operations) - constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor) + constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(hicpp-noexcept-move, performance-noexcept-move-constructor, cppcoreguidelines-noexcept-move-operations) { if (this != &src) { @@ -23,11 +23,11 @@ class [[nodiscard]] BasicScopeGuard template requires std::is_convertible_v - constexpr BasicScopeGuard(BasicScopeGuard && src) : function{src.release()} {} // NOLINT(google-explicit-constructor) + constexpr BasicScopeGuard(BasicScopeGuard && src) : function{src.release()} {} // NOLINT(google-explicit-constructor, cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations) template requires std::is_convertible_v - constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) + constexpr BasicScopeGuard & operator=(BasicScopeGuard && src) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved, cppcoreguidelines-noexcept-move-operations) { if (this != &src) { @@ -43,7 +43,7 @@ class [[nodiscard]] BasicScopeGuard template requires std::is_convertible_v - constexpr BasicScopeGuard(G && function_) : function{std::move(function_)} {} // NOLINT(google-explicit-constructor, bugprone-forwarding-reference-overload, bugprone-move-forwarding-reference) + constexpr BasicScopeGuard(G && function_) : function{std::move(function_)} {} // NOLINT(google-explicit-constructor, bugprone-forwarding-reference-overload, bugprone-move-forwarding-reference, cppcoreguidelines-missing-std-forward) ~BasicScopeGuard() { invoke(); } @@ -70,7 +70,7 @@ class [[nodiscard]] BasicScopeGuard template requires std::is_convertible_v - BasicScopeGuard & join(BasicScopeGuard && other) + BasicScopeGuard & join(BasicScopeGuard && other) // NOLINT(cppcoreguidelines-rvalue-reference-param-not-moved) { if (other.function) { diff --git a/base/base/simd.h b/base/base/simd.h new file mode 100644 index 000000000000..3283c40971c1 --- /dev/null +++ b/base/base/simd.h @@ -0,0 +1,14 @@ +#pragma once + +#if defined(__aarch64__) && defined(__ARM_NEON) + +# include +# pragma clang diagnostic ignored "-Wreserved-identifier" + +/// Returns a 64 bit mask of nibbles (4 bits for each byte). +inline uint64_t getNibbleMask(uint8x16_t res) +{ + return vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(res), 4)), 0); +} + +#endif diff --git a/base/base/sort.h b/base/base/sort.h index 912545979dc4..1a8145877632 100644 --- a/base/base/sort.h +++ b/base/base/sort.h @@ -131,3 +131,29 @@ void sort(RandomIt first, RandomIt last) using comparator = std::less; ::sort(first, last, comparator()); } + +/** Try to fast sort elements for common sorting patterns: + * 1. If elements are already sorted. + * 2. If elements are already almost sorted. + * 3. If elements are already sorted in reverse order. + * + * Returns true if fast sort was performed or elements were already sorted, false otherwise. + */ +template +bool trySort(RandomIt first, RandomIt last, Compare compare) +{ +#ifndef NDEBUG + ::shuffle(first, last); +#endif + + ComparatorWrapper compare_wrapper = compare; + return ::pdqsort_try_sort(first, last, compare_wrapper); +} + +template +bool trySort(RandomIt first, RandomIt last) +{ + using value_type = typename std::iterator_traits::value_type; + using comparator = std::less; + return ::trySort(first, last, comparator()); +} diff --git a/base/base/strong_typedef.h b/base/base/strong_typedef.h index b3b8bced688c..518d87a47b9e 100644 --- a/base/base/strong_typedef.h +++ b/base/base/strong_typedef.h @@ -23,10 +23,10 @@ struct StrongTypedef constexpr StrongTypedef(): t() {} constexpr StrongTypedef(const Self &) = default; - constexpr StrongTypedef(Self &&) noexcept(std::is_nothrow_move_constructible_v) = default; + constexpr StrongTypedef(Self &&) noexcept(std::is_nothrow_move_constructible_v) = default; // NOLINT(cppcoreguidelines-noexcept-move-operations, hicpp-noexcept-move, performance-noexcept-move-constructor) Self & operator=(const Self &) = default; - Self & operator=(Self &&) noexcept(std::is_nothrow_move_assignable_v)= default; + Self & operator=(Self &&) noexcept(std::is_nothrow_move_assignable_v)= default; // NOLINT(cppcoreguidelines-noexcept-move-operations, hicpp-noexcept-move, performance-noexcept-move-constructor) template ::type> Self & operator=(const T & rhs) { t = rhs; return *this;} diff --git a/base/base/wide_integer_impl.h b/base/base/wide_integer_impl.h index 411841e6d9fd..fc4e9e551ca7 100644 --- a/base/base/wide_integer_impl.h +++ b/base/base/wide_integer_impl.h @@ -12,7 +12,6 @@ #include #include -#include #include // NOLINTBEGIN(*) @@ -22,6 +21,7 @@ #define CONSTEXPR_FROM_DOUBLE constexpr using FromDoubleIntermediateType = long double; #else +#include /// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended` #define CONSTEXPR_FROM_DOUBLE using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended; diff --git a/base/pcg-random/pcg_extras.hpp b/base/pcg-random/pcg_extras.hpp index cc11d907006b..32dc5c318c38 100644 --- a/base/pcg-random/pcg_extras.hpp +++ b/base/pcg-random/pcg_extras.hpp @@ -463,7 +463,7 @@ auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) } template -void shuffle(Iter from, Iter to, RandType&& rng) +void shuffle(Iter from, Iter to, RandType&& rng) // NOLINT(cppcoreguidelines-missing-std-forward) { typedef typename std::iterator_traits::difference_type delta_t; typedef typename std::remove_reference::type::result_type result_t; diff --git a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp index 4caf097c28a8..1c5555f8cf35 100644 --- a/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp +++ b/base/poco/Data/ODBC/src/Unicode_UNIXODBC.cpp @@ -19,7 +19,6 @@ #include "Poco/UTF16Encoding.h" #include "Poco/Buffer.h" #include "Poco/Exception.h" -#include using Poco::Buffer; diff --git a/base/poco/Data/include/Poco/Data/TypeHandler.h b/base/poco/Data/include/Poco/Data/TypeHandler.h index 34f88e986f7b..e7633de70182 100644 --- a/base/poco/Data/include/Poco/Data/TypeHandler.h +++ b/base/poco/Data/include/Poco/Data/TypeHandler.h @@ -97,7 +97,7 @@ namespace Data /// /// static void extract(std::size_t pos, Person& obj, const Person& defVal, AbstractExtractor::Ptr pExt) /// { - /// // defVal is the default person we should use if we encunter NULL entries, so we take the individual fields + /// // defVal is the default person we should use if we encounter NULL entries, so we take the individual fields /// // as defaults. You can do more complex checking, ie return defVal if only one single entry of the fields is null etc... /// poco_assert_dbg (!pExt.isNull()); /// std::string lastName; diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt index 358f49ed055e..d0dde8a51a5b 100644 --- a/base/poco/Foundation/CMakeLists.txt +++ b/base/poco/Foundation/CMakeLists.txt @@ -87,7 +87,6 @@ set (SRCS src/LoggingRegistry.cpp src/LogStream.cpp src/MD5Engine.cpp - src/MemoryPool.cpp src/MemoryStream.cpp src/Message.cpp src/Mutex.cpp diff --git a/base/poco/Foundation/include/Poco/MemoryPool.h b/base/poco/Foundation/include/Poco/MemoryPool.h deleted file mode 100644 index 9ab12081b5fc..000000000000 --- a/base/poco/Foundation/include/Poco/MemoryPool.h +++ /dev/null @@ -1,116 +0,0 @@ -// -// MemoryPool.h -// -// Library: Foundation -// Package: Core -// Module: MemoryPool -// -// Definition of the MemoryPool class. -// -// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Foundation_MemoryPool_INCLUDED -#define Foundation_MemoryPool_INCLUDED - - -#include -#include -#include "Poco/Foundation.h" -#include "Poco/Mutex.h" - - -namespace Poco -{ - - -class Foundation_API MemoryPool -/// A simple pool for fixed-size memory blocks. -/// -/// The main purpose of this class is to speed-up -/// memory allocations, as well as to reduce memory -/// fragmentation in situations where the same blocks -/// are allocated all over again, such as in server -/// applications. -/// -/// All allocated blocks are retained for future use. -/// A limit on the number of blocks can be specified. -/// Blocks can be preallocated. -{ -public: - MemoryPool(std::size_t blockSize, int preAlloc = 0, int maxAlloc = 0); - /// Creates a MemoryPool for blocks with the given blockSize. - /// The number of blocks given in preAlloc are preallocated. - - ~MemoryPool(); - - void * get(); - /// Returns a memory block. If there are no more blocks - /// in the pool, a new block will be allocated. - /// - /// If maxAlloc blocks are already allocated, an - /// OutOfMemoryException is thrown. - - void release(void * ptr); - /// Releases a memory block and returns it to the pool. - - std::size_t blockSize() const; - /// Returns the block size. - - int allocated() const; - /// Returns the number of allocated blocks. - - int available() const; - /// Returns the number of available blocks in the pool. - -private: - MemoryPool(); - MemoryPool(const MemoryPool &); - MemoryPool & operator=(const MemoryPool &); - - void clear(); - - enum - { - BLOCK_RESERVE = 128 - }; - - typedef std::vector BlockVec; - - std::size_t _blockSize; - int _maxAlloc; - int _allocated; - BlockVec _blocks; - FastMutex _mutex; -}; - - -// -// inlines -// -inline std::size_t MemoryPool::blockSize() const -{ - return _blockSize; -} - - -inline int MemoryPool::allocated() const -{ - return _allocated; -} - - -inline int MemoryPool::available() const -{ - return (int)_blocks.size(); -} - - -} // namespace Poco - - -#endif // Foundation_MemoryPool_INCLUDED diff --git a/base/poco/Foundation/include/Poco/Message.h b/base/poco/Foundation/include/Poco/Message.h index e8f04888ab4e..282c7fb5fd1d 100644 --- a/base/poco/Foundation/include/Poco/Message.h +++ b/base/poco/Foundation/include/Poco/Message.h @@ -67,6 +67,8 @@ class Foundation_API Message Message( const std::string & source, const std::string & text, Priority prio, const char * file, int line, std::string_view fmt_str = {}); + Message( + std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str); /// Creates a Message with the given source, text, priority, /// source file path and line. /// diff --git a/base/poco/Foundation/include/Poco/URI.h b/base/poco/Foundation/include/Poco/URI.h index 1880af4ccd2d..30654504e0bd 100644 --- a/base/poco/Foundation/include/Poco/URI.h +++ b/base/poco/Foundation/include/Poco/URI.h @@ -57,7 +57,7 @@ class Foundation_API URI URI(); /// Creates an empty URI. - explicit URI(const std::string & uri); + explicit URI(const std::string & uri, bool enable_url_encoding = true); /// Parses an URI from the given string. Throws a /// SyntaxException if the uri is not valid. @@ -350,6 +350,10 @@ class Foundation_API URI static const std::string ILLEGAL; private: + void encodePath(std::string & encodedStr) const; + void decodePath(const std::string & encodedStr); + + std::string _scheme; std::string _userInfo; std::string _host; @@ -357,6 +361,8 @@ class Foundation_API URI std::string _path; std::string _query; std::string _fragment; + + bool _enable_url_encoding = true; }; diff --git a/base/poco/Foundation/src/MemoryPool.cpp b/base/poco/Foundation/src/MemoryPool.cpp deleted file mode 100644 index 01c477be5256..000000000000 --- a/base/poco/Foundation/src/MemoryPool.cpp +++ /dev/null @@ -1,105 +0,0 @@ -// -// MemoryPool.cpp -// -// Library: Foundation -// Package: Core -// Module: MemoryPool -// -// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/MemoryPool.h" -#include "Poco/Exception.h" - - -namespace Poco { - - -MemoryPool::MemoryPool(std::size_t blockSize, int preAlloc, int maxAlloc): - _blockSize(blockSize), - _maxAlloc(maxAlloc), - _allocated(preAlloc) -{ - poco_assert (maxAlloc == 0 || maxAlloc >= preAlloc); - poco_assert (preAlloc >= 0 && maxAlloc >= 0); - - int r = BLOCK_RESERVE; - if (preAlloc > r) - r = preAlloc; - if (maxAlloc > 0 && maxAlloc < r) - r = maxAlloc; - _blocks.reserve(r); - - try - { - for (int i = 0; i < preAlloc; ++i) - { - _blocks.push_back(new char[_blockSize]); - } - } - catch (...) - { - clear(); - throw; - } -} - - -MemoryPool::~MemoryPool() -{ - clear(); -} - - -void MemoryPool::clear() -{ - for (BlockVec::iterator it = _blocks.begin(); it != _blocks.end(); ++it) - { - delete [] *it; - } - _blocks.clear(); -} - - -void* MemoryPool::get() -{ - FastMutex::ScopedLock lock(_mutex); - - if (_blocks.empty()) - { - if (_maxAlloc == 0 || _allocated < _maxAlloc) - { - ++_allocated; - return new char[_blockSize]; - } - else throw OutOfMemoryException("MemoryPool exhausted"); - } - else - { - char* ptr = _blocks.back(); - _blocks.pop_back(); - return ptr; - } -} - - -void MemoryPool::release(void* ptr) -{ - FastMutex::ScopedLock lock(_mutex); - - try - { - _blocks.push_back(reinterpret_cast(ptr)); - } - catch (...) - { - delete [] reinterpret_cast(ptr); - } -} - - -} // namespace Poco diff --git a/base/poco/Foundation/src/Message.cpp b/base/poco/Foundation/src/Message.cpp index 663c96e47a24..54118cc0fc54 100644 --- a/base/poco/Foundation/src/Message.cpp +++ b/base/poco/Foundation/src/Message.cpp @@ -60,6 +60,19 @@ Message::Message(const std::string& source, const std::string& text, Priority pr } +Message::Message(std::string && source, std::string && text, Priority prio, const char * file, int line, std::string_view fmt_str): + _source(std::move(source)), + _text(std::move(text)), + _prio(prio), + _tid(0), + _file(file), + _line(line), + _pMap(0), + _fmt_str(fmt_str) +{ + init(); +} + Message::Message(const Message& msg): _source(msg._source), _text(msg._text), diff --git a/base/poco/Foundation/src/Task.cpp b/base/poco/Foundation/src/Task.cpp index a850ae37eff2..4303d50d6e89 100644 --- a/base/poco/Foundation/src/Task.cpp +++ b/base/poco/Foundation/src/Task.cpp @@ -16,7 +16,6 @@ #include "Poco/TaskManager.h" #include "Poco/Exception.h" -#include #include diff --git a/base/poco/Foundation/src/URI.cpp b/base/poco/Foundation/src/URI.cpp index 5543e02b2791..41e331bb0800 100644 --- a/base/poco/Foundation/src/URI.cpp +++ b/base/poco/Foundation/src/URI.cpp @@ -36,8 +36,8 @@ URI::URI(): } -URI::URI(const std::string& uri): - _port(0) +URI::URI(const std::string& uri, bool enable_url_encoding): + _port(0), _enable_url_encoding(enable_url_encoding) { parse(uri); } @@ -107,7 +107,8 @@ URI::URI(const URI& uri): _port(uri._port), _path(uri._path), _query(uri._query), - _fragment(uri._fragment) + _fragment(uri._fragment), + _enable_url_encoding(uri._enable_url_encoding) { } @@ -119,7 +120,8 @@ URI::URI(const URI& baseURI, const std::string& relativeURI): _port(baseURI._port), _path(baseURI._path), _query(baseURI._query), - _fragment(baseURI._fragment) + _fragment(baseURI._fragment), + _enable_url_encoding(baseURI._enable_url_encoding) { resolve(relativeURI); } @@ -151,6 +153,7 @@ URI& URI::operator = (const URI& uri) _path = uri._path; _query = uri._query; _fragment = uri._fragment; + _enable_url_encoding = uri._enable_url_encoding; } return *this; } @@ -181,6 +184,7 @@ void URI::swap(URI& uri) std::swap(_path, uri._path); std::swap(_query, uri._query); std::swap(_fragment, uri._fragment); + std::swap(_enable_url_encoding, uri._enable_url_encoding); } @@ -201,7 +205,7 @@ std::string URI::toString() const std::string uri; if (isRelative()) { - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else { @@ -217,7 +221,7 @@ std::string URI::toString() const { if (!auth.empty() && _path[0] != '/') uri += '/'; - encode(_path, RESERVED_PATH, uri); + encodePath(uri); } else if (!_query.empty() || !_fragment.empty()) { @@ -313,7 +317,7 @@ void URI::setAuthority(const std::string& authority) void URI::setPath(const std::string& path) { _path.clear(); - decode(path, _path); + decodePath(path); } @@ -418,7 +422,7 @@ void URI::setPathEtc(const std::string& pathEtc) std::string URI::getPathEtc() const { std::string pathEtc; - encode(_path, RESERVED_PATH, pathEtc); + encodePath(pathEtc); if (!_query.empty()) { pathEtc += '?'; @@ -436,7 +440,7 @@ std::string URI::getPathEtc() const std::string URI::getPathAndQuery() const { std::string pathAndQuery; - encode(_path, RESERVED_PATH, pathAndQuery); + encodePath(pathAndQuery); if (!_query.empty()) { pathAndQuery += '?'; @@ -681,6 +685,21 @@ void URI::decode(const std::string& str, std::string& decodedStr, bool plusAsSpa } } +void URI::encodePath(std::string & encodedStr) const +{ + if (_enable_url_encoding) + encode(_path, RESERVED_PATH, encodedStr); + else + encodedStr = _path; +} + +void URI::decodePath(const std::string & encodedStr) +{ + if (_enable_url_encoding) + decode(encodedStr, _path); + else + _path = encodedStr; +} bool URI::isWellKnownPort() const { @@ -820,7 +839,7 @@ void URI::parsePath(std::string::const_iterator& it, const std::string::const_it { std::string path; while (it != end && *it != '?' && *it != '#') path += *it++; - decode(path, _path); + decodePath(path); } diff --git a/base/poco/JSON/src/Object.cpp b/base/poco/JSON/src/Object.cpp index 7fca65c5b01d..b041f570934a 100644 --- a/base/poco/JSON/src/Object.cpp +++ b/base/poco/JSON/src/Object.cpp @@ -14,7 +14,6 @@ #include "Poco/JSON/Object.h" #include -#include using Poco::Dynamic::Var; diff --git a/base/poco/MongoDB/include/Poco/MongoDB/Database.h b/base/poco/MongoDB/include/Poco/MongoDB/Database.h index 3334a673df64..1fa91f4ca1af 100644 --- a/base/poco/MongoDB/include/Poco/MongoDB/Database.h +++ b/base/poco/MongoDB/include/Poco/MongoDB/Database.h @@ -70,7 +70,7 @@ namespace MongoDB Document::Ptr queryBuildInfo(Connection & connection) const; /// Queries server build info (all wire protocols) - Document::Ptr queryServerHello(Connection & connection) const; + Document::Ptr queryServerHello(Connection & connection, bool old = false) const; /// Queries hello response from server (all wire protocols) Int64 count(Connection & connection, const std::string & collectionName) const; diff --git a/base/poco/MongoDB/src/Database.cpp b/base/poco/MongoDB/src/Database.cpp index 1a0d3cfe5599..15c46b172511 100644 --- a/base/poco/MongoDB/src/Database.cpp +++ b/base/poco/MongoDB/src/Database.cpp @@ -356,11 +356,19 @@ Document::Ptr Database::queryBuildInfo(Connection& connection) const } -Document::Ptr Database::queryServerHello(Connection& connection) const +Document::Ptr Database::queryServerHello(Connection& connection, bool old) const { // hello can be issued on "config" system database Poco::SharedPtr request = createCommand(); - request->selector().add("hello", 1); + + // 'hello' command was previously called 'isMaster' + std::string command_name; + if (old) + command_name = "isMaster"; + else + command_name = "hello"; + + request->selector().add(command_name, 1); Poco::MongoDB::ResponseMessage response; connection.sendRequest(*request, response); diff --git a/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h b/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h index c4872d95353c..c87719b63a49 100644 --- a/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h +++ b/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h @@ -19,7 +19,6 @@ #include "Poco/BufferedStreamBuf.h" -#include "Poco/Net/HTTPBufferAllocator.h" #include "Poco/Net/Net.h" @@ -27,9 +26,9 @@ namespace Poco { namespace Net { + constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 8 * 1024; - - typedef Poco::BasicBufferedStreamBuf, HTTPBufferAllocator> HTTPBasicStreamBuf; + typedef Poco::BasicBufferedStreamBuf> HTTPBasicStreamBuf; } diff --git a/base/poco/Net/include/Poco/Net/HTTPBufferAllocator.h b/base/poco/Net/include/Poco/Net/HTTPBufferAllocator.h deleted file mode 100644 index 5d088e352971..000000000000 --- a/base/poco/Net/include/Poco/Net/HTTPBufferAllocator.h +++ /dev/null @@ -1,53 +0,0 @@ -// -// HTTPBufferAllocator.h -// -// Library: Net -// Package: HTTP -// Module: HTTPBufferAllocator -// -// Definition of the HTTPBufferAllocator class. -// -// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#ifndef Net_HTTPBufferAllocator_INCLUDED -#define Net_HTTPBufferAllocator_INCLUDED - - -#include -#include "Poco/MemoryPool.h" -#include "Poco/Net/Net.h" - - -namespace Poco -{ -namespace Net -{ - - - class Net_API HTTPBufferAllocator - /// A BufferAllocator for HTTP streams. - { - public: - static char * allocate(std::streamsize size); - static void deallocate(char * ptr, std::streamsize size); - - enum - { - BUFFER_SIZE = 128 * 1024 - }; - - private: - static Poco::MemoryPool _pool; - }; - - -} -} // namespace Poco::Net - - -#endif // Net_HTTPBufferAllocator_INCLUDED diff --git a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h index 47987b18817a..5f4729c9278e 100644 --- a/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPChunkedStream.h @@ -21,7 +21,6 @@ #include #include #include -#include "Poco/MemoryPool.h" #include "Poco/Net/HTTPBasicStreamBuf.h" #include "Poco/Net/Net.h" @@ -80,12 +79,6 @@ namespace Net public: HTTPChunkedInputStream(HTTPSession & session); ~HTTPChunkedInputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; @@ -95,12 +88,6 @@ namespace Net public: HTTPChunkedOutputStream(HTTPSession & session); ~HTTPChunkedOutputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPClientSession.h b/base/poco/Net/include/Poco/Net/HTTPClientSession.h index d495d662f750..167a06eb7ffe 100644 --- a/base/poco/Net/include/Poco/Net/HTTPClientSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPClientSession.h @@ -306,7 +306,7 @@ namespace Net DEFAULT_KEEP_ALIVE_TIMEOUT = 8 }; - void reconnect(); + virtual void reconnect(); /// Connects the underlying socket to the HTTP server. int write(const char * buffer, std::streamsize length); diff --git a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h index 4de211fdb92d..2f4df1026057 100644 --- a/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPFixedLengthStream.h @@ -78,12 +78,6 @@ namespace Net public: HTTPFixedLengthInputStream(HTTPSession & session, HTTPFixedLengthStreamBuf::ContentLength length); ~HTTPFixedLengthInputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; @@ -93,12 +87,6 @@ namespace Net public: HTTPFixedLengthOutputStream(HTTPSession & session, HTTPFixedLengthStreamBuf::ContentLength length); ~HTTPFixedLengthOutputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPHeaderStream.h b/base/poco/Net/include/Poco/Net/HTTPHeaderStream.h index bcfca984d8bf..cf1a6dba2e60 100644 --- a/base/poco/Net/include/Poco/Net/HTTPHeaderStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPHeaderStream.h @@ -21,7 +21,6 @@ #include #include #include -#include "Poco/MemoryPool.h" #include "Poco/Net/HTTPBasicStreamBuf.h" #include "Poco/Net/Net.h" @@ -74,12 +73,6 @@ namespace Net public: HTTPHeaderInputStream(HTTPSession & session); ~HTTPHeaderInputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; @@ -89,12 +82,6 @@ namespace Net public: HTTPHeaderOutputStream(HTTPSession & session); ~HTTPHeaderOutputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; diff --git a/base/poco/Net/include/Poco/Net/HTTPSession.h b/base/poco/Net/include/Poco/Net/HTTPSession.h index d0045025f5f0..934b34be5d5f 100644 --- a/base/poco/Net/include/Poco/Net/HTTPSession.h +++ b/base/poco/Net/include/Poco/Net/HTTPSession.h @@ -192,7 +192,7 @@ namespace Net HTTPSession & operator=(const HTTPSession &); StreamSocket _socket; - char * _pBuffer; + std::unique_ptr _pBuffer; char * _pCurrent; char * _pEnd; bool _keepAlive; diff --git a/base/poco/Net/include/Poco/Net/HTTPStream.h b/base/poco/Net/include/Poco/Net/HTTPStream.h index 0197bc62eb29..48502347b2cc 100644 --- a/base/poco/Net/include/Poco/Net/HTTPStream.h +++ b/base/poco/Net/include/Poco/Net/HTTPStream.h @@ -21,7 +21,6 @@ #include #include #include -#include "Poco/MemoryPool.h" #include "Poco/Net/HTTPBasicStreamBuf.h" #include "Poco/Net/Net.h" @@ -75,12 +74,6 @@ namespace Net public: HTTPInputStream(HTTPSession & session); ~HTTPInputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; @@ -90,12 +83,6 @@ namespace Net public: HTTPOutputStream(HTTPSession & session); ~HTTPOutputStream(); - - void * operator new(std::size_t size); - void operator delete(void * ptr); - - private: - static Poco::MemoryPool _pool; }; diff --git a/base/poco/Net/src/HTTPBufferAllocator.cpp b/base/poco/Net/src/HTTPBufferAllocator.cpp deleted file mode 100644 index 2944e2a6121c..000000000000 --- a/base/poco/Net/src/HTTPBufferAllocator.cpp +++ /dev/null @@ -1,44 +0,0 @@ -// -// HTTPBufferAllocator.cpp -// -// Library: Net -// Package: HTTP -// Module: HTTPBufferAllocator -// -// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH. -// and Contributors. -// -// SPDX-License-Identifier: BSL-1.0 -// - - -#include "Poco/Net/HTTPBufferAllocator.h" - - -using Poco::MemoryPool; - - -namespace Poco { -namespace Net { - - -MemoryPool HTTPBufferAllocator::_pool(HTTPBufferAllocator::BUFFER_SIZE, 16); - - -char* HTTPBufferAllocator::allocate(std::streamsize size) -{ - poco_assert_dbg (size == BUFFER_SIZE); - - return reinterpret_cast(_pool.get()); -} - - -void HTTPBufferAllocator::deallocate(char* ptr, std::streamsize size) -{ - poco_assert_dbg (size == BUFFER_SIZE); - - _pool.release(ptr); -} - - -} } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPChunkedStream.cpp b/base/poco/Net/src/HTTPChunkedStream.cpp index f2f79da590ba..376e3f55492d 100644 --- a/base/poco/Net/src/HTTPChunkedStream.cpp +++ b/base/poco/Net/src/HTTPChunkedStream.cpp @@ -34,7 +34,7 @@ namespace Net { HTTPChunkedStreamBuf::HTTPChunkedStreamBuf(HTTPSession& session, openmode mode): - HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode), + HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode), _session(session), _mode(mode), _chunk(0) @@ -181,10 +181,6 @@ HTTPChunkedStreamBuf* HTTPChunkedIOS::rdbuf() // HTTPChunkedInputStream // - -Poco::MemoryPool HTTPChunkedInputStream::_pool(sizeof(HTTPChunkedInputStream)); - - HTTPChunkedInputStream::HTTPChunkedInputStream(HTTPSession& session): HTTPChunkedIOS(session, std::ios::in), std::istream(&_buf) @@ -196,34 +192,10 @@ HTTPChunkedInputStream::~HTTPChunkedInputStream() { } - -void* HTTPChunkedInputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPChunkedInputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - // // HTTPChunkedOutputStream // - -Poco::MemoryPool HTTPChunkedOutputStream::_pool(sizeof(HTTPChunkedOutputStream)); - - HTTPChunkedOutputStream::HTTPChunkedOutputStream(HTTPSession& session): HTTPChunkedIOS(session, std::ios::out), std::ostream(&_buf) @@ -235,24 +207,4 @@ HTTPChunkedOutputStream::~HTTPChunkedOutputStream() { } - -void* HTTPChunkedOutputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPChunkedOutputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPClientSession.cpp b/base/poco/Net/src/HTTPClientSession.cpp index c5697b556d1b..2712c0c452ed 100644 --- a/base/poco/Net/src/HTTPClientSession.cpp +++ b/base/poco/Net/src/HTTPClientSession.cpp @@ -26,7 +26,6 @@ #include "Poco/CountingStream.h" #include "Poco/RegularExpression.h" #include -#include using Poco::NumberFormatter; diff --git a/base/poco/Net/src/HTTPFixedLengthStream.cpp b/base/poco/Net/src/HTTPFixedLengthStream.cpp index d19f6122ee12..fd77ff71cd92 100644 --- a/base/poco/Net/src/HTTPFixedLengthStream.cpp +++ b/base/poco/Net/src/HTTPFixedLengthStream.cpp @@ -30,7 +30,7 @@ namespace Net { HTTPFixedLengthStreamBuf::HTTPFixedLengthStreamBuf(HTTPSession& session, ContentLength length, openmode mode): - HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode), + HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode), _session(session), _length(length), _count(0) @@ -109,9 +109,6 @@ HTTPFixedLengthStreamBuf* HTTPFixedLengthIOS::rdbuf() // -Poco::MemoryPool HTTPFixedLengthInputStream::_pool(sizeof(HTTPFixedLengthInputStream)); - - HTTPFixedLengthInputStream::HTTPFixedLengthInputStream(HTTPSession& session, HTTPFixedLengthStreamBuf::ContentLength length): HTTPFixedLengthIOS(session, length, std::ios::in), std::istream(&_buf) @@ -124,33 +121,10 @@ HTTPFixedLengthInputStream::~HTTPFixedLengthInputStream() } -void* HTTPFixedLengthInputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPFixedLengthInputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - // // HTTPFixedLengthOutputStream // - -Poco::MemoryPool HTTPFixedLengthOutputStream::_pool(sizeof(HTTPFixedLengthOutputStream)); - - HTTPFixedLengthOutputStream::HTTPFixedLengthOutputStream(HTTPSession& session, HTTPFixedLengthStreamBuf::ContentLength length): HTTPFixedLengthIOS(session, length, std::ios::out), std::ostream(&_buf) @@ -163,23 +137,4 @@ HTTPFixedLengthOutputStream::~HTTPFixedLengthOutputStream() } -void* HTTPFixedLengthOutputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPFixedLengthOutputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPHeaderStream.cpp b/base/poco/Net/src/HTTPHeaderStream.cpp index 8e0091fcbe3f..39b9007062d9 100644 --- a/base/poco/Net/src/HTTPHeaderStream.cpp +++ b/base/poco/Net/src/HTTPHeaderStream.cpp @@ -26,7 +26,7 @@ namespace Net { HTTPHeaderStreamBuf::HTTPHeaderStreamBuf(HTTPSession& session, openmode mode): - HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode), + HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode), _session(session), _end(false) { @@ -101,10 +101,6 @@ HTTPHeaderStreamBuf* HTTPHeaderIOS::rdbuf() // HTTPHeaderInputStream // - -Poco::MemoryPool HTTPHeaderInputStream::_pool(sizeof(HTTPHeaderInputStream)); - - HTTPHeaderInputStream::HTTPHeaderInputStream(HTTPSession& session): HTTPHeaderIOS(session, std::ios::in), std::istream(&_buf) @@ -116,34 +112,10 @@ HTTPHeaderInputStream::~HTTPHeaderInputStream() { } - -void* HTTPHeaderInputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPHeaderInputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - // // HTTPHeaderOutputStream // - -Poco::MemoryPool HTTPHeaderOutputStream::_pool(sizeof(HTTPHeaderOutputStream)); - - HTTPHeaderOutputStream::HTTPHeaderOutputStream(HTTPSession& session): HTTPHeaderIOS(session, std::ios::out), std::ostream(&_buf) @@ -155,24 +127,4 @@ HTTPHeaderOutputStream::~HTTPHeaderOutputStream() { } - -void* HTTPHeaderOutputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPHeaderOutputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - } } // namespace Poco::Net diff --git a/base/poco/Net/src/HTTPSession.cpp b/base/poco/Net/src/HTTPSession.cpp index cb6fdc25e9ac..d2663baaf9fe 100644 --- a/base/poco/Net/src/HTTPSession.cpp +++ b/base/poco/Net/src/HTTPSession.cpp @@ -13,8 +13,8 @@ #include "Poco/Net/HTTPSession.h" -#include "Poco/Net/HTTPBufferAllocator.h" #include "Poco/Net/NetException.h" +#include "Poco/Net/HTTPBasicStreamBuf.h" #include @@ -68,14 +68,6 @@ HTTPSession::HTTPSession(const StreamSocket& socket, bool keepAlive): HTTPSession::~HTTPSession() { - try - { - if (_pBuffer) HTTPBufferAllocator::deallocate(_pBuffer, HTTPBufferAllocator::BUFFER_SIZE); - } - catch (...) - { - poco_unexpected(); - } try { close(); @@ -177,10 +169,10 @@ void HTTPSession::refill() { if (!_pBuffer) { - _pBuffer = HTTPBufferAllocator::allocate(HTTPBufferAllocator::BUFFER_SIZE); + _pBuffer = std::make_unique(HTTP_DEFAULT_BUFFER_SIZE); } - _pCurrent = _pEnd = _pBuffer; - int n = receive(_pBuffer, HTTPBufferAllocator::BUFFER_SIZE); + _pCurrent = _pEnd = _pBuffer.get(); + int n = receive(_pBuffer.get(), HTTP_DEFAULT_BUFFER_SIZE); _pEnd += n; } @@ -199,7 +191,7 @@ void HTTPSession::connect(const SocketAddress& address) _socket.setNoDelay(true); // There may be leftover data from a previous (failed) request in the buffer, // so we clear it. - _pCurrent = _pEnd = _pBuffer; + _pCurrent = _pEnd = _pBuffer.get(); } diff --git a/base/poco/Net/src/HTTPStream.cpp b/base/poco/Net/src/HTTPStream.cpp index 4acb881c4f3b..c2f276005690 100644 --- a/base/poco/Net/src/HTTPStream.cpp +++ b/base/poco/Net/src/HTTPStream.cpp @@ -26,7 +26,7 @@ namespace Net { HTTPStreamBuf::HTTPStreamBuf(HTTPSession& session, openmode mode): - HTTPBasicStreamBuf(HTTPBufferAllocator::BUFFER_SIZE, mode), + HTTPBasicStreamBuf(HTTP_DEFAULT_BUFFER_SIZE, mode), _session(session), _mode(mode) { @@ -96,10 +96,6 @@ HTTPStreamBuf* HTTPIOS::rdbuf() // HTTPInputStream // - -Poco::MemoryPool HTTPInputStream::_pool(sizeof(HTTPInputStream)); - - HTTPInputStream::HTTPInputStream(HTTPSession& session): HTTPIOS(session, std::ios::in), std::istream(&_buf) @@ -112,33 +108,11 @@ HTTPInputStream::~HTTPInputStream() } -void* HTTPInputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPInputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - // // HTTPOutputStream // -Poco::MemoryPool HTTPOutputStream::_pool(sizeof(HTTPOutputStream)); - - HTTPOutputStream::HTTPOutputStream(HTTPSession& session): HTTPIOS(session, std::ios::out), std::ostream(&_buf) @@ -150,24 +124,4 @@ HTTPOutputStream::~HTTPOutputStream() { } - -void* HTTPOutputStream::operator new(std::size_t size) -{ - return _pool.get(); -} - - -void HTTPOutputStream::operator delete(void* ptr) -{ - try - { - _pool.release(ptr); - } - catch (...) - { - poco_unexpected(); - } -} - - } } // namespace Poco::Net diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h index 65917ac9dd40..c19eecf5c737 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/Context.h @@ -146,7 +146,7 @@ namespace Net std::string cipherList; /// Specifies the supported ciphers in OpenSSL notation. - /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH". + /// Defaults to "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH". std::string dhParamsFile; /// Specifies a file containing Diffie-Hellman parameters. @@ -172,7 +172,7 @@ namespace Net VerificationMode verificationMode = VERIFY_RELAXED, int verificationDepth = 9, bool loadDefaultCAs = false, - const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); + const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH"); /// Creates a Context. /// /// * usage specifies whether the context is used by a client or server. @@ -200,7 +200,7 @@ namespace Net VerificationMode verificationMode = VERIFY_RELAXED, int verificationDepth = 9, bool loadDefaultCAs = false, - const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH"); + const std::string & cipherList = "ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH"); /// Creates a Context. /// /// * usage specifies whether the context is used by a client or server. diff --git a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h index 21a1ed685e54..e4037c87927b 100644 --- a/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h +++ b/base/poco/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h @@ -76,7 +76,7 @@ namespace Net /// none|relaxed|strict|once /// 1..9 /// true|false - /// ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH + /// ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH /// true|false /// /// KeyFileHandler diff --git a/base/poco/NetSSL_OpenSSL/src/Context.cpp b/base/poco/NetSSL_OpenSSL/src/Context.cpp index ca220c40a33d..d0bab902b89e 100644 --- a/base/poco/NetSSL_OpenSSL/src/Context.cpp +++ b/base/poco/NetSSL_OpenSSL/src/Context.cpp @@ -41,7 +41,7 @@ Context::Params::Params(): verificationMode(VERIFY_RELAXED), verificationDepth(9), loadDefaultCAs(false), - cipherList("ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH") + cipherList("ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH") { } diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt index 015037b2de6f..2f6d43d6cd0c 100644 --- a/cmake/autogenerated_versions.txt +++ b/cmake/autogenerated_versions.txt @@ -2,11 +2,11 @@ # NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION, # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes. -SET(VERSION_REVISION 54475) +SET(VERSION_REVISION 54479) SET(VERSION_MAJOR 23) -SET(VERSION_MINOR 6) +SET(VERSION_MINOR 10) SET(VERSION_PATCH 1) -SET(VERSION_GITHASH 2fec796e73efda10a538a03af3205ce8ffa1b2de) -SET(VERSION_DESCRIBE v23.6.1.1-testing) -SET(VERSION_STRING 23.6.1.1) +SET(VERSION_GITHASH 8f9a227de1f530cdbda52c145d41a6b0f1d29961) +SET(VERSION_DESCRIBE v23.10.1.1-testing) +SET(VERSION_STRING 23.10.1.1) # end of autochange diff --git a/cmake/ccache.cmake b/cmake/ccache.cmake index 9a70e4aee320..e8bf856332a5 100644 --- a/cmake/ccache.cmake +++ b/cmake/ccache.cmake @@ -12,7 +12,7 @@ endif() set(COMPILER_CACHE "auto" CACHE STRING "Speedup re-compilations using the caching tools; valid options are 'auto' (ccache, then sccache), 'ccache', 'sccache', or 'disabled'") if(COMPILER_CACHE STREQUAL "auto") - find_program (CCACHE_EXECUTABLE ccache sccache) + find_program (CCACHE_EXECUTABLE NAMES ccache sccache) elseif (COMPILER_CACHE STREQUAL "ccache") find_program (CCACHE_EXECUTABLE ccache) elseif(COMPILER_CACHE STREQUAL "sccache") diff --git a/cmake/clang_tidy.cmake b/cmake/clang_tidy.cmake index 96c295b6bb92..4323c20463a9 100644 --- a/cmake/clang_tidy.cmake +++ b/cmake/clang_tidy.cmake @@ -5,14 +5,14 @@ if (ENABLE_CLANG_TIDY) find_program (CLANG_TIDY_CACHE_PATH NAMES "clang-tidy-cache") if (CLANG_TIDY_CACHE_PATH) - find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-16" "clang-tidy-15" "clang-tidy-14" "clang-tidy") + find_program (_CLANG_TIDY_PATH NAMES "clang-tidy-17" "clang-tidy-16" "clang-tidy") # Why do we use ';' here? # It's a cmake black magic: https://cmake.org/cmake/help/latest/prop_tgt/LANG_CLANG_TIDY.html#prop_tgt:%3CLANG%3E_CLANG_TIDY # The CLANG_TIDY_PATH is passed to CMAKE_CXX_CLANG_TIDY, which follows CXX_CLANG_TIDY syntax. set (CLANG_TIDY_PATH "${CLANG_TIDY_CACHE_PATH};${_CLANG_TIDY_PATH}" CACHE STRING "A combined command to run clang-tidy with caching wrapper") else () - find_program (CLANG_TIDY_PATH NAMES "clang-tidy-16" "clang-tidy-15" "clang-tidy-14" "clang-tidy") + find_program (CLANG_TIDY_PATH NAMES "clang-tidy-17" "clang-tidy-16" "clang-tidy") endif () if (CLANG_TIDY_PATH) diff --git a/cmake/darwin/default_libs.cmake b/cmake/darwin/default_libs.cmake index 812847e62010..42b8473cb755 100644 --- a/cmake/darwin/default_libs.cmake +++ b/cmake/darwin/default_libs.cmake @@ -15,6 +15,7 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15) set(THREADS_PREFER_PTHREAD_FLAG ON) find_package(Threads REQUIRED) +include (cmake/unwind.cmake) include (cmake/cxx.cmake) link_libraries(global-group) diff --git a/cmake/dbms_glob_sources.cmake b/cmake/dbms_glob_sources.cmake index 01c4a8b16e96..fbe7f96cea3c 100644 --- a/cmake/dbms_glob_sources.cmake +++ b/cmake/dbms_glob_sources.cmake @@ -4,10 +4,19 @@ macro(add_glob cur_list) endmacro() macro(add_headers_and_sources prefix common_path) - add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) - add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c ${common_path}/*.h) + add_glob(${prefix}_headers ${common_path}/*.h) + add_glob(${prefix}_sources ${common_path}/*.cpp ${common_path}/*.c) endmacro() macro(add_headers_only prefix common_path) - add_glob(${prefix}_headers ${CMAKE_CURRENT_SOURCE_DIR} ${common_path}/*.h) + add_glob(${prefix}_headers ${common_path}/*.h) +endmacro() + +macro(extract_into_parent_list src_list dest_list) + list(REMOVE_ITEM ${src_list} ${ARGN}) + get_filename_component(__dir_name ${CMAKE_CURRENT_SOURCE_DIR} NAME) + foreach(file IN ITEMS ${ARGN}) + list(APPEND ${dest_list} ${__dir_name}/${file}) + endforeach() + set(${dest_list} "${${dest_list}}" PARENT_SCOPE) endmacro() diff --git a/cmake/embed_binary.cmake b/cmake/embed_binary.cmake deleted file mode 100644 index e5428c249398..000000000000 --- a/cmake/embed_binary.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# Embed a set of resource files into a resulting object file. -# -# Signature: `clickhouse_embed_binaries(TARGET RESOURCE_DIR RESOURCES ...) -# -# This will generate a static library target named ``, which contains the contents of -# each `` file. The files should be located in ``. defaults to -# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty. -# -# Each resource will result in three symbols in the final archive, based on the name ``. -# These are: -# 1. `_binary__start`: Points to the start of the binary data from ``. -# 2. `_binary__end`: Points to the end of the binary data from ``. -# 2. `_binary__size`: Points to the size of the binary data from ``. -# -# `` is a normalized name derived from ``, by replacing the characters "./-" with -# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated -# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`. -macro(clickhouse_embed_binaries) - set(one_value_args TARGET RESOURCE_DIR) - set(resources RESOURCES) - cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN}) - - if (NOT DEFINED EMBED_TARGET) - message(FATAL_ERROR "A target name must be provided for embedding binary resources into") - endif() - - if (NOT DEFINED EMBED_RESOURCE_DIR) - set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}") - endif() - - list(LENGTH EMBED_RESOURCES N_RESOURCES) - if (N_RESOURCES LESS 1) - message(FATAL_ERROR "The list of binary resources to embed may not be empty") - endif() - - add_library("${EMBED_TARGET}" STATIC) - set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C) - - set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in") - - foreach(RESOURCE_FILE ${EMBED_RESOURCES}) - set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S") - set(BINARY_FILE_NAME "${RESOURCE_FILE}") - - # Normalize the name of the resource. - string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex - string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}") - - # Generate the configured assembly file in the output directory. - configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY) - - # Set the include directory for relative paths specified for `.incbin` directive. - set_property(SOURCE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" APPEND PROPERTY INCLUDE_DIRECTORIES "${EMBED_RESOURCE_DIR}") - - target_sources("${EMBED_TARGET}" PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}") - set_target_properties("${EMBED_TARGET}" PROPERTIES OBJECT_DEPENDS "${RESOURCE_FILE}") - endforeach() -endmacro() diff --git a/cmake/ld.lld.in b/cmake/ld.lld.in deleted file mode 100755 index 78a264a0089c..000000000000 --- a/cmake/ld.lld.in +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - -# This is a workaround for bug in llvm/clang, -# that does not produce .debug_aranges with LTO -# -# NOTE: this is a temporary solution, that should be removed after upgrading to -# clang-16/llvm-16. -# -# Refs: https://reviews.llvm.org/D133092 - -# NOTE: only -flto=thin is supported. -# NOTE: it is not possible to check was there -gdwarf-aranges initially or not. -if [[ "$*" =~ -plugin-opt=thinlto ]]; then - exec "@LLD_PATH@" -plugin-opt=-generate-arange-section "$@" -else - exec "@LLD_PATH@" "$@" -fi diff --git a/cmake/limit_jobs.cmake b/cmake/limit_jobs.cmake index a8f105b89874..28ccb62e10c6 100644 --- a/cmake/limit_jobs.cmake +++ b/cmake/limit_jobs.cmake @@ -1,38 +1,39 @@ -# Usage: -# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # In megabytes -# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") -# include (cmake/limit_jobs.cmake) +# Limit compiler/linker job concurrency to avoid OOMs on subtrees where compilation/linking is memory-intensive. +# +# Usage from CMake: +# set (MAX_COMPILER_MEMORY 2000 CACHE INTERNAL "") # megabyte +# set (MAX_LINKER_MEMORY 3500 CACHE INTERNAL "") # megabyte +# include (cmake/limit_jobs.cmake) +# +# (bigger values mean fewer jobs) -cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) # Not available under freebsd +cmake_host_system_information(RESULT TOTAL_PHYSICAL_MEMORY QUERY TOTAL_PHYSICAL_MEMORY) cmake_host_system_information(RESULT NUMBER_OF_LOGICAL_CORES QUERY NUMBER_OF_LOGICAL_CORES) -# 1 if not set -option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" "") - -# 1 if not set -option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" "") +# Set to disable the automatic job-limiting +option(PARALLEL_COMPILE_JOBS "Maximum number of concurrent compilation jobs" OFF) +option(PARALLEL_LINK_JOBS "Maximum number of concurrent link jobs" OFF) -if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY) +if (NOT PARALLEL_COMPILE_JOBS AND MAX_COMPILER_MEMORY) math(EXPR PARALLEL_COMPILE_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_COMPILER_MEMORY}) if (NOT PARALLEL_COMPILE_JOBS) set (PARALLEL_COMPILE_JOBS 1) endif () + if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + message(WARNING "The auto-calculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.") + endif() endif () -if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)) - set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR}) - string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE}) - set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS}) -endif () - - -if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY) +if (NOT PARALLEL_LINK_JOBS AND MAX_LINKER_MEMORY) math(EXPR PARALLEL_LINK_JOBS ${TOTAL_PHYSICAL_MEMORY}/${MAX_LINKER_MEMORY}) if (NOT PARALLEL_LINK_JOBS) set (PARALLEL_LINK_JOBS 1) endif () + if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) + message(WARNING "The auto-calculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.") + endif() endif () # ThinLTO provides its own parallel linking @@ -46,14 +47,16 @@ if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND ENABLE_THINLTO AND PARALLE set (PARALLEL_LINK_JOBS 2) endif() -if (PARALLEL_LINK_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)) +message(STATUS "Building sub-tree with ${PARALLEL_COMPILE_JOBS} compile jobs and ${PARALLEL_LINK_JOBS} linker jobs (system: ${NUMBER_OF_LOGICAL_CORES} cores, ${TOTAL_PHYSICAL_MEMORY} MB RAM, 'OFF' means the native core count).") + +if (PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES) + set(CMAKE_JOB_POOL_COMPILE compile_job_pool${CMAKE_CURRENT_SOURCE_DIR}) + string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_COMPILE ${CMAKE_JOB_POOL_COMPILE}) + set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_COMPILE}=${PARALLEL_COMPILE_JOBS}) +endif () + +if (PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES) set(CMAKE_JOB_POOL_LINK link_job_pool${CMAKE_CURRENT_SOURCE_DIR}) string (REGEX REPLACE "[^a-zA-Z0-9]+" "_" CMAKE_JOB_POOL_LINK ${CMAKE_JOB_POOL_LINK}) set_property(GLOBAL APPEND PROPERTY JOB_POOLS ${CMAKE_JOB_POOL_LINK}=${PARALLEL_LINK_JOBS}) endif () - -if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS) - message(STATUS - "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory. - Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)") -endif () diff --git a/cmake/linux/default_libs.cmake b/cmake/linux/default_libs.cmake index d42d587303a2..56a663a708eb 100644 --- a/cmake/linux/default_libs.cmake +++ b/cmake/linux/default_libs.cmake @@ -35,10 +35,6 @@ find_package(Threads REQUIRED) include (cmake/unwind.cmake) include (cmake/cxx.cmake) -# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies -# which are ok with static libraries but not with dynamic ones -link_libraries(global-group) - if (NOT OS_ANDROID) if (NOT USE_MUSL) # Our compatibility layer doesn't build under Android, many errors in musl. @@ -47,6 +43,8 @@ if (NOT OS_ANDROID) add_subdirectory(base/harmful) endif () +link_libraries(global-group) + target_link_libraries(global-group INTERFACE -Wl,--start-group $ diff --git a/cmake/linux/toolchain-ppc64le.cmake b/cmake/linux/toolchain-ppc64le.cmake index 8eb2aab34e94..c46ea954b710 100644 --- a/cmake/linux/toolchain-ppc64le.cmake +++ b/cmake/linux/toolchain-ppc64le.cmake @@ -5,9 +5,9 @@ set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) set (CMAKE_SYSTEM_NAME "Linux") set (CMAKE_SYSTEM_PROCESSOR "ppc64le") -set (CMAKE_C_COMPILER_TARGET "ppc64le-linux-gnu") -set (CMAKE_CXX_COMPILER_TARGET "ppc64le-linux-gnu") -set (CMAKE_ASM_COMPILER_TARGET "ppc64le-linux-gnu") +set (CMAKE_C_COMPILER_TARGET "powerpc64le-linux-gnu") +set (CMAKE_CXX_COMPILER_TARGET "powerpc64le-linux-gnu") +set (CMAKE_ASM_COMPILER_TARGET "powerpc64le-linux-gnu") # Will be changed later, but somehow needed to be set here. set (CMAKE_AR "ar") diff --git a/cmake/linux/toolchain-s390x.cmake b/cmake/linux/toolchain-s390x.cmake index b85d4253b89f..945eb9affa45 100644 --- a/cmake/linux/toolchain-s390x.cmake +++ b/cmake/linux/toolchain-s390x.cmake @@ -20,6 +20,9 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/s390x-linux-gnu/libc") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}") +set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") +set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") +set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=mold -Wl,-L${CMAKE_SYSROOT}/usr/lib64") set (HAS_PRE_1970_EXITCODE "0" CACHE STRING "Result from TRY_RUN" FORCE) set (HAS_PRE_1970_EXITCODE__TRYRUN_OUTPUT "" CACHE STRING "Output from TRY_RUN" FORCE) diff --git a/cmake/sanitize.cmake b/cmake/sanitize.cmake index 6ac46bb42fa2..b3c8f97a5e1a 100644 --- a/cmake/sanitize.cmake +++ b/cmake/sanitize.cmake @@ -14,15 +14,6 @@ set (SAN_FLAGS "${SAN_FLAGS} -g -fno-omit-frame-pointer -DSANITIZER") if (SANITIZE) if (SANITIZE STREQUAL "address") set (ASAN_FLAGS "-fsanitize=address -fsanitize-address-use-after-scope") - if (COMPILER_CLANG) - if (${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER_EQUAL 15 AND ${CMAKE_CXX_COMPILER_VERSION} VERSION_LESS 16) - # LLVM-15 has a bug in Address Sanitizer, preventing the usage - # of 'sanitize-address-use-after-scope', see [1]. - # - # [1]: https://github.com/llvm/llvm-project/issues/58633 - set (ASAN_FLAGS "${ASAN_FLAGS} -fno-sanitize-address-use-after-scope") - endif() - endif() set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}") set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} ${ASAN_FLAGS}") diff --git a/cmake/split_debug_symbols.cmake b/cmake/split_debug_symbols.cmake index a9c2158359a5..d6821eb6c482 100644 --- a/cmake/split_debug_symbols.cmake +++ b/cmake/split_debug_symbols.cmake @@ -22,8 +22,9 @@ macro(clickhouse_split_debug_symbols) # Splits debug symbols into separate file, leaves the binary untouched: COMMAND "${OBJCOPY_PATH}" --only-keep-debug "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" - # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check: - COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note --keep-section=.clickhouse.hash "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" + # Strips binary, sections '.note' & '.comment' are removed in line with Debian's stripping policy: www.debian.org/doc/debian-policy/ch-files.html, section '.clickhouse.hash' is needed for integrity check. + # Also, after we disabled the export of symbols for dynamic linking, we still to keep a static symbol table for good stack traces. + COMMAND "${STRIP_PATH}" --strip-debug --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" # Associate stripped binary with debug symbols: COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" COMMENT "Stripping clickhouse binary" VERBATIM diff --git a/cmake/target.cmake b/cmake/target.cmake index 5ef45576fb79..e4a2f060f1ee 100644 --- a/cmake/target.cmake +++ b/cmake/target.cmake @@ -19,6 +19,19 @@ else () message (FATAL_ERROR "Platform ${CMAKE_SYSTEM_NAME} is not supported") endif () +# Since we always use toolchain files to generate hermetic builds, cmake will +# always think it's a cross-compilation, See +# https://cmake.org/cmake/help/latest/variable/CMAKE_CROSSCOMPILING.html +# +# This will slow down cmake configuration and compilation. For instance, LLVM +# will try to configure NATIVE LLVM targets with all tests enabled (You'll see +# Building native llvm-tblgen...). +# +# Here, we set it manually by checking the system name and processor. +if (${CMAKE_SYSTEM_NAME} STREQUAL ${CMAKE_HOST_SYSTEM_NAME} AND ${CMAKE_SYSTEM_PROCESSOR} STREQUAL ${CMAKE_HOST_SYSTEM_PROCESSOR}) + set (CMAKE_CROSSCOMPILING 0) +endif () + if (CMAKE_CROSSCOMPILING) if (OS_DARWIN) # FIXME: broken dependencies @@ -33,9 +46,21 @@ if (CMAKE_CROSSCOMPILING) elseif (ARCH_PPC64LE) set (ENABLE_GRPC OFF CACHE INTERNAL "") set (ENABLE_SENTRY OFF CACHE INTERNAL "") + elseif (ARCH_RISCV64) + # RISC-V support is preliminary + set (GLIBC_COMPATIBILITY OFF CACHE INTERNAL "") + set (ENABLE_LDAP OFF CACHE INTERNAL "") + set (OPENSSL_NO_ASM ON CACHE INTERNAL "") + set (ENABLE_JEMALLOC ON CACHE INTERNAL "") + set (ENABLE_PARQUET OFF CACHE INTERNAL "") + set (ENABLE_GRPC OFF CACHE INTERNAL "") + set (ENABLE_HDFS OFF CACHE INTERNAL "") + set (ENABLE_MYSQL OFF CACHE INTERNAL "") + # It might be ok, but we need to update 'sysroot' + set (ENABLE_RUST OFF CACHE INTERNAL "") elseif (ARCH_S390X) set (ENABLE_GRPC OFF CACHE INTERNAL "") - set (ENABLE_SENTRY OFF CACHE INTERNAL "") + set (ENABLE_RUST OFF CACHE INTERNAL "") endif () elseif (OS_FREEBSD) # FIXME: broken dependencies diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 802907c9ddac..1ba3007b0f3b 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -13,7 +13,7 @@ execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version OUTPUT_VARIABLE COMPILER message (STATUS "Using compiler:\n${COMPILER_SELF_IDENTIFICATION}") # Require minimum compiler versions -set (CLANG_MINIMUM_VERSION 15) +set (CLANG_MINIMUM_VERSION 16) set (XCODE_MINIMUM_VERSION 12.0) set (APPLE_CLANG_MINIMUM_VERSION 12.0.0) @@ -49,14 +49,14 @@ endif () if (NOT LINKER_NAME) if (COMPILER_CLANG) - if (OS_LINUX) - if (NOT ARCH_S390X) # s390x doesnt support lld - find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld") - endif () + if (OS_LINUX AND NOT ARCH_S390X) + find_program (LLD_PATH NAMES "ld.lld-${COMPILER_VERSION_MAJOR}" "ld.lld") + elseif (OS_DARWIN) + find_program (LLD_PATH NAMES "ld") endif () endif () - if (OS_LINUX) - if (LLD_PATH) + if (LLD_PATH) + if (OS_LINUX OR OS_DARWIN) if (COMPILER_CLANG) # Clang driver simply allows full linker path. set (LINKER_NAME ${LLD_PATH}) @@ -70,23 +70,16 @@ if (LINKER_NAME) if (NOT LLD_PATH) message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") endif () - # This a temporary quirk to emit .debug_aranges with ThinLTO, it is only the case clang/llvm <16 - if (COMPILER_CLANG AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) - set (LLD_WRAPPER "${CMAKE_CURRENT_BINARY_DIR}/ld.lld") - configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY) - - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}") - else () - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") - endif() - + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") endif () if (LINKER_NAME) message(STATUS "Using linker: ${LINKER_NAME}") -else() +elseif (NOT ARCH_S390X AND NOT OS_FREEBSD) + message (FATAL_ERROR "The only supported linker is LLVM's LLD, but we cannot find it.") +else () message(STATUS "Using linker: ") -endif() +endif () # Archiver diff --git a/cmake/unwind.cmake b/cmake/unwind.cmake index c9f5f30a5d6b..84e4f01b7523 100644 --- a/cmake/unwind.cmake +++ b/cmake/unwind.cmake @@ -1,13 +1 @@ -option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES}) - -if (USE_UNWIND) - add_subdirectory(contrib/libunwind-cmake) - set (UNWIND_LIBRARIES unwind) - set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES}) - - message (STATUS "Using libunwind: ${UNWIND_LIBRARIES}") -else () - set (EXCEPTION_HANDLING_LIBRARY gcc_eh) -endif () - -message (STATUS "Using exception handler: ${EXCEPTION_HANDLING_LIBRARY}") +add_subdirectory(contrib/libunwind-cmake) diff --git a/cmake/utils.cmake b/cmake/utils.cmake new file mode 100644 index 000000000000..a318408098a7 --- /dev/null +++ b/cmake/utils.cmake @@ -0,0 +1,120 @@ +# Useful stuff + +# Function get_all_targets collects all targets recursively +function(get_all_targets outvar) + macro(get_all_targets_recursive targets dir) + get_property(subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES) + foreach(subdir ${subdirectories}) + get_all_targets_recursive(${targets} ${subdir}) + endforeach() + get_property(current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS) + list(APPEND ${targets} ${current_targets}) + endmacro() + + set(targets) + get_all_targets_recursive(targets ${CMAKE_CURRENT_SOURCE_DIR}) + set(${outvar} ${targets} PARENT_SCOPE) +endfunction() + + +# Function get_target_filename calculates target's output file name +function(get_target_filename target outvar) + get_target_property(prop_type "${target}" TYPE) + get_target_property(prop_is_framework "${target}" FRAMEWORK) + get_target_property(prop_outname "${target}" OUTPUT_NAME) + get_target_property(prop_archive_outname "${target}" ARCHIVE_OUTPUT_NAME) + get_target_property(prop_library_outname "${target}" LIBRARY_OUTPUT_NAME) + get_target_property(prop_runtime_outname "${target}" RUNTIME_OUTPUT_NAME) + # message("prop_archive_outname: ${prop_archive_outname}") + # message("prop_library_outname: ${prop_library_outname}") + # message("prop_runtime_outname: ${prop_runtime_outname}") + if(DEFINED CMAKE_BUILD_TYPE) + get_target_property(prop_cfg_outname "${target}" "${OUTPUT_NAME}_${CMAKE_BUILD_TYPE}") + get_target_property(prop_archive_cfg_outname "${target}" "${ARCHIVE_OUTPUT_NAME}_${CMAKE_BUILD_TYPE}") + get_target_property(prop_library_cfg_outname "${target}" "${LIBRARY_OUTPUT_NAME}_${CMAKE_BUILD_TYPE}") + get_target_property(prop_runtime_cfg_outname "${target}" "${RUNTIME_OUTPUT_NAME}_${CMAKE_BUILD_TYPE}") + # message("prop_archive_cfg_outname: ${prop_archive_cfg_outname}") + # message("prop_library_cfg_outname: ${prop_library_cfg_outname}") + # message("prop_runtime_cfg_outname: ${prop_runtime_cfg_outname}") + if(NOT ("${prop_cfg_outname}" STREQUAL "prop_cfg_outname-NOTFOUND")) + set(prop_outname "${prop_cfg_outname}") + endif() + if(NOT ("${prop_archive_cfg_outname}" STREQUAL "prop_archive_cfg_outname-NOTFOUND")) + set(prop_archive_outname "${prop_archive_cfg_outname}") + endif() + if(NOT ("${prop_library_cfg_outname}" STREQUAL "prop_library_cfg_outname-NOTFOUND")) + set(prop_library_outname "${prop_library_cfg_outname}") + endif() + if(NOT ("${prop_runtime_cfg_outname}" STREQUAL "prop_runtime_cfg_outname-NOTFOUND")) + set(prop_runtime_outname "${prop_runtime_cfg_outname}") + endif() + endif() + set(outname "${target}") + if(NOT ("${prop_outname}" STREQUAL "prop_outname-NOTFOUND")) + set(outname "${prop_outname}") + endif() + if("${prop_is_framework}") + set(filename "${outname}") + elseif(prop_type STREQUAL "STATIC_LIBRARY") + if(NOT ("${prop_archive_outname}" STREQUAL "prop_archive_outname-NOTFOUND")) + set(outname "${prop_archive_outname}") + endif() + set(filename "${CMAKE_STATIC_LIBRARY_PREFIX}${outname}${CMAKE_STATIC_LIBRARY_SUFFIX}") + elseif(prop_type STREQUAL "MODULE_LIBRARY") + if(NOT ("${prop_library_outname}" STREQUAL "prop_library_outname-NOTFOUND")) + set(outname "${prop_library_outname}") + endif() + set(filename "${CMAKE_SHARED_MODULE_LIBRARY_PREFIX}${outname}${CMAKE_SHARED_MODULE_LIBRARY_SUFFIX}") + elseif(prop_type STREQUAL "SHARED_LIBRARY") + if(WIN32) + if(NOT ("${prop_runtime_outname}" STREQUAL "prop_runtime_outname-NOTFOUND")) + set(outname "${prop_runtime_outname}") + endif() + else() + if(NOT ("${prop_library_outname}" STREQUAL "prop_library_outname-NOTFOUND")) + set(outname "${prop_library_outname}") + endif() + endif() + set(filename "${CMAKE_SHARED_LIBRARY_PREFIX}${outname}${CMAKE_SHARED_LIBRARY_SUFFIX}") + elseif(prop_type STREQUAL "EXECUTABLE") + if(NOT ("${prop_runtime_outname}" STREQUAL "prop_runtime_outname-NOTFOUND")) + set(outname "${prop_runtime_outname}") + endif() + set(filename "${CMAKE_EXECUTABLE_PREFIX}${outname}${CMAKE_EXECUTABLE_SUFFIX}") + else() + message(FATAL_ERROR "target \"${target}\" is not of type STATIC_LIBRARY, MODULE_LIBRARY, SHARED_LIBRARY, or EXECUTABLE.") + endif() + set("${outvar}" "${filename}" PARENT_SCOPE) +endfunction() + + +# Function get_cmake_properties returns list of all propreties that cmake supports +function(get_cmake_properties outvar) + execute_process(COMMAND cmake --help-property-list OUTPUT_VARIABLE cmake_properties) + # Convert command output into a CMake list + string(REGEX REPLACE ";" "\\\\;" cmake_properties "${cmake_properties}") + string(REGEX REPLACE "\n" ";" cmake_properties "${cmake_properties}") + list(REMOVE_DUPLICATES cmake_properties) + set("${outvar}" "${cmake_properties}" PARENT_SCOPE) +endfunction() + +# Function get_target_property_list returns list of all propreties set for target +function(get_target_property_list target outvar) + get_cmake_properties(cmake_property_list) + foreach(property ${cmake_property_list}) + string(REPLACE "" "${CMAKE_BUILD_TYPE}" property ${property}) + + # https://stackoverflow.com/questions/32197663/how-can-i-remove-the-the-location-property-may-not-be-read-from-target-error-i + if(property STREQUAL "LOCATION" OR property MATCHES "^LOCATION_" OR property MATCHES "_LOCATION$") + continue() + endif() + + get_property(was_set TARGET ${target} PROPERTY ${property} SET) + if(was_set) + get_target_property(value ${target} ${property}) + string(REGEX REPLACE ";" "\\\\\\\\;" value "${value}") + list(APPEND outvar "${property} = ${value}") + endif() + endforeach() + set(${outvar} ${${outvar}} PARENT_SCOPE) +endfunction() diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 2af468970f17..390b0241e7d9 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -92,6 +92,7 @@ add_contrib (google-protobuf-cmake google-protobuf) add_contrib (openldap-cmake openldap) add_contrib (grpc-cmake grpc) add_contrib (msgpack-c-cmake msgpack-c) +add_contrib (libarchive-cmake libarchive) add_contrib (corrosion-cmake corrosion) @@ -134,10 +135,8 @@ add_contrib (aws-cmake aws-cmake ) -add_contrib (base64-cmake base64) -if (NOT ARCH_S390X) +add_contrib (aklomp-base64-cmake aklomp-base64) add_contrib (simdjson-cmake simdjson) -endif() add_contrib (rapidjson-cmake rapidjson) add_contrib (fastops-cmake fastops) add_contrib (libuv-cmake libuv) @@ -164,14 +163,13 @@ add_contrib (libpq-cmake libpq) add_contrib (nuraft-cmake NuRaft) add_contrib (fast_float-cmake fast_float) add_contrib (datasketches-cpp-cmake datasketches-cpp) -add_contrib (hashidsxx-cmake hashidsxx) +add_contrib (incbin-cmake incbin) option(ENABLE_NLP "Enable NLP functions support" ${ENABLE_LIBRARIES}) if (ENABLE_NLP) add_contrib (libstemmer-c-cmake libstemmer_c) add_contrib (wordnet-blast-cmake wordnet-blast) add_contrib (lemmagen-c-cmake lemmagen-c) - add_contrib (nlp-data-cmake nlp-data) add_contrib (cld2-cmake cld2) endif() @@ -196,6 +194,17 @@ if (ARCH_S390X) add_contrib(crc32-s390x-cmake crc32-s390x) endif() add_contrib (annoy-cmake annoy) + +option(ENABLE_USEARCH "Enable USearch (Approximate Neighborhood Search, HNSW) support" ${ENABLE_LIBRARIES}) +if (ENABLE_USEARCH) + add_contrib (FP16-cmake FP16) + add_contrib (robin-map-cmake robin-map) + add_contrib (SimSIMD-cmake SimSIMD) + add_contrib (usearch-cmake usearch) # requires: FP16, robin-map, SimdSIMD +else () + message(STATUS "Not using USearch") +endif () + add_contrib (xxHash-cmake xxHash) add_contrib (libbcrypt-cmake libbcrypt) @@ -203,6 +212,8 @@ add_contrib (libbcrypt-cmake libbcrypt) add_contrib (google-benchmark-cmake google-benchmark) add_contrib (ulid-c-cmake ulid-c) +add_contrib (libssh-cmake libssh) + # Put all targets defined here and in subdirectories under "contrib/" folders in GUI-based IDEs. # Some of third-party projects may override CMAKE_FOLDER or FOLDER property of their targets, so they would not appear # in "contrib/..." as originally planned, so we workaround this by fixing FOLDER properties of all targets manually, diff --git a/contrib/FP16 b/contrib/FP16 new file mode 160000 index 000000000000..0a92994d729f --- /dev/null +++ b/contrib/FP16 @@ -0,0 +1 @@ +Subproject commit 0a92994d729ff76a58f692d3028ca1b64b145d91 diff --git a/contrib/FP16-cmake/CMakeLists.txt b/contrib/FP16-cmake/CMakeLists.txt new file mode 100644 index 000000000000..f82ad705dcc7 --- /dev/null +++ b/contrib/FP16-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/NuRaft b/contrib/NuRaft index 491eaf592d95..eb1572129c71 160000 --- a/contrib/NuRaft +++ b/contrib/NuRaft @@ -1 +1 @@ -Subproject commit 491eaf592d950e0e37accbe8b3f217e068c9fecf +Subproject commit eb1572129c71beb2156dcdaadc3fb136954aed96 diff --git a/contrib/SimSIMD b/contrib/SimSIMD new file mode 160000 index 000000000000..de2cb75b9e9e --- /dev/null +++ b/contrib/SimSIMD @@ -0,0 +1 @@ +Subproject commit de2cb75b9e9e3389d5e1e51fd9f8ed151f3c17cf diff --git a/contrib/SimSIMD-cmake/CMakeLists.txt b/contrib/SimSIMD-cmake/CMakeLists.txt new file mode 100644 index 000000000000..f82ad705dcc7 --- /dev/null +++ b/contrib/SimSIMD-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/abseil-cpp b/contrib/abseil-cpp index 215105818dfd..5655528c4183 160000 --- a/contrib/abseil-cpp +++ b/contrib/abseil-cpp @@ -1 +1 @@ -Subproject commit 215105818dfde3174fe799600bb0f3cae233d0bf +Subproject commit 5655528c41830f733160de4fb0b99073841bae9e diff --git a/contrib/abseil-cpp-cmake/CMakeLists.txt b/contrib/abseil-cpp-cmake/CMakeLists.txt index 4c31ecfc599c..2901daf32db6 100644 --- a/contrib/abseil-cpp-cmake/CMakeLists.txt +++ b/contrib/abseil-cpp-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ set(ABSL_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") -set(BUILD_TESTING OFF) + set(ABSL_PROPAGATE_CXX_STD ON) add_subdirectory("${ABSL_ROOT_DIR}" "${ClickHouse_BINARY_DIR}/contrib/abseil-cpp") @@ -17,3 +17,17 @@ get_target_property(FLAT_HASH_SET_INCLUDE_DIR absl::flat_hash_set INTERFACE_INCL target_include_directories (_abseil_swiss_tables SYSTEM BEFORE INTERFACE ${FLAT_HASH_SET_INCLUDE_DIR}) add_library(ch_contrib::abseil_swiss_tables ALIAS _abseil_swiss_tables) + +set(ABSL_FORMAT_SRC + ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/arg.cc + ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/bind.cc + ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/extension.cc + ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/float_conversion.cc + ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/output.cc + ${ABSL_ROOT_DIR}/absl/strings/internal/str_format/parser.cc +) + +add_library(_abseil_str_format ${ABSL_FORMAT_SRC}) +target_include_directories(_abseil_str_format PUBLIC ${ABSL_ROOT_DIR}) + +add_library(ch_contrib::abseil_str_format ALIAS _abseil_str_format) diff --git a/contrib/aklomp-base64 b/contrib/aklomp-base64 new file mode 160000 index 000000000000..e77bd70bdd86 --- /dev/null +++ b/contrib/aklomp-base64 @@ -0,0 +1 @@ +Subproject commit e77bd70bdd860c52c561568cffb251d88bba064c diff --git a/contrib/aklomp-base64-cmake/.gitignore b/contrib/aklomp-base64-cmake/.gitignore new file mode 100644 index 000000000000..0e56cf2f8c1e --- /dev/null +++ b/contrib/aklomp-base64-cmake/.gitignore @@ -0,0 +1 @@ +config.h diff --git a/contrib/aklomp-base64-cmake/CMakeLists.txt b/contrib/aklomp-base64-cmake/CMakeLists.txt new file mode 100644 index 000000000000..4b988fad8609 --- /dev/null +++ b/contrib/aklomp-base64-cmake/CMakeLists.txt @@ -0,0 +1,68 @@ +option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES}) + +if (NOT ENABLE_BASE64) + message(STATUS "Not using base64") + return() +endif() + +SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/aklomp-base64") + +if (ARCH_AMD64) + # These defines enable/disable SIMD codecs in base64's runtime codec dispatch. + # We don't want to limit ourselves --> enable all. + set(HAVE_SSSE3 1) + set(HAVE_SSE41 1) + set(HAVE_SSE42 1) + set(HAVE_AVX 1) + set(HAVE_AVX2 1) + set(HAVE_AVX512 1) +endif () + +if (ARCH_AARCH64) + # The choice of HAVE_NEON* depends on the target machine because base64 provides + # no runtime dispatch on ARM. NEON is only mandatory with the normal build profile. + if(NOT NO_ARMV81_OR_HIGHER) + set(HAVE_NEON64 1) + set(HAVE_NEON32 0) + endif () +endif () + +configure_file(config.h.in ${CMAKE_CURRENT_BINARY_DIR}/config.h) + +add_library(_base64 + "${LIBRARY_DIR}/lib/lib.c" + "${LIBRARY_DIR}/lib/codec_choose.c" + + "${LIBRARY_DIR}/lib/tables/tables.c" + "${LIBRARY_DIR}/lib/tables/table_dec_32bit.h" + "${LIBRARY_DIR}/lib/tables/table_enc_12bit.h" + + "${LIBRARY_DIR}/lib/codecs.h" + + "${CMAKE_CURRENT_BINARY_DIR}/config.h" + + "${LIBRARY_DIR}/lib/arch/generic/codec.c" + "${LIBRARY_DIR}/lib/arch/ssse3/codec.c" + "${LIBRARY_DIR}/lib/arch/sse41/codec.c" + "${LIBRARY_DIR}/lib/arch/sse42/codec.c" + "${LIBRARY_DIR}/lib/arch/avx/codec.c" + "${LIBRARY_DIR}/lib/arch/avx2/codec.c" + "${LIBRARY_DIR}/lib/arch/avx512/codec.c" + + "${LIBRARY_DIR}/lib/arch/neon32/codec.c" + "${LIBRARY_DIR}/lib/arch/neon64/codec.c" +) + +if (ARCH_AMD64) + set_source_files_properties(${LIBRARY_DIR}/lib/arch/ssse3/codec.c PROPERTIES COMPILE_FLAGS "-mssse3") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse41/codec.c PROPERTIES COMPILE_FLAGS "-msse4.1") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/sse42/codec.c PROPERTIES COMPILE_FLAGS "-msse4.2") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx/codec.c PROPERTIES COMPILE_FLAGS "-mavx") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx2/codec.c PROPERTIES COMPILE_FLAGS "-mavx2") + set_source_files_properties(${LIBRARY_DIR}/lib/arch/avx512/codec.c PROPERTIES COMPILE_FLAGS "-mavx512vl -mavx512vbmi") +endif() + +target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR}/include) +target_include_directories(_base64 PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + +add_library(ch_contrib::base64 ALIAS _base64) diff --git a/contrib/aklomp-base64-cmake/config.h.in b/contrib/aklomp-base64-cmake/config.h.in new file mode 100644 index 000000000000..4dc84632b30b --- /dev/null +++ b/contrib/aklomp-base64-cmake/config.h.in @@ -0,0 +1,9 @@ +#cmakedefine01 HAVE_SSSE3 +#cmakedefine01 HAVE_SSE41 +#cmakedefine01 HAVE_SSE42 +#cmakedefine01 HAVE_AVX +#cmakedefine01 HAVE_AVX2 +#cmakedefine01 HAVE_AVX512 + +#cmakedefine01 HAVE_NEON32 +#cmakedefine01 HAVE_NEON64 diff --git a/contrib/arrow b/contrib/arrow index 1f1b3d35fb6e..1d93838f69a8 160000 --- a/contrib/arrow +++ b/contrib/arrow @@ -1 +1 @@ -Subproject commit 1f1b3d35fb6eb73e6492d3afd8a85cde848d174f +Subproject commit 1d93838f69a802639ca144ea5704a98e2481810d diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt index 46b86cb4ddb0..02e809c560f7 100644 --- a/contrib/arrow-cmake/CMakeLists.txt +++ b/contrib/arrow-cmake/CMakeLists.txt @@ -334,20 +334,36 @@ set(ARROW_SRCS "${LIBRARY_DIR}/compute/api_vector.cc" "${LIBRARY_DIR}/compute/cast.cc" "${LIBRARY_DIR}/compute/exec.cc" + "${LIBRARY_DIR}/compute/exec/accumulation_queue.cc" + "${LIBRARY_DIR}/compute/exec/accumulation_queue.h" + "${LIBRARY_DIR}/compute/exec/aggregate.cc" "${LIBRARY_DIR}/compute/exec/aggregate_node.cc" + "${LIBRARY_DIR}/compute/exec/asof_join_node.cc" + "${LIBRARY_DIR}/compute/exec/bloom_filter.cc" "${LIBRARY_DIR}/compute/exec/exec_plan.cc" "${LIBRARY_DIR}/compute/exec/expression.cc" "${LIBRARY_DIR}/compute/exec/filter_node.cc" - "${LIBRARY_DIR}/compute/exec/project_node.cc" - "${LIBRARY_DIR}/compute/exec/source_node.cc" - "${LIBRARY_DIR}/compute/exec/sink_node.cc" + "${LIBRARY_DIR}/compute/exec/hash_join.cc" + "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc" + "${LIBRARY_DIR}/compute/exec/hash_join_node.cc" + "${LIBRARY_DIR}/compute/exec/key_hash.cc" + "${LIBRARY_DIR}/compute/exec/key_map.cc" + "${LIBRARY_DIR}/compute/exec/map_node.cc" + "${LIBRARY_DIR}/compute/exec/options.cc" "${LIBRARY_DIR}/compute/exec/order_by_impl.cc" "${LIBRARY_DIR}/compute/exec/partition_util.cc" + "${LIBRARY_DIR}/compute/exec/project_node.cc" + "${LIBRARY_DIR}/compute/exec/query_context.cc" + "${LIBRARY_DIR}/compute/exec/sink_node.cc" + "${LIBRARY_DIR}/compute/exec/source_node.cc" + "${LIBRARY_DIR}/compute/exec/swiss_join.cc" + "${LIBRARY_DIR}/compute/exec/task_util.cc" + "${LIBRARY_DIR}/compute/exec/tpch_node.cc" + "${LIBRARY_DIR}/compute/exec/union_node.cc" + "${LIBRARY_DIR}/compute/exec/util.cc" "${LIBRARY_DIR}/compute/function.cc" "${LIBRARY_DIR}/compute/function_internal.cc" "${LIBRARY_DIR}/compute/kernel.cc" - "${LIBRARY_DIR}/compute/light_array.cc" - "${LIBRARY_DIR}/compute/registry.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc" "${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc" @@ -355,49 +371,43 @@ set(ARROW_SRCS "${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc" "${LIBRARY_DIR}/compute/kernels/codegen_internal.cc" "${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc" + "${LIBRARY_DIR}/compute/kernels/row_encoder.cc" "${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc" "${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_dictionary.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_extension.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc" "${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc" "${LIBRARY_DIR}/compute/kernels/scalar_compare.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc" "${LIBRARY_DIR}/compute/kernels/scalar_nested.cc" "${LIBRARY_DIR}/compute/kernels/scalar_random.cc" "${LIBRARY_DIR}/compute/kernels/scalar_round.cc" "${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc" + "${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc" "${LIBRARY_DIR}/compute/kernels/scalar_temporal_binary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_temporal_unary.cc" "${LIBRARY_DIR}/compute/kernels/scalar_validity.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_if_else.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_string_ascii.cc" - "${LIBRARY_DIR}/compute/kernels/scalar_string_utf8.cc" "${LIBRARY_DIR}/compute/kernels/util_internal.cc" "${LIBRARY_DIR}/compute/kernels/vector_array_sort.cc" "${LIBRARY_DIR}/compute/kernels/vector_cumulative_ops.cc" "${LIBRARY_DIR}/compute/kernels/vector_hash.cc" - "${LIBRARY_DIR}/compute/kernels/vector_rank.cc" - "${LIBRARY_DIR}/compute/kernels/vector_select_k.cc" "${LIBRARY_DIR}/compute/kernels/vector_nested.cc" + "${LIBRARY_DIR}/compute/kernels/vector_rank.cc" "${LIBRARY_DIR}/compute/kernels/vector_replace.cc" + "${LIBRARY_DIR}/compute/kernels/vector_select_k.cc" "${LIBRARY_DIR}/compute/kernels/vector_selection.cc" "${LIBRARY_DIR}/compute/kernels/vector_sort.cc" - "${LIBRARY_DIR}/compute/kernels/row_encoder.cc" - "${LIBRARY_DIR}/compute/exec/union_node.cc" - "${LIBRARY_DIR}/compute/exec/key_hash.cc" - "${LIBRARY_DIR}/compute/exec/key_map.cc" - "${LIBRARY_DIR}/compute/exec/util.cc" - "${LIBRARY_DIR}/compute/exec/hash_join_dict.cc" - "${LIBRARY_DIR}/compute/exec/hash_join.cc" - "${LIBRARY_DIR}/compute/exec/hash_join_node.cc" - "${LIBRARY_DIR}/compute/exec/task_util.cc" + "${LIBRARY_DIR}/compute/light_array.cc" + "${LIBRARY_DIR}/compute/registry.cc" + "${LIBRARY_DIR}/compute/row/compare_internal.cc" "${LIBRARY_DIR}/compute/row/encode_internal.cc" "${LIBRARY_DIR}/compute/row/grouper.cc" - "${LIBRARY_DIR}/compute/row/compare_internal.cc" "${LIBRARY_DIR}/compute/row/row_internal.cc" "${LIBRARY_DIR}/ipc/dictionary.cc" @@ -502,9 +512,10 @@ target_include_directories(_parquet SYSTEM BEFORE "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src") target_link_libraries(_parquet - PUBLIC _arrow - PRIVATE + PUBLIC + _arrow ch_contrib::thrift + PRIVATE boost::headers_only boost::regex OpenSSL::Crypto OpenSSL::SSL) diff --git a/contrib/azure-cmake/CMakeLists.txt b/contrib/azure-cmake/CMakeLists.txt index 1e2a4c97824f..7aba81259d3a 100644 --- a/contrib/azure-cmake/CMakeLists.txt +++ b/contrib/azure-cmake/CMakeLists.txt @@ -1,6 +1,6 @@ option (ENABLE_AZURE_BLOB_STORAGE "Enable Azure blob storage" ${ENABLE_LIBRARIES}) -if (NOT ENABLE_AZURE_BLOB_STORAGE OR BUILD_STANDALONE_KEEPER OR OS_FREEBSD) +if (NOT ENABLE_AZURE_BLOB_STORAGE OR OS_FREEBSD) message(STATUS "Not using Azure blob storage") return() endif() diff --git a/contrib/base64 b/contrib/base64 deleted file mode 160000 index 9499e0c49455..000000000000 --- a/contrib/base64 +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 9499e0c4945589973b9ea1bc927377cfbc84aa46 diff --git a/contrib/base64-cmake/CMakeLists.txt b/contrib/base64-cmake/CMakeLists.txt deleted file mode 100644 index 333e0a96a0ba..000000000000 --- a/contrib/base64-cmake/CMakeLists.txt +++ /dev/null @@ -1,60 +0,0 @@ -if(ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_S390X) - option (ENABLE_BASE64 "Enable base64" ${ENABLE_LIBRARIES}) -elseif(ENABLE_BASE64) - message (${RECONFIGURE_MESSAGE_LEVEL} "base64 library is only supported on x86_64 and aarch64") -endif() - -if (NOT ENABLE_BASE64) - message(STATUS "Not using base64") - return() -endif() - -SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/base64") - -add_library(_base64_scalar OBJECT "${LIBRARY_DIR}/turbob64c.c" "${LIBRARY_DIR}/turbob64d.c") -add_library(_base64_ssse3 OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This file also contains code for ARM NEON - -if (ARCH_AMD64) - add_library(_base64_avx OBJECT "${LIBRARY_DIR}/turbob64sse.c") # This is not a mistake. One file is compiled twice. - add_library(_base64_avx2 OBJECT "${LIBRARY_DIR}/turbob64avx2.c") -endif () - -target_compile_options(_base64_scalar PRIVATE -falign-loops) - -if (ARCH_AMD64) - target_compile_options(_base64_ssse3 PRIVATE -mno-avx -mno-avx2 -mssse3 -falign-loops) - target_compile_options(_base64_avx PRIVATE -falign-loops -mavx) - target_compile_options(_base64_avx2 PRIVATE -falign-loops -mavx2) -else () - if (ARCH_PPC64LE) - target_compile_options(_base64_ssse3 PRIVATE -D__SSSE3__ -falign-loops) - else() - target_compile_options(_base64_ssse3 PRIVATE -falign-loops) - endif() -endif () - -if (ARCH_AMD64) - add_library(_base64 - $ - $ - $ - $) -else () - add_library(_base64 - $ - $) -endif () - -target_include_directories(_base64 SYSTEM PUBLIC ${LIBRARY_DIR}) - -if (XCODE OR XCODE_VERSION) - # https://gitlab.kitware.com/cmake/cmake/issues/17457 - # Some native build systems may not like targets that have only object files, so consider adding at least one real source file - # This applies to Xcode. - if (NOT EXISTS "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c" "") - endif () - target_sources(_base64 PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/dummy.c") -endif () - -add_library(ch_contrib::base64 ALIAS _base64) diff --git a/contrib/boost b/contrib/boost index aec12eea7fc7..ae94606a70f1 160000 --- a/contrib/boost +++ b/contrib/boost @@ -1 +1 @@ -Subproject commit aec12eea7fc762721ae16943d1361340c66c9c17 +Subproject commit ae94606a70f1e298ce2a5718db858079185c4d9c diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt index 6f9dce0b042d..343e863e4968 100644 --- a/contrib/boost-cmake/CMakeLists.txt +++ b/contrib/boost-cmake/CMakeLists.txt @@ -19,18 +19,22 @@ add_library (_boost_filesystem ${SRCS_FILESYSTEM}) add_library (boost::filesystem ALIAS _boost_filesystem) target_include_directories (_boost_filesystem SYSTEM BEFORE PUBLIC ${LIBRARY_DIR}) -# headers-only +if (OS_LINUX) + target_compile_definitions (_boost_filesystem PRIVATE + BOOST_FILESYSTEM_HAS_POSIX_AT_APIS=1 + ) +endif () + +# headers-only add_library (_boost_headers_only INTERFACE) add_library (boost::headers_only ALIAS _boost_headers_only) target_include_directories (_boost_headers_only SYSTEM BEFORE INTERFACE ${LIBRARY_DIR}) -# asio - target_compile_definitions (_boost_headers_only INTERFACE BOOST_ASIO_STANDALONE=1 - # Avoid using of deprecated in c++ > 17 std::result_of - BOOST_ASIO_HAS_STD_INVOKE_RESULT=1 + BOOST_ASIO_HAS_STD_INVOKE_RESULT=1 # Avoid using of deprecated in c++ > 17 std::result_of + BOOST_TIMER_ENABLE_DEPRECATED=1 # wordnet-blast (enabled via USE_NLP) uses Boost legacy timer classes ) # iostreams @@ -172,9 +176,9 @@ endif() # coroutine set (SRCS_COROUTINE - "${LIBRARY_DIR}/libs/coroutine/detail/coroutine_context.cpp" - "${LIBRARY_DIR}/libs/coroutine/exceptions.cpp" - "${LIBRARY_DIR}/libs/coroutine/posix/stack_traits.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/detail/coroutine_context.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/exceptions.cpp" + "${LIBRARY_DIR}/libs/coroutine/src/posix/stack_traits.cpp" ) add_library (_boost_coroutine ${SRCS_COROUTINE}) add_library (boost::coroutine ALIAS _boost_coroutine) diff --git a/contrib/cctz b/contrib/cctz index 5e05432420f9..8529bcef5cd9 160000 --- a/contrib/cctz +++ b/contrib/cctz @@ -1 +1 @@ -Subproject commit 5e05432420f9692418e2e12aff09859e420b14a2 +Subproject commit 8529bcef5cd996b7c0f4d7475286b76b5d126c4c diff --git a/contrib/cctz-cmake/CMakeLists.txt b/contrib/cctz-cmake/CMakeLists.txt index 10070fbd9494..7161f743de15 100644 --- a/contrib/cctz-cmake/CMakeLists.txt +++ b/contrib/cctz-cmake/CMakeLists.txt @@ -1,4 +1,3 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz") set (SRCS @@ -23,12 +22,10 @@ if (OS_FREEBSD) endif () # Related to time_zones table: -# StorageSystemTimeZones.generated.cpp is autogenerated each time during a build -# data in this file will be used to populate the system.time_zones table, this is specific to OS_LINUX -# as the library that's built using embedded tzdata is also specific to OS_LINUX -set(SYSTEM_STORAGE_TZ_FILE "${PROJECT_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp") +# TimeZones.generated.cpp is autogenerated each time during a build +set(TIMEZONES_FILE "${CMAKE_CURRENT_BINARY_DIR}/TimeZones.generated.cpp") # remove existing copies so that its generated fresh on each build. -file(REMOVE ${SYSTEM_STORAGE_TZ_FILE}) +file(REMOVE ${TIMEZONES_FILE}) # get the list of timezones from tzdata shipped with cctz set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo") @@ -36,28 +33,44 @@ file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION) set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}") message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}") -set(TIMEZONE_RESOURCE_FILES) - # each file in that dir (except of tab and localtime) store the info about timezone execute_process(COMMAND bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | LC_ALL=C sort | paste -sd ';' -" OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE TIMEZONES) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {\n" ) +file(APPEND ${TIMEZONES_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n") +file(APPEND ${TIMEZONES_FILE} "#include \n") +set (COUNTER 1) foreach(TIMEZONE ${TIMEZONES}) - file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " \"${TIMEZONE}\",\n") - list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}") + file(APPEND ${TIMEZONES_FILE} "INCBIN(resource_timezone${COUNTER}, \"${TZDIR}/${TIMEZONE}\");\n") + MATH(EXPR COUNTER "${COUNTER}+1") endforeach(TIMEZONE) -file(APPEND ${SYSTEM_STORAGE_TZ_FILE} " nullptr};\n") -clickhouse_embed_binaries( - TARGET tzdata - RESOURCE_DIR "${TZDIR}" - RESOURCES ${TIMEZONE_RESOURCE_FILES} -) -add_dependencies(_cctz tzdata) -target_link_libraries(_cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") + +file(APPEND ${TIMEZONES_FILE} "const char * auto_time_zones[] {\n" ) + +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} " \"${TIMEZONE}\",\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} " nullptr\n};\n\n") + +file(APPEND ${TIMEZONES_FILE} "#include \n\n") +file(APPEND ${TIMEZONES_FILE} "std::string_view getTimeZone(const char * name)\n{\n" ) + +set (COUNTER 1) +foreach(TIMEZONE ${TIMEZONES}) + file(APPEND ${TIMEZONES_FILE} " if (std::string_view(\"${TIMEZONE}\") == name) return { reinterpret_cast(gresource_timezone${COUNTER}Data), gresource_timezone${COUNTER}Size };\n") + MATH(EXPR COUNTER "${COUNTER}+1") +endforeach(TIMEZONE) + +file(APPEND ${TIMEZONES_FILE} " return {};\n") +file(APPEND ${TIMEZONES_FILE} "}\n") + +add_library (tzdata ${TIMEZONES_FILE}) +target_link_libraries(tzdata ch_contrib::incbin) +target_link_libraries(_cctz tzdata) add_library(ch_contrib::cctz ALIAS _cctz) diff --git a/contrib/cityhash102/include/city.h b/contrib/cityhash102/include/city.h index 87363d164442..c98eb7e3585b 100644 --- a/contrib/cityhash102/include/city.h +++ b/contrib/cityhash102/include/city.h @@ -73,8 +73,8 @@ struct uint128 uint128() = default; uint128(uint64 low64_, uint64 high64_) : low64(low64_), high64(high64_) {} - friend bool operator ==(const uint128 & x, const uint128 & y) { return (x.low64 == y.low64) && (x.high64 == y.high64); } - friend bool operator !=(const uint128 & x, const uint128 & y) { return !(x == y); } + + friend auto operator<=>(const uint128 &, const uint128 &) = default; }; inline uint64 Uint128Low64(const uint128 & x) { return x.low64; } diff --git a/contrib/corrosion-cmake/CMakeLists.txt b/contrib/corrosion-cmake/CMakeLists.txt index ea8f191564d7..8adc2c0b23a3 100644 --- a/contrib/corrosion-cmake/CMakeLists.txt +++ b/contrib/corrosion-cmake/CMakeLists.txt @@ -17,20 +17,22 @@ endif() message(STATUS "Checking Rust toolchain for current target") -if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") - set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") -endif() +# See https://doc.rust-lang.org/nightly/rustc/platform-support.html -if(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") +if((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "x86_64-unknown-linux-musl") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-x86_64") + set(Rust_CARGO_TARGET "x86_64-unknown-linux-gnu") +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") AND (CMAKE_TOOLCHAIN_FILE MATCHES "musl")) + set(Rust_CARGO_TARGET "aarch64-unknown-linux-musl") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-aarch64") set(Rust_CARGO_TARGET "aarch64-unknown-linux-gnu") -endif() - -if((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "darwin") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) set(Rust_CARGO_TARGET "x86_64-apple-darwin") -endif() - -if((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) +elseif((CMAKE_TOOLCHAIN_FILE MATCHES "freebsd") AND (CMAKE_TOOLCHAIN_FILE MATCHES "x86_64")) set(Rust_CARGO_TARGET "x86_64-unknown-freebsd") +elseif(CMAKE_TOOLCHAIN_FILE MATCHES "linux/toolchain-riscv64") + set(Rust_CARGO_TARGET "riscv64gc-unknown-linux-gnu") endif() if(CMAKE_TOOLCHAIN_FILE MATCHES "ppc64le") diff --git a/contrib/croaring b/contrib/croaring index f40ed52bcdd6..e4a7ad554274 160000 --- a/contrib/croaring +++ b/contrib/croaring @@ -1 +1 @@ -Subproject commit f40ed52bcdd635840a79877cef4857315dba817c +Subproject commit e4a7ad5542746103e71ca8b5e56225baf0014c87 diff --git a/contrib/croaring-cmake/CMakeLists.txt b/contrib/croaring-cmake/CMakeLists.txt index 794c0426b969..5f3b90e0f7c0 100644 --- a/contrib/croaring-cmake/CMakeLists.txt +++ b/contrib/croaring-cmake/CMakeLists.txt @@ -2,23 +2,25 @@ set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/croaring") set(SRCS "${LIBRARY_DIR}/src/array_util.c" + "${LIBRARY_DIR}/src/bitset.c" "${LIBRARY_DIR}/src/bitset_util.c" + "${LIBRARY_DIR}/src/isadetection.c" + "${LIBRARY_DIR}/src/memory.c" + "${LIBRARY_DIR}/src/roaring.c" + "${LIBRARY_DIR}/src/roaring_array.c" + "${LIBRARY_DIR}/src/roaring_priority_queue.c" "${LIBRARY_DIR}/src/containers/array.c" "${LIBRARY_DIR}/src/containers/bitset.c" "${LIBRARY_DIR}/src/containers/containers.c" "${LIBRARY_DIR}/src/containers/convert.c" - "${LIBRARY_DIR}/src/containers/mixed_intersection.c" - "${LIBRARY_DIR}/src/containers/mixed_union.c" + "${LIBRARY_DIR}/src/containers/mixed_andnot.c" "${LIBRARY_DIR}/src/containers/mixed_equal.c" - "${LIBRARY_DIR}/src/containers/mixed_subset.c" + "${LIBRARY_DIR}/src/containers/mixed_intersection.c" "${LIBRARY_DIR}/src/containers/mixed_negation.c" + "${LIBRARY_DIR}/src/containers/mixed_subset.c" + "${LIBRARY_DIR}/src/containers/mixed_union.c" "${LIBRARY_DIR}/src/containers/mixed_xor.c" - "${LIBRARY_DIR}/src/containers/mixed_andnot.c" - "${LIBRARY_DIR}/src/containers/run.c" - "${LIBRARY_DIR}/src/roaring.c" - "${LIBRARY_DIR}/src/roaring_priority_queue.c" - "${LIBRARY_DIR}/src/roaring_array.c" - "${LIBRARY_DIR}/src/memory.c") + "${LIBRARY_DIR}/src/containers/run.c") add_library(_roaring ${SRCS}) diff --git a/contrib/curl b/contrib/curl index b0edf0b7dae4..eb3b049df526 160000 --- a/contrib/curl +++ b/contrib/curl @@ -1 +1 @@ -Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d +Subproject commit eb3b049df526bf125eda23218e680ce7fa9ec46c diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index 70d9c2816dc1..733865d5101f 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -8,125 +8,122 @@ endif() set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl") set (SRCS - "${LIBRARY_DIR}/lib/fopen.c" - "${LIBRARY_DIR}/lib/noproxy.c" - "${LIBRARY_DIR}/lib/idn.c" - "${LIBRARY_DIR}/lib/cfilters.c" - "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/altsvc.c" + "${LIBRARY_DIR}/lib/amigaos.c" + "${LIBRARY_DIR}/lib/asyn-thread.c" + "${LIBRARY_DIR}/lib/base64.c" + "${LIBRARY_DIR}/lib/bufq.c" + "${LIBRARY_DIR}/lib/bufref.c" + "${LIBRARY_DIR}/lib/cf-h1-proxy.c" "${LIBRARY_DIR}/lib/cf-haproxy.c" "${LIBRARY_DIR}/lib/cf-https-connect.c" - "${LIBRARY_DIR}/lib/file.c" - "${LIBRARY_DIR}/lib/timeval.c" - "${LIBRARY_DIR}/lib/base64.c" - "${LIBRARY_DIR}/lib/hostip.c" - "${LIBRARY_DIR}/lib/progress.c" - "${LIBRARY_DIR}/lib/formdata.c" + "${LIBRARY_DIR}/lib/cf-socket.c" + "${LIBRARY_DIR}/lib/cfilters.c" + "${LIBRARY_DIR}/lib/conncache.c" + "${LIBRARY_DIR}/lib/connect.c" + "${LIBRARY_DIR}/lib/content_encoding.c" "${LIBRARY_DIR}/lib/cookie.c" - "${LIBRARY_DIR}/lib/http.c" - "${LIBRARY_DIR}/lib/sendf.c" - "${LIBRARY_DIR}/lib/url.c" + "${LIBRARY_DIR}/lib/curl_addrinfo.c" + "${LIBRARY_DIR}/lib/curl_des.c" + "${LIBRARY_DIR}/lib/curl_endian.c" + "${LIBRARY_DIR}/lib/curl_fnmatch.c" + "${LIBRARY_DIR}/lib/curl_get_line.c" + "${LIBRARY_DIR}/lib/curl_gethostname.c" + "${LIBRARY_DIR}/lib/curl_gssapi.c" + "${LIBRARY_DIR}/lib/curl_memrchr.c" + "${LIBRARY_DIR}/lib/curl_multibyte.c" + "${LIBRARY_DIR}/lib/curl_ntlm_core.c" + "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" + "${LIBRARY_DIR}/lib/curl_path.c" + "${LIBRARY_DIR}/lib/curl_range.c" + "${LIBRARY_DIR}/lib/curl_rtmp.c" + "${LIBRARY_DIR}/lib/curl_sasl.c" + "${LIBRARY_DIR}/lib/curl_sspi.c" + "${LIBRARY_DIR}/lib/curl_threads.c" + "${LIBRARY_DIR}/lib/curl_trc.c" "${LIBRARY_DIR}/lib/dict.c" - "${LIBRARY_DIR}/lib/if2ip.c" - "${LIBRARY_DIR}/lib/speedcheck.c" - "${LIBRARY_DIR}/lib/ldap.c" - "${LIBRARY_DIR}/lib/version.c" - "${LIBRARY_DIR}/lib/getenv.c" - "${LIBRARY_DIR}/lib/escape.c" - "${LIBRARY_DIR}/lib/mprintf.c" - "${LIBRARY_DIR}/lib/telnet.c" - "${LIBRARY_DIR}/lib/netrc.c" - "${LIBRARY_DIR}/lib/getinfo.c" - "${LIBRARY_DIR}/lib/transfer.c" - "${LIBRARY_DIR}/lib/strcase.c" + "${LIBRARY_DIR}/lib/doh.c" + "${LIBRARY_DIR}/lib/dynbuf.c" + "${LIBRARY_DIR}/lib/dynhds.c" "${LIBRARY_DIR}/lib/easy.c" - "${LIBRARY_DIR}/lib/curl_fnmatch.c" - "${LIBRARY_DIR}/lib/curl_log.c" + "${LIBRARY_DIR}/lib/escape.c" + "${LIBRARY_DIR}/lib/file.c" "${LIBRARY_DIR}/lib/fileinfo.c" - "${LIBRARY_DIR}/lib/krb5.c" - "${LIBRARY_DIR}/lib/memdebug.c" - "${LIBRARY_DIR}/lib/http_chunks.c" - "${LIBRARY_DIR}/lib/strtok.c" - "${LIBRARY_DIR}/lib/connect.c" - "${LIBRARY_DIR}/lib/llist.c" + "${LIBRARY_DIR}/lib/fopen.c" + "${LIBRARY_DIR}/lib/formdata.c" + "${LIBRARY_DIR}/lib/getenv.c" + "${LIBRARY_DIR}/lib/getinfo.c" + "${LIBRARY_DIR}/lib/gopher.c" "${LIBRARY_DIR}/lib/hash.c" - "${LIBRARY_DIR}/lib/multi.c" - "${LIBRARY_DIR}/lib/content_encoding.c" - "${LIBRARY_DIR}/lib/share.c" - "${LIBRARY_DIR}/lib/http_digest.c" - "${LIBRARY_DIR}/lib/md4.c" - "${LIBRARY_DIR}/lib/md5.c" - "${LIBRARY_DIR}/lib/http_negotiate.c" - "${LIBRARY_DIR}/lib/inet_pton.c" - "${LIBRARY_DIR}/lib/strtoofft.c" - "${LIBRARY_DIR}/lib/strerror.c" - "${LIBRARY_DIR}/lib/amigaos.c" + "${LIBRARY_DIR}/lib/headers.c" + "${LIBRARY_DIR}/lib/hmac.c" "${LIBRARY_DIR}/lib/hostasyn.c" + "${LIBRARY_DIR}/lib/hostip.c" "${LIBRARY_DIR}/lib/hostip4.c" "${LIBRARY_DIR}/lib/hostip6.c" "${LIBRARY_DIR}/lib/hostsyn.c" + "${LIBRARY_DIR}/lib/hsts.c" + "${LIBRARY_DIR}/lib/http.c" + "${LIBRARY_DIR}/lib/http2.c" + "${LIBRARY_DIR}/lib/http_aws_sigv4.c" + "${LIBRARY_DIR}/lib/http_chunks.c" + "${LIBRARY_DIR}/lib/http_digest.c" + "${LIBRARY_DIR}/lib/http_negotiate.c" + "${LIBRARY_DIR}/lib/http_ntlm.c" + "${LIBRARY_DIR}/lib/http_proxy.c" + "${LIBRARY_DIR}/lib/idn.c" + "${LIBRARY_DIR}/lib/if2ip.c" + "${LIBRARY_DIR}/lib/imap.c" "${LIBRARY_DIR}/lib/inet_ntop.c" + "${LIBRARY_DIR}/lib/inet_pton.c" + "${LIBRARY_DIR}/lib/krb5.c" + "${LIBRARY_DIR}/lib/ldap.c" + "${LIBRARY_DIR}/lib/llist.c" + "${LIBRARY_DIR}/lib/md4.c" + "${LIBRARY_DIR}/lib/md5.c" + "${LIBRARY_DIR}/lib/memdebug.c" + "${LIBRARY_DIR}/lib/mime.c" + "${LIBRARY_DIR}/lib/mprintf.c" + "${LIBRARY_DIR}/lib/mqtt.c" + "${LIBRARY_DIR}/lib/multi.c" + "${LIBRARY_DIR}/lib/netrc.c" + "${LIBRARY_DIR}/lib/nonblock.c" + "${LIBRARY_DIR}/lib/noproxy.c" + "${LIBRARY_DIR}/lib/openldap.c" "${LIBRARY_DIR}/lib/parsedate.c" + "${LIBRARY_DIR}/lib/pingpong.c" + "${LIBRARY_DIR}/lib/pop3.c" + "${LIBRARY_DIR}/lib/progress.c" + "${LIBRARY_DIR}/lib/psl.c" + "${LIBRARY_DIR}/lib/rand.c" + "${LIBRARY_DIR}/lib/rename.c" + "${LIBRARY_DIR}/lib/rtsp.c" "${LIBRARY_DIR}/lib/select.c" - "${LIBRARY_DIR}/lib/splay.c" - "${LIBRARY_DIR}/lib/strdup.c" + "${LIBRARY_DIR}/lib/sendf.c" + "${LIBRARY_DIR}/lib/setopt.c" + "${LIBRARY_DIR}/lib/sha256.c" + "${LIBRARY_DIR}/lib/share.c" + "${LIBRARY_DIR}/lib/slist.c" + "${LIBRARY_DIR}/lib/smb.c" + "${LIBRARY_DIR}/lib/smtp.c" + "${LIBRARY_DIR}/lib/socketpair.c" "${LIBRARY_DIR}/lib/socks.c" - "${LIBRARY_DIR}/lib/curl_addrinfo.c" "${LIBRARY_DIR}/lib/socks_gssapi.c" "${LIBRARY_DIR}/lib/socks_sspi.c" - "${LIBRARY_DIR}/lib/curl_sspi.c" - "${LIBRARY_DIR}/lib/slist.c" - "${LIBRARY_DIR}/lib/nonblock.c" - "${LIBRARY_DIR}/lib/curl_memrchr.c" - "${LIBRARY_DIR}/lib/imap.c" - "${LIBRARY_DIR}/lib/pop3.c" - "${LIBRARY_DIR}/lib/smtp.c" - "${LIBRARY_DIR}/lib/pingpong.c" - "${LIBRARY_DIR}/lib/rtsp.c" - "${LIBRARY_DIR}/lib/curl_threads.c" - "${LIBRARY_DIR}/lib/warnless.c" - "${LIBRARY_DIR}/lib/hmac.c" - "${LIBRARY_DIR}/lib/curl_rtmp.c" - "${LIBRARY_DIR}/lib/openldap.c" - "${LIBRARY_DIR}/lib/curl_gethostname.c" - "${LIBRARY_DIR}/lib/gopher.c" - "${LIBRARY_DIR}/lib/http_proxy.c" - "${LIBRARY_DIR}/lib/asyn-thread.c" - "${LIBRARY_DIR}/lib/curl_gssapi.c" - "${LIBRARY_DIR}/lib/http_ntlm.c" - "${LIBRARY_DIR}/lib/curl_ntlm_wb.c" - "${LIBRARY_DIR}/lib/curl_ntlm_core.c" - "${LIBRARY_DIR}/lib/curl_sasl.c" - "${LIBRARY_DIR}/lib/rand.c" - "${LIBRARY_DIR}/lib/curl_multibyte.c" - "${LIBRARY_DIR}/lib/conncache.c" - "${LIBRARY_DIR}/lib/cf-h1-proxy.c" - "${LIBRARY_DIR}/lib/http2.c" - "${LIBRARY_DIR}/lib/smb.c" - "${LIBRARY_DIR}/lib/curl_endian.c" - "${LIBRARY_DIR}/lib/curl_des.c" + "${LIBRARY_DIR}/lib/speedcheck.c" + "${LIBRARY_DIR}/lib/splay.c" + "${LIBRARY_DIR}/lib/strcase.c" + "${LIBRARY_DIR}/lib/strdup.c" + "${LIBRARY_DIR}/lib/strerror.c" + "${LIBRARY_DIR}/lib/strtok.c" + "${LIBRARY_DIR}/lib/strtoofft.c" "${LIBRARY_DIR}/lib/system_win32.c" - "${LIBRARY_DIR}/lib/mime.c" - "${LIBRARY_DIR}/lib/sha256.c" - "${LIBRARY_DIR}/lib/setopt.c" - "${LIBRARY_DIR}/lib/curl_path.c" - "${LIBRARY_DIR}/lib/curl_range.c" - "${LIBRARY_DIR}/lib/psl.c" - "${LIBRARY_DIR}/lib/doh.c" - "${LIBRARY_DIR}/lib/urlapi.c" - "${LIBRARY_DIR}/lib/curl_get_line.c" - "${LIBRARY_DIR}/lib/altsvc.c" - "${LIBRARY_DIR}/lib/socketpair.c" - "${LIBRARY_DIR}/lib/bufref.c" - "${LIBRARY_DIR}/lib/bufq.c" - "${LIBRARY_DIR}/lib/dynbuf.c" - "${LIBRARY_DIR}/lib/dynhds.c" - "${LIBRARY_DIR}/lib/hsts.c" - "${LIBRARY_DIR}/lib/http_aws_sigv4.c" - "${LIBRARY_DIR}/lib/mqtt.c" - "${LIBRARY_DIR}/lib/rename.c" - "${LIBRARY_DIR}/lib/headers.c" + "${LIBRARY_DIR}/lib/telnet.c" "${LIBRARY_DIR}/lib/timediff.c" - "${LIBRARY_DIR}/lib/vauth/vauth.c" + "${LIBRARY_DIR}/lib/timeval.c" + "${LIBRARY_DIR}/lib/transfer.c" + "${LIBRARY_DIR}/lib/url.c" + "${LIBRARY_DIR}/lib/urlapi.c" "${LIBRARY_DIR}/lib/vauth/cleartext.c" "${LIBRARY_DIR}/lib/vauth/cram.c" "${LIBRARY_DIR}/lib/vauth/digest.c" @@ -138,23 +135,24 @@ set (SRCS "${LIBRARY_DIR}/lib/vauth/oauth2.c" "${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c" "${LIBRARY_DIR}/lib/vauth/spnego_sspi.c" + "${LIBRARY_DIR}/lib/vauth/vauth.c" + "${LIBRARY_DIR}/lib/version.c" "${LIBRARY_DIR}/lib/vquic/vquic.c" - "${LIBRARY_DIR}/lib/vtls/openssl.c" + "${LIBRARY_DIR}/lib/vssh/libssh.c" + "${LIBRARY_DIR}/lib/vssh/libssh2.c" + "${LIBRARY_DIR}/lib/vtls/bearssl.c" "${LIBRARY_DIR}/lib/vtls/gtls.c" - "${LIBRARY_DIR}/lib/vtls/vtls.c" - "${LIBRARY_DIR}/lib/vtls/nss.c" - "${LIBRARY_DIR}/lib/vtls/wolfssl.c" + "${LIBRARY_DIR}/lib/vtls/hostcheck.c" + "${LIBRARY_DIR}/lib/vtls/keylog.c" + "${LIBRARY_DIR}/lib/vtls/mbedtls.c" + "${LIBRARY_DIR}/lib/vtls/openssl.c" "${LIBRARY_DIR}/lib/vtls/schannel.c" "${LIBRARY_DIR}/lib/vtls/schannel_verify.c" "${LIBRARY_DIR}/lib/vtls/sectransp.c" - "${LIBRARY_DIR}/lib/vtls/gskit.c" - "${LIBRARY_DIR}/lib/vtls/mbedtls.c" - "${LIBRARY_DIR}/lib/vtls/bearssl.c" - "${LIBRARY_DIR}/lib/vtls/keylog.c" + "${LIBRARY_DIR}/lib/vtls/vtls.c" + "${LIBRARY_DIR}/lib/vtls/wolfssl.c" "${LIBRARY_DIR}/lib/vtls/x509asn1.c" - "${LIBRARY_DIR}/lib/vtls/hostcheck.c" - "${LIBRARY_DIR}/lib/vssh/libssh2.c" - "${LIBRARY_DIR}/lib/vssh/libssh.c" + "${LIBRARY_DIR}/lib/warnless.c" ) add_library (_curl ${SRCS}) diff --git a/contrib/googletest b/contrib/googletest index 71140c3ca7a8..e47544ad31cb 160000 --- a/contrib/googletest +++ b/contrib/googletest @@ -1 +1 @@ -Subproject commit 71140c3ca7a87bb1b5b9c9f1500fea8858cce344 +Subproject commit e47544ad31cb3ceecd04cc13e8fe556f8df9fe0b diff --git a/contrib/hashidsxx b/contrib/hashidsxx deleted file mode 160000 index 783f6911ccfd..000000000000 --- a/contrib/hashidsxx +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 783f6911ccfdaca83e3cfac084c4aad888a80cee diff --git a/contrib/hashidsxx-cmake/CMakeLists.txt b/contrib/hashidsxx-cmake/CMakeLists.txt deleted file mode 100644 index 17f3888bd94d..000000000000 --- a/contrib/hashidsxx-cmake/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/hashidsxx") - -set (SRCS - "${LIBRARY_DIR}/hashids.cpp" -) - -set (HDRS - "${LIBRARY_DIR}/hashids.h" -) - -add_library(_hashidsxx ${SRCS} ${HDRS}) -target_include_directories(_hashidsxx SYSTEM PUBLIC "${LIBRARY_DIR}") - -add_library(ch_contrib::hashidsxx ALIAS _hashidsxx) diff --git a/contrib/idxd-config b/contrib/idxd-config index f6605c41a735..a836ce0e4205 160000 --- a/contrib/idxd-config +++ b/contrib/idxd-config @@ -1 +1 @@ -Subproject commit f6605c41a735e3fdfef2d2d18655a33af6490b99 +Subproject commit a836ce0e42052a69bffbbc14239ab4097f3b77f1 diff --git a/contrib/incbin b/contrib/incbin new file mode 160000 index 000000000000..6e576cae5ab5 --- /dev/null +++ b/contrib/incbin @@ -0,0 +1 @@ +Subproject commit 6e576cae5ab5810f25e2631f2e0b80cbe7dc8cbf diff --git a/contrib/incbin-cmake/CMakeLists.txt b/contrib/incbin-cmake/CMakeLists.txt new file mode 100644 index 000000000000..5778cf83c221 --- /dev/null +++ b/contrib/incbin-cmake/CMakeLists.txt @@ -0,0 +1,8 @@ +set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/incbin") +add_library(_incbin INTERFACE) +target_include_directories(_incbin SYSTEM INTERFACE ${LIBRARY_DIR}) +add_library(ch_contrib::incbin ALIAS _incbin) + +# Warning "incbin is incompatible with bitcode. Using the library will break upload to App Store if you have bitcode enabled. +# Add `#define INCBIN_SILENCE_BITCODE_WARNING` before including this header to silence this warning." +target_compile_definitions(_incbin INTERFACE INCBIN_SILENCE_BITCODE_WARNING) diff --git a/contrib/isa-l-cmake/CMakeLists.txt b/contrib/isa-l-cmake/CMakeLists.txt index d4d6d648268b..10f7d7bad64d 100644 --- a/contrib/isa-l-cmake/CMakeLists.txt +++ b/contrib/isa-l-cmake/CMakeLists.txt @@ -1,6 +1,7 @@ option(ENABLE_ISAL_LIBRARY "Enable ISA-L library" ${ENABLE_LIBRARIES}) -if (ARCH_AARCH64) - # Disable ISA-L libray on aarch64. + +# ISA-L is only available for x86-64, so it shall be disabled for other platforms +if (NOT ARCH_AMD64) set (ENABLE_ISAL_LIBRARY OFF) endif () diff --git a/contrib/jemalloc-cmake/CMakeLists.txt b/contrib/jemalloc-cmake/CMakeLists.txt index 97f723bb5400..15e965ed841a 100644 --- a/contrib/jemalloc-cmake/CMakeLists.txt +++ b/contrib/jemalloc-cmake/CMakeLists.txt @@ -1,5 +1,5 @@ if (SANITIZE OR NOT ( - ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64)) OR + ((OS_LINUX OR OS_FREEBSD) AND (ARCH_AMD64 OR ARCH_AARCH64 OR ARCH_PPC64LE OR ARCH_RISCV64 OR ARCH_S390X)) OR (OS_DARWIN AND (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" OR CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")) )) if (ENABLE_JEMALLOC) @@ -17,17 +17,17 @@ if (NOT ENABLE_JEMALLOC) endif () if (NOT OS_LINUX) - message (WARNING "jemalloc support on non-linux is EXPERIMENTAL") + message (WARNING "jemalloc support on non-Linux is EXPERIMENTAL") endif() if (OS_LINUX) - # ThreadPool select job randomly, and there can be some threads that had been - # performed some memory heavy task before and will be inactive for some time, - # but until it will became active again, the memory will not be freed since by - # default each thread has it's own arena, but there should be not more then + # ThreadPool select job randomly, and there can be some threads that have been + # performed some memory-heavy tasks before and will be inactive for some time, + # but until it becomes active again, the memory will not be freed since, by + # default, each thread has its arena, but there should be no more than # 4*CPU arenas (see opt.nareans description). # - # By enabling percpu_arena number of arenas limited to number of CPUs and hence + # By enabling percpu_arena number of arenas is limited to the number of CPUs, and hence # this problem should go away. # # muzzy_decay_ms -- use MADV_FREE when available on newer Linuxes, to @@ -38,7 +38,7 @@ if (OS_LINUX) else() set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000") endif() -# CACHE variable is empty, to allow changing defaults without necessity +# CACHE variable is empty to allow changing defaults without the necessity # to purge cache set (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE "" CACHE STRING "Change default configuration string of JEMalloc" ) if (JEMALLOC_CONFIG_MALLOC_CONF_OVERRIDE) @@ -148,6 +148,8 @@ elseif (ARCH_PPC64LE) set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_ppc64le") elseif (ARCH_RISCV64) set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_riscv64") +elseif (ARCH_S390X) + set(JEMALLOC_INCLUDE_PREFIX "${JEMALLOC_INCLUDE_PREFIX}_s390x") else () message (FATAL_ERROR "internal jemalloc: This arch is not supported") endif () @@ -170,16 +172,13 @@ endif () target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1) -if (USE_UNWIND) - # jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. - # The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. - # At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing. - - # ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). - - target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) - target_link_libraries (_jemalloc PRIVATE unwind) -endif () +# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++. +# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`. +# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracking. +# +# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1). +target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1) +target_link_libraries (_jemalloc PRIVATE unwind) # for RTLD_NEXT target_compile_options(_jemalloc PRIVATE -D_GNU_SOURCE) diff --git a/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in new file mode 100644 index 000000000000..531f2bca0c21 --- /dev/null +++ b/contrib/jemalloc-cmake/include_linux_s390x/jemalloc/internal/jemalloc_internal_defs.h.in @@ -0,0 +1,435 @@ +/* include/jemalloc/internal/jemalloc_internal_defs.h. Generated from jemalloc_internal_defs.h.in by configure. */ +#ifndef JEMALLOC_INTERNAL_DEFS_H_ +#define JEMALLOC_INTERNAL_DEFS_H_ +/* + * If JEMALLOC_PREFIX is defined via --with-jemalloc-prefix, it will cause all + * public APIs to be prefixed. This makes it possible, with some care, to use + * multiple allocators simultaneously. + */ +/* #undef JEMALLOC_PREFIX */ +/* #undef JEMALLOC_CPREFIX */ + +/* + * Define overrides for non-standard allocator-related functions if they are + * present on the system. + */ +#define JEMALLOC_OVERRIDE___LIBC_CALLOC +#define JEMALLOC_OVERRIDE___LIBC_FREE +#define JEMALLOC_OVERRIDE___LIBC_MALLOC +#define JEMALLOC_OVERRIDE___LIBC_MEMALIGN +#define JEMALLOC_OVERRIDE___LIBC_REALLOC +#define JEMALLOC_OVERRIDE___LIBC_VALLOC +#define JEMALLOC_OVERRIDE___LIBC_PVALLOC +/* #undef JEMALLOC_OVERRIDE___POSIX_MEMALIGN */ + +/* + * JEMALLOC_PRIVATE_NAMESPACE is used as a prefix for all library-private APIs. + * For shared libraries, symbol visibility mechanisms prevent these symbols + * from being exported, but for static libraries, naming collisions are a real + * possibility. + */ +#define JEMALLOC_PRIVATE_NAMESPACE je_ + +/* + * Hyper-threaded CPUs may need a special instruction inside spin loops in + * order to yield to another virtual CPU. + */ +#define CPU_SPINWAIT +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 0 + +/* + * Number of significant bits in virtual addresses. This may be less than the + * total number of bits in a pointer, e.g. on x64, for which the uppermost 16 + * bits are the same as bit 47. + */ +#define LG_VADDR 64 + +/* Defined if C11 atomics are available. */ +#define JEMALLOC_C11_ATOMICS + +/* Defined if GCC __atomic atomics are available. */ +#define JEMALLOC_GCC_ATOMIC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_ATOMIC_ATOMICS + +/* Defined if GCC __sync atomics are available. */ +#define JEMALLOC_GCC_SYNC_ATOMICS +/* and the 8-bit variant support. */ +#define JEMALLOC_GCC_U8_SYNC_ATOMICS + +/* + * Defined if __builtin_clz() and __builtin_clzl() are available. + */ +#define JEMALLOC_HAVE_BUILTIN_CLZ + +/* + * Defined if os_unfair_lock_*() functions are available, as provided by Darwin. + */ +/* #undef JEMALLOC_OS_UNFAIR_LOCK */ + +/* Defined if syscall(2) is usable. */ +#define JEMALLOC_USE_SYSCALL + +/* + * Defined if secure_getenv(3) is available. + */ +#define JEMALLOC_HAVE_SECURE_GETENV + +/* + * Defined if issetugid(2) is available. + */ +/* #undef JEMALLOC_HAVE_ISSETUGID */ + +/* Defined if pthread_atfork(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_ATFORK + +/* Defined if pthread_setname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_SETNAME_NP + +/* Defined if pthread_getname_np(3) is available. */ +#define JEMALLOC_HAVE_PTHREAD_GETNAME_NP + +/* Defined if pthread_get_name_np(3) is available. */ +/* #undef JEMALLOC_HAVE_PTHREAD_GET_NAME_NP */ + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC_COARSE, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC_COARSE + +/* + * Defined if clock_gettime(CLOCK_MONOTONIC, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_MONOTONIC + +/* + * Defined if mach_absolute_time() is available. + */ +/* #undef JEMALLOC_HAVE_MACH_ABSOLUTE_TIME */ + +/* + * Defined if clock_gettime(CLOCK_REALTIME, ...) is available. + */ +#define JEMALLOC_HAVE_CLOCK_REALTIME + +/* + * Defined if _malloc_thread_cleanup() exists. At least in the case of + * FreeBSD, pthread_key_create() allocates, which if used during malloc + * bootstrapping will cause recursion into the pthreads library. Therefore, if + * _malloc_thread_cleanup() exists, use it as the basis for thread cleanup in + * malloc_tsd. + */ +/* #undef JEMALLOC_MALLOC_THREAD_CLEANUP */ + +/* + * Defined if threaded initialization is known to be safe on this platform. + * Among other things, it must be possible to initialize a mutex without + * triggering allocation in order for threaded allocation to be safe. + */ +#define JEMALLOC_THREADED_INIT + +/* + * Defined if the pthreads implementation defines + * _pthread_mutex_init_calloc_cb(), in which case the function is used in order + * to avoid recursive allocation during mutex initialization. + */ +/* #undef JEMALLOC_MUTEX_INIT_CB */ + +/* Non-empty if the tls_model attribute is supported. */ +#define JEMALLOC_TLS_MODEL __attribute__((tls_model("initial-exec"))) + +/* + * JEMALLOC_DEBUG enables assertions and other sanity checks, and disables + * inline functions. + */ +/* #undef JEMALLOC_DEBUG */ + +/* JEMALLOC_STATS enables statistics calculation. */ +#define JEMALLOC_STATS + +/* JEMALLOC_EXPERIMENTAL_SMALLOCX_API enables experimental smallocx API. */ +/* #undef JEMALLOC_EXPERIMENTAL_SMALLOCX_API */ + +/* JEMALLOC_PROF enables allocation profiling. */ +/* #undef JEMALLOC_PROF */ + +/* Use libunwind for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBUNWIND */ + +/* Use libgcc for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_LIBGCC */ + +/* Use gcc intrinsics for profile backtracing if defined. */ +/* #undef JEMALLOC_PROF_GCC */ + +/* JEMALLOC_PAGEID enabled page id */ +/* #undef JEMALLOC_PAGEID */ + +/* JEMALLOC_HAVE_PRCTL checks prctl */ +#define JEMALLOC_HAVE_PRCTL + +/* + * JEMALLOC_DSS enables use of sbrk(2) to allocate extents from the data storage + * segment (DSS). + */ +#define JEMALLOC_DSS + +/* Support memory filling (junk/zero). */ +#define JEMALLOC_FILL + +/* Support utrace(2)-based tracing. */ +/* #undef JEMALLOC_UTRACE */ + +/* Support utrace(2)-based tracing (label based signature). */ +/* #undef JEMALLOC_UTRACE_LABEL */ + +/* Support optional abort() on OOM. */ +/* #undef JEMALLOC_XMALLOC */ + +/* Support lazy locking (avoid locking unless a second thread is launched). */ +/* #undef JEMALLOC_LAZY_LOCK */ + +/* + * Minimum allocation alignment is 2^LG_QUANTUM bytes (ignoring tiny size + * classes). + */ +/* #undef LG_QUANTUM */ + +/* One page is 2^LG_PAGE bytes. */ +#define LG_PAGE 12 + +/* Maximum number of regions in a slab. */ +/* #undef CONFIG_LG_SLAB_MAXREGS */ + +/* + * One huge page is 2^LG_HUGEPAGE bytes. Note that this is defined even if the + * system does not explicitly support huge pages; system calls that require + * explicit huge page support are separately configured. + */ +#define LG_HUGEPAGE 20 + +/* + * If defined, adjacent virtual memory mappings with identical attributes + * automatically coalesce, and they fragment when changes are made to subranges. + * This is the normal order of things for mmap()/munmap(), but on Windows + * VirtualAlloc()/VirtualFree() operations must be precisely matched, i.e. + * mappings do *not* coalesce/fragment. + */ +#define JEMALLOC_MAPS_COALESCE + +/* + * If defined, retain memory for later reuse by default rather than using e.g. + * munmap() to unmap freed extents. This is enabled on 64-bit Linux because + * common sequences of mmap()/munmap() calls will cause virtual memory map + * holes. + */ +#define JEMALLOC_RETAIN + +/* TLS is used to map arenas and magazine caches to threads. */ +#define JEMALLOC_TLS + +/* + * Used to mark unreachable code to quiet "end of non-void" compiler warnings. + * Don't use this directly; instead use unreachable() from util.h + */ +#define JEMALLOC_INTERNAL_UNREACHABLE __builtin_unreachable + +/* + * ffs*() functions to use for bitmapping. Don't use these directly; instead, + * use ffs_*() from util.h. + */ +#define JEMALLOC_INTERNAL_FFSLL __builtin_ffsll +#define JEMALLOC_INTERNAL_FFSL __builtin_ffsl +#define JEMALLOC_INTERNAL_FFS __builtin_ffs + +/* + * popcount*() functions to use for bitmapping. + */ +#define JEMALLOC_INTERNAL_POPCOUNTL __builtin_popcountl +#define JEMALLOC_INTERNAL_POPCOUNT __builtin_popcount + +/* + * If defined, explicitly attempt to more uniformly distribute large allocation + * pointer alignments across all cache indices. + */ +#define JEMALLOC_CACHE_OBLIVIOUS + +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + +/* + * If defined, use readlinkat() (instead of readlink()) to follow + * /etc/malloc_conf. + */ +/* #undef JEMALLOC_READLINKAT */ + +/* + * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. + */ +/* #undef JEMALLOC_ZONE */ + +/* + * Methods for determining whether the OS overcommits. + * JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY: Linux's + * /proc/sys/vm.overcommit_memory file. + * JEMALLOC_SYSCTL_VM_OVERCOMMIT: FreeBSD's vm.overcommit sysctl. + */ +/* #undef JEMALLOC_SYSCTL_VM_OVERCOMMIT */ +#define JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY + +/* Defined if madvise(2) is available. */ +#define JEMALLOC_HAVE_MADVISE + +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +#define JEMALLOC_HAVE_MADVISE_HUGE + +/* + * Methods for purging unused pages differ between operating systems. + * + * madvise(..., MADV_FREE) : This marks pages as being unused, such that they + * will be discarded rather than swapped out. + * madvise(..., MADV_DONTNEED) : If JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS is + * defined, this immediately discards pages, + * such that new pages will be demand-zeroed if + * the address region is later touched; + * otherwise this behaves similarly to + * MADV_FREE, though typically with higher + * system overhead. + */ +#define JEMALLOC_PURGE_MADVISE_FREE +#define JEMALLOC_PURGE_MADVISE_DONTNEED +#define JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS + +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +#define JEMALLOC_MADVISE_DONTDUMP + +/* + * Defined if MADV_[NO]CORE is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_NOCORE */ + +/* Defined if mprotect(2) is available. */ +#define JEMALLOC_HAVE_MPROTECT + +/* + * Defined if transparent huge pages (THPs) are supported via the + * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. + */ +/* #undef JEMALLOC_THP */ + +/* Defined if posix_madvise is available. */ +/* #undef JEMALLOC_HAVE_POSIX_MADVISE */ + +/* + * Method for purging unused pages using posix_madvise. + * + * posix_madvise(..., POSIX_MADV_DONTNEED) + */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED */ +/* #undef JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS */ + +/* + * Defined if memcntl page admin call is supported + */ +/* #undef JEMALLOC_HAVE_MEMCNTL */ + +/* + * Defined if malloc_size is supported + */ +/* #undef JEMALLOC_HAVE_MALLOC_SIZE */ + +/* Define if operating system has alloca.h header. */ +#define JEMALLOC_HAS_ALLOCA_H + +/* C99 restrict keyword supported. */ +#define JEMALLOC_HAS_RESTRICT + +/* For use by hash code. */ +#define JEMALLOC_BIG_ENDIAN + +/* sizeof(int) == 2^LG_SIZEOF_INT. */ +#define LG_SIZEOF_INT 2 + +/* sizeof(long) == 2^LG_SIZEOF_LONG. */ +#define LG_SIZEOF_LONG 3 + +/* sizeof(long long) == 2^LG_SIZEOF_LONG_LONG. */ +#define LG_SIZEOF_LONG_LONG 3 + +/* sizeof(intmax_t) == 2^LG_SIZEOF_INTMAX_T. */ +#define LG_SIZEOF_INTMAX_T 3 + +/* glibc malloc hooks (__malloc_hook, __realloc_hook, __free_hook). */ +/* #undef JEMALLOC_GLIBC_MALLOC_HOOK */ + +/* glibc memalign hook. */ +/* #undef JEMALLOC_GLIBC_MEMALIGN_HOOK */ + +/* pthread support */ +#define JEMALLOC_HAVE_PTHREAD + +/* dlsym() support */ +#define JEMALLOC_HAVE_DLSYM + +/* Adaptive mutex support in pthreads. */ +#define JEMALLOC_HAVE_PTHREAD_MUTEX_ADAPTIVE_NP + +/* GNU specific sched_getcpu support */ +#define JEMALLOC_HAVE_SCHED_GETCPU + +/* GNU specific sched_setaffinity support */ +#define JEMALLOC_HAVE_SCHED_SETAFFINITY + +/* + * If defined, all the features necessary for background threads are present. + */ +#define JEMALLOC_BACKGROUND_THREAD + +/* + * If defined, jemalloc symbols are not exported (doesn't work when + * JEMALLOC_PREFIX is not defined). + */ +/* #undef JEMALLOC_EXPORT */ + +/* config.malloc_conf options string. */ +#define JEMALLOC_CONFIG_MALLOC_CONF "" + +/* If defined, jemalloc takes the malloc/free/etc. symbol names. */ +#define JEMALLOC_IS_MALLOC + +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +#define JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE + +/* Performs additional safety checks when defined. */ +/* #undef JEMALLOC_OPT_SAFETY_CHECKS */ + +/* Is C++ support being built? */ +#define JEMALLOC_ENABLE_CXX + +/* Performs additional size checks when defined. */ +/* #undef JEMALLOC_OPT_SIZE_CHECKS */ + +/* Allows sampled junk and stash for checking use-after-free when defined. */ +/* #undef JEMALLOC_UAF_DETECTION */ + +/* Darwin VM_MAKE_TAG support */ +/* #undef JEMALLOC_HAVE_VM_MAKE_TAG */ + +/* If defined, realloc(ptr, 0) defaults to "free" instead of "alloc". */ +#define JEMALLOC_ZERO_REALLOC_DEFAULT_FREE + +#endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/krb5 b/contrib/krb5 index b56ce6ba690e..71b06c227600 160000 --- a/contrib/krb5 +++ b/contrib/krb5 @@ -1 +1 @@ -Subproject commit b56ce6ba690e1f320df1a64afa34980c3e462617 +Subproject commit 71b06c2276009ae649c7703019f3b4605f66fd3d diff --git a/contrib/libarchive b/contrib/libarchive new file mode 160000 index 000000000000..ee4579617132 --- /dev/null +++ b/contrib/libarchive @@ -0,0 +1 @@ +Subproject commit ee45796171324519f0c0bfd012018dd099296336 diff --git a/contrib/libarchive-cmake/CMakeLists.txt b/contrib/libarchive-cmake/CMakeLists.txt new file mode 100644 index 000000000000..cd5658b7086b --- /dev/null +++ b/contrib/libarchive-cmake/CMakeLists.txt @@ -0,0 +1,182 @@ +set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/libarchive") + +set(SRCS + "${LIBRARY_DIR}/libarchive/archive_acl.c" + "${LIBRARY_DIR}/libarchive/archive_blake2sp_ref.c" + "${LIBRARY_DIR}/libarchive/archive_blake2s_ref.c" + "${LIBRARY_DIR}/libarchive/archive_check_magic.c" + "${LIBRARY_DIR}/libarchive/archive_cmdline.c" + "${LIBRARY_DIR}/libarchive/archive_cryptor.c" + "${LIBRARY_DIR}/libarchive/archive_digest.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_darwin.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_freebsd.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_linux.c" + "${LIBRARY_DIR}/libarchive/archive_disk_acl_sunos.c" + "${LIBRARY_DIR}/libarchive/archive_entry.c" + "${LIBRARY_DIR}/libarchive/archive_entry_copy_bhfi.c" + "${LIBRARY_DIR}/libarchive/archive_entry_copy_stat.c" + "${LIBRARY_DIR}/libarchive/archive_entry_link_resolver.c" + "${LIBRARY_DIR}/libarchive/archive_entry_sparse.c" + "${LIBRARY_DIR}/libarchive/archive_entry_stat.c" + "${LIBRARY_DIR}/libarchive/archive_entry_strmode.c" + "${LIBRARY_DIR}/libarchive/archive_entry_xattr.c" + "${LIBRARY_DIR}/libarchive/archive_getdate.c" + "${LIBRARY_DIR}/libarchive/archive_hmac.c" + "${LIBRARY_DIR}/libarchive/archive_match.c" + "${LIBRARY_DIR}/libarchive/archive_options.c" + "${LIBRARY_DIR}/libarchive/archive_pack_dev.c" + "${LIBRARY_DIR}/libarchive/archive_pathmatch.c" + "${LIBRARY_DIR}/libarchive/archive_ppmd7.c" + "${LIBRARY_DIR}/libarchive/archive_ppmd8.c" + "${LIBRARY_DIR}/libarchive/archive_random.c" + "${LIBRARY_DIR}/libarchive/archive_rb.c" + "${LIBRARY_DIR}/libarchive/archive_read_add_passphrase.c" + "${LIBRARY_DIR}/libarchive/archive_read_append_filter.c" + "${LIBRARY_DIR}/libarchive/archive_read.c" + "${LIBRARY_DIR}/libarchive/archive_read_data_into_fd.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_entry_from_file.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_posix.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_set_standard_lookup.c" + "${LIBRARY_DIR}/libarchive/archive_read_disk_windows.c" + "${LIBRARY_DIR}/libarchive/archive_read_extract2.c" + "${LIBRARY_DIR}/libarchive/archive_read_extract.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_fd.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_file.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_filename.c" + "${LIBRARY_DIR}/libarchive/archive_read_open_memory.c" + "${LIBRARY_DIR}/libarchive/archive_read_set_format.c" + "${LIBRARY_DIR}/libarchive/archive_read_set_options.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_all.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_by_code.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_bzip2.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_compress.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_grzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_gzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lrzip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lz4.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_lzop.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_none.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_program.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_rpm.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_uu.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_xz.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_filter_zstd.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_7zip.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_all.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_ar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_by_code.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_cab.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_cpio.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_empty.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_iso9660.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_lha.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_mtree.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar5.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_rar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_raw.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_tar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_warc.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_xar.c" + "${LIBRARY_DIR}/libarchive/archive_read_support_format_zip.c" + "${LIBRARY_DIR}/libarchive/archive_string.c" + "${LIBRARY_DIR}/libarchive/archive_string_sprintf.c" + "${LIBRARY_DIR}/libarchive/archive_util.c" + "${LIBRARY_DIR}/libarchive/archive_version_details.c" + "${LIBRARY_DIR}/libarchive/archive_virtual.c" + "${LIBRARY_DIR}/libarchive/archive_windows.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_b64encode.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_by_name.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_bzip2.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_compress.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_grzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_gzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lrzip.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lz4.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_lzop.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_none.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_program.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_uuencode.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_xz.c" + "${LIBRARY_DIR}/libarchive/archive_write_add_filter_zstd.c" + "${LIBRARY_DIR}/libarchive/archive_write.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_posix.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_set_standard_lookup.c" + "${LIBRARY_DIR}/libarchive/archive_write_disk_windows.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_fd.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_file.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_filename.c" + "${LIBRARY_DIR}/libarchive/archive_write_open_memory.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_7zip.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_ar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_by_name.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_binary.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_newc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_cpio_odc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_filter_by_ext.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_gnutar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_iso9660.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_mtree.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_pax.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_raw.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_shar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_ustar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_v7tar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_warc.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_xar.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_format_zip.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_options.c" + "${LIBRARY_DIR}/libarchive/archive_write_set_passphrase.c" + "${LIBRARY_DIR}/libarchive/filter_fork_posix.c" + "${LIBRARY_DIR}/libarchive/filter_fork_windows.c" + "${LIBRARY_DIR}/libarchive/xxhash.c" +) + +add_library(_libarchive ${SRCS}) +target_include_directories(_libarchive PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + "${LIBRARY_DIR}/libarchive" +) + +target_compile_definitions(_libarchive PUBLIC + HAVE_CONFIG_H +) + +target_compile_options(_libarchive PRIVATE "-Wno-reserved-macro-identifier") + +if (TARGET ch_contrib::xz) + target_compile_definitions(_libarchive PUBLIC HAVE_LZMA_H=1 HAVE_LIBLZMA=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::xz) +endif() + +if (TARGET ch_contrib::zlib) + target_compile_definitions(_libarchive PUBLIC HAVE_ZLIB_H=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::zlib) +endif() + +if (TARGET ch_contrib::zstd) + target_compile_definitions(_libarchive PUBLIC HAVE_ZSTD_H=1 HAVE_LIBZSTD=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::zstd) +endif() + +if (TARGET ch_contrib::bzip2) + target_compile_definitions(_libarchive PUBLIC HAVE_BZLIB_H=1) + target_link_libraries(_libarchive PRIVATE ch_contrib::bzip2) +endif() + +if (OS_LINUX) + target_compile_definitions( + _libarchive PUBLIC + MAJOR_IN_SYSMACROS=1 + HAVE_LINUX_FS_H=1 + HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC=1 + HAVE_LINUX_TYPES_H=1 + HAVE_SYS_STATFS_H=1 + HAVE_FUTIMESAT=1 + HAVE_ICONV=1 + ) +endif() + +add_library(ch_contrib::libarchive ALIAS _libarchive) \ No newline at end of file diff --git a/contrib/libarchive-cmake/config.h b/contrib/libarchive-cmake/config.h new file mode 100644 index 000000000000..0b0cab47a52a --- /dev/null +++ b/contrib/libarchive-cmake/config.h @@ -0,0 +1,1391 @@ +/* config.h. Generated from build/cmake/config.h.in by cmake configure */ +#define __LIBARCHIVE_CONFIG_H_INCLUDED 1 + +/* + * Ensure we have C99-style int64_t, etc, all defined. + */ + +/* First, we need to know if the system has already defined them. */ +#define HAVE_INT16_T +#define HAVE_INT32_T +#define HAVE_INT64_T +#define HAVE_INTMAX_T + +#define HAVE_UINT8_T +#define HAVE_UINT16_T +#define HAVE_UINT32_T +#define HAVE_UINT64_T +#define HAVE_UINTMAX_T + +/* We might have the types we want under other spellings. */ +/* #undef HAVE___INT64 */ +/* #undef HAVE_U_INT64_T */ +/* #undef HAVE_UNSIGNED___INT64 */ + +/* The sizes of various standard integer types. */ +#define SIZEOF_SHORT 2 +#define SIZEOF_INT 4 +#define SIZEOF_LONG 8 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF_UNSIGNED_SHORT 2 +#define SIZEOF_UNSIGNED 4 +#define SIZEOF_UNSIGNED_LONG 8 +#define SIZEOF_UNSIGNED_LONG_LONG 8 + +/* + * If we lack int64_t, define it to the first of __int64, int, long, and long long + * that exists and is the right size. + */ +#if !defined(HAVE_INT64_T) && defined(HAVE___INT64) +typedef __int64 int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_INT == 8 +typedef int int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_LONG == 8 +typedef long int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) && SIZEOF_LONG_LONG == 8 +typedef long long int64_t; +#define HAVE_INT64_T +#endif + +#if !defined(HAVE_INT64_T) +#error No 64-bit integer type was found. +#endif + +/* + * Similarly for int32_t + */ +#if !defined(HAVE_INT32_T) && SIZEOF_INT == 4 +typedef int int32_t; +#define HAVE_INT32_T +#endif + +#if !defined(HAVE_INT32_T) && SIZEOF_LONG == 4 +typedef long int32_t; +#define HAVE_INT32_T +#endif + +#if !defined(HAVE_INT32_T) +#error No 32-bit integer type was found. +#endif + +/* + * Similarly for int16_t + */ +#if !defined(HAVE_INT16_T) && SIZEOF_INT == 2 +typedef int int16_t; +#define HAVE_INT16_T +#endif + +#if !defined(HAVE_INT16_T) && SIZEOF_SHORT == 2 +typedef short int16_t; +#define HAVE_INT16_T +#endif + +#if !defined(HAVE_INT16_T) +#error No 16-bit integer type was found. +#endif + +/* + * Similarly for uint64_t + */ +#if !defined(HAVE_UINT64_T) && defined(HAVE_UNSIGNED___INT64) +typedef unsigned __int64 uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED == 8 +typedef unsigned uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG == 8 +typedef unsigned long uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) && SIZEOF_UNSIGNED_LONG_LONG == 8 +typedef unsigned long long uint64_t; +#define HAVE_UINT64_T +#endif + +#if !defined(HAVE_UINT64_T) +#error No 64-bit unsigned integer type was found. +#endif + + +/* + * Similarly for uint32_t + */ +#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED == 4 +typedef unsigned uint32_t; +#define HAVE_UINT32_T +#endif + +#if !defined(HAVE_UINT32_T) && SIZEOF_UNSIGNED_LONG == 4 +typedef unsigned long uint32_t; +#define HAVE_UINT32_T +#endif + +#if !defined(HAVE_UINT32_T) +#error No 32-bit unsigned integer type was found. +#endif + +/* + * Similarly for uint16_t + */ +#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED == 2 +typedef unsigned uint16_t; +#define HAVE_UINT16_T +#endif + +#if !defined(HAVE_UINT16_T) && SIZEOF_UNSIGNED_SHORT == 2 +typedef unsigned short uint16_t; +#define HAVE_UINT16_T +#endif + +#if !defined(HAVE_UINT16_T) +#error No 16-bit unsigned integer type was found. +#endif + +/* + * Similarly for uint8_t + */ +#if !defined(HAVE_UINT8_T) +typedef unsigned char uint8_t; +#define HAVE_UINT8_T +#endif + +#if !defined(HAVE_UINT8_T) +#error No 8-bit unsigned integer type was found. +#endif + +/* Define intmax_t and uintmax_t if they are not already defined. */ +#if !defined(HAVE_INTMAX_T) +typedef int64_t intmax_t; +#endif + +#if !defined(HAVE_UINTMAX_T) +typedef uint64_t uintmax_t; +#endif + +/* Define ZLIB_WINAPI if zlib was built on Visual Studio. */ +/* #undef ZLIB_WINAPI */ + +/* Darwin ACL support */ +/* #undef ARCHIVE_ACL_DARWIN */ + +/* FreeBSD ACL support */ +/* #undef ARCHIVE_ACL_FREEBSD */ + +/* FreeBSD NFSv4 ACL support */ +/* #undef ARCHIVE_ACL_FREEBSD_NFS4 */ + +/* Linux POSIX.1e ACL support via libacl */ +/* #undef ARCHIVE_ACL_LIBACL */ + +/* Linux NFSv4 ACL support via librichacl */ +/* #undef ARCHIVE_ACL_LIBRICHACL */ + +/* Solaris ACL support */ +/* #undef ARCHIVE_ACL_SUNOS */ + +/* Solaris NFSv4 ACL support */ +/* #undef ARCHIVE_ACL_SUNOS_NFS4 */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_LIBC */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_LIBSYSTEM */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_MBEDTLS */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_NETTLE */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_OPENSSL */ + +/* MD5 via ARCHIVE_CRYPTO_MD5_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_MD5_WIN */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_LIBC */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_NETTLE */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_MBEDTLS */ + +/* RMD160 via ARCHIVE_CRYPTO_RMD160_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_RMD160_OPENSSL */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_LIBC */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_LIBSYSTEM */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_MBEDTLS */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_NETTLE */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_OPENSSL */ + +/* SHA1 via ARCHIVE_CRYPTO_SHA1_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA1_WIN */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC2 */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBC3 */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_LIBSYSTEM */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_MBEDTLS */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_NETTLE */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_OPENSSL */ + +/* SHA256 via ARCHIVE_CRYPTO_SHA256_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA256_WIN */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC2 */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBC3 */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_LIBSYSTEM */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_MBEDTLS */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_NETTLE */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_OPENSSL */ + +/* SHA384 via ARCHIVE_CRYPTO_SHA384_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA384_WIN */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC2 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC2 */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBC3 supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBC3 */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_LIBSYSTEM supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_LIBSYSTEM */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_MBEDTLS supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_MBEDTLS */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_NETTLE supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_NETTLE */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_OPENSSL supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_OPENSSL */ + +/* SHA512 via ARCHIVE_CRYPTO_SHA512_WIN supported. */ +/* #undef ARCHIVE_CRYPTO_SHA512_WIN */ + +/* AIX xattr support */ +/* #undef ARCHIVE_XATTR_AIX */ + +/* Darwin xattr support */ +/* #undef ARCHIVE_XATTR_DARWIN */ + +/* FreeBSD xattr support */ +/* #undef ARCHIVE_XATTR_FREEBSD */ + +/* Linux xattr support */ +/* #undef ARCHIVE_XATTR_LINUX */ + +/* Version number of bsdcpio */ +#define BSDCPIO_VERSION_STRING "3.7.0" + +/* Version number of bsdtar */ +#define BSDTAR_VERSION_STRING "3.7.0" + +/* Version number of bsdcat */ +#define BSDCAT_VERSION_STRING "3.7.0" + +/* Define to 1 if you have the `acl_create_entry' function. */ +/* #undef HAVE_ACL_CREATE_ENTRY */ + +/* Define to 1 if you have the `acl_get_fd_np' function. */ +/* #undef HAVE_ACL_GET_FD_NP */ + +/* Define to 1 if you have the `acl_get_link' function. */ +/* #undef HAVE_ACL_GET_LINK */ + +/* Define to 1 if you have the `acl_get_link_np' function. */ +/* #undef HAVE_ACL_GET_LINK_NP */ + +/* Define to 1 if you have the `acl_get_perm' function. */ +/* #undef HAVE_ACL_GET_PERM */ + +/* Define to 1 if you have the `acl_get_perm_np' function. */ +/* #undef HAVE_ACL_GET_PERM_NP */ + +/* Define to 1 if you have the `acl_init' function. */ +/* #undef HAVE_ACL_INIT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ACL_LIBACL_H */ + +/* Define to 1 if the system has the type `acl_permset_t'. */ +/* #undef HAVE_ACL_PERMSET_T */ + +/* Define to 1 if you have the `acl_set_fd' function. */ +/* #undef HAVE_ACL_SET_FD */ + +/* Define to 1 if you have the `acl_set_fd_np' function. */ +/* #undef HAVE_ACL_SET_FD_NP */ + +/* Define to 1 if you have the `acl_set_file' function. */ +/* #undef HAVE_ACL_SET_FILE */ + +/* Define to 1 if you have the `arc4random_buf' function. */ +/* #undef HAVE_ARC4RANDOM_BUF */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ATTR_XATTR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BCRYPT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BSDXML_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BZLIB_H */ + +/* Define to 1 if you have the `chflags' function. */ +/* #undef HAVE_CHFLAGS */ + +/* Define to 1 if you have the `chown' function. */ +#define HAVE_CHOWN 1 + +/* Define to 1 if you have the `chroot' function. */ +#define HAVE_CHROOT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_COPYFILE_H */ + +/* Define to 1 if you have the `ctime_r' function. */ +#define HAVE_CTIME_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_CTYPE_H 1 + +/* Define to 1 if you have the `cygwin_conv_path' function. */ +/* #undef HAVE_CYGWIN_CONV_PATH */ + +/* Define to 1 if you have the declaration of `ACE_GETACL', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_GETACL */ + +/* Define to 1 if you have the declaration of `ACE_GETACLCNT', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_GETACLCNT */ + +/* Define to 1 if you have the declaration of `ACE_SETACL', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACE_SETACL */ + +/* Define to 1 if you have the declaration of `ACL_SYNCHRONIZE', and to 0 if + you don't. */ +/* #undef HAVE_DECL_ACL_SYNCHRONIZE */ + +/* Define to 1 if you have the declaration of `ACL_TYPE_EXTENDED', and to 0 if + you don't. */ +/* #undef HAVE_DECL_ACL_TYPE_EXTENDED */ + +/* Define to 1 if you have the declaration of `ACL_TYPE_NFS4', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACL_TYPE_NFS4 */ + +/* Define to 1 if you have the declaration of `ACL_USER', and to 0 if you + don't. */ +/* #undef HAVE_DECL_ACL_USER */ + +/* Define to 1 if you have the declaration of `INT32_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INT32_MAX 1 + +/* Define to 1 if you have the declaration of `INT32_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INT32_MIN 1 + +/* Define to 1 if you have the declaration of `INT64_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INT64_MAX 1 + +/* Define to 1 if you have the declaration of `INT64_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INT64_MIN 1 + +/* Define to 1 if you have the declaration of `INTMAX_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_INTMAX_MAX 1 + +/* Define to 1 if you have the declaration of `INTMAX_MIN', and to 0 if you + don't. */ +#define HAVE_DECL_INTMAX_MIN 1 + +/* Define to 1 if you have the declaration of `SETACL', and to 0 if you don't. + */ +/* #undef HAVE_DECL_SETACL */ + +/* Define to 1 if you have the declaration of `SIZE_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_SIZE_MAX 1 + +/* Define to 1 if you have the declaration of `SSIZE_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_SSIZE_MAX 1 + +/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you + don't. */ +#define HAVE_DECL_STRERROR_R 1 + +/* Define to 1 if you have the declaration of `UINT32_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINT32_MAX 1 + +/* Define to 1 if you have the declaration of `UINT64_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINT64_MAX 1 + +/* Define to 1 if you have the declaration of `UINTMAX_MAX', and to 0 if you + don't. */ +#define HAVE_DECL_UINTMAX_MAX 1 + +/* Define to 1 if you have the declaration of `XATTR_NOFOLLOW', and to 0 if + you don't. */ +/* #undef HAVE_DECL_XATTR_NOFOLLOW */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_DIRECT_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +#define HAVE_DIRENT_H 1 + +/* Define to 1 if you have the `dirfd' function. */ +#define HAVE_DIRFD 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */ +/* #undef HAVE_DOPRNT */ + +/* Define to 1 if nl_langinfo supports D_MD_ORDER */ +/* #undef HAVE_D_MD_ORDER */ + +/* A possible errno value for invalid file format errors */ +/* #undef HAVE_EFTYPE */ + +/* A possible errno value for invalid file format errors */ +#define HAVE_EILSEQ 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_ERRNO_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EXPAT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_EXT2FS_EXT2_FS_H */ + +/* Define to 1 if you have the `extattr_get_file' function. */ +/* #undef HAVE_EXTATTR_GET_FILE */ + +/* Define to 1 if you have the `extattr_list_file' function. */ +/* #undef HAVE_EXTATTR_LIST_FILE */ + +/* Define to 1 if you have the `extattr_set_fd' function. */ +/* #undef HAVE_EXTATTR_SET_FD */ + +/* Define to 1 if you have the `extattr_set_file' function. */ +/* #undef HAVE_EXTATTR_SET_FILE */ + +/* Define to 1 if EXTATTR_NAMESPACE_USER is defined in sys/extattr.h. */ +/* #undef HAVE_DECL_EXTATTR_NAMESPACE_USER */ + +/* Define to 1 if you have the declaration of `GETACL', and to 0 if you don't. + */ +/* #undef HAVE_DECL_GETACL */ + +/* Define to 1 if you have the declaration of `GETACLCNT', and to 0 if you + don't. */ +/* #undef HAVE_DECL_GETACLCNT */ + +/* Define to 1 if you have the `fchdir' function. */ +#define HAVE_FCHDIR 1 + +/* Define to 1 if you have the `fchflags' function. */ +/* #undef HAVE_FCHFLAGS */ + +/* Define to 1 if you have the `fchmod' function. */ +#define HAVE_FCHMOD 1 + +/* Define to 1 if you have the `fchown' function. */ +#define HAVE_FCHOWN 1 + +/* Define to 1 if you have the `fcntl' function. */ +#define HAVE_FCNTL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the `fdopendir' function. */ +#define HAVE_FDOPENDIR 1 + +/* Define to 1 if you have the `fgetea' function. */ +/* #undef HAVE_FGETEA */ + +/* Define to 1 if you have the `fgetxattr' function. */ +/* #undef HAVE_FGETXATTR */ + +/* Define to 1 if you have the `flistea' function. */ +/* #undef HAVE_FLISTEA */ + +/* Define to 1 if you have the `flistxattr' function. */ +#define HAVE_FLISTXATTR 1 + +/* Define to 1 if you have the `fnmatch' function. */ +#define HAVE_FNMATCH 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_FNMATCH_H 1 + +/* Define to 1 if you have the `fork' function. */ +#define HAVE_FORK 1 + +/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */ +#define HAVE_FSEEKO 1 + +/* Define to 1 if you have the `fsetea' function. */ +/* #undef HAVE_FSETEA */ + +/* Define to 1 if you have the `fsetxattr' function. */ +/* #undef HAVE_FSETXATTR */ + +/* Define to 1 if you have the `fstat' function. */ +#define HAVE_FSTAT 1 + +/* Define to 1 if you have the `fstatat' function. */ +#define HAVE_FSTATAT 1 + +/* Define to 1 if you have the `fstatfs' function. */ +#define HAVE_FSTATFS 1 + +/* Define to 1 if you have the `fstatvfs' function. */ +#define HAVE_FSTATVFS 1 + +/* Define to 1 if you have the `ftruncate' function. */ +#define HAVE_FTRUNCATE 1 + +/* Define to 1 if you have the `futimens' function. */ +#define HAVE_FUTIMENS 1 + +/* Define to 1 if you have the `futimes' function. */ +#define HAVE_FUTIMES 1 + +/* Define to 1 if you have the `futimesat' function. */ +/* #undef HAVE_FUTIMESAT */ + +/* Define to 1 if you have the `getea' function. */ +/* #undef HAVE_GETEA */ + +/* Define to 1 if you have the `geteuid' function. */ +#define HAVE_GETEUID 1 + +/* Define to 1 if you have the `getgrgid_r' function. */ +#define HAVE_GETGRGID_R 1 + +/* Define to 1 if you have the `getgrnam_r' function. */ +#define HAVE_GETGRNAM_R 1 + +/* Define to 1 if platform uses `optreset` to reset `getopt` */ +#define HAVE_GETOPT_OPTRESET 1 + +/* Define to 1 if you have the `getpid' function. */ +#define HAVE_GETPID 1 + +/* Define to 1 if you have the `getpwnam_r' function. */ +#define HAVE_GETPWNAM_R 1 + +/* Define to 1 if you have the `getpwuid_r' function. */ +#define HAVE_GETPWUID_R 1 + +/* Define to 1 if you have the `getvfsbyname' function. */ +/* #undef HAVE_GETVFSBYNAME */ + +/* Define to 1 if you have the `getxattr' function. */ +#define HAVE_GETXATTR 1 + +/* Define to 1 if you have the `gmtime_r' function. */ +#define HAVE_GMTIME_R 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GRP_H 1 + +/* Define to 1 if you have the `iconv' function. */ +/* #undef HAVE_ICONV */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ICONV_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LANGINFO_H 1 + +/* Define to 1 if you have the `lchflags' function. */ +/* #undef HAVE_LCHFLAGS */ + +/* Define to 1 if you have the `lchmod' function. */ +#define HAVE_LCHMOD 1 + +/* Define to 1 if you have the `lchown' function. */ +#define HAVE_LCHOWN 1 + +/* Define to 1 if you have the `lgetea' function. */ +/* #undef HAVE_LGETEA */ + +/* Define to 1 if you have the `lgetxattr' function. */ +#define HAVE_LGETXATTR 1 + +/* Define to 1 if you have the `acl' library (-lacl). */ +/* #undef HAVE_LIBACL */ + +/* Define to 1 if you have the `attr' library (-lattr). */ +/* #undef HAVE_LIBATTR */ + +/* Define to 1 if you have the `bsdxml' library (-lbsdxml). */ +/* #undef HAVE_LIBBSDXML */ + +/* Define to 1 if you have the `bz2' library (-lbz2). */ +/* #undef HAVE_LIBBZ2 */ + +/* Define to 1 if you have the `b2' library (-lb2). */ +/* #undef HAVE_LIBB2 */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_BLAKE2_H */ + +/* Define to 1 if you have the `charset' library (-lcharset). */ +/* #undef HAVE_LIBCHARSET */ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +/* #undef HAVE_LIBCRYPTO */ + +/* Define to 1 if you have the `expat' library (-lexpat). */ +/* #undef HAVE_LIBEXPAT */ + +/* Define to 1 if you have the `gcc' library (-lgcc). */ +/* #undef HAVE_LIBGCC */ + +/* Define to 1 if you have the `lz4' library (-llz4). */ +/* #undef HAVE_LIBLZ4 */ + +/* Define to 1 if you have the `lzma' library (-llzma). */ +/* #undef HAVE_LIBLZMA */ + +/* Define to 1 if you have the `lzmadec' library (-llzmadec). */ +/* #undef HAVE_LIBLZMADEC */ + +/* Define to 1 if you have the `lzo2' library (-llzo2). */ +/* #undef HAVE_LIBLZO2 */ + +/* Define to 1 if you have the `mbedcrypto' library (-lmbedcrypto). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `nettle' library (-lnettle). */ +/* #undef HAVE_LIBNETTLE */ + +/* Define to 1 if you have the `pcre' library (-lpcre). */ +/* #undef HAVE_LIBPCRE */ + +/* Define to 1 if you have the `pcreposix' library (-lpcreposix). */ +/* #undef HAVE_LIBPCREPOSIX */ + +/* Define to 1 if you have the `xml2' library (-lxml2). */ +#define HAVE_LIBXML2 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBXML_XMLREADER_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBXML_XMLWRITER_H */ + +/* Define to 1 if you have the `z' library (-lz). */ +/* #undef HAVE_LIBZ */ + +/* Define to 1 if you have the `zstd' library (-lzstd). */ +/* #undef HAVE_LIBZSTD */ + +/* Define to 1 if you have the `zstd' library (-lzstd) with compression + support. */ +/* #undef HAVE_LIBZSTD_COMPRESSOR */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LIMITS_H 1 + +/* Define to 1 if you have the `link' function. */ +#define HAVE_LINK 1 + +/* Define to 1 if you have the `linkat' function. */ +#define HAVE_LINKAT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FIEMAP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_FS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_MAGIC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LINUX_TYPES_H */ + +/* Define to 1 if you have the `listea' function. */ +/* #undef HAVE_LISTEA */ + +/* Define to 1 if you have the `listxattr' function. */ +#define HAVE_LISTXATTR 1 + +/* Define to 1 if you have the `llistea' function. */ +/* #undef HAVE_LLISTEA */ + +/* Define to 1 if you have the `llistxattr' function. */ +#define HAVE_LLISTXATTR 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LOCALCHARSET_H */ + +/* Define to 1 if you have the `locale_charset' function. */ +/* #undef HAVE_LOCALE_CHARSET */ + +/* Define to 1 if you have the header file. */ +#define HAVE_LOCALE_H 1 + +/* Define to 1 if you have the `localtime_r' function. */ +#define HAVE_LOCALTIME_R 1 + +/* Define to 1 if the system has the type `long long int'. */ +/* #undef HAVE_LONG_LONG_INT */ + +/* Define to 1 if you have the `lsetea' function. */ +/* #undef HAVE_LSETEA */ + +/* Define to 1 if you have the `lsetxattr' function. */ +#define HAVE_LSETXATTR 1 + +/* Define to 1 if you have the `lstat' function. */ +#define HAVE_LSTAT 1 + +/* Define to 1 if `lstat' has the bug that it succeeds when given the + zero-length file name argument. */ +/* #undef HAVE_LSTAT_EMPTY_STRING_BUG */ + +/* Define to 1 if you have the `lutimes' function. */ +#define HAVE_LUTIMES 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZ4HC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZ4_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMADEC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZMA_H */ + +/* Define to 1 if you have a working `lzma_stream_encoder_mt' function. */ +/* #undef HAVE_LZMA_STREAM_ENCODER_MT */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZO_LZO1X_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LZO_LZOCONF_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_AES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_MD_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MBEDTLS_PKCS5_H */ + +/* Define to 1 if you have the `mbrtowc' function. */ +/* #undef HAVE_MBRTOWC */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_MEMBERSHIP_H */ + +/* Define to 1 if you have the `memmove' function. */ +#define HAVE_MEMMOVE 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `mkdir' function. */ +#define HAVE_MKDIR 1 + +/* Define to 1 if you have the `mkfifo' function. */ +#define HAVE_MKFIFO 1 + +/* Define to 1 if you have the `mknod' function. */ +#define HAVE_MKNOD 1 + +/* Define to 1 if you have the `mkstemp' function. */ +#define HAVE_MKSTEMP 1 + +/* Define to 1 if you have the header file, and it defines `DIR'. */ +/* #undef HAVE_NDIR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_AES_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_HMAC_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_MD5_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_PBKDF2_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_RIPEMD160_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_NETTLE_SHA_H */ + +/* Define to 1 if you have the `nl_langinfo' function. */ +/* #undef HAVE_NL_LANGINFO */ + +/* Define to 1 if you have the `openat' function. */ +#define HAVE_OPENAT 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_EVP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PATHS_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PCREPOSIX_H */ + +/* Define to 1 if you have the `pipe' function. */ +#define HAVE_PIPE 1 + +/* Define to 1 if you have the `PKCS5_PBKDF2_HMAC_SHA1' function. */ +/* #undef HAVE_PKCS5_PBKDF2_HMAC_SHA1 */ + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_POLL_H 1 + +/* Define to 1 if you have the `posix_spawnp' function. */ +#define HAVE_POSIX_SPAWNP 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PROCESS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PWD_H 1 + +/* Define to 1 if you have the `readdir_r' function. */ +#define HAVE_READDIR_R 1 + +/* Define to 1 if you have the `readlink' function. */ +#define HAVE_READLINK 1 + +/* Define to 1 if you have the `readlinkat' function. */ +#define HAVE_READLINKAT 1 + +/* Define to 1 if you have the `readpassphrase' function. */ +/* #undef HAVE_READPASSPHRASE */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_READPASSPHRASE_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_REGEX_H 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `setenv' function. */ +#define HAVE_SETENV 1 + +/* Define to 1 if you have the `setlocale' function. */ +#define HAVE_SETLOCALE 1 + +/* Define to 1 if you have the `sigaction' function. */ +#define HAVE_SIGACTION 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SPAWN_H 1 + +/* Define to 1 if you have the `statfs' function. */ +#define HAVE_STATFS 1 + +/* Define to 1 if you have the `statvfs' function. */ +#define HAVE_STATVFS 1 + +/* Define to 1 if `stat' has the bug that it succeeds when given the + zero-length file name argument. */ +/* #undef HAVE_STAT_EMPTY_STRING_BUG */ + +/* Define to 1 if you have the header file. */ +#define HAVE_STDARG_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the `strchr' function. */ +#define HAVE_STRCHR 1 + +/* Define to 1 if you have the `strnlen' function. */ +#define HAVE_STRNLEN 1 + +/* Define to 1 if you have the `strdup' function. */ +#define HAVE_STRDUP 1 + +/* Define to 1 if you have the `strerror' function. */ +#define HAVE_STRERROR 1 + +/* Define to 1 if you have the `strerror_r' function. */ +#define HAVE_STRERROR_R 1 + +/* Define to 1 if you have the `strftime' function. */ +#define HAVE_STRFTIME 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the `strrchr' function. */ +#define HAVE_STRRCHR 1 + +/* Define to 1 if `f_namemax' is a member of `struct statfs'. */ +/* #undef HAVE_STRUCT_STATFS_F_NAMEMAX */ + +/* Define to 1 if `f_iosize' is a member of `struct statvfs'. */ +/* #undef HAVE_STRUCT_STATVFS_F_IOSIZE */ + +/* Define to 1 if `st_birthtime' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIME */ + +/* Define to 1 if `st_birthtimespec.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC */ + +/* Define to 1 if `st_blksize' is a member of `struct stat'. */ +#define HAVE_STRUCT_STAT_ST_BLKSIZE 1 + +/* Define to 1 if `st_flags' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_FLAGS */ + +/* Define to 1 if `st_mtimespec.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC */ + +/* Define to 1 if `st_mtime_n' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIME_N */ + +/* Define to 1 if `st_mtime_usec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIME_USEC */ + +/* Define to 1 if `st_mtim.tv_nsec' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC */ + +/* Define to 1 if `st_umtime' is a member of `struct stat'. */ +/* #undef HAVE_STRUCT_STAT_ST_UMTIME */ + +/* Define to 1 if `tm_gmtoff' is a member of `struct tm'. */ +#define HAVE_STRUCT_TM_TM_GMTOFF 1 + +/* Define to 1 if `__tm_gmtoff' is a member of `struct tm'. */ +/* #undef HAVE_STRUCT_TM___TM_GMTOFF */ + +/* Define to 1 if you have `struct vfsconf'. */ +/* #undef HAVE_STRUCT_VFSCONF */ + +/* Define to 1 if you have `struct xvfsconf'. */ +/* #undef HAVE_STRUCT_XVFSCONF */ + +/* Define to 1 if you have the `symlink' function. */ +#define HAVE_SYMLINK 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_ACL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_CDEFS_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_DIR_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EA_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_EXTATTR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_IOCTL_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MKDEV_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_MOUNT_H */ + +/* Define to 1 if you have the header file, and it defines `DIR'. + */ +/* #undef HAVE_SYS_NDIR_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_PARAM_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_POLL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_QUEUE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_RICHACL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SELECT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_STATFS_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STATVFS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_SYSMACROS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_UTSNAME_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_VFS_H 1 + +/* Define to 1 if you have that is POSIX.1 compatible. */ +#define HAVE_SYS_WAIT_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_XATTR_H */ + +/* Define to 1 if you have the `timegm' function. */ +#define HAVE_TIMEGM 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_TIME_H 1 + +/* Define to 1 if you have the `tzset' function. */ +#define HAVE_TZSET 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the `unlinkat' function. */ +#define HAVE_UNLINKAT 1 + +/* Define to 1 if you have the `unsetenv' function. */ +#define HAVE_UNSETENV 1 + +/* Define to 1 if the system has the type `unsigned long long'. */ +/* #undef HAVE_UNSIGNED_LONG_LONG */ + +/* Define to 1 if the system has the type `unsigned long long int'. */ +/* #undef HAVE_UNSIGNED_LONG_LONG_INT */ + +/* Define to 1 if you have the `utime' function. */ +#define HAVE_UTIME 1 + +/* Define to 1 if you have the `utimensat' function. */ +#define HAVE_UTIMENSAT 1 + +/* Define to 1 if you have the `utimes' function. */ +#define HAVE_UTIMES 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UTIME_H 1 + +/* Define to 1 if you have the `vfork' function. */ +#define HAVE_VFORK 1 + +/* Define to 1 if you have the `vprintf' function. */ +#define HAVE_VPRINTF 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WCHAR_H 1 + +/* Define to 1 if the system has the type `wchar_t'. */ +#define HAVE_WCHAR_T 1 + +/* Define to 1 if you have the `wcrtomb' function. */ +#define HAVE_WCRTOMB 1 + +/* Define to 1 if you have the `wcscmp' function. */ +#define HAVE_WCSCMP 1 + +/* Define to 1 if you have the `wcscpy' function. */ +#define HAVE_WCSCPY 1 + +/* Define to 1 if you have the `wcslen' function. */ +#define HAVE_WCSLEN 1 + +/* Define to 1 if you have the `wctomb' function. */ +#define HAVE_WCTOMB 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_WCTYPE_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINCRYPT_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINDOWS_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WINIOCTL_H */ + +/* Define to 1 if you have _CrtSetReportMode in */ +/* #undef HAVE__CrtSetReportMode */ + +/* Define to 1 if you have the `wmemcmp' function. */ +#define HAVE_WMEMCMP 1 + +/* Define to 1 if you have the `wmemcpy' function. */ +#define HAVE_WMEMCPY 1 + +/* Define to 1 if you have the `wmemmove' function. */ +#define HAVE_WMEMMOVE 1 + +/* Define to 1 if you have a working EXT2_IOC_GETFLAGS */ +/* #undef HAVE_WORKING_EXT2_IOC_GETFLAGS */ + +/* Define to 1 if you have a working FS_IOC_GETFLAGS */ +#define HAVE_WORKING_FS_IOC_GETFLAGS 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZLIB_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ZSTD_H */ + +/* Define to 1 if you have the `ctime_s' function. */ +/* #undef HAVE_CTIME_S */ + +/* Define to 1 if you have the `_fseeki64' function. */ +/* #undef HAVE__FSEEKI64 */ + +/* Define to 1 if you have the `_get_timezone' function. */ +/* #undef HAVE__GET_TIMEZONE */ + +/* Define to 1 if you have the `gmtime_s' function. */ +/* #undef HAVE_GMTIME_S */ + +/* Define to 1 if you have the `localtime_s' function. */ +/* #undef HAVE_LOCALTIME_S */ + +/* Define to 1 if you have the `_mkgmtime' function. */ +/* #undef HAVE__MKGMTIME */ + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST + +/* Version number of libarchive as a single integer */ +#define LIBARCHIVE_VERSION_NUMBER "3007000" + +/* Version number of libarchive */ +#define LIBARCHIVE_VERSION_STRING "3.7.0" + +/* Define to 1 if `lstat' dereferences a symlink specified with a trailing + slash. */ +/* #undef LSTAT_FOLLOWS_SLASHED_SYMLINK */ + +/* Define to 1 if `major', `minor', and `makedev' are declared in . + */ +/* #undef MAJOR_IN_MKDEV */ + +/* Define to 1 if `major', `minor', and `makedev' are declared in + . */ +/* #undef MAJOR_IN_SYSMACROS */ + +/* Define to 1 if your C compiler doesn't accept -c and -o together. */ +/* #undef NO_MINUS_C_MINUS_O */ + +/* The size of `wchar_t', as computed by sizeof. */ +#define SIZEOF_WCHAR_T 4 + +/* Define to 1 if strerror_r returns char *. */ +/* #undef STRERROR_R_CHAR_P */ + +/* Define to 1 if you can safely include both and . */ +/* #undef TIME_WITH_SYS_TIME */ + +/* + * Some platform requires a macro to use extension functions. + */ +#define SAFE_TO_DEFINE_EXTENSIONS 1 +#ifdef SAFE_TO_DEFINE_EXTENSIONS +/* Enable extensions on AIX 3, Interix. */ +#ifndef _ALL_SOURCE +# define _ALL_SOURCE 1 +#endif +/* Enable GNU extensions on systems that have them. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif +/* Enable threading extensions on Solaris. */ +#ifndef _POSIX_PTHREAD_SEMANTICS +# define _POSIX_PTHREAD_SEMANTICS 1 +#endif +/* Enable extensions on HP NonStop. */ +#ifndef _TANDEM_SOURCE +# define _TANDEM_SOURCE 1 +#endif +/* Enable general extensions on Solaris. */ +#ifndef __EXTENSIONS__ +# define __EXTENSIONS__ 1 +#endif +#endif /* SAFE_TO_DEFINE_EXTENSIONS */ + +/* Version number of package */ +#define VERSION "3.7.0" + +/* Number of bits in a file offset, on hosts where this is settable. */ +/* #undef _FILE_OFFSET_BITS */ + +/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */ +/* #undef _LARGEFILE_SOURCE */ + +/* Define for large files, on AIX-style hosts. */ +/* #undef _LARGE_FILES */ + +/* Define to control Windows SDK version */ +#ifndef NTDDI_VERSION +/* #undef NTDDI_VERSION */ +#endif // NTDDI_VERSION + +#ifndef _WIN32_WINNT +/* #undef _WIN32_WINNT */ +#endif // _WIN32_WINNT + +#ifndef WINVER +/* #undef WINVER */ +#endif // WINVER + +/* Define to empty if `const' does not conform to ANSI C. */ +/* #undef const */ + +/* Define to `int' if doesn't define. */ +/* #undef gid_t */ + +/* Define to `unsigned long' if does not define. */ +/* #undef id_t */ + +/* Define to `int' if does not define. */ +/* #undef mode_t */ + +/* Define to `long long' if does not define. */ +/* #undef off_t */ + +/* Define to `int' if doesn't define. */ +/* #undef pid_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef size_t */ + +/* Define to `int' if does not define. */ +/* #undef ssize_t */ + +/* Define to `int' if doesn't define. */ +/* #undef uid_t */ + +/* Define to `int' if does not define. */ +/* #undef intptr_t */ + +/* Define to `unsigned int' if does not define. */ +/* #undef uintptr_t */ diff --git a/contrib/libcxx-cmake/CMakeLists.txt b/contrib/libcxx-cmake/CMakeLists.txt index a13e4f0f60a6..b7e59e2c9a3b 100644 --- a/contrib/libcxx-cmake/CMakeLists.txt +++ b/contrib/libcxx-cmake/CMakeLists.txt @@ -61,9 +61,7 @@ target_include_directories(cxx SYSTEM BEFORE PUBLIC $<$:$ target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI) # Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () +target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) if (USE_MUSL) target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1) diff --git a/contrib/libcxxabi-cmake/CMakeLists.txt b/contrib/libcxxabi-cmake/CMakeLists.txt index 0473527912ea..c7ee34e6e287 100644 --- a/contrib/libcxxabi-cmake/CMakeLists.txt +++ b/contrib/libcxxabi-cmake/CMakeLists.txt @@ -35,12 +35,10 @@ target_include_directories(cxxabi SYSTEM BEFORE ) target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY) target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast. -target_link_libraries(cxxabi PUBLIC ${EXCEPTION_HANDLING_LIBRARY}) +target_link_libraries(cxxabi PUBLIC unwind) # Enable capturing stack traces for all exceptions. -if (USE_UNWIND) - target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) -endif () +target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1) install( TARGETS cxxabi diff --git a/contrib/libhdfs3 b/contrib/libhdfs3 index 164b89253fad..377220ef351a 160000 --- a/contrib/libhdfs3 +++ b/contrib/libhdfs3 @@ -1 +1 @@ -Subproject commit 164b89253fad7991bce77882f01b51ab81d19f3d +Subproject commit 377220ef351ae24994a5fcd2b5fa3930d00c4db0 diff --git a/contrib/libmetrohash/src/platform.h b/contrib/libmetrohash/src/platform.h index bc00e5a286be..9e83d11cb7ce 100644 --- a/contrib/libmetrohash/src/platform.h +++ b/contrib/libmetrohash/src/platform.h @@ -17,7 +17,8 @@ #ifndef METROHASH_PLATFORM_H #define METROHASH_PLATFORM_H -#include +#include +#include #include // rotate right idiom recognized by most compilers @@ -33,6 +34,11 @@ inline static uint64_t read_u64(const void * const ptr) // so we use memcpy() which is the most portable. clang & gcc usually translates `memcpy()` into a single `load` instruction // when hardware supports it, so using memcpy() is efficient too. memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } @@ -40,6 +46,11 @@ inline static uint64_t read_u32(const void * const ptr) { uint32_t result; memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } @@ -47,6 +58,11 @@ inline static uint64_t read_u16(const void * const ptr) { uint16_t result; memcpy(&result, ptr, sizeof(result)); + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + result = std::byteswap(result); +#endif + return result; } diff --git a/contrib/libpqxx b/contrib/libpqxx index bdd6540fb95f..791d68fd8990 160000 --- a/contrib/libpqxx +++ b/contrib/libpqxx @@ -1 +1 @@ -Subproject commit bdd6540fb95ff56c813691ceb5da5a3266cf235d +Subproject commit 791d68fd89902835133c50435e380ec7a73271b7 diff --git a/contrib/libssh b/contrib/libssh new file mode 160000 index 000000000000..2c76332ef56d --- /dev/null +++ b/contrib/libssh @@ -0,0 +1 @@ +Subproject commit 2c76332ef56d90f55965ab24da6b6dbcbef29c4c diff --git a/contrib/libssh-cmake/CMakeLists.txt b/contrib/libssh-cmake/CMakeLists.txt new file mode 100644 index 000000000000..58db81cf3526 --- /dev/null +++ b/contrib/libssh-cmake/CMakeLists.txt @@ -0,0 +1,74 @@ +set(LIB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libssh") +set(LIB_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/libssh") +# Specify search path for CMake modules to be loaded by include() +# and find_package() +list(APPEND CMAKE_MODULE_PATH "${LIB_SOURCE_DIR}/cmake/Modules") + +include(DefineCMakeDefaults) +include(DefineCompilerFlags) + +project(libssh VERSION 0.9.7 LANGUAGES C) + +# global needed variable +set(APPLICATION_NAME ${PROJECT_NAME}) + +# SOVERSION scheme: CURRENT.AGE.REVISION +# If there was an incompatible interface change: +# Increment CURRENT. Set AGE and REVISION to 0 +# If there was a compatible interface change: +# Increment AGE. Set REVISION to 0 +# If the source code was changed, but there were no interface changes: +# Increment REVISION. +set(LIBRARY_VERSION "4.8.7") +set(LIBRARY_SOVERSION "4") + +# where to look first for cmake modules, before ${CMAKE_ROOT}/Modules/ is checked + +# add definitions + +include(DefinePlatformDefaults) + +# Copy library files to a lib sub-directory +set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${LIB_BINARY_DIR}/lib") + +set(CMAKE_THREAD_PREFER_PTHREADS ON) +set(THREADS_PREFER_PTHREAD_FLAG ON) + +set(WITH_ZLIB OFF) +set(WITH_SYMBOL_VERSIONING OFF) +set(WITH_SERVER ON) + +include(IncludeSources.cmake) +if (OS_LINUX) + if (ARCH_AMD64) + if (USE_MUSL) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/x86-64-musl") + else() + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/x86-64") + endif () + elseif (ARCH_AARCH64) + if (USE_MUSL) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/aarch64-musl") + else() + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/aarch64") + endif () + elseif (ARCH_PPC64LE) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/ppc64le") + elseif (ARCH_S390X) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/s390x") + elseif (ARCH_RISCV64) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/linux/riscv64") + else () + message(FATAL_ERROR "Platform is not supported") + endif () +elseif (OS_DARWIN) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/darwin") +elseif (OS_FREEBSD) + target_include_directories(_ssh PRIVATE "${ClickHouse_SOURCE_DIR}/contrib/libssh-cmake/freebsd") +else () + message(FATAL_ERROR "Platform is not supported") +endif() + +configure_file(${LIB_SOURCE_DIR}/include/libssh/libssh_version.h.cmake + ${LIB_BINARY_DIR}/include/libssh/libssh_version.h + @ONLY) diff --git a/contrib/libssh-cmake/IncludeSources.cmake b/contrib/libssh-cmake/IncludeSources.cmake new file mode 100644 index 000000000000..d72cf11da1f3 --- /dev/null +++ b/contrib/libssh-cmake/IncludeSources.cmake @@ -0,0 +1,140 @@ +set(LIBSSH_LINK_LIBRARIES + ${LIBSSH_REQUIRED_LIBRARIES} +) + + +set(LIBSSH_LINK_LIBRARIES + ${LIBSSH_LINK_LIBRARIES} + OpenSSL::Crypto +) + +if (MINGW AND Threads_FOUND) + set(LIBSSH_LINK_LIBRARIES + ${LIBSSH_LINK_LIBRARIES} + Threads::Threads + ) +endif() + +set(libssh_SRCS + ${LIB_SOURCE_DIR}/src/agent.c + ${LIB_SOURCE_DIR}/src/auth.c + ${LIB_SOURCE_DIR}/src/base64.c + ${LIB_SOURCE_DIR}/src/bignum.c + ${LIB_SOURCE_DIR}/src/buffer.c + ${LIB_SOURCE_DIR}/src/callbacks.c + ${LIB_SOURCE_DIR}/src/channels.c + ${LIB_SOURCE_DIR}/src/client.c + ${LIB_SOURCE_DIR}/src/config.c + ${LIB_SOURCE_DIR}/src/connect.c + ${LIB_SOURCE_DIR}/src/connector.c + ${LIB_SOURCE_DIR}/src/curve25519.c + ${LIB_SOURCE_DIR}/src/dh.c + ${LIB_SOURCE_DIR}/src/ecdh.c + ${LIB_SOURCE_DIR}/src/error.c + ${LIB_SOURCE_DIR}/src/getpass.c + ${LIB_SOURCE_DIR}/src/init.c + ${LIB_SOURCE_DIR}/src/kdf.c + ${LIB_SOURCE_DIR}/src/kex.c + ${LIB_SOURCE_DIR}/src/known_hosts.c + ${LIB_SOURCE_DIR}/src/knownhosts.c + ${LIB_SOURCE_DIR}/src/legacy.c + ${LIB_SOURCE_DIR}/src/log.c + ${LIB_SOURCE_DIR}/src/match.c + ${LIB_SOURCE_DIR}/src/messages.c + ${LIB_SOURCE_DIR}/src/misc.c + ${LIB_SOURCE_DIR}/src/options.c + ${LIB_SOURCE_DIR}/src/packet.c + ${LIB_SOURCE_DIR}/src/packet_cb.c + ${LIB_SOURCE_DIR}/src/packet_crypt.c + ${LIB_SOURCE_DIR}/src/pcap.c + ${LIB_SOURCE_DIR}/src/pki.c + ${LIB_SOURCE_DIR}/src/pki_container_openssh.c + ${LIB_SOURCE_DIR}/src/poll.c + ${LIB_SOURCE_DIR}/src/session.c + ${LIB_SOURCE_DIR}/src/scp.c + ${LIB_SOURCE_DIR}/src/socket.c + ${LIB_SOURCE_DIR}/src/string.c + ${LIB_SOURCE_DIR}/src/threads.c + ${LIB_SOURCE_DIR}/src/wrapper.c + ${LIB_SOURCE_DIR}/src/external/bcrypt_pbkdf.c + ${LIB_SOURCE_DIR}/src/external/blowfish.c + ${LIB_SOURCE_DIR}/src/external/chacha.c + ${LIB_SOURCE_DIR}/src/external/poly1305.c + ${LIB_SOURCE_DIR}/src/chachapoly.c + ${LIB_SOURCE_DIR}/src/config_parser.c + ${LIB_SOURCE_DIR}/src/token.c + ${LIB_SOURCE_DIR}/src/pki_ed25519_common.c +) + +if (DEFAULT_C_NO_DEPRECATION_FLAGS) + set_source_files_properties(known_hosts.c + PROPERTIES + COMPILE_FLAGS ${DEFAULT_C_NO_DEPRECATION_FLAGS}) +endif() + +if (CMAKE_USE_PTHREADS_INIT) + set(libssh_SRCS + ${libssh_SRCS} + ${LIB_SOURCE_DIR}/src/threads/noop.c + ${LIB_SOURCE_DIR}/src/threads/pthread.c + ) +elseif (CMAKE_USE_WIN32_THREADS_INIT) + set(libssh_SRCS + ${libssh_SRCS} + ${LIB_SOURCE_DIR}/src/threads/noop.c + ${LIB_SOURCE_DIR}/src/threads/winlocks.c + ) +else() + set(libssh_SRCS + ${libssh_SRCS} + ${LIB_SOURCE_DIR}/src/threads/noop.c + ) +endif() + +# LIBCRYPT specific +set(libssh_SRCS + ${libssh_SRCS} + ${LIB_SOURCE_DIR}/src/threads/libcrypto.c + ${LIB_SOURCE_DIR}/src/pki_crypto.c + ${LIB_SOURCE_DIR}/src/ecdh_crypto.c + ${LIB_SOURCE_DIR}/src/libcrypto.c + ${LIB_SOURCE_DIR}/src/dh_crypto.c +) + +if (NOT (ENABLE_OPENSSL OR ENABLE_OPENSSL_DYNAMIC)) + add_compile_definitions(USE_BORINGSSL=1) +endif() + +set(libssh_SRCS +${libssh_SRCS} +${LIB_SOURCE_DIR}/src/options.c +${LIB_SOURCE_DIR}/src/server.c +${LIB_SOURCE_DIR}/src/bind.c +${LIB_SOURCE_DIR}/src/bind_config.c +) + + +add_library(_ssh STATIC ${libssh_SRCS}) + +target_include_directories(_ssh PRIVATE ${LIB_BINARY_DIR}) +target_include_directories(_ssh PUBLIC "${LIB_SOURCE_DIR}/include" "${LIB_BINARY_DIR}/include") +target_link_libraries(_ssh + PRIVATE ${LIBSSH_LINK_LIBRARIES}) + +add_library(ch_contrib::ssh ALIAS _ssh) + +target_compile_options(_ssh + PRIVATE + ${DEFAULT_C_COMPILE_FLAGS} + -D_GNU_SOURCE) + + +set_target_properties(_ssh + PROPERTIES + VERSION + ${LIBRARY_VERSION} + SOVERSION + ${LIBRARY_SOVERSION} + DEFINE_SYMBOL + LIBSSH_EXPORTS +) diff --git a/contrib/libssh-cmake/darwin/config.h b/contrib/libssh-cmake/darwin/config.h new file mode 100644 index 000000000000..12378a64ceaa --- /dev/null +++ b/contrib/libssh-cmake/darwin/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/darwin" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_UTMP_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UTIL_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +#define HAVE_NTOHLL 1 + +/* Define to 1 if you have the `htonll' function. */ +#define HAVE_HTONLL 1 + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +/* #undef HAVE_EXPLICIT_BZERO */ + +/* Define to 1 if you have the `memset_s' function. */ +#define HAVE_MEMSET_S 1 + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libssh-cmake/freebsd/config.h b/contrib/libssh-cmake/freebsd/config.h new file mode 100644 index 000000000000..8a70acb473c0 --- /dev/null +++ b/contrib/libssh-cmake/freebsd/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/freebsd" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +#define HAVE_EXPLICIT_BZERO 1 + +/* Define to 1 if you have the `memset_s' function. */ +/* #undef HAVE_MEMSET_S */ + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libssh-cmake/linux/aarch64-musl/config.h b/contrib/libssh-cmake/linux/aarch64-musl/config.h new file mode 100644 index 000000000000..15236527fdf8 --- /dev/null +++ b/contrib/libssh-cmake/linux/aarch64-musl/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/aarch64-musl" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +/* #undef HAVE_GLOB_GL_FLAGS_MEMBER + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +#define HAVE_EXPLICIT_BZERO 1 + +/* Define to 1 if you have the `memset_s' function. */ +/* #undef HAVE_MEMSET_S */ + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libssh-cmake/linux/aarch64/config.h b/contrib/libssh-cmake/linux/aarch64/config.h new file mode 100644 index 000000000000..e65ccb8ba3e6 --- /dev/null +++ b/contrib/libssh-cmake/linux/aarch64/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/aarch64" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +#define HAVE_EXPLICIT_BZERO 1 + +/* Define to 1 if you have the `memset_s' function. */ +#define HAVE_MEMSET_S 1 + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libssh-cmake/linux/ppc64le/config.h b/contrib/libssh-cmake/linux/ppc64le/config.h new file mode 100644 index 000000000000..c56b1ad03344 --- /dev/null +++ b/contrib/libssh-cmake/linux/ppc64le/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/ppc64le" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +/* #undef HAVE_EXPLICIT_BZERO 1 */ + +/* Define to 1 if you have the `memset_s' function. */ +/* #undef HAVE_MEMSET_S */ + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libssh-cmake/linux/riscv64/config.h b/contrib/libssh-cmake/linux/riscv64/config.h new file mode 100644 index 000000000000..33c91bf542b9 --- /dev/null +++ b/contrib/libssh-cmake/linux/riscv64/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/riscv64" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +/* #undef HAVE_EXPLICIT_BZERO 1 */ + +/* Define to 1 if you have the `memset_s' function. */ +/* #undef HAVE_MEMSET_S */ + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libssh-cmake/linux/s390x/config.h b/contrib/libssh-cmake/linux/s390x/config.h new file mode 100644 index 000000000000..289a8cabc8e4 --- /dev/null +++ b/contrib/libssh-cmake/linux/s390x/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/s390x" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +/* #undef HAVE_EXPLICIT_BZERO 1 */ + +/* Define to 1 if you have the `memset_s' function. */ +/* #undef HAVE_MEMSET_S */ + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#define WORDS_BIGENDIAN 1 diff --git a/contrib/libssh-cmake/linux/x86-64-musl/config.h b/contrib/libssh-cmake/linux/x86-64-musl/config.h new file mode 100644 index 000000000000..fd7c2e2b0c12 --- /dev/null +++ b/contrib/libssh-cmake/linux/x86-64-musl/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/musl" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +/* #undef HAVE_GLOB_GL_FLAGS_MEMBER + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +#define HAVE_EXPLICIT_BZERO 1 + +/* Define to 1 if you have the `memset_s' function. */ +/* #undef HAVE_MEMSET_S */ + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libssh-cmake/linux/x86-64/config.h b/contrib/libssh-cmake/linux/x86-64/config.h new file mode 100644 index 000000000000..4090c5a45ad1 --- /dev/null +++ b/contrib/libssh-cmake/linux/x86-64/config.h @@ -0,0 +1,287 @@ +/* Name of package */ +#define PACKAGE "libssh" + +/* Version number of package */ +#define VERSION "0.9.7" + +#define SYSCONFDIR "etc" +#define BINARYDIR "/home/ubuntu/workdir/ClickHouse/build/Debug" +#define SOURCEDIR "/home/ubuntu/workdir/ClickHouse" + +/* Global bind configuration file path */ +#define GLOBAL_BIND_CONFIG "/etc/ssh/libssh_server_config" + +/* Global client configuration file path */ +#define GLOBAL_CLIENT_CONFIG "/etc/ssh/ssh_config" + +/************************** HEADER FILES *************************/ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_ARGP_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ARPA_INET_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_GLOB_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_VALGRIND_VALGRIND_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_PTY_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTMP_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_UTIL_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_LIBUTIL_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TIME_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_SYS_UTIME_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_IO_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_TERMIOS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_AES_H 1 + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_WSPIAPI_H */ + +/* Define to 1 if you have the header file. */ +/* #undef HAVE_OPENSSL_BLOWFISH_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_DES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDH_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_EC_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_OPENSSL_ECDSA_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have eliptic curve cryptography in openssl */ +#define HAVE_OPENSSL_ECC 1 + +/* Define to 1 if you have eliptic curve cryptography in gcrypt */ +/* #undef HAVE_GCRYPT_ECC */ + +/* Define to 1 if you have eliptic curve cryptography */ +#define HAVE_ECC 1 + +/* Define to 1 if you have DSA */ +/* #undef HAVE_DSA */ + +/* Define to 1 if you have gl_flags as a glob_t sturct member */ +#define HAVE_GLOB_GL_FLAGS_MEMBER 1 + +/* Define to 1 if you have OpenSSL with Ed25519 support */ +#define HAVE_OPENSSL_ED25519 1 + +/* Define to 1 if you have OpenSSL with X25519 support */ +#define HAVE_OPENSSL_X25519 1 + +/*************************** FUNCTIONS ***************************/ + +/* Define to 1 if you have the `EVP_aes128_ctr' function. */ +#define HAVE_OPENSSL_EVP_AES_CTR 1 + +/* Define to 1 if you have the `EVP_aes128_cbc' function. */ +#define HAVE_OPENSSL_EVP_AES_CBC 1 + +/* Define to 1 if you have the `EVP_aes128_gcm' function. */ +/* #undef HAVE_OPENSSL_EVP_AES_GCM */ + +/* Define to 1 if you have the `CRYPTO_THREADID_set_callback' function. */ +#define HAVE_OPENSSL_CRYPTO_THREADID_SET_CALLBACK 1 + +/* Define to 1 if you have the `CRYPTO_ctr128_encrypt' function. */ +#define HAVE_OPENSSL_CRYPTO_CTR128_ENCRYPT 1 + +/* Define to 1 if you have the `EVP_CIPHER_CTX_new' function. */ +#define HAVE_OPENSSL_EVP_CIPHER_CTX_NEW 1 + +/* Define to 1 if you have the `EVP_KDF_CTX_new_id' function. */ +/* #undef HAVE_OPENSSL_EVP_KDF_CTX_NEW_ID */ + +/* Define to 1 if you have the `FIPS_mode' function. */ +#if USE_BORINGSSL +#define HAVE_OPENSSL_FIPS_MODE 1 +#endif + +/* Define to 1 if you have the `EVP_DigestSign' function. */ +#define HAVE_OPENSSL_EVP_DIGESTSIGN 1 + +/* Define to 1 if you have the `EVP_DigestVerify' function. */ +#define HAVE_OPENSSL_EVP_DIGESTVERIFY 1 + +/* Define to 1 if you have the `OPENSSL_ia32cap_loc' function. */ +/* #undef HAVE_OPENSSL_IA32CAP_LOC */ + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the `_snprintf' function. */ +/* #undef HAVE__SNPRINTF */ + +/* Define to 1 if you have the `_snprintf_s' function. */ +/* #undef HAVE__SNPRINTF_S */ + +/* Define to 1 if you have the `vsnprintf' function. */ +#define HAVE_VSNPRINTF 1 + +/* Define to 1 if you have the `_vsnprintf' function. */ +/* #undef HAVE__VSNPRINTF */ + +/* Define to 1 if you have the `_vsnprintf_s' function. */ +/* #undef HAVE__VSNPRINTF_S */ + +/* Define to 1 if you have the `isblank' function. */ +#define HAVE_ISBLANK 1 + +/* Define to 1 if you have the `strncpy' function. */ +#define HAVE_STRNCPY 1 + +/* Define to 1 if you have the `strndup' function. */ +#define HAVE_STRNDUP 1 + +/* Define to 1 if you have the `cfmakeraw' function. */ +/* #undef HAVE_CFMAKERAW */ + +/* Define to 1 if you have the `getaddrinfo' function. */ +#define HAVE_GETADDRINFO 1 + +/* Define to 1 if you have the `poll' function. */ +#define HAVE_POLL 1 + +/* Define to 1 if you have the `select' function. */ +#define HAVE_SELECT 1 + +/* Define to 1 if you have the `clock_gettime' function. */ +/* #undef HAVE_CLOCK_GETTIME */ + +/* Define to 1 if you have the `ntohll' function. */ +/* #undef HAVE_NTOHLL */ + +/* Define to 1 if you have the `htonll' function. */ +/* #undef HAVE_HTONLL */ + +/* Define to 1 if you have the `strtoull' function. */ +#define HAVE_STRTOULL 1 + +/* Define to 1 if you have the `__strtoull' function. */ +/* #undef HAVE___STRTOULL */ + +/* Define to 1 if you have the `_strtoui64' function. */ +/* #undef HAVE__STRTOUI64 */ + +/* Define to 1 if you have the `glob' function. */ +#define HAVE_GLOB 1 + +/* Define to 1 if you have the `explicit_bzero' function. */ +#define HAVE_EXPLICIT_BZERO 1 + +/* Define to 1 if you have the `memset_s' function. */ +#define HAVE_MEMSET_S 1 + +/* Define to 1 if you have the `SecureZeroMemory' function. */ +/* #undef HAVE_SECURE_ZERO_MEMORY */ + +/* Define to 1 if you have the `cmocka_set_test_filter' function. */ +/* #undef HAVE_CMOCKA_SET_TEST_FILTER */ + +/*************************** LIBRARIES ***************************/ + +/* Define to 1 if you have the `crypto' library (-lcrypto). */ +#define HAVE_LIBCRYPTO 1 + +/* Define to 1 if you have the `gcrypt' library (-lgcrypt). */ +/* #undef HAVE_LIBGCRYPT */ + +/* Define to 1 if you have the 'mbedTLS' library (-lmbedtls). */ +/* #undef HAVE_LIBMBEDCRYPTO */ + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_PTHREAD 1 + +/* Define to 1 if you have the `cmocka' library (-lcmocka). */ +/* #undef HAVE_CMOCKA */ + +/**************************** OPTIONS ****************************/ + +#define HAVE_GCC_THREAD_LOCAL_STORAGE 1 +/* #undef HAVE_MSC_THREAD_LOCAL_STORAGE */ + +#define HAVE_FALLTHROUGH_ATTRIBUTE 1 +#define HAVE_UNUSED_ATTRIBUTE 1 + +#define HAVE_CONSTRUCTOR_ATTRIBUTE 1 +#define HAVE_DESTRUCTOR_ATTRIBUTE 1 + +#define HAVE_GCC_VOLATILE_MEMORY_PROTECTION 1 + +#define HAVE_COMPILER__FUNC__ 1 +#define HAVE_COMPILER__FUNCTION__ 1 + +/* #undef HAVE_GCC_BOUNDED_ATTRIBUTE */ + +/* Define to 1 if you want to enable GSSAPI */ +/* #undef WITH_GSSAPI */ + +/* Define to 1 if you want to enable ZLIB */ +/* #undef WITH_ZLIB */ + +/* Define to 1 if you want to enable SFTP */ +/* #undef WITH_SFTP */ + +/* Define to 1 if you want to enable server support */ +#define WITH_SERVER 1 + +/* Define to 1 if you want to enable DH group exchange algorithms */ +/* #undef WITH_GEX */ + +/* Define to 1 if you want to enable blowfish cipher support */ +/* #undef WITH_BLOWFISH_CIPHER */ + +/* Define to 1 if you want to enable debug output for crypto functions */ +/* #undef DEBUG_CRYPTO */ + +/* Define to 1 if you want to enable debug output for packet functions */ +/* #undef DEBUG_PACKET */ + +/* Define to 1 if you want to enable pcap output support (experimental) */ +/* #undef WITH_PCAP */ + +/* Define to 1 if you want to enable calltrace debug output */ +/* #undef DEBUG_CALLTRACE */ + +/* Define to 1 if you want to enable NaCl support */ +/* #undef WITH_NACL */ + +/*************************** ENDIAN *****************************/ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +/* #undef WORDS_BIGENDIAN */ diff --git a/contrib/libunwind b/contrib/libunwind index e48aa13f67dc..30cc1d3fd365 160000 --- a/contrib/libunwind +++ b/contrib/libunwind @@ -1 +1 @@ -Subproject commit e48aa13f67dc722511b5af33a32ba9b7748176b5 +Subproject commit 30cc1d3fd3655a5cfa0ab112fe320fb9fc0a8344 diff --git a/contrib/llvm-project b/contrib/llvm-project index d857c707fccd..e7b8befca85c 160000 --- a/contrib/llvm-project +++ b/contrib/llvm-project @@ -1 +1 @@ -Subproject commit d857c707fccd50423bea1c4710dc469cf89607a9 +Subproject commit e7b8befca85c8b847614432dba250c22d35fbae0 diff --git a/contrib/llvm-project-cmake/CMakeLists.txt b/contrib/llvm-project-cmake/CMakeLists.txt index fe6cffd33e22..ce82a10d3eb6 100644 --- a/contrib/llvm-project-cmake/CMakeLists.txt +++ b/contrib/llvm-project-cmake/CMakeLists.txt @@ -1,18 +1,16 @@ -if (APPLE OR NOT ARCH_AMD64 OR SANITIZE STREQUAL "undefined") +if (APPLE OR SANITIZE STREQUAL "undefined" OR SANITIZE STREQUAL "memory") set (ENABLE_EMBEDDED_COMPILER_DEFAULT OFF) else() set (ENABLE_EMBEDDED_COMPILER_DEFAULT ON) endif() -option (ENABLE_EMBEDDED_COMPILER "Enable support for 'compile_expressions' option for query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) +option (ENABLE_EMBEDDED_COMPILER "Enable support for JIT compilation during query execution" ${ENABLE_EMBEDDED_COMPILER_DEFAULT}) if (NOT ENABLE_EMBEDDED_COMPILER) message(STATUS "Not using LLVM") return() endif() -# TODO: Enable compilation on AArch64 - set (LLVM_VERSION "15.0.0bundled") set (LLVM_INCLUDE_DIRS "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm/include" @@ -58,18 +56,30 @@ set (REQUIRED_LLVM_LIBRARIES LLVMDemangle ) -# if (ARCH_AMD64) +if (ARCH_AMD64) + set (LLVM_TARGETS_TO_BUILD "X86" CACHE INTERNAL "") list(APPEND REQUIRED_LLVM_LIBRARIES LLVMX86Info LLVMX86Desc LLVMX86CodeGen) -# elseif (ARCH_AARCH64) -# list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) -# endif () +elseif (ARCH_AARCH64) + set (LLVM_TARGETS_TO_BUILD "AArch64" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMAArch64Info LLVMAArch64Desc LLVMAArch64CodeGen) +elseif (ARCH_PPC64LE) + set (LLVM_TARGETS_TO_BUILD "PowerPC" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMPowerPCInfo LLVMPowerPCDesc LLVMPowerPCCodeGen) +elseif (ARCH_S390X) + set (LLVM_TARGETS_TO_BUILD "SystemZ" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMSystemZInfo LLVMSystemZDesc LLVMSystemZCodeGen) +elseif (ARCH_RISCV64) + set (LLVM_TARGETS_TO_BUILD "RISCV" CACHE INTERNAL "") + list(APPEND REQUIRED_LLVM_LIBRARIES LLVMRISCVInfo LLVMRISCVDesc LLVMRISCVCodeGen) +endif () + +message (STATUS "LLVM TARGETS TO BUILD ${LLVM_TARGETS_TO_BUILD}") set (CMAKE_INSTALL_RPATH "ON") # Do not adjust RPATH in llvm, since then it will not be able to find libcxx/libcxxabi/libunwind set (LLVM_COMPILER_CHECKED 1 CACHE INTERNAL "") # Skip internal compiler selection set (LLVM_ENABLE_EH 1 CACHE INTERNAL "") # With exception handling set (LLVM_ENABLE_RTTI 1 CACHE INTERNAL "") set (LLVM_ENABLE_PIC 0 CACHE INTERNAL "") -set (LLVM_TARGETS_TO_BUILD "X86" CACHE STRING "") # for x86 + ARM: "X86;AArch64" # Omit unnecessary stuff (just the options which are ON by default) set(LLVM_ENABLE_BACKTRACES 0 CACHE INTERNAL "") @@ -99,15 +109,12 @@ set(LLVM_ENABLE_BINDINGS 0 CACHE INTERNAL "") set (LLVM_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/llvm-project/llvm") set (LLVM_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/llvm-project/llvm") -# Since we always use toolchain files to generate hermatic builds, cmake will -# think it's a cross compilation, and LLVM will try to configure NATIVE LLVM -# targets with all tests enabled, which will slow down cmake configuration and -# compilation (You'll see Building native llvm-tblgen...). Let's disable the -# cross compiling indicator for now. -# -# TODO We should let cmake know whether it's indeed a cross compilation in the -# first place. -set (CMAKE_CROSSCOMPILING 0) +message (STATUS "LLVM CMAKE CROSS COMPILING ${CMAKE_CROSSCOMPILING}") +if (CMAKE_CROSSCOMPILING) + set (LLVM_HOST_TRIPLE "${CMAKE_C_COMPILER_TARGET}" CACHE INTERNAL "") + message (STATUS "CROSS COMPILING SET LLVM HOST TRIPLE ${LLVM_HOST_TRIPLE}") +endif() + add_subdirectory ("${LLVM_SOURCE_DIR}" "${LLVM_BINARY_DIR}") set_directory_properties (PROPERTIES diff --git a/contrib/lz4 b/contrib/lz4 index e82198428c80..92ebf1870b9a 160000 --- a/contrib/lz4 +++ b/contrib/lz4 @@ -1 +1 @@ -Subproject commit e82198428c8061372d5adef1f9bfff4203f6081e +Subproject commit 92ebf1870b9acbefc0e7970409a181954a10ff40 diff --git a/contrib/lz4-cmake/CMakeLists.txt b/contrib/lz4-cmake/CMakeLists.txt index c0fd574134f5..0f37022d5154 100644 --- a/contrib/lz4-cmake/CMakeLists.txt +++ b/contrib/lz4-cmake/CMakeLists.txt @@ -13,6 +13,11 @@ add_library (ch_contrib::lz4 ALIAS _lz4) target_compile_definitions (_lz4 PUBLIC LZ4_DISABLE_DEPRECATE_WARNINGS=1) target_compile_definitions (_lz4 PUBLIC LZ4_FAST_DEC_LOOP=1) + +if(ARCH_S390X) + target_compile_definitions(_lz4 PRIVATE LZ4_STATIC_LINKING_ONLY_ENDIANNESS_INDEPENDENT_OUTPUT) +endif() + if (SANITIZE STREQUAL "undefined") target_compile_options (_lz4 PRIVATE -fno-sanitize=undefined) endif () diff --git a/contrib/nlp-data-cmake/CMakeLists.txt b/contrib/nlp-data-cmake/CMakeLists.txt deleted file mode 100644 index 5380269c4799..000000000000 --- a/contrib/nlp-data-cmake/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake) - -set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/nlp-data") - -add_library (_nlp_data INTERFACE) - -clickhouse_embed_binaries( - TARGET nlp_dictionaries - RESOURCE_DIR "${LIBRARY_DIR}" - RESOURCES charset.zst tonality_ru.zst programming.zst -) - -add_dependencies(_nlp_data nlp_dictionaries) -target_link_libraries(_nlp_data INTERFACE "-Wl,${WHOLE_ARCHIVE} $ -Wl,${NO_WHOLE_ARCHIVE}") -add_library(ch_contrib::nlp_data ALIAS _nlp_data) diff --git a/contrib/openldap b/contrib/openldap index 8688afe6bc95..5671b80e369d 160000 --- a/contrib/openldap +++ b/contrib/openldap @@ -1 +1 @@ -Subproject commit 8688afe6bc95ebcd20edf4578c536362218cb70a +Subproject commit 5671b80e369df2caf5f34e02924316205a43c895 diff --git a/contrib/openldap-cmake/CMakeLists.txt b/contrib/openldap-cmake/CMakeLists.txt index 7af07d5f553c..c7d4b5a2ca22 100644 --- a/contrib/openldap-cmake/CMakeLists.txt +++ b/contrib/openldap-cmake/CMakeLists.txt @@ -96,71 +96,82 @@ target_compile_definitions(_lber ) set(_ldap_srcs + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/abandon.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/add.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/addentry.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/assertion.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/avl.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/bind.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/open.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/result.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/error.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/cancel.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/charray.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/compare.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/search.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/controls.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/messages.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/references.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/extended.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/cyrus.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/modify.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/add.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/modrdn.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/dds.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/delete.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/abandon.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sasl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sbind.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/unbind.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/cancel.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/deref.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/dnssrv.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/error.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/extended.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/fetch.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/filter.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/free.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sort.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/passwd.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/whoami.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/vc.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getattr.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getdn.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getentry.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getattr.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/getvalues.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/addentry.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/request.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-ip.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/url.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/pagectrl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sortctrl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/vlvctrl.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/init.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/lbase64.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldap_sync.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldif.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldifutil.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/messages.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/modify.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/modrdn.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/msctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/open.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/options.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-ip.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-local.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/pagectrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/passwd.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ppolicy.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/print.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/string.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/util-int.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/psearchctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/rdwr.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/references.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/request.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/result.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/rq.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sasl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sbind.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/schema.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/charray.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/os-local.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/dnssrv.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8-conv.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/search.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sort.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/sortctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/stctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/string.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tavl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_debug.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_nt.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_posix.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_pth.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/thr_thr.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/threads.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls2.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_o.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_g.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tls_o.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/tpool.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/turn.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ppolicy.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/dds.c" "${OPENLDAP_SOURCE_DIR}/libraries/libldap/txn.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldap_sync.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/stctrl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/assertion.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/deref.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldifutil.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/ldif.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/fetch.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/lbase64.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/msctrl.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap/psearchctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/unbind.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/url.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8-conv.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/utf-8.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/util-int.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/vc.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/vlvctrl.c" + "${OPENLDAP_SOURCE_DIR}/libraries/libldap/whoami.c" ) mkversion(ldap) @@ -185,43 +196,5 @@ target_compile_definitions(_ldap PRIVATE LDAP_LIBRARY ) -set(_ldap_r_specific_srcs - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/threads.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/rdwr.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/tpool.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/rq.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_posix.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_thr.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_nt.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_pth.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_stub.c" - "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r/thr_debug.c" -) - -mkversion(ldap_r) - -add_library(_ldap_r - ${_ldap_r_specific_srcs} - ${_ldap_srcs} - "${CMAKE_CURRENT_BINARY_DIR}/ldap_r-version.c" -) - -target_link_libraries(_ldap_r - PRIVATE _lber - PRIVATE OpenSSL::Crypto OpenSSL::SSL -) - -target_include_directories(_ldap_r SYSTEM - PUBLIC ${_extra_build_dir}/include - PUBLIC "${OPENLDAP_SOURCE_DIR}/include" - PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap_r" - PRIVATE "${OPENLDAP_SOURCE_DIR}/libraries/libldap" -) - -target_compile_definitions(_ldap_r - PRIVATE LDAP_R_COMPILE - PRIVATE LDAP_LIBRARY -) - -add_library(ch_contrib::ldap ALIAS _ldap_r) +add_library(ch_contrib::ldap ALIAS _ldap) add_library(ch_contrib::lber ALIAS _lber) diff --git a/contrib/openssl b/contrib/openssl index 19cc035b6c6f..245cb0291e0d 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 19cc035b6c6f2283573d29c7ea7f7d675cf750ce +Subproject commit 245cb0291e0db99d9ccf3692fa76f440b2b054c2 diff --git a/contrib/openssl-cmake/CMakeLists.txt b/contrib/openssl-cmake/CMakeLists.txt index 92739ff36083..980a12e03656 100644 --- a/contrib/openssl-cmake/CMakeLists.txt +++ b/contrib/openssl-cmake/CMakeLists.txt @@ -126,7 +126,7 @@ if(ENABLE_OPENSSL_DYNAMIC OR ENABLE_OPENSSL) elseif(ARCH_PPC64LE) macro(perl_generate_asm FILE_IN FILE_OUT) add_custom_command(OUTPUT ${FILE_OUT} - COMMAND /usr/bin/env perl ${FILE_IN} "linux64" ${FILE_OUT}) + COMMAND /usr/bin/env perl ${FILE_IN} "linux64v2" ${FILE_OUT}) endmacro() perl_generate_asm(${OPENSSL_SOURCE_DIR}/crypto/aes/asm/aes-ppc.pl ${OPENSSL_BINARY_DIR}/crypto/aes/aes-ppc.s) diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h index 2476042c531a..49825570d8c3 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h index 81c1b93afaa3..3c221e1ac231 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h index fb4b49ca3497..20b67455f206 100644 --- a/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_aarch64/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h index 2476042c531a..49825570d8c3 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h index 81c1b93afaa3..3c221e1ac231 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h index fb4b49ca3497..20b67455f206 100644 --- a/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_ppc64le/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h b/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h index 2476042c531a..49825570d8c3 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h index 81c1b93afaa3..3c221e1ac231 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h index fb4b49ca3497..20b67455f206 100644 --- a/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_s390x/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h index 2476042c531a..49825570d8c3 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/cmp.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/cmp.h.in + * Generated by Makefile from include/openssl/cmp.h.in * - * Copyright 2007-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 2007-2023 The OpenSSL Project Authors. All Rights Reserved. * Copyright Nokia 2007-2019 * Copyright Siemens AG 2015-2019 * @@ -193,13 +193,16 @@ typedef ASN1_BIT_STRING OSSL_CMP_PKIFAILUREINFO; * -- CertReqMsg * } */ -# define OSSL_CMP_PKISTATUS_accepted 0 -# define OSSL_CMP_PKISTATUS_grantedWithMods 1 -# define OSSL_CMP_PKISTATUS_rejection 2 -# define OSSL_CMP_PKISTATUS_waiting 3 -# define OSSL_CMP_PKISTATUS_revocationWarning 4 +# define OSSL_CMP_PKISTATUS_request -3 +# define OSSL_CMP_PKISTATUS_trans -2 +# define OSSL_CMP_PKISTATUS_unspecified -1 +# define OSSL_CMP_PKISTATUS_accepted 0 +# define OSSL_CMP_PKISTATUS_grantedWithMods 1 +# define OSSL_CMP_PKISTATUS_rejection 2 +# define OSSL_CMP_PKISTATUS_waiting 3 +# define OSSL_CMP_PKISTATUS_revocationWarning 4 # define OSSL_CMP_PKISTATUS_revocationNotification 5 -# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 +# define OSSL_CMP_PKISTATUS_keyUpdateWarning 6 typedef ASN1_INTEGER OSSL_CMP_PKISTATUS; DECLARE_ASN1_ITEM(OSSL_CMP_PKISTATUS) @@ -439,11 +442,12 @@ int OSSL_CMP_CTX_build_cert_chain(OSSL_CMP_CTX *ctx, X509_STORE *own_trusted, int OSSL_CMP_CTX_set1_pkey(OSSL_CMP_CTX *ctx, EVP_PKEY *pkey); int OSSL_CMP_CTX_set1_referenceValue(OSSL_CMP_CTX *ctx, const unsigned char *ref, int len); -int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, const unsigned char *sec, - const int len); +int OSSL_CMP_CTX_set1_secretValue(OSSL_CMP_CTX *ctx, + const unsigned char *sec, int len); /* CMP message header and extra certificates: */ int OSSL_CMP_CTX_set1_recipient(OSSL_CMP_CTX *ctx, const X509_NAME *name); int OSSL_CMP_CTX_push0_geninfo_ITAV(OSSL_CMP_CTX *ctx, OSSL_CMP_ITAV *itav); +int OSSL_CMP_CTX_reset_geninfo_ITAVs(OSSL_CMP_CTX *ctx); int OSSL_CMP_CTX_set1_extraCertsOut(OSSL_CMP_CTX *ctx, STACK_OF(X509) *extraCertsOut); /* certificate template: */ @@ -499,6 +503,7 @@ ASN1_OCTET_STRING *OSSL_CMP_HDR_get0_recipNonce(const OSSL_CMP_PKIHEADER *hdr); OSSL_CMP_PKIHEADER *OSSL_CMP_MSG_get0_header(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_get_bodytype(const OSSL_CMP_MSG *msg); int OSSL_CMP_MSG_update_transactionID(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); +int OSSL_CMP_MSG_update_recipNonce(OSSL_CMP_CTX *ctx, OSSL_CMP_MSG *msg); OSSL_CRMF_MSG *OSSL_CMP_CTX_setup_CRM(OSSL_CMP_CTX *ctx, int for_KUR, int rid); OSSL_CMP_MSG *OSSL_CMP_MSG_read(const char *file, OSSL_LIB_CTX *libctx, const char *propq); diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h index 81c1b93afaa3..3c221e1ac231 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/opensslv.h @@ -1,6 +1,6 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/opensslv.h.in + * Generated by Makefile from include/openssl/opensslv.h.in * * Copyright 1999-2020 The OpenSSL Project Authors. All Rights Reserved. * @@ -29,7 +29,7 @@ extern "C" { */ # define OPENSSL_VERSION_MAJOR 3 # define OPENSSL_VERSION_MINOR 0 -# define OPENSSL_VERSION_PATCH 7 +# define OPENSSL_VERSION_PATCH 10 /* * Additional version information @@ -74,21 +74,21 @@ extern "C" { * longer variant with OPENSSL_VERSION_PRE_RELEASE_STR and * OPENSSL_VERSION_BUILD_METADATA_STR appended. */ -# define OPENSSL_VERSION_STR "3.0.7" -# define OPENSSL_FULL_VERSION_STR "3.0.7" +# define OPENSSL_VERSION_STR "3.0.10" +# define OPENSSL_FULL_VERSION_STR "3.0.10" /* * SECTION 3: ADDITIONAL METADATA * * These strings are defined separately to allow them to be parsable. */ -# define OPENSSL_RELEASE_DATE "1 Nov 2022" +# define OPENSSL_RELEASE_DATE "1 Aug 2023" /* * SECTION 4: BACKWARD COMPATIBILITY */ -# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.7 1 Nov 2022" +# define OPENSSL_VERSION_TEXT "OpenSSL 3.0.10 1 Aug 2023" /* Synthesize OPENSSL_VERSION_NUMBER with the layout 0xMNN00PPSL */ # ifdef OPENSSL_VERSION_PRE_RELEASE diff --git a/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h b/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h index fb4b49ca3497..20b67455f206 100644 --- a/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h +++ b/contrib/openssl-cmake/linux_x86_64/include/openssl/x509v3.h @@ -1,8 +1,8 @@ /* * WARNING: do not edit! - * Generated by Makefile from ../include/openssl/x509v3.h.in + * Generated by Makefile from include/openssl/x509v3.h.in * - * Copyright 1999-2021 The OpenSSL Project Authors. All Rights Reserved. + * Copyright 1999-2023 The OpenSSL Project Authors. All Rights Reserved. * * Licensed under the Apache License 2.0 (the "License"). You may not use * this file except in compliance with the License. You can obtain a copy @@ -177,7 +177,7 @@ typedef struct GENERAL_NAME_st { OTHERNAME *otherName; /* otherName */ ASN1_IA5STRING *rfc822Name; ASN1_IA5STRING *dNSName; - ASN1_TYPE *x400Address; + ASN1_STRING *x400Address; X509_NAME *directoryName; EDIPARTYNAME *ediPartyName; ASN1_IA5STRING *uniformResourceIdentifier; diff --git a/contrib/orc b/contrib/orc index 568d1d60c250..a20d1d9d7ad4 160000 --- a/contrib/orc +++ b/contrib/orc @@ -1 +1 @@ -Subproject commit 568d1d60c250af1890f226c182bc15bd8cc94cf1 +Subproject commit a20d1d9d7ad4a4be7b7ba97588e16ca8b9abb2b6 diff --git a/contrib/pdqsort/pdqsort.h b/contrib/pdqsort/pdqsort.h index 01e82b710ee0..cbfc82a4f41e 100644 --- a/contrib/pdqsort/pdqsort.h +++ b/contrib/pdqsort/pdqsort.h @@ -54,8 +54,10 @@ namespace pdqsort_detail { block_size = 64, // Cacheline size, assumes power of two. - cacheline_size = 64 + cacheline_size = 64, + /// Try sort allowed iterations + try_sort_iterations = 3, }; #if __cplusplus >= 201103L @@ -501,6 +503,167 @@ namespace pdqsort_detail { leftmost = false; } } + + template + inline bool pdqsort_try_sort_loop(Iter begin, + Iter end, + Compare comp, + size_t bad_allowed, + size_t iterations_allowed, + bool force_sort = false, + bool leftmost = true) { + typedef typename std::iterator_traits::difference_type diff_t; + + // Use a while loop for tail recursion elimination. + while (true) { + if (!force_sort && iterations_allowed == 0) { + return false; + } + + diff_t size = end - begin; + + // Insertion sort is faster for small arrays. + if (size < insertion_sort_threshold) { + if (leftmost) insertion_sort(begin, end, comp); + else unguarded_insertion_sort(begin, end, comp); + + return true; + } + + // Choose pivot as median of 3 or pseudomedian of 9. + diff_t s2 = size / 2; + if (size > ninther_threshold) { + sort3(begin, begin + s2, end - 1, comp); + sort3(begin + 1, begin + (s2 - 1), end - 2, comp); + sort3(begin + 2, begin + (s2 + 1), end - 3, comp); + sort3(begin + (s2 - 1), begin + s2, begin + (s2 + 1), comp); + std::iter_swap(begin, begin + s2); + } else sort3(begin + s2, begin, end - 1, comp); + + // If *(begin - 1) is the end of the right partition of a previous partition operation + // there is no element in [begin, end) that is smaller than *(begin - 1). Then if our + // pivot compares equal to *(begin - 1) we change strategy, putting equal elements in + // the left partition, greater elements in the right partition. We do not have to + // recurse on the left partition, since it's sorted (all equal). + if (!leftmost && !comp(*(begin - 1), *begin)) { + begin = partition_left(begin, end, comp) + 1; + continue; + } + + // Partition and get results. + std::pair part_result = + Branchless ? partition_right_branchless(begin, end, comp) + : partition_right(begin, end, comp); + Iter pivot_pos = part_result.first; + bool already_partitioned = part_result.second; + + // Check for a highly unbalanced partition. + diff_t l_size = pivot_pos - begin; + diff_t r_size = end - (pivot_pos + 1); + bool highly_unbalanced = l_size < size / 8 || r_size < size / 8; + + // If we got a highly unbalanced partition we shuffle elements to break many patterns. + if (highly_unbalanced) { + if (!force_sort) { + return false; + } + + // If we had too many bad partitions, switch to heapsort to guarantee O(n log n). + if (--bad_allowed == 0) { + std::make_heap(begin, end, comp); + std::sort_heap(begin, end, comp); + return true; + } + + if (l_size >= insertion_sort_threshold) { + std::iter_swap(begin, begin + l_size / 4); + std::iter_swap(pivot_pos - 1, pivot_pos - l_size / 4); + + if (l_size > ninther_threshold) { + std::iter_swap(begin + 1, begin + (l_size / 4 + 1)); + std::iter_swap(begin + 2, begin + (l_size / 4 + 2)); + std::iter_swap(pivot_pos - 2, pivot_pos - (l_size / 4 + 1)); + std::iter_swap(pivot_pos - 3, pivot_pos - (l_size / 4 + 2)); + } + } + + if (r_size >= insertion_sort_threshold) { + std::iter_swap(pivot_pos + 1, pivot_pos + (1 + r_size / 4)); + std::iter_swap(end - 1, end - r_size / 4); + + if (r_size > ninther_threshold) { + std::iter_swap(pivot_pos + 2, pivot_pos + (2 + r_size / 4)); + std::iter_swap(pivot_pos + 3, pivot_pos + (3 + r_size / 4)); + std::iter_swap(end - 2, end - (1 + r_size / 4)); + std::iter_swap(end - 3, end - (2 + r_size / 4)); + } + } + } else { + // If we were decently balanced and we tried to sort an already partitioned + // sequence try to use insertion sort. + if (already_partitioned && partial_insertion_sort(begin, pivot_pos, comp) + && partial_insertion_sort(pivot_pos + 1, end, comp)) { + return true; + } + } + + // Sort the left partition first using recursion and do tail recursion elimination for + // the right-hand partition. + if (pdqsort_try_sort_loop(begin, + pivot_pos, + comp, + bad_allowed, + iterations_allowed - 1, + force_sort, + leftmost)) { + force_sort = true; + } else { + return false; + } + + --iterations_allowed; + begin = pivot_pos + 1; + leftmost = false; + } + + return false; + } + + template + inline bool pdqsort_try_sort_impl(Iter begin, Iter end, Compare comp, size_t bad_allowed) + { + typedef typename std::iterator_traits::difference_type diff_t; + + static constexpr size_t iterations_allowed = pdqsort_detail::try_sort_iterations; + static constexpr size_t num_to_try = 16; + + diff_t size = end - begin; + + if (size > num_to_try * 10) + { + size_t out_of_order_elements = 0; + + for (size_t i = 1; i < num_to_try; ++i) + { + diff_t offset = size / num_to_try; + + diff_t prev_position = offset * (i - 1); + diff_t curr_position = offset * i; + diff_t next_position = offset * (i + 1) - 1; + + bool prev_less_than_curr = comp(*(begin + prev_position), *(begin + curr_position)); + bool curr_less_than_next = comp(*(begin + curr_position), *(begin + next_position)); + if ((prev_less_than_curr && curr_less_than_next) || (!prev_less_than_curr && !curr_less_than_next)) + continue; + + ++out_of_order_elements; + if (out_of_order_elements > iterations_allowed) + return false; + } + } + + return pdqsort_try_sort_loop(begin, end, comp, bad_allowed, iterations_allowed); + } } @@ -538,6 +701,41 @@ inline void pdqsort_branchless(Iter begin, Iter end) { pdqsort_branchless(begin, end, std::less()); } +template +inline bool pdqsort_try_sort(Iter begin, Iter end, Compare comp) { + if (begin == end) return true; + +#if __cplusplus >= 201103L + return pdqsort_detail::pdqsort_try_sort_impl::type>::value && + std::is_arithmetic::value_type>::value>( + begin, end, comp, pdqsort_detail::log2(end - begin)); +#else + return pdqsort_detail::pdqsort_try_sort_impl( + begin, end, comp, pdqsort_detail::log2(end - begin)); +#endif +} + +template +inline bool pdqsort_try_sort(Iter begin, Iter end) { + typedef typename std::iterator_traits::value_type T; + return pdqsort_try_sort(begin, end, std::less()); +} + +template +inline bool pdqsort_try_sort_branchless(Iter begin, Iter end, Compare comp) { + if (begin == end) return true; + + return pdqsort_detail::pdqsort_try_sort_impl( + begin, end, comp, pdqsort_detail::log2(end - begin)); +} + +template +inline bool pdqsort_try_sort_branchless(Iter begin, Iter end) { + typedef typename std::iterator_traits::value_type T; + return pdqsort_try_sort_branchless(begin, end, std::less()); +} + #undef PDQSORT_PREFER_MOVE diff --git a/contrib/qpl b/contrib/qpl index 3f8f5cea2773..faaf19350459 160000 --- a/contrib/qpl +++ b/contrib/qpl @@ -1 +1 @@ -Subproject commit 3f8f5cea27739f5261e8fd577dc233ffe88bf679 +Subproject commit faaf19350459c076e66bb5df11743c3fade59b73 diff --git a/contrib/re2 b/contrib/re2 index 13ebb377c6ad..a807e8a3aac2 160000 --- a/contrib/re2 +++ b/contrib/re2 @@ -1 +1 @@ -Subproject commit 13ebb377c6ad763ca61d12dd6f88b1126bd0b911 +Subproject commit a807e8a3aac2cc33c77b7071efea54fcabe38e0c diff --git a/contrib/re2-cmake/CMakeLists.txt b/contrib/re2-cmake/CMakeLists.txt index 19939c11ebf4..e72b5e1fca89 100644 --- a/contrib/re2-cmake/CMakeLists.txt +++ b/contrib/re2-cmake/CMakeLists.txt @@ -1,17 +1,7 @@ -# Copyright 2015 The RE2 Authors. All Rights Reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -# This file was edited for ClickHouse - -string(FIND ${CMAKE_CURRENT_BINARY_DIR} " " _have_space) -if(_have_space GREATER 0) - message(FATAL_ERROR "Using spaces in build path [${CMAKE_CURRENT_BINARY_DIR}] highly not recommended. Library re2st will be disabled.") -endif() - set(SRC_DIR "${ClickHouse_SOURCE_DIR}/contrib/re2") set(RE2_SOURCES + ${SRC_DIR}/re2/bitmap256.cc ${SRC_DIR}/re2/bitstate.cc ${SRC_DIR}/re2/compile.cc ${SRC_DIR}/re2/dfa.cc @@ -28,49 +18,15 @@ set(RE2_SOURCES ${SRC_DIR}/re2/regexp.cc ${SRC_DIR}/re2/set.cc ${SRC_DIR}/re2/simplify.cc - ${SRC_DIR}/re2/stringpiece.cc ${SRC_DIR}/re2/tostring.cc ${SRC_DIR}/re2/unicode_casefold.cc ${SRC_DIR}/re2/unicode_groups.cc ${SRC_DIR}/util/rune.cc ${SRC_DIR}/util/strutil.cc ) -add_library(re2 ${RE2_SOURCES}) -target_include_directories(re2 PUBLIC "${SRC_DIR}") - -# Building re2 which is thread-safe and re2_st which is not. -# re2 changes its state during matching of regular expression, e.g. creates temporary DFA. -# It uses RWLock to process the same regular expression object from different threads. -# In order to avoid redundant locks in some cases, we use not thread-safe version of the library (re2_st). - -add_library(re2_st ${RE2_SOURCES}) -target_compile_definitions (re2_st PRIVATE NDEBUG NO_THREADS re2=re2_st) -target_include_directories (re2_st PRIVATE .) -target_include_directories (re2_st SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR}) -target_include_directories (re2_st SYSTEM BEFORE PUBLIC ${SRC_DIR}) - -file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/re2_st) -foreach (FILENAME filtered_re2.h re2.h set.h stringpiece.h) - add_custom_command (OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}" - COMMAND ${CMAKE_COMMAND} -DSOURCE_FILENAME="${SRC_DIR}/re2/${FILENAME}" - -DTARGET_FILENAME="${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}" - -P "${CMAKE_CURRENT_SOURCE_DIR}/re2_transform.cmake" - COMMENT "Creating ${FILENAME} for re2_st library.") - add_custom_target (transform_${FILENAME} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/re2_st/${FILENAME}") - add_dependencies (re2_st transform_${FILENAME}) -endforeach () -file (MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/util) -foreach (FILENAME mutex.h) - add_custom_command (OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/util/${FILENAME}" - COMMAND ${CMAKE_COMMAND} -DSOURCE_FILENAME="${SRC_DIR}/util/${FILENAME}" - -DTARGET_FILENAME="${CMAKE_CURRENT_BINARY_DIR}/util/${FILENAME}" - -P "${CMAKE_CURRENT_SOURCE_DIR}/re2_transform.cmake" - COMMENT "Creating ${FILENAME} for re2_st library.") - add_custom_target (transform_${FILENAME} DEPENDS "${CMAKE_CURRENT_BINARY_DIR}/util/${FILENAME}") - add_dependencies (re2_st transform_${FILENAME}) -endforeach () +add_library(_re2 ${RE2_SOURCES}) +target_include_directories(_re2 PUBLIC "${SRC_DIR}") +target_link_libraries(_re2 ch_contrib::abseil_str_format) -# NOTE: you should not change name of library here, since it is used to generate required header (see above) -add_library(ch_contrib::re2 ALIAS re2) -add_library(ch_contrib::re2_st ALIAS re2_st) +add_library(ch_contrib::re2 ALIAS _re2) diff --git a/contrib/re2-cmake/re2_transform.cmake b/contrib/re2-cmake/re2_transform.cmake deleted file mode 100644 index 56a96f456301..000000000000 --- a/contrib/re2-cmake/re2_transform.cmake +++ /dev/null @@ -1,10 +0,0 @@ -file (READ ${SOURCE_FILENAME} CONTENT) -string (REGEX REPLACE "using re2::RE2;" "" CONTENT "${CONTENT}") -string (REGEX REPLACE "using re2::LazyRE2;" "" CONTENT "${CONTENT}") -string (REGEX REPLACE "namespace re2 {" "namespace re2_st {" CONTENT "${CONTENT}") -string (REGEX REPLACE "re2::" "re2_st::" CONTENT "${CONTENT}") -string (REGEX REPLACE "\"re2/" "\"re2_st/" CONTENT "${CONTENT}") -string (REGEX REPLACE "(.\\*?_H)" "\\1_ST" CONTENT "${CONTENT}") -string (REGEX REPLACE "#define MUTEX_IS_PTHREAD_RWLOCK" "#undef MUTEX_IS_PTHREAD_RWLOCK" CONTENT "${CONTENT}") -string (REGEX REPLACE "typedef std::mutex MutexType;" "struct MutexType { void lock() {} void unlock() {} };" CONTENT "${CONTENT}") -file (WRITE ${TARGET_FILENAME} "${CONTENT}") diff --git a/contrib/robin-map b/contrib/robin-map new file mode 160000 index 000000000000..851a59e0e306 --- /dev/null +++ b/contrib/robin-map @@ -0,0 +1 @@ +Subproject commit 851a59e0e3063ee0e23089062090a73fd3de482d diff --git a/contrib/robin-map-cmake/CMakeLists.txt b/contrib/robin-map-cmake/CMakeLists.txt new file mode 100644 index 000000000000..f82ad705dcc7 --- /dev/null +++ b/contrib/robin-map-cmake/CMakeLists.txt @@ -0,0 +1 @@ +# See contrib/usearch-cmake/CMakeLists.txt diff --git a/contrib/s2geometry b/contrib/s2geometry index 4a7ebd5da04c..0547c3837177 160000 --- a/contrib/s2geometry +++ b/contrib/s2geometry @@ -1 +1 @@ -Subproject commit 4a7ebd5da04cb6c9ea38bbf5914a9f8f3c768564 +Subproject commit 0547c38371777a1c1c8be263a6f05c3bf71bb05b diff --git a/contrib/s2geometry-cmake/CMakeLists.txt b/contrib/s2geometry-cmake/CMakeLists.txt index 102ceb0db3c0..6632f9c27d52 100644 --- a/contrib/s2geometry-cmake/CMakeLists.txt +++ b/contrib/s2geometry-cmake/CMakeLists.txt @@ -7,12 +7,6 @@ endif() set(S2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/s2geometry/src") -set(ABSL_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/abseil-cpp") -if(NOT EXISTS "${ABSL_SOURCE_DIR}/CMakeLists.txt") - message(FATAL_ERROR " submodule contrib/abseil-cpp is missing. To fix try run: \n git submodule update --init --recursive") -endif() - - set(S2_SRCS "${S2_SOURCE_DIR}/s2/encoded_s2cell_id_vector.cc" "${S2_SOURCE_DIR}/s2/encoded_s2point_vector.cc" @@ -58,7 +52,9 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2edge_crossings.cc" "${S2_SOURCE_DIR}/s2/s2edge_distances.cc" "${S2_SOURCE_DIR}/s2/s2edge_tessellator.cc" + "${S2_SOURCE_DIR}/s2/s2error.cc" "${S2_SOURCE_DIR}/s2/s2furthest_edge_query.cc" + "${S2_SOURCE_DIR}/s2/s2hausdorff_distance_query.cc" "${S2_SOURCE_DIR}/s2/s2latlng.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect.cc" "${S2_SOURCE_DIR}/s2/s2latlng_rect_bounder.cc" @@ -93,59 +89,58 @@ set(S2_SRCS "${S2_SOURCE_DIR}/s2/s2shape_index_buffered_region.cc" "${S2_SOURCE_DIR}/s2/s2shape_index_measures.cc" "${S2_SOURCE_DIR}/s2/s2shape_measures.cc" + "${S2_SOURCE_DIR}/s2/s2shape_nesting_query.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_build_polygon_boundaries.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_coding.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_contains_brute_force.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_conversion.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_edge_iterator.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_get_reference_point.cc" - "${S2_SOURCE_DIR}/s2/s2shapeutil_range_iterator.cc" "${S2_SOURCE_DIR}/s2/s2shapeutil_visit_crossing_edge_pairs.cc" "${S2_SOURCE_DIR}/s2/s2text_format.cc" "${S2_SOURCE_DIR}/s2/s2wedge_relations.cc" "${S2_SOURCE_DIR}/s2/s2winding_operation.cc" - "${S2_SOURCE_DIR}/s2/strings/serialize.cc" "${S2_SOURCE_DIR}/s2/util/bits/bit-interleave.cc" - "${S2_SOURCE_DIR}/s2/util/bits/bits.cc" "${S2_SOURCE_DIR}/s2/util/coding/coder.cc" "${S2_SOURCE_DIR}/s2/util/coding/varint.cc" "${S2_SOURCE_DIR}/s2/util/math/exactfloat/exactfloat.cc" "${S2_SOURCE_DIR}/s2/util/math/mathutil.cc" "${S2_SOURCE_DIR}/s2/util/units/length-units.cc" - ) add_library(_s2 ${S2_SRCS}) add_library(ch_contrib::s2 ALIAS _s2) -set_property(TARGET _s2 PROPERTY CXX_STANDARD 17) - if (TARGET OpenSSL::SSL) target_link_libraries(_s2 PRIVATE OpenSSL::Crypto OpenSSL::SSL) endif() # Copied from contrib/s2geometry/CMakeLists target_link_libraries(_s2 PRIVATE - absl::base - absl::btree - absl::config - absl::core_headers - absl::dynamic_annotations - absl::endian - absl::fixed_array - absl::flat_hash_map - absl::flat_hash_set - absl::hash - absl::inlined_vector - absl::int128 - absl::log_severity - absl::memory - absl::span - absl::str_format - absl::strings - absl::type_traits - absl::utility - ) + absl::base + absl::btree + absl::check + absl::config + absl::core_headers + absl::dynamic_annotations + absl::endian + absl::fixed_array + absl::flags + absl::flat_hash_map + absl::flat_hash_set + absl::hash + absl::inlined_vector + absl::int128 + absl::log + absl::log_severity + absl::memory + absl::span + absl::status + absl::str_format + absl::strings + absl::type_traits + absl::utility +) target_include_directories(_s2 SYSTEM BEFORE PUBLIC "${S2_SOURCE_DIR}/") target_include_directories(_s2 SYSTEM PUBLIC "${ABSL_SOURCE_DIR}") diff --git a/contrib/snappy b/contrib/snappy index fb057edfed82..6ebb5b1ab880 160000 --- a/contrib/snappy +++ b/contrib/snappy @@ -1 +1 @@ -Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf +Subproject commit 6ebb5b1ab8801ea3fde103c5c29f5ab86df5fe7a diff --git a/contrib/sparse-checkout/update-boost.sh b/contrib/sparse-checkout/update-boost.sh index 9bd1f6c17964..04a5c0a1f6c5 100755 --- a/contrib/sparse-checkout/update-boost.sh +++ b/contrib/sparse-checkout/update-boost.sh @@ -20,6 +20,7 @@ echo '/boost/context/*' >> $FILES_TO_CHECKOUT echo '/boost/convert/*' >> $FILES_TO_CHECKOUT echo '/boost/coroutine/*' >> $FILES_TO_CHECKOUT echo '/boost/core/*' >> $FILES_TO_CHECKOUT +echo '/boost/describe/*' >> $FILES_TO_CHECKOUT echo '/boost/detail/*' >> $FILES_TO_CHECKOUT echo '/boost/dynamic_bitset/*' >> $FILES_TO_CHECKOUT echo '/boost/exception/*' >> $FILES_TO_CHECKOUT @@ -82,4 +83,4 @@ echo '/libs/*' >> $FILES_TO_CHECKOUT git config core.sparsecheckout true git checkout $1 -git read-tree -mu HEAD \ No newline at end of file +git read-tree -mu HEAD diff --git a/contrib/sysroot b/contrib/sysroot index e0d1b64da666..b5fcabb24d28 160000 --- a/contrib/sysroot +++ b/contrib/sysroot @@ -1 +1 @@ -Subproject commit e0d1b64da666afbfaa6f1ee0487c33f3fd2cd5cb +Subproject commit b5fcabb24d28fc33024291b2c6c1abd807c7dba8 diff --git a/contrib/usearch b/contrib/usearch new file mode 160000 index 000000000000..955c6f9c11ad --- /dev/null +++ b/contrib/usearch @@ -0,0 +1 @@ +Subproject commit 955c6f9c11adfd89c912e0d1643d160b4e9e543f diff --git a/contrib/usearch-cmake/CMakeLists.txt b/contrib/usearch-cmake/CMakeLists.txt new file mode 100644 index 000000000000..29fbe57106c7 --- /dev/null +++ b/contrib/usearch-cmake/CMakeLists.txt @@ -0,0 +1,17 @@ +set(USEARCH_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/usearch") +set(USEARCH_SOURCE_DIR "${USEARCH_PROJECT_DIR}/include") + +set(FP16_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/FP16") +set(ROBIN_MAP_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/robin-map") +set(SIMSIMD_PROJECT_DIR "${ClickHouse_SOURCE_DIR}/contrib/SimSIMD-map") + +add_library(_usearch INTERFACE) + +target_include_directories(_usearch SYSTEM INTERFACE + ${FP16_PROJECT_DIR}/include + ${ROBIN_MAP_PROJECT_DIR}/include + ${SIMSIMD_PROJECT_DIR}/include + ${USEARCH_SOURCE_DIR}) + +add_library(ch_contrib::usearch ALIAS _usearch) +target_compile_definitions(_usearch INTERFACE ENABLE_USEARCH) diff --git a/docker/README.md b/docker/README.md index ec52ddd143e5..c1bb3b49f009 100644 --- a/docker/README.md +++ b/docker/README.md @@ -1,5 +1,5 @@ ## ClickHouse Dockerfiles -This directory contain Dockerfiles for `clickhouse-client` and `clickhouse-server`. They are updated in each release. +This directory contain Dockerfiles for `clickhouse-server`. They are updated in each release. -Also there is bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`. +Also, there is a bunch of images for testing and CI. They are listed in `images.json` file and updated on each commit to master. If you need to add another image, place information about it into `images.json`. diff --git a/docker/client/Dockerfile b/docker/client/Dockerfile deleted file mode 100644 index 1c185daec758..000000000000 --- a/docker/client/Dockerfile +++ /dev/null @@ -1,34 +0,0 @@ -FROM ubuntu:18.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/" -ARG version=22.1.1.* - -RUN apt-get update \ - && apt-get install --yes --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - dirmngr \ - gnupg \ - && mkdir -p /etc/apt/sources.list.d \ - && apt-key adv --keyserver keyserver.ubuntu.com --recv E0C56BD4 \ - && echo $repository > /etc/apt/sources.list.d/clickhouse.list \ - && apt-get update \ - && env DEBIAN_FRONTEND=noninteractive \ - apt-get install --allow-unauthenticated --yes --no-install-recommends \ - clickhouse-client=$version \ - clickhouse-common-static=$version \ - locales \ - tzdata \ - && rm -rf /var/lib/apt/lists/* /var/cache/debconf \ - && apt-get clean - -RUN locale-gen en_US.UTF-8 -ENV LANG en_US.UTF-8 -ENV LANGUAGE en_US:en -ENV LC_ALL en_US.UTF-8 - -ENTRYPOINT ["/usr/bin/clickhouse-client"] diff --git a/docker/client/README.md b/docker/client/README.md deleted file mode 100644 index bbcc7d607940..000000000000 --- a/docker/client/README.md +++ /dev/null @@ -1,7 +0,0 @@ -# ClickHouse Client Docker Image - -For more information see [ClickHouse Server Docker Image](https://hub.docker.com/r/clickhouse/clickhouse-server/). - -## License - -View [license information](https://github.com/ClickHouse/ClickHouse/blob/master/LICENSE) for the software contained in this image. diff --git a/docker/images.json b/docker/images.json index b4f3e755bd1f..1535715648c0 100644 --- a/docker/images.json +++ b/docker/images.json @@ -1,9 +1,7 @@ { "docker/packager/binary": { "name": "clickhouse/binary-builder", - "dependent": [ - "docker/test/codebrowser" - ] + "dependent": [] }, "docker/test/compatibility/centos": { "name": "clickhouse/test-old-centos", @@ -21,6 +19,10 @@ "name": "clickhouse/fuzzer", "dependent": [] }, + "docker/test/libfuzzer": { + "name": "clickhouse/libfuzzer", + "dependent": [] + }, "docker/test/performance-comparison": { "name": "clickhouse/performance-comparison", "dependent": [] @@ -59,10 +61,6 @@ "name": "clickhouse/upgrade-check", "dependent": [] }, - "docker/test/codebrowser": { - "name": "clickhouse/codebrowser", - "dependent": [] - }, "docker/test/integration/runner": { "only_amd64": true, "name": "clickhouse/integration-tests-runner", @@ -120,11 +118,14 @@ "docker/test/base": { "name": "clickhouse/test-base", "dependent": [ - "docker/test/stateless", - "docker/test/integration/base", "docker/test/fuzzer", + "docker/test/libfuzzer", + "docker/test/integration/base", "docker/test/keeper-jepsen", - "docker/test/server-jepsen" + "docker/test/server-jepsen", + "docker/test/sqllogic", + "docker/test/sqltest", + "docker/test/stateless" ] }, "docker/test/integration/kerberized_hadoop": { @@ -154,11 +155,18 @@ }, "docker/docs/builder": { "name": "clickhouse/docs-builder", - "dependent": [ - ] + "dependent": [] }, "docker/test/sqllogic": { "name": "clickhouse/sqllogic-test", "dependent": [] + }, + "docker/test/sqltest": { + "name": "clickhouse/sqltest", + "dependent": [] + }, + "docker/test/integration/nginx_dav": { + "name": "clickhouse/nginx-dav", + "dependent": [] } } diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 367f6043b90a..0e10068d79a2 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \ esac ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release" -ARG VERSION="23.5.3.24" +ARG VERSION="23.9.1.1854" ARG PACKAGES="clickhouse-keeper" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/packager/README.md b/docker/packager/README.md index a78feb8d7fcb..3a91f9a63f0a 100644 --- a/docker/packager/README.md +++ b/docker/packager/README.md @@ -6,7 +6,7 @@ Usage: Build deb package with `clang-14` in `debug` mode: ``` $ mkdir deb/test_output -$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --build-type=debug +$ ./packager --output-dir deb/test_output/ --package-type deb --compiler=clang-14 --debug-build $ ls -l deb/test_output -rw-r--r-- 1 root root 3730 clickhouse-client_22.2.2+debug_all.deb -rw-r--r-- 1 root root 84221888 clickhouse-common-static_22.2.2+debug_amd64.deb diff --git a/docker/packager/binary/Dockerfile b/docker/packager/binary/Dockerfile index e824161a688e..fb033e289595 100644 --- a/docker/packager/binary/Dockerfile +++ b/docker/packager/binary/Dockerfile @@ -49,14 +49,18 @@ ENV CARGO_HOME=/rust/cargo ENV PATH="/rust/cargo/bin:${PATH}" RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \ chmod 777 -R /rust && \ - rustup toolchain install nightly && \ - rustup default nightly && \ + rustup toolchain install nightly-2023-07-04 && \ + rustup default nightly-2023-07-04 && \ rustup component add rust-src && \ + rustup target add x86_64-unknown-linux-gnu && \ rustup target add aarch64-unknown-linux-gnu && \ rustup target add x86_64-apple-darwin && \ rustup target add x86_64-unknown-freebsd && \ rustup target add aarch64-apple-darwin && \ - rustup target add powerpc64le-unknown-linux-gnu + rustup target add powerpc64le-unknown-linux-gnu && \ + rustup target add x86_64-unknown-linux-musl && \ + rustup target add aarch64-unknown-linux-musl && \ + rustup target add riscv64gc-unknown-linux-gnu # NOTE: Seems like gcc-11 is too new for ubuntu20 repository # A cross-linker for RISC-V 64 (we need it, because LLVM's LLD does not work): @@ -74,12 +78,21 @@ RUN add-apt-repository ppa:ubuntu-toolchain-r/test --yes \ python3-boto3 \ yasm \ zstd \ + zip \ && apt-get clean \ && rm -rf /var/lib/apt/lists # Download toolchain and SDK for Darwin RUN curl -sL -O https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz +# Download and install mold 2.0 for s390x build +RUN curl -Lo /tmp/mold.tar.gz "https://github.com/rui314/mold/releases/download/v2.0.0/mold-2.0.0-x86_64-linux.tar.gz" \ + && mkdir /tmp/mold \ + && tar -xzf /tmp/mold.tar.gz -C /tmp/mold \ + && cp -r /tmp/mold/mold*/* /usr \ + && rm -rf /tmp/mold \ + && rm /tmp/mold.tar.gz + # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH ARG NFPM_VERSION=2.20.0 diff --git a/docker/packager/binary/build.sh b/docker/packager/binary/build.sh index c0803c741475..6a27e7adcebd 100755 --- a/docker/packager/binary/build.sh +++ b/docker/packager/binary/build.sh @@ -15,6 +15,11 @@ if [ "$EXTRACT_TOOLCHAIN_DARWIN" = "1" ]; then mkdir -p /build/cmake/toolchain/darwin-x86_64 tar xJf /MacOSX11.0.sdk.tar.xz -C /build/cmake/toolchain/darwin-x86_64 --strip-components=1 ln -sf darwin-x86_64 /build/cmake/toolchain/darwin-aarch64 + + if [ "$EXPORT_SOURCES_WITH_SUBMODULES" = "1" ]; then + cd /build + tar --exclude-vcs-ignores --exclude-vcs --exclude build --exclude build_docker --exclude debian --exclude .git --exclude .github --exclude .cache --exclude docs --exclude tests/integration -c . | pigz -9 > /output/source_sub.tar.gz + fi fi # Uncomment to debug ccache. Don't put ccache log in /output right away, or it @@ -26,9 +31,6 @@ fi mkdir -p /build/build_docker cd /build/build_docker rm -f CMakeCache.txt -# Read cmake arguments into array (possibly empty) -read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" -env if [ -n "$MAKE_DEB" ]; then rm -rf /build/packages/root @@ -55,16 +57,41 @@ ccache_status # clear cache stats ccache --zero-stats ||: +function check_prebuild_exists() { + local path="$1" + [ -d "$path" ] && [ "$(ls -A "$path")" ] +} + +# Check whether the directory with pre-build scripts exists and not empty. +if check_prebuild_exists /build/packages/pre-build +then + # Execute all commands + for file in /build/packages/pre-build/*.sh ; + do + # The script may want to modify environment variables. Why not to allow it to do so? + # shellcheck disable=SC1090 + source "$file" + done +else + echo "There are no subcommands to execute :)" +fi + +# Read cmake arguments into array (possibly empty) +# The name of local variable has to be different from the name of environment variable +# not to override it. And make it usable for other processes. +read -ra CMAKE_FLAGS_ARRAY <<< "${CMAKE_FLAGS:-}" +env + if [ "$BUILD_MUSL_KEEPER" == "1" ] then # build keeper with musl separately # and without rust bindings - cmake --debug-trycompile -DENABLE_RUST=OFF -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. + cmake --debug-trycompile -DENABLE_RUST=OFF -DBUILD_STANDALONE_KEEPER=1 -DENABLE_CLICKHOUSE_KEEPER=1 -DCMAKE_VERBOSE_MAKEFILE=1 -DUSE_MUSL=1 -LA -DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-x86_64-musl.cmake "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS_ARRAY[@]}" .. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. ninja $NINJA_FLAGS clickhouse-keeper ls -la ./programs/ - ldd ./programs/clickhouse-keeper + ldd ./programs/clickhouse-keeper ||: if [ -n "$MAKE_DEB" ]; then # No quotes because I want it to expand to nothing if empty. @@ -73,26 +100,16 @@ then fi rm -f CMakeCache.txt - # Build the rest of binaries - cmake --debug-trycompile -DBUILD_STANDALONE_KEEPER=0 -DCREATE_KEEPER_SYMLINK=0 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. -else - # Build everything - cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" .. + # Modify CMake flags, so we won't overwrite standalone keeper with symlinks + CMAKE_FLAGS_ARRAY+=(-DBUILD_STANDALONE_KEEPER=0 -DCREATE_KEEPER_SYMLINK=0) fi -if [ "coverity" == "$COMBINED_OUTPUT" ] -then - mkdir -p /workdir/cov-analysis - - wget --post-data "token=$COVERITY_TOKEN&project=ClickHouse%2FClickHouse" -qO- https://scan.coverity.com/download/linux64 | tar xz -C /workdir/cov-analysis --strip-components 1 - export PATH=$PATH:/workdir/cov-analysis/bin - cov-configure --config ./coverity.config --template --comptype clangcc --compiler "$CC" - SCAN_WRAPPER="cov-build --config ./coverity.config --dir cov-int" -fi +# Build everything +cmake --debug-trycompile -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS_ARRAY[@]}" .. # No quotes because I want it to expand to nothing if empty. # shellcheck disable=SC2086 # No quotes because I want it to expand to nothing if empty. -$SCAN_WRAPPER ninja $NINJA_FLAGS $BUILD_TARGET +ninja $NINJA_FLAGS $BUILD_TARGET ls -la ./programs @@ -107,9 +124,10 @@ if [ -n "$MAKE_DEB" ]; then bash -x /build/packages/build fi -mv ./programs/clickhouse* /output +mv ./programs/clickhouse* /output || mv ./programs/*_fuzzer /output [ -x ./programs/self-extracting/clickhouse ] && mv ./programs/self-extracting/clickhouse /output mv ./src/unit_tests_dbms /output ||: # may not exist for some binary builds +mv ./programs/*.dict ./programs/*.options ./programs/*_seed_corpus.zip /output ||: # libFuzzer oss-fuzz compatible infrastructure prepare_combined_output () { local OUTPUT @@ -175,13 +193,6 @@ then mv "$COMBINED_OUTPUT.tar.zst" /output fi -if [ "coverity" == "$COMBINED_OUTPUT" ] -then - # Coverity does not understand ZSTD. - tar -cvz -f "coverity-scan.tar.gz" cov-int - mv "coverity-scan.tar.gz" /output -fi - ccache_status ccache --evict-older-than 1d diff --git a/docker/packager/packager b/docker/packager/packager index 1b3df858cd2e..e63a4912e7cc 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -22,7 +22,7 @@ def check_image_exists_locally(image_name: str) -> bool: output = subprocess.check_output( f"docker images -q {image_name} 2> /dev/null", shell=True ) - return output != "" + return output != b"" except subprocess.CalledProcessError: return False @@ -46,7 +46,7 @@ def build_image(image_name: str, filepath: Path) -> None: ) -def pre_build(repo_path: Path, env_variables: List[str]): +def pre_build(repo_path: Path, env_variables: List[str]) -> None: if "WITH_PERFORMANCE=1" in env_variables: current_branch = subprocess.check_output( "git branch --show-current", shell=True, encoding="utf-8" @@ -80,9 +80,12 @@ def run_docker_image_with_env( output_dir: Path, env_variables: List[str], ch_root: Path, + cargo_cache_dir: Path, ccache_dir: Optional[Path], -): +) -> None: output_dir.mkdir(parents=True, exist_ok=True) + cargo_cache_dir.mkdir(parents=True, exist_ok=True) + env_part = " -e ".join(env_variables) if env_part: env_part = " -e " + env_part @@ -102,9 +105,9 @@ def run_docker_image_with_env( ccache_mount = "" cmd = ( - f"docker run --network=host --user={user} --rm {ccache_mount}" + f"docker run --network=host --user={user} --rm {ccache_mount} " f"--volume={output_dir}:/output --volume={ch_root}:/build {env_part} " - f"{interactive} {image_name}" + f"--volume={cargo_cache_dir}:/rust/cargo/registry {interactive} {image_name}" ) logging.info("Will build ClickHouse pkg with cmd: '%s'", cmd) @@ -112,12 +115,12 @@ def run_docker_image_with_env( subprocess.check_call(cmd, shell=True) -def is_release_build(build_type: str, package_type: str, sanitizer: str) -> bool: - return build_type == "" and package_type == "deb" and sanitizer == "" +def is_release_build(debug_build: bool, package_type: str, sanitizer: str) -> bool: + return not debug_build and package_type == "deb" and sanitizer == "" def parse_env_variables( - build_type: str, + debug_build: bool, compiler: str, sanitizer: str, package_type: str, @@ -129,27 +132,35 @@ def parse_env_variables( version: str, official: bool, additional_pkgs: bool, + with_profiler: bool, with_coverage: bool, with_binaries: str, -): +) -> List[str]: DARWIN_SUFFIX = "-darwin" DARWIN_ARM_SUFFIX = "-darwin-aarch64" ARM_SUFFIX = "-aarch64" ARM_V80COMPAT_SUFFIX = "-aarch64-v80compat" FREEBSD_SUFFIX = "-freebsd" PPC_SUFFIX = "-ppc64le" + RISCV_SUFFIX = "-riscv64" + S390X_SUFFIX = "-s390x" AMD64_COMPAT_SUFFIX = "-amd64-compat" result = [] result.append("OUTPUT_DIR=/output") cmake_flags = ["$CMAKE_FLAGS"] - build_target = "clickhouse-bundle" + if package_type == "fuzzers": + build_target = "fuzzers" + else: + build_target = "clickhouse-bundle" is_cross_darwin = compiler.endswith(DARWIN_SUFFIX) is_cross_darwin_arm = compiler.endswith(DARWIN_ARM_SUFFIX) is_cross_arm = compiler.endswith(ARM_SUFFIX) is_cross_arm_v80compat = compiler.endswith(ARM_V80COMPAT_SUFFIX) is_cross_ppc = compiler.endswith(PPC_SUFFIX) + is_cross_riscv = compiler.endswith(RISCV_SUFFIX) + is_cross_s390x = compiler.endswith(S390X_SUFFIX) is_cross_freebsd = compiler.endswith(FREEBSD_SUFFIX) is_amd64_compat = compiler.endswith(AMD64_COMPAT_SUFFIX) @@ -168,6 +179,7 @@ def parse_env_variables( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/darwin/toolchain-x86_64.cmake" ) result.append("EXTRACT_TOOLCHAIN_DARWIN=1") + result.append("EXPORT_SOURCES_WITH_SUBMODULES=1") elif is_cross_darwin_arm: cc = compiler[: -len(DARWIN_ARM_SUFFIX)] cmake_flags.append("-DCMAKE_AR:FILEPATH=/cctools/bin/aarch64-apple-darwin-ar") @@ -206,6 +218,16 @@ def parse_env_variables( cmake_flags.append( "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-ppc64le.cmake" ) + elif is_cross_riscv: + cc = compiler[: -len(RISCV_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-riscv64.cmake" + ) + elif is_cross_s390x: + cc = compiler[: -len(S390X_SUFFIX)] + cmake_flags.append( + "-DCMAKE_TOOLCHAIN_FILE=/build/cmake/linux/toolchain-s390x.cmake" + ) elif is_amd64_compat: cc = compiler[: -len(AMD64_COMPAT_SUFFIX)] result.append("DEB_ARCH=amd64") @@ -233,28 +255,34 @@ def parse_env_variables( build_target = ( f"{build_target} clickhouse-odbc-bridge clickhouse-library-bridge" ) - if is_release_build(build_type, package_type, sanitizer): + if is_release_build(debug_build, package_type, sanitizer): cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") if is_cross_arm: cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") else: result.append("BUILD_MUSL_KEEPER=1") + elif package_type == "fuzzers": + cmake_flags.append("-DENABLE_FUZZING=1") + cmake_flags.append("-DENABLE_PROTOBUF=1") + cmake_flags.append("-DUSE_INTERNAL_PROTOBUF_LIBRARY=1") + cmake_flags.append("-DWITH_COVERAGE=1") + cmake_flags.append("-DCMAKE_AUTOGEN_VERBOSE=ON") + # cmake_flags.append("-DCMAKE_INSTALL_PREFIX=/usr") + # cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc") + # cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") + # Reduce linking and building time by avoid *install/all dependencies + cmake_flags.append("-DCMAKE_SKIP_INSTALL_ALL_DEPENDENCY=ON") result.append(f"CC={cc}") result.append(f"CXX={cxx}") cmake_flags.append(f"-DCMAKE_C_COMPILER={cc}") cmake_flags.append(f"-DCMAKE_CXX_COMPILER={cxx}") - # Create combined output archive for performance tests. - if package_type == "coverity": - result.append("COMBINED_OUTPUT=coverity") - result.append('COVERITY_TOKEN="$COVERITY_TOKEN"') - if sanitizer: result.append(f"SANITIZER={sanitizer}") - if build_type: - result.append(f"BUILD_TYPE={build_type.capitalize()}") + if debug_build: + result.append("BUILD_TYPE=Debug") else: result.append("BUILD_TYPE=None") @@ -298,7 +326,6 @@ def parse_env_variables( if additional_pkgs: # NOTE: This are the env for packages/build script - result.append("MAKE_APK=true") result.append("MAKE_RPM=true") result.append("MAKE_TGZ=true") @@ -320,6 +347,9 @@ def parse_env_variables( # utils are not included into clickhouse-bundle, so build everything build_target = "all" + if with_profiler: + cmake_flags.append("-DENABLE_BUILD_PROFILING=1") + if with_coverage: cmake_flags.append("-DWITH_COVERAGE=1") @@ -349,7 +379,7 @@ def parse_args() -> argparse.Namespace: ) parser.add_argument( "--package-type", - choices=["deb", "binary", "coverity"], + choices=["deb", "binary", "fuzzers"], required=True, ) parser.add_argument( @@ -359,21 +389,23 @@ def parse_args() -> argparse.Namespace: help="ClickHouse git repository", ) parser.add_argument("--output-dir", type=dir_name, required=True) - parser.add_argument("--build-type", choices=("debug", ""), default="") + parser.add_argument("--debug-build", action="store_true") parser.add_argument( "--compiler", choices=( - "clang-16", - "clang-16-darwin", - "clang-16-darwin-aarch64", - "clang-16-aarch64", - "clang-16-aarch64-v80compat", - "clang-16-ppc64le", - "clang-16-amd64-compat", - "clang-16-freebsd", + "clang-17", + "clang-17-darwin", + "clang-17-darwin-aarch64", + "clang-17-aarch64", + "clang-17-aarch64-v80compat", + "clang-17-ppc64le", + "clang-17-riscv64", + "clang-17-s390x", + "clang-17-amd64-compat", + "clang-17-freebsd", ), - default="clang-16", + default="clang-17", help="a compiler to use", ) parser.add_argument( @@ -409,10 +441,18 @@ def parse_args() -> argparse.Namespace: action="store_true", help="if set, the build fails on errors writing cache to S3", ) + parser.add_argument( + "--cargo-cache-dir", + default=Path(os.getenv("CARGO_HOME", "") or Path.home() / ".cargo") + / "registry", + type=dir_name, + help="a directory to preserve the rust cargo crates", + ) parser.add_argument("--force-build-image", action="store_true") parser.add_argument("--version") parser.add_argument("--official", action="store_true") parser.add_argument("--additional-pkgs", action="store_true") + parser.add_argument("--with-profiler", action="store_true") parser.add_argument("--with-coverage", action="store_true") parser.add_argument( "--with-binaries", choices=("programs", "tests", ""), default="" @@ -448,7 +488,7 @@ def parse_args() -> argparse.Namespace: return args -def main(): +def main() -> None: logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") args = parse_args() @@ -464,7 +504,7 @@ def main(): build_image(image_with_version, dockerfile) env_prepared = parse_env_variables( - args.build_type, + args.debug_build, args.compiler, args.sanitizer, args.package_type, @@ -476,6 +516,7 @@ def main(): args.version, args.official, args.additional_pkgs, + args.with_profiler, args.with_coverage, args.with_binaries, ) @@ -487,6 +528,7 @@ def main(): args.output_dir, env_prepared, ch_root, + args.cargo_cache_dir, args.ccache_dir, ) logging.info("Output placed into %s", args.output_dir) diff --git a/docker/server/Dockerfile.alpine b/docker/server/Dockerfile.alpine index e7e879fa95fd..afe1be75bedf 100644 --- a/docker/server/Dockerfile.alpine +++ b/docker/server/Dockerfile.alpine @@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \ # lts / testing / prestable / etc ARG REPO_CHANNEL="stable" ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}" -ARG VERSION="23.5.3.24" +ARG VERSION="23.9.1.1854" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # user/group precreated explicitly with fixed uid/gid on purpose. diff --git a/docker/server/Dockerfile.ubuntu b/docker/server/Dockerfile.ubuntu index 42ae81655d25..402168c3e157 100644 --- a/docker/server/Dockerfile.ubuntu +++ b/docker/server/Dockerfile.ubuntu @@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list ARG REPO_CHANNEL="stable" ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main" -ARG VERSION="23.5.3.24" +ARG VERSION="23.9.1.1854" ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static" # set non-empty deb_location_url url to create a docker image diff --git a/docker/server/README.md b/docker/server/README.md index 67646a262f52..6200acbd30c9 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -97,8 +97,8 @@ docker run -d \ You may also want to mount: -* `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustmenets -* `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustmenets +* `/etc/clickhouse-server/config.d/*.xml` - files with server configuration adjustments +* `/etc/clickhouse-server/users.d/*.xml` - files with user settings adjustments * `/docker-entrypoint-initdb.d/` - folder with database initialization scripts (see below). ### Linux capabilities diff --git a/docker/test/base/Dockerfile b/docker/test/base/Dockerfile index f68368044541..b55baa0e0fc3 100644 --- a/docker/test/base/Dockerfile +++ b/docker/test/base/Dockerfile @@ -11,6 +11,7 @@ RUN apt-get update \ pv \ ripgrep \ zstd \ + locales \ --yes --no-install-recommends # Sanitizer options for services (clickhouse-server) @@ -18,17 +19,23 @@ RUN apt-get update \ # and MEMORY_LIMIT_EXCEEDED exceptions in Functional tests (total memory limit in Functional tests is ~55.24 GiB). # TSAN will flush shadow memory when reaching this limit. # It may cause false-negatives, but it's better than OOM. -RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment +RUN echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1'" >> /etc/environment RUN echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment RUN echo "MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'" >> /etc/environment RUN echo "LSAN_OPTIONS='suppressions=/usr/share/clickhouse-test/config/lsan_suppressions.txt'" >> /etc/environment # Sanitizer options for current shell (not current, but the one that will be spawned on "docker run") # (but w/o verbosity for TSAN, otherwise test.reference will not match) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' -ENV TZ=Europe/Moscow +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 + +ENV TZ=Europe/Amsterdam RUN ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone +# This script is used to setup realtime export of server logs from the CI into external ClickHouse cluster: +COPY setup_export_logs.sh / + CMD sleep 1 diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh new file mode 100755 index 000000000000..0d6c0e368db0 --- /dev/null +++ b/docker/test/base/setup_export_logs.sh @@ -0,0 +1,170 @@ +#!/bin/bash + +# This script sets up export of system log tables to a remote server. +# Remote tables are created if not exist, and augmented with extra columns, +# and their names will contain a hash of the table structure, +# which allows exporting tables from servers of different versions. + +# Config file contains KEY=VALUE pairs with any necessary parameters like: +# CLICKHOUSE_CI_LOGS_HOST - remote host +# CLICKHOUSE_CI_LOGS_USER - password for user +# CLICKHOUSE_CI_LOGS_PASSWORD - password for user +CLICKHOUSE_CI_LOGS_CREDENTIALS=${CLICKHOUSE_CI_LOGS_CREDENTIALS:-/tmp/export-logs-config.sh} +CLICKHOUSE_CI_LOGS_USER=${CLICKHOUSE_CI_LOGS_USER:-ci} + +# Pre-configured destination cluster, where to export the data +CLICKHOUSE_CI_LOGS_CLUSTER=${CLICKHOUSE_CI_LOGS_CLUSTER:-system_logs_export} + +EXTRA_COLUMNS=${EXTRA_COLUMNS:-"pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), instance_id String, "} +EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:-"0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type, '' AS instance_id"} +EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:-"check_name, "} + +function __set_connection_args +{ + # It's impossible to use generous $CONNECTION_ARGS string, it's unsafe from word splitting perspective. + # That's why we must stick to the generated option + CONNECTION_ARGS=( + --receive_timeout=45 --send_timeout=45 --secure + --user "${CLICKHOUSE_CI_LOGS_USER}" --host "${CLICKHOUSE_CI_LOGS_HOST}" + --password "${CLICKHOUSE_CI_LOGS_PASSWORD}" + ) +} + +function __shadow_credentials +{ + # The function completely screws the output, it shouldn't be used in normal functions, only in () + # The only way to substitute the env as a plain text is using perl 's/\Qsomething\E/another/ + exec &> >(perl -pe ' + s(\Q$ENV{CLICKHOUSE_CI_LOGS_HOST}\E)[CLICKHOUSE_CI_LOGS_HOST]g; + s(\Q$ENV{CLICKHOUSE_CI_LOGS_USER}\E)[CLICKHOUSE_CI_LOGS_USER]g; + s(\Q$ENV{CLICKHOUSE_CI_LOGS_PASSWORD}\E)[CLICKHOUSE_CI_LOGS_PASSWORD]g; + ') +} + +function check_logs_credentials +( + # The function connects with given credentials, and if it's unable to execute the simplest query, returns exit code + + # First check, if all necessary parameters are set + set +x + for parameter in CLICKHOUSE_CI_LOGS_HOST CLICKHOUSE_CI_LOGS_USER CLICKHOUSE_CI_LOGS_PASSWORD; do + export -p | grep -q "$parameter" || { + echo "Credentials parameter $parameter is unset" + return 1 + } + done + + __shadow_credentials + __set_connection_args + local code + # Catch both success and error to not fail on `set -e` + clickhouse-client "${CONNECTION_ARGS[@]}" -q 'SELECT 1 FORMAT Null' && return 0 || code=$? + if [ "$code" != 0 ]; then + echo 'Failed to connect to CI Logs cluster' + return $code + fi +) + +function config_logs_export_cluster +( + # The function is launched in a separate shell instance to not expose the + # exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS + set +x + if ! [ -r "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" ]; then + echo "File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup" + return + fi + set -a + # shellcheck disable=SC1090 + source "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" + set +a + __shadow_credentials + echo "Checking if the credentials work" + check_logs_credentials || return 0 + cluster_config="${1:-/etc/clickhouse-server/config.d/system_logs_export.yaml}" + mkdir -p "$(dirname "$cluster_config")" + echo "remote_servers: + ${CLICKHOUSE_CI_LOGS_CLUSTER}: + shard: + replica: + secure: 1 + user: '${CLICKHOUSE_CI_LOGS_USER}' + host: '${CLICKHOUSE_CI_LOGS_HOST}' + port: 9440 + password: '${CLICKHOUSE_CI_LOGS_PASSWORD}' +" > "$cluster_config" + echo "Cluster ${CLICKHOUSE_CI_LOGS_CLUSTER} is confugured in ${cluster_config}" +) + +function setup_logs_replication +( + # The function is launched in a separate shell instance to not expose the + # exported values from CLICKHOUSE_CI_LOGS_CREDENTIALS + set +x + # disable output + if ! [ -r "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" ]; then + echo "File $CLICKHOUSE_CI_LOGS_CREDENTIALS does not exist, do not setup" + return 0 + fi + set -a + # shellcheck disable=SC1090 + source "${CLICKHOUSE_CI_LOGS_CREDENTIALS}" + set +a + __shadow_credentials + echo "Checking if the credentials work" + check_logs_credentials || return 0 + __set_connection_args + + echo 'Create all configured system logs' + clickhouse-client --query "SYSTEM FLUSH LOGS" + + # It's doesn't make sense to try creating tables if SYNC fails + echo "SYSTEM SYNC DATABASE REPLICA default" | clickhouse-client "${CONNECTION_ARGS[@]}" || return 0 + + # For each system log table: + echo 'Create %_log tables' + clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table + do + # Calculate hash of its structure. Note: 1 is the version of extra columns - increment it if extra columns are changed: + hash=$(clickhouse-client --query " + SELECT sipHash64(1, groupArray((name, type))) + FROM (SELECT name, type FROM system.columns + WHERE database = 'system' AND table = '$table' + ORDER BY position) + ") + + # Create the destination table with adapted name and structure: + statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e ' + s/^\($/('"$EXTRA_COLUMNS"'/; + s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/; + s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/; + /^TTL /d + ') + + echo -e "Creating remote destination table ${table}_${hash} with statement:\n${statement}" >&2 + + echo "$statement" | clickhouse-client --database_replicated_initial_query_timeout_sec=10 \ + --distributed_ddl_task_timeout=30 \ + "${CONNECTION_ARGS[@]}" || continue + + echo "Creating table system.${table}_sender" >&2 + + # Create Distributed table and materialized view to watch on the original table: + clickhouse-client --query " + CREATE TABLE system.${table}_sender + ENGINE = Distributed(${CLICKHOUSE_CI_LOGS_CLUSTER}, default, ${table}_${hash}) + SETTINGS flush_on_detach=0 + EMPTY AS + SELECT ${EXTRA_COLUMNS_EXPRESSION}, * + FROM system.${table} + " || continue + + echo "Creating materialized view system.${table}_watcher" >&2 + + clickhouse-client --query " + CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS + SELECT ${EXTRA_COLUMNS_EXPRESSION}, * + FROM system.${table} + " || continue + done +) diff --git a/docker/test/codebrowser/Dockerfile b/docker/test/codebrowser/Dockerfile deleted file mode 100644 index 8136fd1fbbcd..000000000000 --- a/docker/test/codebrowser/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -# rebuild in #33610 -# docker build --network=host -t clickhouse/codebrowser . -# docker run --volume=path_to_repo:/repo_folder --volume=path_to_result:/test_output clickhouse/codebrowser -ARG FROM_TAG=latest -FROM clickhouse/binary-builder:$FROM_TAG - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -RUN apt-get update && apt-get --yes --allow-unauthenticated install libclang-${LLVM_VERSION}-dev libmlir-${LLVM_VERSION}-dev - -ARG TARGETARCH -RUN arch=${TARGETARCH:-amd64} \ - && case $arch in \ - amd64) rarch=x86_64 ;; \ - arm64) rarch=aarch64 ;; \ - *) exit 1 ;; \ - esac - -# repo versions doesn't work correctly with C++17 -# also we push reports to s3, so we add index.html to subfolder urls -# https://github.com/ClickHouse/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b -RUN git clone --branch=master --depth=1 https://github.com/ClickHouse/woboq_codebrowser /woboq_codebrowser \ - && cd /woboq_codebrowser \ - && cmake . -G Ninja -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-${LLVM_VERSION} -DCMAKE_C_COMPILER=clang-${LLVM_VERSION} -DCLANG_BUILTIN_HEADERS_DIR=/usr/lib/llvm-${LLVM_VERSION}/lib/clang/${LLVM_VERSION}/include \ - && ninja - -COPY build.sh / -CMD ["bash", "-c", "/build.sh 2>&1"] diff --git a/docker/test/codebrowser/build.sh b/docker/test/codebrowser/build.sh deleted file mode 100755 index d76d0c3a039f..000000000000 --- a/docker/test/codebrowser/build.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -set -x -e - - -STATIC_DATA=${STATIC_DATA:-/woboq_codebrowser/data} -SOURCE_DIRECTORY=${SOURCE_DIRECTORY:-/build} -BUILD_DIRECTORY=${BUILD_DIRECTORY:-/workdir/build} -OUTPUT_DIRECTORY=${OUTPUT_DIRECTORY:-/workdir/output} -HTML_RESULT_DIRECTORY=${HTML_RESULT_DIRECTORY:-$OUTPUT_DIRECTORY/html_report} -SHA=${SHA:-nosha} -DATA=${DATA:-https://s3.amazonaws.com/clickhouse-test-reports/codebrowser/data} -nproc=$(($(nproc) + 2)) # increase parallelism - -read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}" - -mkdir -p "$BUILD_DIRECTORY" && cd "$BUILD_DIRECTORY" -cmake "$SOURCE_DIRECTORY" -DCMAKE_CXX_COMPILER="/usr/bin/clang++-${LLVM_VERSION}" -DCMAKE_C_COMPILER="/usr/bin/clang-${LLVM_VERSION}" -DENABLE_WOBOQ_CODEBROWSER=ON "${CMAKE_FLAGS[@]}" -mkdir -p "$HTML_RESULT_DIRECTORY" -echo 'Filter out too noisy "Error: filename" lines and keep them in full codebrowser_generator.log' -/woboq_codebrowser/generator/codebrowser_generator -b "$BUILD_DIRECTORY" -a \ - -o "$HTML_RESULT_DIRECTORY" --execute-concurrency="$nproc" -p "ClickHouse:$SOURCE_DIRECTORY:$SHA" \ - -d "$DATA" \ - |& ts '%Y-%m-%d %H:%M:%S' \ - | tee "$OUTPUT_DIRECTORY/codebrowser_generator.log" \ - | grep --line-buffered -v ':[0-9]* Error: ' -cp -r "$STATIC_DATA" "$HTML_RESULT_DIRECTORY/" -/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator "$HTML_RESULT_DIRECTORY" \ - -d "$DATA" |& ts '%Y-%m-%d %H:%M:%S' diff --git a/docker/test/fasttest/Dockerfile b/docker/test/fasttest/Dockerfile index ffb13fc774d4..a38f59dacac4 100644 --- a/docker/test/fasttest/Dockerfile +++ b/docker/test/fasttest/Dockerfile @@ -9,6 +9,7 @@ RUN apt-get update \ expect \ file \ lsof \ + odbcinst \ psmisc \ python3 \ python3-lxml \ @@ -17,6 +18,7 @@ RUN apt-get update \ python3-termcolor \ unixodbc \ pv \ + jq \ zstd \ --yes --no-install-recommends @@ -29,9 +31,13 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && cp /tmp/clickhouse-odbc-tmp/lib64/*.so /usr/local/lib/ \ && odbcinst -i -d -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbcinst.ini.sample \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ - && rm -rf /tmp/clickhouse-odbc-tmp + && rm -rf /tmp/clickhouse-odbc-tmp \ + && mkdir -p /var/lib/clickhouse \ + && chmod 777 /var/lib/clickhouse -ENV TZ=Europe/Moscow +# chmod 777 to make the container user independent + +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV COMMIT_SHA='' diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index dab873377ce5..57789f003f90 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT stage=${stage:-} # Compiler version, normally set by Dockerfile -export LLVM_VERSION=${LLVM_VERSION:-16} +export LLVM_VERSION=${LLVM_VERSION:-17} # A variable to pass additional flags to CMake. # Here we explicitly default it to nothing so that bash doesn't complain about @@ -28,6 +28,12 @@ FASTTEST_BUILD=$(readlink -f "${FASTTEST_BUILD:-${BUILD:-$FASTTEST_WORKSPACE/bui FASTTEST_DATA=$(readlink -f "${FASTTEST_DATA:-$FASTTEST_WORKSPACE/db-fasttest}") FASTTEST_OUTPUT=$(readlink -f "${FASTTEST_OUTPUT:-$FASTTEST_WORKSPACE}") PATH="$FASTTEST_BUILD/programs:$FASTTEST_SOURCE/tests:$PATH" +# Work around for non-existent user +if [ "$HOME" == "/" ]; then + HOME="$FASTTEST_WORKSPACE/user-home" + mkdir -p "$HOME" + export HOME +fi # Export these variables, so that all subsequent invocations of the script # use them, and not try to guess them anew, which leads to weird effects. @@ -80,7 +86,7 @@ function start_server function clone_root { - git config --global --add safe.directory "$FASTTEST_SOURCE" + [ "$UID" -eq 0 ] && git config --global --add safe.directory "$FASTTEST_SOURCE" git clone --depth 1 https://github.com/ClickHouse/ClickHouse.git -- "$FASTTEST_SOURCE" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/clone_log.txt" ( @@ -120,7 +126,7 @@ function clone_submodules contrib/libxml2 contrib/libunwind contrib/fmtlib - contrib/base64 + contrib/aklomp-base64 contrib/cctz contrib/libcpuid contrib/libdivide @@ -141,17 +147,22 @@ function clone_submodules contrib/jemalloc contrib/replxx contrib/wyhash - contrib/hashidsxx contrib/c-ares contrib/morton-nd contrib/xxHash contrib/simdjson contrib/liburing contrib/libfiu + contrib/incbin + contrib/yaml-cpp ) git submodule sync - git submodule update --jobs=16 --depth 1 --init "${SUBMODULES_TO_UPDATE[@]}" + git submodule init + # --jobs does not work as fast as real parallel running + printf '%s\0' "${SUBMODULES_TO_UPDATE[@]}" | \ + xargs --max-procs=100 --null --no-run-if-empty --max-args=1 \ + git submodule update --depth 1 --single-branch git submodule foreach git reset --hard git submodule foreach git checkout @ -f git submodule foreach git clean -xfd @@ -166,11 +177,11 @@ function run_cmake "-DENABLE_UTILS=0" "-DENABLE_EMBEDDED_COMPILER=0" "-DENABLE_THINLTO=0" - "-DUSE_UNWIND=1" "-DENABLE_NURAFT=1" "-DENABLE_SIMDJSON=1" "-DENABLE_JEMALLOC=1" "-DENABLE_LIBURING=1" + "-DENABLE_YAML_CPP=1" ) export CCACHE_DIR="$FASTTEST_WORKSPACE/ccache" @@ -202,10 +213,11 @@ function build | ts '%Y-%m-%d %H:%M:%S' \ | tee "$FASTTEST_OUTPUT/test_result.txt" if [ "$COPY_CLICKHOUSE_BINARY_TO_OUTPUT" -eq "1" ]; then - cp programs/clickhouse "$FASTTEST_OUTPUT/clickhouse" + mkdir -p "$FASTTEST_OUTPUT/binaries/" + cp programs/clickhouse "$FASTTEST_OUTPUT/binaries/clickhouse" - strip programs/clickhouse -o "$FASTTEST_OUTPUT/clickhouse-stripped" - zstd --threads=0 "$FASTTEST_OUTPUT/clickhouse-stripped" + strip programs/clickhouse -o programs/clickhouse-stripped + zstd --threads=0 programs/clickhouse-stripped -o "$FASTTEST_OUTPUT/binaries/clickhouse-stripped.zst" fi ccache_status ccache --evict-older-than 1d ||: @@ -269,34 +281,12 @@ case "$stage" in ;& "clone_root") clone_root - - # Pass control to the script from cloned sources, unless asked otherwise. - if ! [ -v FASTTEST_LOCAL_SCRIPT ] - then - # 'run' stage is deprecated, used for compatibility with old scripts. - # Replace with 'clone_submodules' after Nov 1, 2020. - # cd and CLICKHOUSE_DIR are also a setup for old scripts, remove as well. - # In modern script we undo it by changing back into workspace dir right - # away, see below. Remove that as well. - cd "$FASTTEST_SOURCE" - CLICKHOUSE_DIR=$(pwd) - export CLICKHOUSE_DIR - stage=run "$FASTTEST_SOURCE/docker/test/fasttest/run.sh" - exit $? - fi - ;& -"run") - # A deprecated stage that is called by old script and equivalent to everything - # after cloning root, starting with cloning submodules. ;& "clone_submodules") - # Recover after being called from the old script that changes into source directory. - # See the compatibility hacks in `clone_root` stage above. Remove at the same time, - # after Nov 1, 2020. - cd "$FASTTEST_WORKSPACE" clone_submodules 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/submodule_log.txt" ;& "run_cmake") + cd "$FASTTEST_WORKSPACE" run_cmake ;& "build") diff --git a/docker/test/fuzzer/Dockerfile b/docker/test/fuzzer/Dockerfile index aa71074c02a7..0bc0fb06633b 100644 --- a/docker/test/fuzzer/Dockerfile +++ b/docker/test/fuzzer/Dockerfile @@ -8,7 +8,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone RUN apt-get update \ diff --git a/docker/test/fuzzer/run-fuzzer.sh b/docker/test/fuzzer/run-fuzzer.sh index d2c8de7a211c..e56fe6ca83ab 100755 --- a/docker/test/fuzzer/run-fuzzer.sh +++ b/docker/test/fuzzer/run-fuzzer.sh @@ -1,6 +1,8 @@ #!/bin/bash # shellcheck disable=SC2086,SC2001,SC2046,SC2030,SC2031,SC2010,SC2015 +# shellcheck disable=SC1091 +source /setup_export_logs.sh set -x # core.COMM.PID-TID @@ -15,7 +17,7 @@ stage=${stage:-} script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" echo "$script_dir" repo_dir=ch -BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-16_debug_none_unsplitted_disable_False_binary"} +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-17_debug_none_unsplitted_disable_False_binary"} BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} function git_clone_with_retry @@ -122,6 +124,8 @@ EOL $PWD EOL + + config_logs_export_cluster db/config.d/system_logs_export.yaml } function filter_exists_and_template @@ -223,7 +227,9 @@ quit done clickhouse-client --query "select 1" # This checks that the server is responding kill -0 $server_pid # This checks that it is our server that is started and not some other one - echo Server started and responded + echo 'Server started and responded' + + setup_logs_replication # SC2012: Use find instead of ls to better handle non-alphanumeric filenames. They are all alphanumeric. # SC2046: Quote this to prevent word splitting. Actually I need word splitting. @@ -291,7 +297,7 @@ quit if [ "$server_died" == 1 ] then # The server has died. - if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*' server.log > description.txt + if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log > description.txt then echo "Lost connection to server. See the logs." > description.txt fi diff --git a/docker/test/install/deb/Dockerfile b/docker/test/install/deb/Dockerfile index 9614473c69b4..e9c928b1fe7e 100644 --- a/docker/test/install/deb/Dockerfile +++ b/docker/test/install/deb/Dockerfile @@ -12,6 +12,7 @@ ENV \ # install systemd packages RUN apt-get update && \ apt-get install -y --no-install-recommends \ + sudo \ systemd \ && \ apt-get clean && \ diff --git a/docker/test/integration/base/Dockerfile b/docker/test/integration/base/Dockerfile index de8efa20af4e..270b40e23a6d 100644 --- a/docker/test/integration/base/Dockerfile +++ b/docker/test/integration/base/Dockerfile @@ -46,12 +46,13 @@ RUN arch=${TARGETARCH:-amd64} \ arm64) rarch=aarch64 ;; \ esac \ && cd /tmp \ - && curl -o mysql-odbc.rpm "https://cdn.mysql.com/archives/mysql-connector-odbc-8.0/mysql-connector-odbc-8.0.27-1.el8.${rarch}.rpm" \ + && curl -o mysql-odbc.rpm "https://cdn.mysql.com/archives/mysql-connector-odbc-8.0/mysql-connector-odbc-8.0.32-1.el9.${rarch}.rpm" \ && rpm2archive mysql-odbc.rpm \ && tar xf mysql-odbc.rpm.tgz -C / ./usr/lib64/ \ - && LINK_DIR=$(dpkg -L libodbc1 | rg '^/usr/lib/.*-linux-gnu/odbc$') \ - && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR" \ - && ln -s /usr/lib64/libmyodbc8a.so "$LINK_DIR"/libmyodbc.so + && rm mysql-odbc.rpm mysql-odbc.rpm.tgz \ + && ODBC_DIR=$(dpkg -L odbc-postgresql | rg '^/usr/lib/.*-linux-gnu/odbc$') \ + && ln -s /usr/lib64/libmyodbc8a.so "$ODBC_DIR" \ + && ln -s /usr/lib64/libmyodbc8a.so "$ODBC_DIR"/libmyodbc.so # Unfortunately this is required for a single test for conversion data from zookeeper to clickhouse-keeper. # ZooKeeper is not started by default, but consumes some space in containers. diff --git a/docker/test/integration/helper_container/Dockerfile b/docker/test/integration/helper_container/Dockerfile index 6a093081bf2c..60adaea17961 100644 --- a/docker/test/integration/helper_container/Dockerfile +++ b/docker/test/integration/helper_container/Dockerfile @@ -2,4 +2,7 @@ # Helper docker container to run iptables without sudo FROM alpine -RUN apk add -U iproute2 +RUN apk add --no-cache -U iproute2 \ + && for bin in iptables iptables-restore iptables-save; \ + do ln -sf xtables-nft-multi "/sbin/$bin"; \ + done diff --git a/docker/test/integration/hive_server/Dockerfile b/docker/test/integration/hive_server/Dockerfile index b06a0dcc8304..e37e28005578 100644 --- a/docker/test/integration/hive_server/Dockerfile +++ b/docker/test/integration/hive_server/Dockerfile @@ -6,7 +6,7 @@ RUN apt-get install -y wget openjdk-8-jre RUN wget https://archive.apache.org/dist/hadoop/common/hadoop-3.1.0/hadoop-3.1.0.tar.gz && \ tar -xf hadoop-3.1.0.tar.gz && rm -rf hadoop-3.1.0.tar.gz -RUN wget https://dlcdn.apache.org/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \ +RUN wget https://apache.apache.org/dist/hive/hive-2.3.9/apache-hive-2.3.9-bin.tar.gz && \ tar -xf apache-hive-2.3.9-bin.tar.gz && rm -rf apache-hive-2.3.9-bin.tar.gz RUN apt install -y vim diff --git a/docker/test/integration/mysql_java_client/Dockerfile b/docker/test/integration/mysql_java_client/Dockerfile index 0abf50cd4937..83c175514814 100644 --- a/docker/test/integration/mysql_java_client/Dockerfile +++ b/docker/test/integration/mysql_java_client/Dockerfile @@ -1,21 +1,15 @@ # docker build -t clickhouse/mysql-java-client . # MySQL Java client docker container -FROM ubuntu:18.04 +FROM openjdk:8-jdk-alpine -RUN apt-get update && \ - apt-get install -y software-properties-common build-essential openjdk-8-jdk libmysql-java curl +RUN apk --no-cache add curl -RUN rm -rf \ - /var/lib/apt/lists/* \ - /var/cache/debconf \ - /tmp/* \ -RUN apt-get clean - -ARG ver=5.1.46 -RUN curl -L -o /mysql-connector-java-${ver}.jar https://repo1.maven.org/maven2/mysql/mysql-connector-java/${ver}/mysql-connector-java-${ver}.jar -ENV CLASSPATH=$CLASSPATH:/mysql-connector-java-${ver}.jar +ARG ver=8.1.0 +RUN curl -L -o /mysql-connector-j-${ver}.jar https://repo1.maven.org/maven2/com/mysql/mysql-connector-j/${ver}/mysql-connector-j-${ver}.jar +ENV CLASSPATH=$CLASSPATH:/mysql-connector-j-${ver}.jar WORKDIR /jdbc COPY Test.java Test.java -RUN javac Test.java +COPY PreparedStatementsTest.java PreparedStatementsTest.java +RUN javac Test.java PreparedStatementsTest.java diff --git a/docker/test/integration/mysql_java_client/PreparedStatementsTest.java b/docker/test/integration/mysql_java_client/PreparedStatementsTest.java new file mode 100644 index 000000000000..1ffa945718d7 --- /dev/null +++ b/docker/test/integration/mysql_java_client/PreparedStatementsTest.java @@ -0,0 +1,193 @@ +import com.mysql.cj.MysqlType; + +import java.sql.*; + +public class PreparedStatementsTest { + public static void main(String[] args) { + int i = 0; + String host = "127.0.0.1"; + String port = "9004"; + String user = "default"; + String password = ""; + String database = "default"; + while (i < args.length) { + switch (args[i]) { + case "--host": + host = args[++i]; + break; + case "--port": + port = args[++i]; + break; + case "--user": + user = args[++i]; + break; + case "--password": + password = args[++i]; + break; + case "--database": + database = args[++i]; + break; + default: + i++; + break; + } + } + + // useServerPrepStmts uses COM_STMT_PREPARE and COM_STMT_EXECUTE + // instead of COM_QUERY which allows us to test the binary protocol + String jdbcUrl = String.format("jdbc:mysql://%s:%s/%s?useSSL=false&useServerPrepStmts=true", + host, port, database); + + try { + Class.forName("com.mysql.cj.jdbc.Driver"); + Connection conn = DriverManager.getConnection(jdbcUrl, user, password); + testSimpleDataTypes(conn); + testStringTypes(conn); + testLowCardinalityAndNullableTypes(conn); + testDecimalTypes(conn); + testMiscTypes(conn); + testDateTypes(conn); + testUnusualDateTime64Scales(conn); + testDateTimeTimezones(conn); + conn.close(); + } catch (Exception e) { + e.printStackTrace(); + System.exit(1); + } + } + + private static void testSimpleDataTypes(Connection conn) throws SQLException { + System.out.println("### testSimpleDataTypes"); + ResultSet rs = conn.prepareStatement("SELECT * FROM ps_simple_data_types").executeQuery(); + int rowNum = 1; + while (rs.next()) { + System.out.printf("Row #%d\n", rowNum++); + System.out.printf("%s, value: %d\n", getMysqlType(rs, "i8"), rs.getInt("i8")); + System.out.printf("%s, value: %d\n", getMysqlType(rs, "i16"), rs.getInt("i16")); + System.out.printf("%s, value: %d\n", getMysqlType(rs, "i32"), rs.getInt("i32")); + System.out.printf("%s, value: %d\n", getMysqlType(rs, "i64"), rs.getLong("i64")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "i128"), rs.getString("i128")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "i256"), rs.getString("i256")); + System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui8"), rs.getInt("ui8")); + System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui16"), rs.getInt("ui16")); + System.out.printf("%s, value: %d\n", getMysqlType(rs, "ui32"), rs.getLong("ui32")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui64"), rs.getString("ui64")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui128"), rs.getString("ui128")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "ui256"), rs.getString("ui256")); + System.out.printf("%s, value: %f\n", getMysqlType(rs, "f32"), rs.getFloat("f32")); + System.out.printf("%s, value: %f\n", getMysqlType(rs, "f64"), rs.getFloat("f64")); + System.out.printf("%s, value: %b\n", getMysqlType(rs, "b"), rs.getBoolean("b")); + } + System.out.println(); + } + + private static void testStringTypes(Connection conn) throws SQLException { + System.out.println("### testStringTypes"); + ResultSet rs = conn.prepareStatement("SELECT * FROM ps_string_types").executeQuery(); + int rowNum = 1; + while (rs.next()) { + System.out.printf("Row #%d\n", rowNum++); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "s"), rs.getString("s")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "sn"), rs.getString("sn")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "lc"), rs.getString("lc")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "nlc"), rs.getString("nlc")); + } + System.out.println(); + } + + private static void testLowCardinalityAndNullableTypes(Connection conn) throws SQLException { + System.out.println("### testLowCardinalityAndNullableTypes"); + ResultSet rs = conn.prepareStatement("SELECT * FROM ps_low_cardinality_and_nullable_types").executeQuery(); + int rowNum = 1; + while (rs.next()) { + System.out.printf("Row #%d\n", rowNum++); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "ilc"), rs.getInt("ilc")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dlc"), rs.getDate("dlc")); + // NULL int is represented as zero + System.out.printf("%s, value: %s\n", getMysqlType(rs, "ni"), rs.getInt("ni")); + } + System.out.println(); + } + + private static void testDecimalTypes(Connection conn) throws SQLException { + System.out.println("### testDecimalTypes"); + ResultSet rs = conn.prepareStatement("SELECT * FROM ps_decimal_types").executeQuery(); + int rowNum = 1; + while (rs.next()) { + System.out.printf("Row #%d\n", rowNum++); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getBigDecimal("d32").toPlainString()); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "d64"), rs.getBigDecimal("d64").toPlainString()); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_native"), + rs.getBigDecimal("d128_native").toPlainString()); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "d128_text"), rs.getString("d128_text")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "d256"), rs.getString("d256")); + } + System.out.println(); + } + + private static void testDateTypes(Connection conn) throws SQLException { + System.out.println("### testDateTypes"); + ResultSet rs = conn.prepareStatement("SELECT * FROM ps_date_types").executeQuery(); + int rowNum = 1; + while (rs.next()) { + System.out.printf("Row #%d\n", rowNum++); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "d"), rs.getDate("d")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "d32"), rs.getDate("d32")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_6"), rs.getTimestamp("dt64_6")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_9"), rs.getTimestamp("dt64_9")); + } + System.out.println(); + } + + private static void testUnusualDateTime64Scales(Connection conn) throws SQLException { + System.out.println("### testUnusualDateTime64Scales"); + ResultSet rs = conn.prepareStatement("SELECT * FROM ps_unusual_datetime64_scales").executeQuery(); + int rowNum = 1; + while (rs.next()) { + System.out.printf("Row #%d\n", rowNum++); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_0"), rs.getTimestamp("dt64_0")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_1"), rs.getTimestamp("dt64_1")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_2"), rs.getTimestamp("dt64_2")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_4"), rs.getTimestamp("dt64_4")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_5"), rs.getTimestamp("dt64_5")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_7"), rs.getTimestamp("dt64_7")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_8"), rs.getTimestamp("dt64_8")); + } + System.out.println(); + } + + private static void testDateTimeTimezones(Connection conn) throws SQLException { + System.out.println("### testDateTimeTimezones"); + ResultSet rs = conn.prepareStatement("SELECT * FROM ps_datetime_timezones").executeQuery(); + int rowNum = 1; + while (rs.next()) { + System.out.printf("Row #%d\n", rowNum++); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt"), rs.getTimestamp("dt")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "dt64_3"), rs.getTimestamp("dt64_3")); + } + System.out.println(); + } + + private static void testMiscTypes(Connection conn) throws SQLException { + System.out.println("### testMiscTypes"); + ResultSet rs = conn.prepareStatement("SELECT * FROM ps_misc_types").executeQuery(); + int rowNum = 1; + while (rs.next()) { + System.out.printf("Row #%d\n", rowNum++); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "a"), rs.getString("a")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "u"), rs.getString("u")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "t"), rs.getString("t")); + System.out.printf("%s, value: %s\n", getMysqlType(rs, "m"), rs.getString("m")); + } + System.out.println(); + } + + private static String getMysqlType(ResultSet rs, String columnLabel) throws SQLException { + ResultSetMetaData meta = rs.getMetaData(); + return String.format("%s type is %s", columnLabel, + MysqlType.getByJdbcType(meta.getColumnType(rs.findColumn(columnLabel)))); + } + +} diff --git a/docker/test/integration/mysql_java_client/Test.java b/docker/test/integration/mysql_java_client/Test.java index 2e256d5dc446..752441718a98 100644 --- a/docker/test/integration/mysql_java_client/Test.java +++ b/docker/test/integration/mysql_java_client/Test.java @@ -46,6 +46,7 @@ public static void main(String[] args) { Connection conn = null; Statement stmt = null; try { + Class.forName("com.mysql.cj.jdbc.Driver"); conn = DriverManager.getConnection(jdbcUrl, user, password); stmt = conn.createStatement(); stmt.executeUpdate(CREATE_TABLE_SQL); @@ -69,7 +70,7 @@ public static void main(String[] args) { stmt.close(); conn.close(); - } catch (SQLException e) { + } catch (Exception e) { e.printStackTrace(); System.exit(1); } diff --git a/docker/test/integration/mysql_php_client/Dockerfile b/docker/test/integration/mysql_php_client/Dockerfile index 55db4d15a7f3..0e11ae023e63 100644 --- a/docker/test/integration/mysql_php_client/Dockerfile +++ b/docker/test/integration/mysql_php_client/Dockerfile @@ -1,7 +1,7 @@ # docker build -t clickhouse/mysql-php-client . # MySQL PHP client docker container -FROM php:8.0.18-cli +FROM php:8-cli-alpine COPY ./client.crt client.crt COPY ./client.key client.key diff --git a/docker/test/integration/nginx_dav/Dockerfile b/docker/test/integration/nginx_dav/Dockerfile new file mode 100644 index 000000000000..42c1244f6dcd --- /dev/null +++ b/docker/test/integration/nginx_dav/Dockerfile @@ -0,0 +1,6 @@ +FROM nginx:alpine-slim + +COPY default.conf /etc/nginx/conf.d/ + +RUN mkdir /usr/share/nginx/files/ \ + && chown nginx: /usr/share/nginx/files/ -R diff --git a/docker/test/integration/nginx_dav/default.conf b/docker/test/integration/nginx_dav/default.conf new file mode 100644 index 000000000000..466d0584a2d2 --- /dev/null +++ b/docker/test/integration/nginx_dav/default.conf @@ -0,0 +1,25 @@ +server { + listen 80; + + #root /usr/share/nginx/test.com; + index index.html index.htm; + + server_name test.com localhost; + + location / { + expires max; + root /usr/share/nginx/files; + client_max_body_size 20m; + client_body_temp_path /usr/share/nginx/tmp; + dav_methods PUT; # Allowed methods, only PUT is necessary + + create_full_put_path on; # nginx automatically creates nested directories + dav_access user:rw group:r all:r; # access permissions for files + + limit_except GET { + allow all; + } + } + + error_page 405 =200 $uri; +} diff --git a/docker/test/integration/runner/Dockerfile b/docker/test/integration/runner/Dockerfile index 14c97e479f6d..8345e3d57912 100644 --- a/docker/test/integration/runner/Dockerfile +++ b/docker/test/integration/runner/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/integration-tests-runner . -FROM ubuntu:20.04 +FROM ubuntu:22.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" @@ -47,26 +47,30 @@ ENV TZ=Etc/UTC RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV DOCKER_CHANNEL stable +# Unpin the docker version after the release 24.0.3 is released +# https://github.com/moby/moby/issues/45770#issuecomment-1618255130 RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add - \ && add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}" \ && apt-get update \ && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ - docker-ce \ + docker-ce='5:23.*' \ && rm -rf \ /var/lib/apt/lists/* \ /var/cache/debconf \ /tmp/* \ - && apt-get clean + && apt-get clean \ + && dockerd --version; docker --version -RUN dockerd --version; docker --version RUN python3 -m pip install --no-cache-dir \ PyMySQL \ - aerospike==4.0.0 \ - avro==1.10.2 \ + aerospike==11.1.0 \ asyncio \ + avro==1.10.2 \ + azure-storage-blob \ cassandra-driver \ - confluent-kafka==1.5.0 \ + confluent-kafka==1.9.2 \ + delta-spark==2.3.0 \ dict2xml \ dicttoxml \ docker \ @@ -79,51 +83,60 @@ RUN python3 -m pip install --no-cache-dir \ minio \ nats-py \ protobuf \ - psycopg2-binary==2.8.6 \ + psycopg2-binary==2.9.6 \ + pyhdfs \ pymongo==3.11.0 \ + pyspark==3.3.2 \ pytest \ pytest-order==1.0.0 \ - pytest-timeout \ pytest-random \ - pytest-xdist \ pytest-repeat \ + pytest-timeout \ + pytest-xdist \ pytz \ + pyyaml==5.3.1 \ redis \ - tzlocal==2.1 \ - urllib3 \ requests-kerberos \ - pyspark==3.3.2 \ - delta-spark==2.2.0 \ - pyhdfs \ - azure-storage-blob \ - meilisearch==0.18.3 - -COPY modprobe.sh /usr/local/bin/modprobe -COPY dockerd-entrypoint.sh /usr/local/bin/ -COPY compose/ /compose/ -COPY misc/ /misc/ + tzlocal==2.1 \ + retry \ + bs4 \ + lxml \ + urllib3 +# bs4, lxml are for cloud tests, do not delete -RUN curl -fsSL -O https://dlcdn.apache.org/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ +# Hudi supports only spark 3.3.*, not 3.4 +RUN curl -fsSL -O https://archive.apache.org/dist/spark/spark-3.3.2/spark-3.3.2-bin-hadoop3.tgz \ && tar xzvf spark-3.3.2-bin-hadoop3.tgz -C / \ && rm spark-3.3.2-bin-hadoop3.tgz # download spark and packages # if you change packages, don't forget to update them in tests/integration/helpers/cluster.py -RUN echo ":quit" | /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,io.delta:delta-core_2.12:2.2.0,org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" > /dev/null +RUN packages="org.apache.hudi:hudi-spark3.3-bundle_2.12:0.13.0,\ +io.delta:delta-core_2.12:2.3.0,\ +org.apache.iceberg:iceberg-spark-runtime-3.3_2.12:1.1.0" \ + && /spark-3.3.2-bin-hadoop3/bin/spark-shell --packages "$packages" > /dev/null \ + && find /root/.ivy2/ -name '*.jar' -exec ln -sf {} /spark-3.3.2-bin-hadoop3/jars/ \; RUN set -x \ && addgroup --system dockremap \ - && adduser --system dockremap \ + && adduser --system dockremap \ && adduser dockremap dockremap \ && echo 'dockremap:165536:65536' >> /etc/subuid \ - && echo 'dockremap:165536:65536' >> /etc/subgid + && echo 'dockremap:165536:65536' >> /etc/subgid + +COPY modprobe.sh /usr/local/bin/modprobe +COPY dockerd-entrypoint.sh /usr/local/bin/ +COPY compose/ /compose/ +COPY misc/ /misc/ + # Same options as in test/base/Dockerfile # (in case you need to override them in tests) -ENV TSAN_OPTIONS='halt_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' +ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_mb=46080 second_deadlock_stack=1' ENV UBSAN_OPTIONS='print_stacktrace=1' ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1' EXPOSE 2375 ENTRYPOINT ["dockerd-entrypoint.sh"] -CMD ["sh", "-c", "pytest $PYTEST_OPTS"] +# To pass additional arguments (i.e. list of tests) use PYTEST_ADDOPTS +CMD ["sh", "-c", "pytest"] diff --git a/docker/test/integration/runner/compose/docker_compose_coredns.yml b/docker/test/integration/runner/compose/docker_compose_coredns.yml index b329d4e0a466..e4736e04846b 100644 --- a/docker/test/integration/runner/compose/docker_compose_coredns.yml +++ b/docker/test/integration/runner/compose/docker_compose_coredns.yml @@ -2,7 +2,7 @@ version: "2.3" services: coredns: - image: coredns/coredns:latest + image: coredns/coredns:1.9.3 # :latest broke this test restart: always volumes: - ${COREDNS_CONFIG_DIR}/example.com:/example.com diff --git a/docker/test/integration/runner/compose/docker_compose_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_hdfs.yml index f83eb93fea70..1cae54ad9e1a 100644 --- a/docker/test/integration/runner/compose/docker_compose_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_hdfs.yml @@ -12,3 +12,5 @@ services: - type: ${HDFS_FS:-tmpfs} source: ${HDFS_LOGS:-} target: /usr/local/hadoop/logs + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' diff --git a/docker/test/integration/runner/compose/docker_compose_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kafka.yml index 7e34f4c114d2..4ae3de3cbc74 100644 --- a/docker/test/integration/runner/compose/docker_compose_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kafka.yml @@ -4,6 +4,8 @@ services: kafka_zookeeper: image: zookeeper:3.4.9 hostname: kafka_zookeeper + ports: + - 2181:2181 environment: ZOO_MY_ID: 1 ZOO_PORT: 2181 @@ -15,33 +17,59 @@ services: image: confluentinc/cp-kafka:5.2.0 hostname: kafka1 ports: - - ${KAFKA_EXTERNAL_PORT:-8081}:${KAFKA_EXTERNAL_PORT:-8081} + - ${KAFKA_EXTERNAL_PORT}:${KAFKA_EXTERNAL_PORT} environment: KAFKA_ADVERTISED_LISTENERS: INSIDE://localhost:${KAFKA_EXTERNAL_PORT},OUTSIDE://kafka1:19092 KAFKA_ADVERTISED_HOST_NAME: kafka1 - KAFKA_LISTENERS: INSIDE://0.0.0.0:${KAFKA_EXTERNAL_PORT},OUTSIDE://0.0.0.0:19092 KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: INSIDE:PLAINTEXT,OUTSIDE:PLAINTEXT KAFKA_INTER_BROKER_LISTENER_NAME: INSIDE KAFKA_BROKER_ID: 1 - KAFKA_ZOOKEEPER_CONNECT: "kafka_zookeeper:2181" + KAFKA_ZOOKEEPER_CONNECT: kafka_zookeeper:2181 KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO" KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 depends_on: - kafka_zookeeper security_opt: - label:disable + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' schema-registry: image: confluentinc/cp-schema-registry:5.2.0 hostname: schema-registry ports: - - ${SCHEMA_REGISTRY_EXTERNAL_PORT:-12313}:${SCHEMA_REGISTRY_INTERNAL_PORT:-12313} + - ${SCHEMA_REGISTRY_EXTERNAL_PORT}:${SCHEMA_REGISTRY_EXTERNAL_PORT} environment: SCHEMA_REGISTRY_HOST_NAME: schema-registry - SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL: PLAINTEXT SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_EXTERNAL_PORT} + SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: noauth + depends_on: + - kafka_zookeeper + - kafka1 + restart: always + security_opt: + - label:disable + + schema-registry-auth: + image: confluentinc/cp-schema-registry:5.2.0 + hostname: schema-registry-auth + ports: + - ${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT}:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + environment: + SCHEMA_REGISTRY_HOST_NAME: schema-registry-auth + SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:${SCHEMA_REGISTRY_AUTH_EXTERNAL_PORT} + SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: PLAINTEXT://kafka1:19092 + SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC + SCHEMA_REGISTRY_AUTHENTICATION_ROLES: user + SCHEMA_REGISTRY_AUTHENTICATION_REALM: RealmFooBar + SCHEMA_REGISTRY_OPTS: "-Djava.security.auth.login.config=/etc/schema-registry/secrets/schema_registry_jaas.conf" + SCHEMA_REGISTRY_SCHEMA_REGISTRY_GROUP_ID: auth + volumes: + - ${SCHEMA_REGISTRY_DIR:-}/secrets:/etc/schema-registry/secrets depends_on: - kafka_zookeeper - kafka1 + restart: always security_opt: - label:disable diff --git a/docker/test/integration/runner/compose/docker_compose_keeper.yml b/docker/test/integration/runner/compose/docker_compose_keeper.yml index 8524823ed87c..91010c4aa83d 100644 --- a/docker/test/integration/runner/compose/docker_compose_keeper.yml +++ b/docker/test/integration/runner/compose/docker_compose_keeper.yml @@ -20,6 +20,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir1:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config1.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE @@ -53,6 +56,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir2:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config2.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE @@ -86,6 +92,9 @@ services: - type: ${keeper_fs:-tmpfs} source: ${keeper_db_dir3:-} target: /var/lib/clickhouse-keeper + - type: ${keeper_fs:-tmpfs} + source: ${keeper_db_dir1:-} + target: /var/lib/clickhouse entrypoint: "${keeper_cmd_prefix:-clickhouse keeper} --config=/etc/clickhouse-keeper/keeper_config3.xml --log-file=/var/log/clickhouse-keeper/clickhouse-keeper.log --errorlog-file=/var/log/clickhouse-keeper/clickhouse-keeper.err.log" cap_add: - SYS_PTRACE diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml index 1160192696dd..e955a14eb3df 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_hdfs.yml @@ -20,6 +20,8 @@ services: depends_on: - hdfskerberos entrypoint: /etc/bootstrap.sh -d + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' hdfskerberos: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} @@ -29,3 +31,5 @@ services: - ${KERBERIZED_HDFS_DIR}/../../kerberos_image_config.sh:/config.sh - /dev/urandom:/dev/random expose: [88, 749] + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' diff --git a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml index 86e920ff573b..49d4c1db90fe 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberized_kafka.yml @@ -48,6 +48,8 @@ services: - kafka_kerberos security_opt: - label:disable + sysctls: + net.ipv4.ip_local_port_range: '55000 65535' kafka_kerberos: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} diff --git a/docker/test/integration/runner/compose/docker_compose_ldap.yml b/docker/test/integration/runner/compose/docker_compose_ldap.yml new file mode 100644 index 000000000000..857f8fdce624 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_ldap.yml @@ -0,0 +1,16 @@ +version: '2.3' +services: + openldap: + image: bitnami/openldap:2.6.6 + restart: always + environment: + LDAP_ROOT: dc=example,dc=org + LDAP_ADMIN_DN: cn=admin,dc=example,dc=org + LDAP_ADMIN_USERNAME: admin + LDAP_ADMIN_PASSWORD: clickhouse + LDAP_USER_DC: users + LDAP_USERS: janedoe,johndoe + LDAP_PASSWORDS: qwerty,qwertz + LDAP_PORT_NUMBER: ${LDAP_INTERNAL_PORT:-1389} + ports: + - ${LDAP_EXTERNAL_PORT:-1389}:${LDAP_INTERNAL_PORT:-1389} diff --git a/docker/test/integration/runner/compose/docker_compose_meili.yml b/docker/test/integration/runner/compose/docker_compose_meili.yml deleted file mode 100644 index c734c43b4c64..000000000000 --- a/docker/test/integration/runner/compose/docker_compose_meili.yml +++ /dev/null @@ -1,16 +0,0 @@ -version: '2.3' -services: - meili1: - image: getmeili/meilisearch:v0.27.0 - restart: always - ports: - - ${MEILI_EXTERNAL_PORT:-7700}:${MEILI_INTERNAL_PORT:-7700} - - meili_secure: - image: getmeili/meilisearch:v0.27.0 - restart: always - ports: - - ${MEILI_SECURE_EXTERNAL_PORT:-7700}:${MEILI_SECURE_INTERNAL_PORT:-7700} - environment: - MEILI_MASTER_KEY: "password" - diff --git a/docker/test/integration/runner/compose/docker_compose_minio.yml b/docker/test/integration/runner/compose/docker_compose_minio.yml index 3eaf891ff8e8..45e55e7a79cf 100644 --- a/docker/test/integration/runner/compose/docker_compose_minio.yml +++ b/docker/test/integration/runner/compose/docker_compose_minio.yml @@ -2,9 +2,7 @@ version: '2.3' services: minio1: - # Newer version of minio results in such errors: - # "AWSErrorMarshaller: Encountered AWSError 'InternalError': We encountered an internal error, please try again" - image: minio/minio:RELEASE.2021-09-23T04-46-24Z + image: minio/minio:RELEASE.2023-09-30T07-02-29Z volumes: - data1-1:/data1 - ${MINIO_CERTS_DIR:-}:/certs @@ -14,7 +12,7 @@ services: MINIO_ACCESS_KEY: minio MINIO_SECRET_KEY: minio123 MINIO_PROMETHEUS_AUTH_TYPE: public - command: server --address :9001 --certs-dir /certs /data1-1 + command: server --console-address 127.0.0.1:19001 --address :9001 --certs-dir /certs /data1-1 depends_on: - proxy1 - proxy2 diff --git a/docker/test/integration/runner/compose/docker_compose_mysql.yml b/docker/test/integration/runner/compose/docker_compose_mysql.yml index 6b98a372bd05..103fe2769e93 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml @@ -9,10 +9,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-1.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-1.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/error.log @@ -21,4 +21,4 @@ services: volumes: - type: ${MYSQL_LOGS_FS:-tmpfs} source: ${MYSQL_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml index d5fb5a53aafd..9c9c7430ceca 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_8_0.yml @@ -9,9 +9,9 @@ services: DATADIR: /mysql/ expose: - ${MYSQL8_PORT:-3306} - command: --server_id=100 --log-bin='mysql-bin-1.log' - --default_authentication_plugin='mysql_native_password' - --default-time-zone='+3:00' --gtid-mode="ON" + command: --server_id=100 --log-bin='mysql-bin-1.log' + --default_authentication_plugin='mysql_native_password' + --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/error.log @@ -20,4 +20,4 @@ services: volumes: - type: ${MYSQL8_LOGS_FS:-tmpfs} source: ${MYSQL8_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml index 8e145a3b408a..73f9e39f0d6a 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_cluster.yml @@ -9,10 +9,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-2.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-2.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/2_error.log @@ -31,10 +31,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-3.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-3.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/3_error.log @@ -53,10 +53,10 @@ services: DATADIR: /mysql/ expose: - ${MYSQL_CLUSTER_PORT:-3306} - command: --server_id=100 - --log-bin='mysql-bin-4.log' - --default-time-zone='+3:00' - --gtid-mode="ON" + command: --server_id=100 + --log-bin='mysql-bin-4.log' + --default-time-zone='+3:00' + --gtid-mode="ON" --enforce-gtid-consistency --log-error-verbosity=3 --log-error=/mysql/4_error.log @@ -65,4 +65,4 @@ services: volumes: - type: ${MYSQL_CLUSTER_LOGS_FS:-tmpfs} source: ${MYSQL_CLUSTER_LOGS:-} - target: /mysql/ \ No newline at end of file + target: /mysql/ diff --git a/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml b/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml index eb5ffb01baa2..529974dd4bfe 100644 --- a/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml +++ b/docker/test/integration/runner/compose/docker_compose_mysql_java_client.yml @@ -3,4 +3,4 @@ services: java1: image: clickhouse/mysql-java-client:${DOCKER_MYSQL_JAVA_CLIENT_TAG:-latest} # to keep container running - command: sleep infinity + command: sleep 1d diff --git a/docker/test/integration/runner/compose/docker_compose_nginx.yml b/docker/test/integration/runner/compose/docker_compose_nginx.yml index d0fb9fc1ff42..38d2a6d84c84 100644 --- a/docker/test/integration/runner/compose/docker_compose_nginx.yml +++ b/docker/test/integration/runner/compose/docker_compose_nginx.yml @@ -5,7 +5,7 @@ services: # Files will be put into /usr/share/nginx/files. nginx: - image: kssenii/nginx-test:1.1 + image: clickhouse/nginx-dav:${DOCKER_NGINX_DAV_TAG:-latest} restart: always ports: - 80:80 diff --git a/docker/test/integration/runner/compose/docker_compose_postgres.yml b/docker/test/integration/runner/compose/docker_compose_postgres.yml index 1fb6b7a14108..2ef7eb173952 100644 --- a/docker/test/integration/runner/compose/docker_compose_postgres.yml +++ b/docker/test/integration/runner/compose/docker_compose_postgres.yml @@ -12,9 +12,9 @@ services: timeout: 5s retries: 5 networks: - default: - aliases: - - postgre-sql.local + default: + aliases: + - postgre-sql.local environment: POSTGRES_HOST_AUTH_METHOD: "trust" POSTGRES_PASSWORD: mysecretpassword diff --git a/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml b/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml index 0190516728cf..2db9fb589d2a 100644 --- a/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml +++ b/docker/test/integration/runner/compose/docker_compose_rabbitmq.yml @@ -2,15 +2,13 @@ version: '2.3' services: rabbitmq1: - image: rabbitmq:3.8-management-alpine + image: rabbitmq:3.12.6-management-alpine hostname: rabbitmq1 expose: - ${RABBITMQ_PORT:-5672} - environment: - RABBITMQ_DEFAULT_USER: "root" - RABBITMQ_DEFAULT_PASS: "clickhouse" - RABBITMQ_LOG_BASE: /rabbitmq_logs/ volumes: - type: ${RABBITMQ_LOGS_FS:-tmpfs} source: ${RABBITMQ_LOGS:-} target: /rabbitmq_logs/ + - "${RABBITMQ_COOKIE_FILE}:/var/lib/rabbitmq/.erlang.cookie" + - /misc/rabbitmq.conf:/etc/rabbitmq/rabbitmq.conf \ No newline at end of file diff --git a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml index 7a1c32e00232..b5dbae423b2d 100644 --- a/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml +++ b/docker/test/integration/runner/compose/docker_compose_zookeeper_secure.yml @@ -12,7 +12,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind @@ -37,7 +37,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind @@ -61,7 +61,7 @@ services: command: ["zkServer.sh", "start-foreground"] entrypoint: /zookeeper-ssl-entrypoint.sh volumes: - - type: bind + - type: bind source: /misc/zookeeper-ssl-entrypoint.sh target: /zookeeper-ssl-entrypoint.sh - type: bind diff --git a/docker/test/integration/runner/dockerd-entrypoint.sh b/docker/test/integration/runner/dockerd-entrypoint.sh index fe47fc909513..b05aef76faf8 100755 --- a/docker/test/integration/runner/dockerd-entrypoint.sh +++ b/docker/test/integration/runner/dockerd-entrypoint.sh @@ -12,6 +12,17 @@ echo '{ "registry-mirrors" : ["http://dockerhub-proxy.dockerhub-proxy-zone:5000"] }' | dd of=/etc/docker/daemon.json 2>/dev/null +if [ -f /sys/fs/cgroup/cgroup.controllers ]; then + # move the processes from the root group to the /init group, + # otherwise writing subtree_control fails with EBUSY. + # An error during moving non-existent process (i.e., "cat") is ignored. + mkdir -p /sys/fs/cgroup/init + xargs -rn1 < /sys/fs/cgroup/cgroup.procs > /sys/fs/cgroup/init/cgroup.procs || : + # enable controllers + sed -e 's/ / +/g' -e 's/^/+/' < /sys/fs/cgroup/cgroup.controllers \ + > /sys/fs/cgroup/cgroup.subtree_control +fi + # In case of test hung it is convenient to use pytest --pdb to debug it, # and on hung you can simply press Ctrl-C and it will spawn a python pdb, # but on SIGINT dockerd will exit, so ignore it to preserve the daemon. @@ -52,14 +63,17 @@ export CLICKHOUSE_TESTS_BASE_CONFIG_DIR=/clickhouse-config export CLICKHOUSE_ODBC_BRIDGE_BINARY_PATH=/clickhouse-odbc-bridge export CLICKHOUSE_LIBRARY_BRIDGE_BINARY_PATH=/clickhouse-library-bridge -export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} +export DOCKER_BASE_TAG=${DOCKER_BASE_TAG:=latest} export DOCKER_DOTNET_CLIENT_TAG=${DOCKER_DOTNET_CLIENT_TAG:=latest} +export DOCKER_HELPER_TAG=${DOCKER_HELPER_TAG:=latest} +export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest} +export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} +export DOCKER_MYSQL_GOLANG_CLIENT_TAG=${DOCKER_MYSQL_GOLANG_CLIENT_TAG:=latest} export DOCKER_MYSQL_JAVA_CLIENT_TAG=${DOCKER_MYSQL_JAVA_CLIENT_TAG:=latest} export DOCKER_MYSQL_JS_CLIENT_TAG=${DOCKER_MYSQL_JS_CLIENT_TAG:=latest} export DOCKER_MYSQL_PHP_CLIENT_TAG=${DOCKER_MYSQL_PHP_CLIENT_TAG:=latest} +export DOCKER_NGINX_DAV_TAG=${DOCKER_NGINX_DAV_TAG:=latest} export DOCKER_POSTGRESQL_JAVA_CLIENT_TAG=${DOCKER_POSTGRESQL_JAVA_CLIENT_TAG:=latest} -export DOCKER_KERBEROS_KDC_TAG=${DOCKER_KERBEROS_KDC_TAG:=latest} -export DOCKER_KERBERIZED_HADOOP_TAG=${DOCKER_KERBERIZED_HADOOP_TAG:=latest} cd /ClickHouse/tests/integration exec "$@" diff --git a/docker/test/integration/runner/misc/rabbitmq.conf b/docker/test/integration/runner/misc/rabbitmq.conf new file mode 100644 index 000000000000..3527c83880b8 --- /dev/null +++ b/docker/test/integration/runner/misc/rabbitmq.conf @@ -0,0 +1,8 @@ +loopback_users.guest = false +listeners.tcp.default = 5672 +default_pass = clickhouse +default_user = root +management.tcp.port = 15672 + +log.file = /rabbitmq_logs/rabbit.log +log.file.level = debug diff --git a/docker/test/keeper-jepsen/run.sh b/docker/test/keeper-jepsen/run.sh index 694d7fcd9167..576a0f0ef8e6 100644 --- a/docker/test/keeper-jepsen/run.sh +++ b/docker/test/keeper-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-17_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/libfuzzer/Dockerfile b/docker/test/libfuzzer/Dockerfile new file mode 100644 index 000000000000..081cf5473f8f --- /dev/null +++ b/docker/test/libfuzzer/Dockerfile @@ -0,0 +1,43 @@ +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG + +# ARG for quick switch to a given ubuntu mirror +ARG apt_archive="http://archive.ubuntu.com" +RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list + +ENV LANG=C.UTF-8 +ENV TZ=Europe/Amsterdam +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ + ca-certificates \ + libc6-dbg \ + moreutils \ + ncdu \ + p7zip-full \ + parallel \ + psmisc \ + python3 \ + python3-pip \ + rsync \ + tree \ + tzdata \ + vim \ + wget \ + && apt-get autoremove --yes \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN pip3 install Jinja2 + +COPY * / + +ENV FUZZER_ARGS="-max_total_time=60" + +SHELL ["/bin/bash", "-c"] +CMD set -o pipefail \ + && timeout -s 9 1h /run_libfuzzer.py 2>&1 | ts "$(printf '%%Y-%%m-%%d %%H:%%M:%%S\t')" | tee main.log + +# docker run --network=host --volume :/workspace -e PR_TO_TEST=<> -e SHA_TO_TEST=<> clickhouse/libfuzzer + diff --git a/docker/test/libfuzzer/run_libfuzzer.py b/docker/test/libfuzzer/run_libfuzzer.py new file mode 100755 index 000000000000..5ed019490d51 --- /dev/null +++ b/docker/test/libfuzzer/run_libfuzzer.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 + +import configparser +import logging +import os +from pathlib import Path +import subprocess + +DEBUGGER = os.getenv("DEBUGGER", "") +FUZZER_ARGS = os.getenv("FUZZER_ARGS", "") + + +def run_fuzzer(fuzzer: str): + logging.info(f"Running fuzzer {fuzzer}...") + + corpus_dir = f"{fuzzer}.in" + with Path(corpus_dir) as path: + if not path.exists() or not path.is_dir(): + corpus_dir = "" + + options_file = f"{fuzzer}.options" + custom_libfuzzer_options = "" + + with Path(options_file) as path: + if path.exists() and path.is_file(): + parser = configparser.ConfigParser() + parser.read(path) + + if parser.has_section("asan"): + os.environ[ + "ASAN_OPTIONS" + ] = f"{os.environ['ASAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['asan'].items())}" + + if parser.has_section("msan"): + os.environ[ + "MSAN_OPTIONS" + ] = f"{os.environ['MSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['msan'].items())}" + + if parser.has_section("ubsan"): + os.environ[ + "UBSAN_OPTIONS" + ] = f"{os.environ['UBSAN_OPTIONS']}:{':'.join('%s=%s' % (key, value) for key, value in parser['ubsan'].items())}" + + if parser.has_section("libfuzzer"): + custom_libfuzzer_options = " ".join( + "-%s=%s" % (key, value) + for key, value in parser["libfuzzer"].items() + ) + + cmd_line = f"{DEBUGGER} ./{fuzzer} {FUZZER_ARGS} {corpus_dir}" + if custom_libfuzzer_options: + cmd_line += f" {custom_libfuzzer_options}" + + if not "-dict=" in cmd_line and Path(f"{fuzzer}.dict").exists(): + cmd_line += f" -dict={fuzzer}.dict" + + cmd_line += " < /dev/null" + + logging.info(f"...will execute: {cmd_line}") + subprocess.check_call(cmd_line, shell=True) + + +def main(): + logging.basicConfig(level=logging.INFO) + + subprocess.check_call("ls -al", shell=True) + + with Path() as current: + for fuzzer in current.iterdir(): + if (current / fuzzer).is_file() and os.access(current / fuzzer, os.X_OK): + run_fuzzer(fuzzer) + + exit(0) + + +if __name__ == "__main__": + main() diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile index ab9f1f8a2e30..d31663f90711 100644 --- a/docker/test/performance-comparison/Dockerfile +++ b/docker/test/performance-comparison/Dockerfile @@ -1,18 +1,7 @@ # docker build -t clickhouse/performance-comparison . -# Using ubuntu:22.04 over 20.04 as all other images, since: -# a) ubuntu 20.04 has too old parallel, and does not support --memsuspend -# b) anyway for perf tests it should not be important (backward compatiblity -# with older ubuntu had been checked lots of times in various tests) -FROM ubuntu:22.04 - -# ARG for quick switch to a given ubuntu mirror -ARG apt_archive="http://archive.ubuntu.com" -RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list - -ENV LANG=C.UTF-8 -ENV TZ=Europe/Moscow -RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG RUN apt-get update \ && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \ @@ -56,10 +45,9 @@ COPY * / # node #0 should be less stable because of system interruptions. We bind # randomly to node 1 or 0 to gather some statistics on that. We have to bind # both servers and the tmpfs on which the database is stored. How to do it -# through Yandex Sandbox API is unclear, but by default tmpfs uses +# is unclear, but by default tmpfs uses # 'process allocation policy', not sure which process but hopefully the one that -# writes to it, so just bind the downloader script as well. We could also try to -# remount it with proper options in Sandbox task. +# writes to it, so just bind the downloader script as well. # https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt # Double-escaped backslashes are a tribute to the engineering wonder of docker -- # it gives '/bin/sh: 1: [bash,: not found' otherwise. diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 798d2a40b123..24df1b9c6b17 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -90,7 +90,7 @@ function configure set +m wait_for_server $LEFT_SERVER_PORT $left_pid - echo Server for setup started + echo "Server for setup started" clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||: clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||: @@ -156,9 +156,9 @@ function restart wait_for_server $RIGHT_SERVER_PORT $right_pid echo right ok - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options" - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database != 'system'" + clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')" clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options" # Check again that both servers we started are running -- this is important @@ -352,14 +352,12 @@ function get_profiles wait clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: & - clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: & clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: & - clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: & clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: & @@ -396,7 +394,7 @@ do done # for each query run, prepare array of metrics from query log -clickhouse-local --query " +clickhouse-local --multiquery --query " create view query_runs as select * from file('analyze/query-runs.tsv', TSV, 'test text, query_index int, query_id text, version UInt8, time float'); @@ -553,7 +551,7 @@ numactl --cpunodebind=all --membind=all numactl --show # If the available memory falls below 2 * size, GNU parallel will suspend some of the running jobs. numactl --cpunodebind=all --membind=all parallel -v --joblog analyze/parallel-log.txt --memsuspend 15G --null < analyze/commands.txt 2>> analyze/errors.log -clickhouse-local --query " +clickhouse-local --multiquery --query " -- Join the metric names back to the metric statistics we've calculated, and make -- a denormalized table of them -- statistics for all metrics for all queries. -- The WITH, ARRAY JOIN and CROSS JOIN do not like each other: @@ -646,12 +644,12 @@ function report rm -r report ||: mkdir report report/tmp ||: -rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv slow-on-client.tsv all-queries.tsv run-errors.tsv ||: +rm ./*.{rep,svg} test-times.tsv test-dump.tsv unstable.tsv unstable-query-ids.tsv unstable-query-metrics.tsv changed-perf.tsv unstable-tests.tsv unstable-queries.tsv bad-tests.tsv all-queries.tsv run-errors.tsv ||: cat analyze/errors.log >> report/errors.log ||: cat profile-errors.log >> report/errors.log ||: -clickhouse-local --query " +clickhouse-local --multiquery --query " create view query_display_names as select * from file('analyze/query-display-names.tsv', TSV, 'test text, query_index int, query_display_name text') @@ -665,9 +663,8 @@ create view partial_query_times as select * from -- Report for backward-incompatible ('partial') queries that we could only run on the new server (e.g. -- queries with new functions added in the tested PR). create table partial_queries_report engine File(TSV, 'report/partial-queries-report.tsv') - settings output_format_decimal_trailing_zeros = 1 - as select toDecimal64(time_median, 3) time, - toDecimal64(time_stddev / time_median, 3) relative_time_stddev, + as select round(time_median, 3) time, + round(time_stddev / time_median, 3) relative_time_stddev, test, query_index, query_display_name from partial_query_times join query_display_names using (test, query_index) @@ -739,28 +736,26 @@ create table queries engine File(TSVWithNamesAndTypes, 'report/queries.tsv') ; create table changed_perf_report engine File(TSV, 'report/changed-perf.tsv') - settings output_format_decimal_trailing_zeros = 1 as with -- server_time is sometimes reported as zero (if it's less than 1 ms), -- so we have to work around this to not get an error about conversion -- of NaN to decimal. (left > right ? left / right : right / left) as times_change_float, isFinite(times_change_float) as times_change_finite, - toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, + round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, times_change_finite ? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x' : '--' as times_change_str select - toDecimal64(left, 3), toDecimal64(right, 3), times_change_str, - toDecimal64(diff, 3), toDecimal64(stat_threshold, 3), + round(left, 3), round(right, 3), times_change_str, + round(diff, 3), round(stat_threshold, 3), changed_fail, test, query_index, query_display_name from queries where changed_show order by abs(diff) desc; create table unstable_queries_report engine File(TSV, 'report/unstable-queries.tsv') - settings output_format_decimal_trailing_zeros = 1 as select - toDecimal64(left, 3), toDecimal64(right, 3), toDecimal64(diff, 3), - toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name + round(left, 3), round(right, 3), round(diff, 3), + round(stat_threshold, 3), unstable_fail, test, query_index, query_display_name from queries where unstable_show order by stat_threshold desc; @@ -789,11 +784,10 @@ create view total_speedup as ; create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') - settings output_format_decimal_trailing_zeros = 1 as with (times_speedup >= 1 - ? '-' || toString(toDecimal64(times_speedup, 3)) || 'x' - : '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x') + ? '-' || toString(round(times_speedup, 3)) || 'x' + : '+' || toString(round(1 / times_speedup, 3)) || 'x') as times_speedup_str select test, times_speedup_str, queries, bad, changed, unstable -- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all @@ -816,13 +810,6 @@ create view total_client_time_per_query as select * from file('analyze/client-times.tsv', TSV, 'test text, query_index int, client float, server float'); -create table slow_on_client_report engine File(TSV, 'report/slow-on-client.tsv') - settings output_format_decimal_trailing_zeros = 1 - as select client, server, toDecimal64(client/server, 3) p, - test, query_display_name - from total_client_time_per_query left join query_display_names using (test, query_index) - where p > toDecimal64(1.02, 3) order by p desc; - create table wall_clock_time_per_test engine Memory as select * from file('wall-clock-times.tsv', TSV, 'test text, real float, user float, system float'); @@ -899,15 +886,14 @@ create view test_times_view_total as ; create table test_times_report engine File(TSV, 'report/test-times.tsv') - settings output_format_decimal_trailing_zeros = 1 as select test, - toDecimal64(real, 3), - toDecimal64(total_client_time, 3), + round(real, 3), + round(total_client_time, 3), queries, - toDecimal64(query_max, 3), - toDecimal64(avg_real_per_query, 3), - toDecimal64(query_min, 3), + round(query_max, 3), + round(avg_real_per_query, 3), + round(query_min, 3), runs from ( select * from test_times_view @@ -919,21 +905,20 @@ create table test_times_report engine File(TSV, 'report/test-times.tsv') -- report for all queries page, only main metric create table all_tests_report engine File(TSV, 'report/all-queries.tsv') - settings output_format_decimal_trailing_zeros = 1 as with -- server_time is sometimes reported as zero (if it's less than 1 ms), -- so we have to work around this to not get an error about conversion -- of NaN to decimal. (left > right ? left / right : right / left) as times_change_float, isFinite(times_change_float) as times_change_finite, - toDecimal64(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, + round(times_change_finite ? times_change_float : 1., 3) as times_change_decimal, times_change_finite ? (left > right ? '-' : '+') || toString(times_change_decimal) || 'x' : '--' as times_change_str select changed_fail, unstable_fail, - toDecimal64(left, 3), toDecimal64(right, 3), times_change_str, - toDecimal64(isFinite(diff) ? diff : 0, 3), - toDecimal64(isFinite(stat_threshold) ? stat_threshold : 0, 3), + round(left, 3), round(right, 3), times_change_str, + round(isFinite(diff) ? diff : 0, 3), + round(isFinite(stat_threshold) ? stat_threshold : 0, 3), test, query_index, query_display_name from queries order by test, query_index; @@ -965,7 +950,7 @@ create table all_query_metrics_tsv engine File(TSV, 'report/all-query-metrics.ts for version in {right,left} do rm -rf data - clickhouse-local --query " + clickhouse-local --multiquery --query " create view query_profiles as with 0 as left, 1 as right select * from file('analyze/query-profiles.tsv', TSV, @@ -1044,27 +1029,6 @@ create table unstable_run_traces engine File(TSVWithNamesAndTypes, order by count() desc ; -create table metric_devation engine File(TSVWithNamesAndTypes, - 'report/metric-deviation.$version.tsv') - settings output_format_decimal_trailing_zeros = 1 - -- first goes the key used to split the file with grep - as select test, query_index, query_display_name, - toDecimal64(d, 3) d, q, metric - from ( - select - test, query_index, - (q[3] - q[1])/q[2] d, - quantilesExact(0, 0.5, 1)(value) q, metric - from (select * from unstable_run_metrics - union all select * from unstable_run_traces - union all select * from unstable_run_metrics_2) mm - group by test, query_index, metric - having isFinite(d) and d > 0.5 and q[3] > 5 - ) metrics - left join query_display_names using (test, query_index) - order by test, query_index, d desc - ; - create table stacks engine File(TSV, 'report/stacks.$version.tsv') as select -- first goes the key used to split the file with grep @@ -1156,7 +1120,7 @@ function report_metrics rm -rf metrics ||: mkdir metrics -clickhouse-local --query " +clickhouse-local --multiquery --query " create view right_async_metric_log as select * from file('right-async-metric-log.tsv', TSVWithNamesAndTypes) ; @@ -1173,9 +1137,8 @@ create table metrics engine File(TSV, 'metrics/metrics.tsv') as -- Show metrics that have changed create table changes engine File(TSV, 'metrics/changes.tsv') - settings output_format_decimal_trailing_zeros = 1 as select metric, left, right, - toDecimal64(diff, 3), toDecimal64(times_diff, 3) + round(diff, 3), round(times_diff, 3) from ( select metric, median(left) as left, median(right) as right, (right - left) / left diff, @@ -1217,7 +1180,7 @@ function upload_results # Prepare info for the CI checks table. rm -f ci-checks.tsv - clickhouse-local --query " + clickhouse-local --multiquery --query " create view queries as select * from file('report/queries.tsv', TSVWithNamesAndTypes); create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') @@ -1226,7 +1189,6 @@ create table ci_checks engine File(TSVWithNamesAndTypes, 'ci-checks.tsv') '$SHA_TO_TEST' :: LowCardinality(String) AS commit_sha, '${CLICKHOUSE_PERFORMANCE_COMPARISON_CHECK_NAME:-Performance}' :: LowCardinality(String) AS check_name, '$(sed -n 's/.*/\1/p' report.html)' :: LowCardinality(String) AS check_status, - -- TODO toDateTime() can't parse output of 'date', so no time for now. (($(date +%s) - $CHPC_CHECK_START_TIMESTAMP) * 1000) :: UInt64 AS check_duration_ms, fromUnixTimestamp($CHPC_CHECK_START_TIMESTAMP) check_start_time, test_name :: LowCardinality(String) AS test_name , diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml index 39c29bb61ca1..292665c4f68e 100644 --- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml +++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml @@ -19,31 +19,6 @@ - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - - ENGINE = Memory - - - 1000000000 10 diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 093834943a3f..cb591f1a184b 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -3,7 +3,7 @@ 1 1 - 1 + 0 0 0 + 0 60 diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh index aee110300685..cb243b655c6b 100755 --- a/docker/test/performance-comparison/download.sh +++ b/docker/test/performance-comparison/download.sh @@ -31,8 +31,6 @@ function download # Test all of them. declare -a urls_to_try=( "$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst" - "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz" ) for path in "${urls_to_try[@]}" diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh index 74571777be07..fb5e6bd2a7af 100755 --- a/docker/test/performance-comparison/entrypoint.sh +++ b/docker/test/performance-comparison/entrypoint.sh @@ -130,7 +130,7 @@ then git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt fi -# Set python output encoding so that we can print queries with Russian letters. +# Set python output encoding so that we can print queries with non-ASCII letters. export PYTHONIOENCODING=utf-8 # By default, use the main comparison script from the tested package, so that we @@ -151,11 +151,7 @@ export PATH export REF_PR export REF_SHA -# Try to collect some core dumps. I've seen two patterns in Sandbox: -# 1) |/home/zomb-sandbox/venv/bin/python /home/zomb-sandbox/client/sandbox/bin/coredumper.py %e %p %g %u %s %P %c -# Not sure what this script does (puts them to sandbox resources, logs some messages?), -# and it's not accessible from inside docker anyway. -# 2) something like %e.%p.core.dmp. The dump should end up in the workspace directory. +# Try to collect some core dumps. # At least we remove the ulimit and then try to pack some common file names into output. ulimit -c unlimited cat /proc/sys/kernel/core_pattern diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 7a4e6386d0df..d23a9ac61c19 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -369,6 +369,7 @@ def do_create(connection, index, queries): "max_execution_time": args.prewarm_max_query_seconds, "query_profiler_real_time_period_ns": 10000000, "query_profiler_cpu_time_period_ns": 10000000, + "metrics_perf_events_enabled": 1, "memory_profiler_step": "4Mi", }, ) @@ -503,6 +504,7 @@ def do_create(connection, index, queries): settings={ "query_profiler_real_time_period_ns": 10000000, "query_profiler_cpu_time_period_ns": 10000000, + "metrics_perf_events_enabled": 1, }, ) print( diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index a1f2eb9d9ecd..7da30ba7a083 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -364,20 +364,6 @@ def add_errors_explained(): ] ) - slow_on_client_rows = tsvRows("report/slow-on-client.tsv") - error_tests += len(slow_on_client_rows) - addSimpleTable( - "Slow on Client", - ["Client time, s", "Server time, s", "Ratio", "Test", "Query"], - slow_on_client_rows, - ) - if slow_on_client_rows: - errors_explained.append( - [ - f'Some queries are taking noticeable time client-side (missing `FORMAT Null`?)' - ] - ) - def add_backward_incompatible(): rows = tsvRows("report/partial-queries-report.tsv") if not rows: diff --git a/docker/test/server-jepsen/run.sh b/docker/test/server-jepsen/run.sh index 0c3768df813c..81e442e65b6e 100644 --- a/docker/test/server-jepsen/run.sh +++ b/docker/test/server-jepsen/run.sh @@ -2,7 +2,7 @@ set -euo pipefail -CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-16_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} +CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-17_relwithdebuginfo_none_unsplitted_disable_False_binary/clickhouse"} CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""} diff --git a/docker/test/sqllogic/Dockerfile b/docker/test/sqllogic/Dockerfile index 83dcf7e1f56a..5cf71e4d3f84 100644 --- a/docker/test/sqllogic/Dockerfile +++ b/docker/test/sqllogic/Dockerfile @@ -13,6 +13,7 @@ RUN apt-get update --yes \ sqlite3 \ unixodbc \ unixodbc-dev \ + odbcinst \ sudo \ && apt-get clean diff --git a/docker/test/sqllogic/run.sh b/docker/test/sqllogic/run.sh index 8d0252e3c989..db828741b0d7 100755 --- a/docker/test/sqllogic/run.sh +++ b/docker/test/sqllogic/run.sh @@ -1,4 +1,5 @@ #!/bin/bash + set -exu trap "exit" INT TERM @@ -92,9 +93,8 @@ sudo clickhouse stop ||: for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done -grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: -pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz & +rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||: +zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & # Compressed (FIXME: remove once only github actions will be left) -rm /var/log/clickhouse-server/clickhouse-server.log mv /var/log/clickhouse-server/stderr.log /test_output/ ||: diff --git a/docker/test/sqltest/Dockerfile b/docker/test/sqltest/Dockerfile new file mode 100644 index 000000000000..437677f4fd1f --- /dev/null +++ b/docker/test/sqltest/Dockerfile @@ -0,0 +1,30 @@ +# docker build -t clickhouse/sqltest . +ARG FROM_TAG=latest +FROM clickhouse/test-base:$FROM_TAG + +RUN apt-get update --yes \ + && env DEBIAN_FRONTEND=noninteractive \ + apt-get install --yes --no-install-recommends \ + wget \ + git \ + python3 \ + python3-dev \ + python3-pip \ + sudo \ + && apt-get clean + +RUN pip3 install \ + pyyaml \ + clickhouse-driver + +ARG sqltest_repo="https://github.com/elliotchance/sqltest/" + +RUN git clone ${sqltest_repo} + +ENV TZ=UTC +ENV MAX_RUN_TIME=900 +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + +COPY run.sh / +COPY test.py / +CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/sqltest/run.sh b/docker/test/sqltest/run.sh new file mode 100755 index 000000000000..1d939805c7b1 --- /dev/null +++ b/docker/test/sqltest/run.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# shellcheck disable=SC2015 + +set -x +set -e +set -u +set -o pipefail + +BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-17_debug_none_unsplitted_disable_False_binary"} +BINARY_URL_TO_DOWNLOAD=${BINARY_URL_TO_DOWNLOAD:="https://clickhouse-builds.s3.amazonaws.com/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"} + +function wget_with_retry +{ + for _ in 1 2 3 4; do + if wget -nv -nd -c "$1";then + return 0 + else + sleep 0.5 + fi + done + return 1 +} + +wget_with_retry "$BINARY_URL_TO_DOWNLOAD" +chmod +x clickhouse +./clickhouse install --noninteractive + +echo " +users: + default: + access_management: 1" > /etc/clickhouse-server/users.d/access_management.yaml + +clickhouse start + +# Wait for start +for _ in {1..100} +do + clickhouse-client --query "SELECT 1" && break ||: + sleep 1 +done + +# Run the test +pushd sqltest/standards/2016/ +/test.py +mv report.html test.log /workspace +popd + +zstd --threads=0 /var/log/clickhouse-server/clickhouse-server.log +zstd --threads=0 /var/log/clickhouse-server/clickhouse-server.err.log + +mv /var/log/clickhouse-server/clickhouse-server.log.zst /var/log/clickhouse-server/clickhouse-server.err.log.zst /workspace diff --git a/docker/test/sqltest/test.py b/docker/test/sqltest/test.py new file mode 100755 index 000000000000..5807ca79b026 --- /dev/null +++ b/docker/test/sqltest/test.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 + +import os +import yaml +import html +import random +import string +from clickhouse_driver import Client + + +client = Client(host="localhost", port=9000) +settings = { + "default_table_engine": "Memory", + "union_default_mode": "DISTINCT", + "calculate_text_stack_trace": 0, +} + +database_name = "sqltest_" + "".join( + random.choice(string.ascii_lowercase) for _ in range(10) +) + +client.execute(f"DROP DATABASE IF EXISTS {database_name}", settings=settings) +client.execute(f"CREATE DATABASE {database_name}", settings=settings) + +client = Client(host="localhost", port=9000, database=database_name) + +summary = {"success": 0, "total": 0, "results": {}} + +log_file = open("test.log", "w") +report_html_file = open("report.html", "w") + +with open("features.yml", "r") as file: + yaml_content = yaml.safe_load(file) + + for category in yaml_content: + log_file.write(category.capitalize() + " features:\n") + summary["results"][category] = {"success": 0, "total": 0, "results": {}} + + for test in yaml_content[category]: + log_file.write(test + ": " + yaml_content[category][test] + "\n") + summary["results"][category]["results"][test] = { + "success": 0, + "total": 0, + "description": yaml_content[category][test], + } + + test_path = test[0] + "/" + test + ".tests.yml" + if os.path.exists(test_path): + with open(test_path, "r") as test_file: + test_yaml_content = yaml.load_all(test_file, Loader=yaml.FullLoader) + + for test_case in test_yaml_content: + queries = test_case["sql"] + if not isinstance(queries, list): + queries = [queries] + + for query in queries: + # Example: E011-01 + test_group = "" + if "-" in test: + test_group = test.split("-", 1)[0] + summary["results"][category]["results"][test_group][ + "total" + ] += 1 + summary["results"][category]["results"][test]["total"] += 1 + summary["results"][category]["total"] += 1 + summary["total"] += 1 + + log_file.write(query + "\n") + + try: + result = client.execute(query, settings=settings) + log_file.write(str(result) + "\n") + + if test_group: + summary["results"][category]["results"][test_group][ + "success" + ] += 1 + summary["results"][category]["results"][test][ + "success" + ] += 1 + summary["results"][category]["success"] += 1 + summary["success"] += 1 + + except Exception as e: + log_file.write(f"Error occurred: {str(e)}\n") + +client.execute(f"DROP DATABASE {database_name}", settings=settings) + + +def enable_color(ratio): + if ratio == 0: + return "" + elif ratio < 0.5: + return "" + elif ratio < 1: + return "" + else: + return "" + + +reset_color = "" + + +def print_ratio(indent, name, success, total, description): + report_html_file.write( + "{}{}: {}{} / {} ({:.1%}){}{}\n".format( + " " * indent, + name.capitalize(), + enable_color(success / total), + success, + total, + success / total, + reset_color, + f" - " + html.escape(description) if description else "", + ) + ) + + +report_html_file.write( + "
\n"
+)
+
+print_ratio(0, "Total", summary["success"], summary["total"], "")
+
+for category in summary["results"]:
+    cat_summary = summary["results"][category]
+
+    if cat_summary["total"] == 0:
+        continue
+
+    print_ratio(2, category, cat_summary["success"], cat_summary["total"], "")
+
+    for test in summary["results"][category]["results"]:
+        test_summary = summary["results"][category]["results"][test]
+
+        if test_summary["total"] == 0:
+            continue
+
+        print_ratio(
+            6 if "-" in test else 4,
+            test,
+            test_summary["success"],
+            test_summary["total"],
+            test_summary["description"],
+        )
+
+report_html_file.write("
\n") diff --git a/docker/test/stateful/Dockerfile b/docker/test/stateful/Dockerfile index 71a2e92e3a86..f513735a2d0a 100644 --- a/docker/test/stateful/Dockerfile +++ b/docker/test/stateful/Dockerfile @@ -16,8 +16,9 @@ COPY s3downloader /s3downloader ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com" ENV DATASETS="hits visits" -RUN npm install -g azurite -RUN npm install tslib +# The following is already done in clickhouse/stateless-test +# RUN npm install -g azurite +# RUN npm install tslib COPY run.sh / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/stateful/run.sh b/docker/test/stateful/run.sh index c973b6c6ec6f..ad3c3477b373 100755 --- a/docker/test/stateful/run.sh +++ b/docker/test/stateful/run.sh @@ -1,5 +1,7 @@ #!/bin/bash +# shellcheck disable=SC1091 +source /setup_export_logs.sh set -e -x # Choose random timezone for this test run @@ -20,6 +22,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateful +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml + function start() { if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then @@ -65,6 +69,9 @@ function start() } start + +setup_logs_replication + # shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse diff --git a/docker/test/stateless/Dockerfile b/docker/test/stateless/Dockerfile index 40109255a7e5..6f593a5bd1d2 100644 --- a/docker/test/stateless/Dockerfile +++ b/docker/test/stateless/Dockerfile @@ -20,6 +20,7 @@ RUN apt-get update -y \ netcat-openbsd \ nodejs \ npm \ + odbcinst \ openjdk-11-jre-headless \ openssl \ postgresql-client \ @@ -32,7 +33,6 @@ RUN apt-get update -y \ qemu-user-static \ sqlite3 \ sudo \ - telnet \ tree \ unixodbc \ wget \ @@ -40,7 +40,10 @@ RUN apt-get update -y \ cargo \ zstd \ file \ + jq \ pv \ + zip \ + p7zip-full \ && apt-get clean RUN pip3 install numpy scipy pandas Jinja2 @@ -52,7 +55,7 @@ RUN mkdir -p /tmp/clickhouse-odbc-tmp \ && odbcinst -i -s -l -f /tmp/clickhouse-odbc-tmp/share/doc/clickhouse-odbc/config/odbc.ini.sample \ && rm -rf /tmp/clickhouse-odbc-tmp -ENV TZ=Europe/Moscow +ENV TZ=Europe/Amsterdam RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone ENV NUM_TRIES=1 @@ -71,7 +74,7 @@ RUN arch=${TARGETARCH:-amd64} \ && chmod +x ./mc ./minio -RUN wget 'https://dlcdn.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ +RUN wget --no-verbose 'https://archive.apache.org/dist/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz' \ && tar -xvf hadoop-3.3.1.tar.gz \ && rm -rf hadoop-3.3.1.tar.gz @@ -79,10 +82,16 @@ ENV MINIO_ROOT_USER="clickhouse" ENV MINIO_ROOT_PASSWORD="clickhouse" ENV EXPORT_S3_STORAGE_POLICIES=1 -RUN npm install -g azurite -RUN npm install tslib +RUN npm install -g azurite \ + && npm install -g tslib COPY run.sh / COPY setup_minio.sh / COPY setup_hdfs_minicluster.sh / +COPY attach_gdb.lib / +COPY utils.lib / + +# We store stress_tests.lib in stateless image to avoid duplication of this file in stress and upgrade tests +COPY stress_tests.lib / + CMD ["/bin/bash", "/run.sh"] diff --git a/tests/ci/attach_gdb.lib b/docker/test/stateless/attach_gdb.lib similarity index 90% rename from tests/ci/attach_gdb.lib rename to docker/test/stateless/attach_gdb.lib index 2df6243f7966..f4738cdc3334 100644 --- a/tests/ci/attach_gdb.lib +++ b/docker/test/stateless/attach_gdb.lib @@ -1,5 +1,7 @@ #!/bin/bash +source /utils.lib + function attach_gdb_to_clickhouse() { # Set follow-fork-mode to parent, because we attach to clickhouse-server, not to watchdog @@ -38,5 +40,7 @@ quit gdb -batch -command script.gdb -p "$(cat /var/run/clickhouse-server/clickhouse-server.pid)" | ts '%Y-%m-%d %H:%M:%S' >> /test_output/gdb.log & sleep 5 # gdb will send SIGSTOP, spend some time loading debug info and then send SIGCONT, wait for it (up to send_timeout, 300s) - time clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" ||: + run_with_retry 60 clickhouse-client --query "SELECT 'Connected to clickhouse-server after attaching gdb'" } + +# vi: ft=bash diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index 21cb31680835..34fc12d1a72c 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -1,9 +1,15 @@ #!/bin/bash +# shellcheck disable=SC1091 +source /setup_export_logs.sh + # fail on errors, verbose and export all env variables set -e -x -a # Choose random timezone for this test run. +# +# NOTE: that clickhouse-test will randomize session_timezone by itself as well +# (it will choose between default server timezone and something specific). TZ="$(rg -v '#' /usr/share/zoneinfo/zone.tab | awk '{print $3}' | shuf | head -n1)" echo "Choosen random timezone $TZ" ln -snf "/usr/share/zoneinfo/$TZ" /etc/localtime && echo "$TZ" > /etc/timezone @@ -16,7 +22,10 @@ dpkg -i package_folder/clickhouse-client_*.deb ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # shellcheck disable=SC1091 -source /usr/share/clickhouse-test/ci/attach_gdb.lib || true # FIXME: to not break old builds, clean on 2023-09-01 +source /attach_gdb.lib + +# shellcheck disable=SC1091 +source /utils.lib # install test configs /usr/share/clickhouse-test/config/install.sh @@ -30,6 +39,8 @@ fi ./setup_minio.sh stateless ./setup_hdfs_minicluster.sh +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml + # For flaky check we also enable thread fuzzer if [ "$NUM_TRIES" -gt "1" ]; then export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000 @@ -58,6 +69,16 @@ else fi if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then + sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \ + | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_1/|" \ + > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp + mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml + + sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \ + | sed "s|/var/lib/clickhouse/filesystem_caches/|/var/lib/clickhouse/filesystem_caches_2/|" \ + > /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp + mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml + mkdir -p /var/run/clickhouse-server1 sudo chown clickhouse:clickhouse /var/run/clickhouse-server1 sudo -E -u clickhouse /usr/bin/clickhouse server --config /etc/clickhouse-server1/config.xml --daemon \ @@ -86,10 +107,34 @@ if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]] MAX_RUN_TIME=$((MAX_RUN_TIME != 0 ? MAX_RUN_TIME : 9000)) # set to 2.5 hours if 0 (unlimited) fi -sleep 5 + +# Wait for the server to start, but not for too long. +for _ in {1..100} +do + clickhouse-client --query "SELECT 1" && break + sleep 1 +done + +setup_logs_replication attach_gdb_to_clickhouse || true # FIXME: to not break old builds, clean on 2023-09-01 +function fn_exists() { + declare -F "$1" > /dev/null; +} + +# FIXME: to not break old builds, clean on 2023-09-01 +function try_run_with_retry() { + local total_retries="$1" + shift + + if fn_exists run_with_retry; then + run_with_retry "$total_retries" "$@" + else + "$@" + fi +} + function run_tests() { set -x @@ -137,8 +182,7 @@ function run_tests() ADDITIONAL_OPTIONS+=('--report-logs-stats') - clickhouse-test "00001_select_1" > /dev/null ||: - clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" ||: + try_run_with_retry 10 clickhouse-client -q "insert into system.zookeeper (name, path, value) values ('auxiliary_zookeeper2', '/test/chroot/', '')" set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ @@ -181,6 +225,12 @@ rg -Fa "" /var/log/clickhouse-server/clickhouse-server.log ||: rg -A50 -Fa "============" /var/log/clickhouse-server/stderr.log ||: zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst & +data_path_config="--path=/var/lib/clickhouse/" +if [[ -n "$USE_S3_STORAGE_FOR_MERGE_TREE" ]] && [[ "$USE_S3_STORAGE_FOR_MERGE_TREE" -eq 1 ]]; then + # We need s3 storage configuration (but it's more likely that clickhouse-local will fail for some reason) + data_path_config="--config-file=/etc/clickhouse-server/config.xml" +fi + # Compress tables. # # NOTE: @@ -190,7 +240,7 @@ zstd --threads=0 < /var/log/clickhouse-server/clickhouse-server.log > /test_outp # for files >64MB, we want this files to be compressed explicitly for table in query_log zookeeper_log trace_log transactions_info_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + clickhouse-local "$data_path_config" --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then clickhouse-local --path /var/lib/clickhouse1/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.1.tsv.zst ||: clickhouse-local --path /var/lib/clickhouse2/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.2.tsv.zst ||: @@ -200,7 +250,7 @@ done # Also export trace log in flamegraph-friendly format. for trace_type in CPU Memory Real do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q " + clickhouse-local "$data_path_config" --only-system-tables -q " select arrayStringConcat((arrayMap(x -> concat(splitByChar('/', addressToLine(x))[-1], '#', demangle(addressToSymbol(x)) ), trace)), ';') AS stack, count(*) AS samples diff --git a/tests/ci/stress_tests.lib b/docker/test/stateless/stress_tests.lib similarity index 92% rename from tests/ci/stress_tests.lib rename to docker/test/stateless/stress_tests.lib index 2b8ac77b9523..11945b68f700 100644 --- a/tests/ci/stress_tests.lib +++ b/docker/test/stateless/stress_tests.lib @@ -9,8 +9,6 @@ FAIL="\tFAIL\t\\N\t" FAILURE_CONTEXT_LINES=100 FAILURE_CONTEXT_MAX_LINE_WIDTH=300 -source attach_gdb.lib - function escaped() { # That's the simplest way I found to escape a string in bash. Yep, bash is the most convenient programming language. @@ -54,6 +52,21 @@ function configure() | sed "s|100000|10000|" \ > /etc/clickhouse-server/config.d/keeper_port.xml.tmp sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + + function randomize_config_boolean_value { + value=$(($RANDOM % 2)) + sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "s|<$1>[01]|<$1>$value|" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp + sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml + } + + # Randomize all Keeper feature flags + randomize_config_boolean_value filtered_list + randomize_config_boolean_value multi_read + randomize_config_boolean_value check_not_exists + randomize_config_boolean_value create_if_not_exists + sudo chown clickhouse /etc/clickhouse-server/config.d/keeper_port.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/keeper_port.xml @@ -243,7 +256,7 @@ function check_logs_for_critical_errors() # Remove file fatal_messages.txt if it's empty [ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt - rg -Fa "########################################" /test_output/* > /dev/null \ + rg -Faz "########################################" /test_output/* > /dev/null \ && echo -e "Killed by signal (output files)$FAIL" >> /test_output/test_results.tsv function get_gdb_log_context() @@ -266,7 +279,7 @@ function collect_query_and_trace_logs() { for table in query_log trace_log do - clickhouse-local --path /var/lib/clickhouse/ --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: + clickhouse-local --config-file=/etc/clickhouse-server/config.xml --only-system-tables -q "select * from system.$table format TSVWithNamesAndTypes" | zstd --threads=0 > /test_output/$table.tsv.zst ||: done } @@ -277,3 +290,5 @@ function collect_core_dumps() mv $core.zst /test_output/ done } + +# vi: ft=bash diff --git a/docker/test/stateless/utils.lib b/docker/test/stateless/utils.lib new file mode 100644 index 000000000000..1204434d853c --- /dev/null +++ b/docker/test/stateless/utils.lib @@ -0,0 +1,38 @@ +#!/bin/bash + +function run_with_retry() +{ + if [[ $- =~ e ]]; then + set_e=true + else + set_e=false + fi + set +e + + local total_retries="$1" + shift + + local retry=0 + + until [ "$retry" -ge "$total_retries" ] + do + if "$@"; then + if $set_e; then + set -e + fi + return + else + retry=$((retry + 1)) + sleep 5 + fi + done + + echo "Command '$*' failed after $total_retries retries, exiting" + exit 1 +} + +function fn_exists() { + declare -F "$1" > /dev/null; +} + +# vi: ft=bash diff --git a/docker/test/stress/Dockerfile b/docker/test/stress/Dockerfile index e9712f430fd2..eddeb04758ba 100644 --- a/docker/test/stress/Dockerfile +++ b/docker/test/stress/Dockerfile @@ -8,8 +8,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ tzdata \ - fakeroot \ - debhelper \ parallel \ expect \ python3 \ @@ -20,7 +18,6 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet \ brotli \ && apt-get clean diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 4926967d2d2a..b5092fd40dfa 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -5,6 +5,8 @@ # Avoid overlaps with previous runs dmesg --clear +# shellcheck disable=SC1091 +source /setup_export_logs.sh set -x @@ -14,7 +16,8 @@ ln -s /usr/share/clickhouse-test/clickhouse-test /usr/bin/clickhouse-test # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib -source /usr/share/clickhouse-test/ci/stress_tests.lib +source /attach_gdb.lib +source /stress_tests.lib install_packages package_folder @@ -50,9 +53,13 @@ configure azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateless # to have a proper environment +config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml + start -shellcheck disable=SC2086 # No quotes because I want to split it into words. +setup_logs_replication + +# shellcheck disable=SC2086 # No quotes because I want to split it into words. /s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS chmod 777 -R /var/lib/clickhouse clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary" @@ -179,6 +186,11 @@ mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/cli sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml +sudo cat /etc/clickhouse-server/config.d/logger_trace.xml \ + | sed "s|trace|test|" \ + > /etc/clickhouse-server/config.d/logger_trace.xml.tmp +mv /etc/clickhouse-server/config.d/logger_trace.xml.tmp /etc/clickhouse-server/config.d/logger_trace.xml + start stress --hung-check --drop-databases --output-folder test_output --skip-func-tests "$SKIP_TESTS_OPTION" --global-time-limit 1200 \ @@ -232,4 +244,10 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv +# But OOMs in stress test are allowed +if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv +then + sed -i 's/failure/success/' /test_output/check_status.tsv +fi + collect_core_dumps diff --git a/docker/test/style/Dockerfile b/docker/test/style/Dockerfile index 746cc7bb2d5b..a4feae27c675 100644 --- a/docker/test/style/Dockerfile +++ b/docker/test/style/Dockerfile @@ -1,5 +1,5 @@ # docker build -t clickhouse/style-test . -FROM ubuntu:20.04 +FROM ubuntu:22.04 ARG ACT_VERSION=0.2.33 ARG ACTIONLINT_VERSION=1.6.22 @@ -18,9 +18,13 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \ python3-pip \ shellcheck \ yamllint \ - && pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \ + locales \ + && pip3 install black==23.1.0 boto3 codespell==2.2.1 mypy==1.3.0 PyGithub unidiff pylint==2.6.2 \ && apt-get clean \ - && rm -rf /root/.cache/pip + && rm -rf /root/.cache/pip + +RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8 +ENV LC_ALL en_US.UTF-8 # Architecture of the image when BuildKit/buildx is used ARG TARGETARCH diff --git a/docker/test/unit/Dockerfile b/docker/test/unit/Dockerfile index b75bfb6661cc..cf5ba1eec7fa 100644 --- a/docker/test/unit/Dockerfile +++ b/docker/test/unit/Dockerfile @@ -6,5 +6,4 @@ FROM clickhouse/stateless-test:$FROM_TAG RUN apt-get install gdb COPY run.sh / -COPY process_unit_tests_result.py / CMD ["/bin/bash", "/run.sh"] diff --git a/docker/test/unit/process_unit_tests_result.py b/docker/test/unit/process_unit_tests_result.py deleted file mode 100755 index 0550edc7c251..000000000000 --- a/docker/test/unit/process_unit_tests_result.py +++ /dev/null @@ -1,102 +0,0 @@ -#!/usr/bin/env python3 - -import os -import logging -import argparse -import csv - -OK_SIGN = "OK ]" -FAILED_SIGN = "FAILED ]" -SEGFAULT = "Segmentation fault" -SIGNAL = "received signal SIG" -PASSED = "PASSED" - - -def get_test_name(line): - elements = reversed(line.split(" ")) - for element in elements: - if "(" not in element and ")" not in element: - return element - raise Exception("No test name in line '{}'".format(line)) - - -def process_result(result_folder): - summary = [] - total_counter = 0 - failed_counter = 0 - result_log_path = "{}/test_result.txt".format(result_folder) - if not os.path.exists(result_log_path): - logging.info("No output log on path %s", result_log_path) - return "exception", "No output log", [] - - status = "success" - description = "" - passed = False - with open(result_log_path, "r") as test_result: - for line in test_result: - if OK_SIGN in line: - logging.info("Found ok line: '%s'", line) - test_name = get_test_name(line.strip()) - logging.info("Test name: '%s'", test_name) - summary.append((test_name, "OK")) - total_counter += 1 - elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line: - logging.info("Found fail line: '%s'", line) - test_name = get_test_name(line.strip()) - logging.info("Test name: '%s'", test_name) - summary.append((test_name, "FAIL")) - total_counter += 1 - failed_counter += 1 - elif SEGFAULT in line: - logging.info("Found segfault line: '%s'", line) - status = "failure" - description += "Segmentation fault. " - break - elif SIGNAL in line: - logging.info("Received signal line: '%s'", line) - status = "failure" - description += "Exit on signal. " - break - elif PASSED in line: - logging.info("PASSED record found: '%s'", line) - passed = True - - if not passed: - status = "failure" - description += "PASSED record not found. " - - if failed_counter != 0: - status = "failure" - - if not description: - description += "fail: {}, passed: {}".format( - failed_counter, total_counter - failed_counter - ) - - return status, description, summary - - -def write_results(results_file, status_file, results, status): - with open(results_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerows(results) - with open(status_file, "w") as f: - out = csv.writer(f, delimiter="\t") - out.writerow(status) - - -if __name__ == "__main__": - logging.basicConfig(level=logging.INFO, format="%(asctime)s %(message)s") - parser = argparse.ArgumentParser( - description="ClickHouse script for parsing results of unit tests" - ) - parser.add_argument("--in-results-dir", default="/test_output/") - parser.add_argument("--out-results-file", default="/test_output/test_results.tsv") - parser.add_argument("--out-status-file", default="/test_output/check_status.tsv") - args = parser.parse_args() - - state, description, test_results = process_result(args.in_results_dir) - logging.info("Result parsed") - status = (state, description) - write_results(args.out_results_file, args.out_status_file, test_results, status) - logging.info("Result written") diff --git a/docker/test/unit/run.sh b/docker/test/unit/run.sh index a4784466e27b..e87432214d84 100644 --- a/docker/test/unit/run.sh +++ b/docker/test/unit/run.sh @@ -3,5 +3,4 @@ set -x service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; -timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt -./process_unit_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv +timeout 40m gdb -q -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms --gtest_output='json:test_output/test_result.json' | tee test_output/test_result.txt diff --git a/docker/test/upgrade/Dockerfile b/docker/test/upgrade/Dockerfile index 8e5890b81a0c..9152230af1cf 100644 --- a/docker/test/upgrade/Dockerfile +++ b/docker/test/upgrade/Dockerfile @@ -8,8 +8,6 @@ RUN apt-get update -y \ apt-get install --yes --no-install-recommends \ bash \ tzdata \ - fakeroot \ - debhelper \ parallel \ expect \ python3 \ @@ -20,7 +18,6 @@ RUN apt-get update -y \ sudo \ openssl \ netcat-openbsd \ - telnet \ brotli \ && apt-get clean diff --git a/docker/test/upgrade/run.sh b/docker/test/upgrade/run.sh index e72c28b7167b..c69d90b9af07 100644 --- a/docker/test/upgrade/run.sh +++ b/docker/test/upgrade/run.sh @@ -16,7 +16,8 @@ ln -s /usr/share/clickhouse-test/ci/get_previous_release_tag.py /usr/bin/get_pre # Stress tests and upgrade check uses similar code that was placed # in a separate bash library. See tests/ci/stress_tests.lib -source /usr/share/clickhouse-test/ci/stress_tests.lib +source /attach_gdb.lib +source /stress_tests.lib azurite-blob --blobHost 0.0.0.0 --blobPort 10000 --debug /azurite_log & ./setup_minio.sh stateless # to have a proper environment @@ -59,36 +60,59 @@ install_packages previous_release_package_folder # available for dump via clickhouse-local configure +function remove_keeper_config() +{ + sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ + | sed "/<$1>$2<\/$1>/d" \ + > /etc/clickhouse-server/config.d/keeper_port.xml.tmp + sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml +} + +# async_replication setting doesn't exist on some older versions +remove_keeper_config "async_replication" "1" + +# create_if_not_exists feature flag doesn't exist on some older versions +remove_keeper_config "create_if_not_exists" "[01]" + # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml +rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml start stop mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log +# Start server from previous release +# Let's enable S3 storage by default +export USE_S3_STORAGE_FOR_MERGE_TREE=1 +# Previous version may not be ready for fault injections +export ZOOKEEPER_FAULT_INJECTION=0 +configure + # force_sync=false doesn't work correctly on some older versions sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \ | sed "s|false|true|" \ > /etc/clickhouse-server/config.d/keeper_port.xml.tmp sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml +# async_replication setting doesn't exist on some older versions +remove_keeper_config "async_replication" "1" + +# create_if_not_exists feature flag doesn't exist on some older versions +remove_keeper_config "create_if_not_exists" "[01]" + # But we still need default disk because some tables loaded only into it sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \ | sed "s|
s3
|
s3
default|" \ - > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml + > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp +mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml -# Start server from previous release -# Let's enable S3 storage by default -export USE_S3_STORAGE_FOR_MERGE_TREE=1 -# Previous version may not be ready for fault injections -export ZOOKEEPER_FAULT_INJECTION=0 -configure - # it contains some new settings, but we can safely remove it rm /etc/clickhouse-server/config.d/merge_tree.xml +rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml rm /etc/clickhouse-server/users.d/nonconst_timezone.xml start @@ -125,6 +149,7 @@ sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \ | sed "s|>1<|>0<|g" \ > /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml +rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml start 500 clickhouse-client --query "SELECT 'Server successfully started', 'OK', NULL, ''" >> /test_output/test_results.tsv \ @@ -189,6 +214,7 @@ rg -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \ -e "Authentication failed" \ -e "Cannot flush" \ -e "Container already exists" \ + -e "doesn't have metadata version on disk" \ clickhouse-server.upgrade.log \ | grep -av -e "_repl_01111_.*Mapping for table with UUID" \ | zgrep -Fa "" > /test_output/upgrade_error_messages.txt \ @@ -226,4 +252,10 @@ rowNumberInAllBlocks() LIMIT 1" < /test_output/test_results.tsv > /test_output/check_status.tsv || echo "failure\tCannot parse test_results.tsv" > /test_output/check_status.tsv [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv +# But OOMs in stress test are allowed +if rg 'OOM in dmesg|Signal 9' /test_output/check_status.tsv +then + sed -i 's/failure/success/' /test_output/check_status.tsv +fi + collect_core_dumps diff --git a/docker/test/util/Dockerfile b/docker/test/util/Dockerfile index a49278e960b3..eb5abce280ac 100644 --- a/docker/test/util/Dockerfile +++ b/docker/test/util/Dockerfile @@ -1,12 +1,12 @@ # docker build -t clickhouse/test-util . -FROM ubuntu:20.04 +FROM ubuntu:22.04 # ARG for quick switch to a given ubuntu mirror ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list # 15.0.2 -ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=16 +ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=17 RUN apt-get update \ && apt-get install \ @@ -44,7 +44,6 @@ RUN apt-get update \ clang-${LLVM_VERSION} \ clang-tidy-${LLVM_VERSION} \ cmake \ - fakeroot \ gdb \ git \ gperf \ @@ -94,7 +93,10 @@ RUN mkdir /tmp/ccache \ && rm -rf /tmp/ccache ARG TARGETARCH -ARG SCCACHE_VERSION=v0.4.1 +ARG SCCACHE_VERSION=v0.5.4 +ENV SCCACHE_IGNORE_SERVER_IO_ERROR=1 +# sccache requires a value for the region. So by default we use The Default Region +ENV SCCACHE_REGION=us-east-1 RUN arch=${TARGETARCH:-amd64} \ && case $arch in \ amd64) rarch=x86_64 ;; \ diff --git a/docs/README.md b/docs/README.md index 0cd35a4e3ec1..d12603121661 100644 --- a/docs/README.md +++ b/docs/README.md @@ -200,8 +200,8 @@ Templates: - [Server Setting](_description_templates/template-server-setting.md) - [Database or Table engine](_description_templates/template-engine.md) - [System table](_description_templates/template-system-table.md) -- [Data type](_description_templates/data-type.md) -- [Statement](_description_templates/statement.md) +- [Data type](_description_templates/template-data-type.md) +- [Statement](_description_templates/template-statement.md) diff --git a/docs/_description_templates/template-data-type.md b/docs/_description_templates/template-data-type.md new file mode 100644 index 000000000000..239edb2808b8 --- /dev/null +++ b/docs/_description_templates/template-data-type.md @@ -0,0 +1,29 @@ +--- +toc_priority: +toc_title: +--- + +# data_type_name {#data_type-name} + +Description. + +**Parameters** (Optional) + +- `x` — Description. [Type name](relative/path/to/type/dscr.md#type). +- `y` — Description. [Type name](relative/path/to/type/dscr.md#type). + +**Examples** + +```sql + +``` + +## Additional Info {#additional-info} (Optional) + +The name of an additional section can be any, for example, **Usage**. + +**See Also** (Optional) + +- [link](#) + +[Original article](https://clickhouse.com/docs/en/data-types//) diff --git a/docs/_description_templates/template-engine.md b/docs/_description_templates/template-engine.md new file mode 100644 index 000000000000..392bc59ed331 --- /dev/null +++ b/docs/_description_templates/template-engine.md @@ -0,0 +1,63 @@ +# EngineName {#enginename} + +- What the Database/Table engine does. +- Relations with other engines if they exist. + +## Creating a Database {#creating-a-database} +``` sql + CREATE DATABASE ... +``` +or + +## Creating a Table {#creating-a-table} +``` sql + CREATE TABLE ... +``` + +**Engine Parameters** + +**Query Clauses** (for Table engines only) + +## Virtual columns {#virtual-columns} (for Table engines only) + +List and virtual columns with description, if they exist. + +## Data Types Support {#data_types-support} (for Database engines only) + +| EngineName | ClickHouse | +|-----------------------|------------------------------------| +| NativeDataTypeName | [ClickHouseDataTypeName](link#) | + + +## Specifics and recommendations {#specifics-and-recommendations} + +Algorithms +Specifics of read and write processes +Examples of tasks +Recommendations for usage +Specifics of data storage + +## Usage Example {#usage-example} + +The example must show usage and use cases. The following text contains the recommended parts of this section. + +Input table: + +``` text +``` + +Query: + +``` sql +``` + +Result: + +``` text +``` + +Follow up with any text to clarify the example. + +**See Also** + +- [link](#) diff --git a/docs/_description_templates/template-function.md b/docs/_description_templates/template-function.md new file mode 100644 index 000000000000..6bdc764c449d --- /dev/null +++ b/docs/_description_templates/template-function.md @@ -0,0 +1,51 @@ +## functionName {#functionname-in-lower-case} + +Short description. + +**Syntax** (without SELECT) + +``` sql + +``` + +Alias: ``. (Optional) + +More text (Optional). + +**Arguments** (Optional) + +- `x` — Description. Optional (only for optional arguments). Possible values: . Default value: . [Type name](relative/path/to/type/dscr.md#type). +- `y` — Description. Optional (only for optional arguments). Possible values: .Default value: . [Type name](relative/path/to/type/dscr.md#type). + +**Parameters** (Optional, only for parametric aggregate functions) + +- `z` — Description. Optional (only for optional parameters). Possible values: . Default value: . [Type name](relative/path/to/type/dscr.md#type). + +**Returned value(s)** + +- Returned values list. + +Type: [Type name](relative/path/to/type/dscr.md#type). + +**Example** + +The example must show usage and/or a use cases. The following text contains recommended parts of an example. + +Input table (Optional): + +``` text +``` + +Query: + +``` sql +``` + +Result: + +``` text +``` + +**See Also** (Optional) + +- [link](#) diff --git a/docs/_description_templates/template-server-setting.md b/docs/_description_templates/template-server-setting.md new file mode 100644 index 000000000000..0b37d46cf418 --- /dev/null +++ b/docs/_description_templates/template-server-setting.md @@ -0,0 +1,33 @@ +## server_setting_name {#server_setting_name} + +Description. + +Describe what is configured in this section of settings. + +Possible value: ... + +Default value: ... + +**Settings** (Optional) + +If the section contains several settings, list them here. Specify possible values and default values: + +- setting_1 — Description. +- setting_2 — Description. + +**Example** + +```xml + + ... + ... + +``` + +**Additional Info** (Optional) + +The name of an additional section can be any, for example, **Usage**. + +**See Also** (Optional) + +- [link](#) diff --git a/docs/_description_templates/template-setting.md b/docs/_description_templates/template-setting.md new file mode 100644 index 000000000000..fc912aba3e1d --- /dev/null +++ b/docs/_description_templates/template-setting.md @@ -0,0 +1,27 @@ +## setting_name {#setting_name} + +Description. + +For the switch setting, use the typical phrase: “Enables or disables something …”. + +Possible values: + +*For switcher setting:* + +- 0 — Disabled. +- 1 — Enabled. + +*For another setting (typical phrases):* + +- Positive integer. +- 0 — Disabled or unlimited or something else. + +Default value: `value`. + +**Additional Info** (Optional) + +The name of an additional section can be any, for example, **Usage**. + +**See Also** (Optional) + +- [link](#) diff --git a/docs/_description_templates/template-statement.md b/docs/_description_templates/template-statement.md new file mode 100644 index 000000000000..238570c22175 --- /dev/null +++ b/docs/_description_templates/template-statement.md @@ -0,0 +1,24 @@ +# Statement name (for example, SHOW USER) {#statement-name-in-lower-case} + +Brief description of what the statement does. + +**Syntax** + +```sql +Syntax of the statement. +``` + +## Other necessary sections of the description (Optional) {#anchor} + +Examples of descriptions with a complicated structure: + +- https://clickhouse.com/docs/en/sql-reference/statements/grant/ +- https://clickhouse.com/docs/en/sql-reference/statements/revoke/ +- https://clickhouse.com/docs/en/sql-reference/statements/select/join/ + + +**See Also** (Optional) + +Links to related topics as a list. + +- [link](#) diff --git a/docs/_description_templates/template-system-table.md b/docs/_description_templates/template-system-table.md new file mode 100644 index 000000000000..f2decc4bb6d5 --- /dev/null +++ b/docs/_description_templates/template-system-table.md @@ -0,0 +1,25 @@ +# system.table_name {#system-tables_table-name} + +Description. + +Columns: + +- `column_name` ([data_type_name](path/to/data_type.md)) — Description. + +**Example** + +Query: + +``` sql +SELECT * FROM system.table_name +``` + +Result: + +``` text +Some output. It shouldn't be too long. +``` + +**See Also** + +- [Article name](path/to/article_name.md) — Some words about referenced information. diff --git a/docs/_includes/install/universal.sh b/docs/_includes/install/universal.sh index 1699be138c8a..0ae77f464eba 100755 --- a/docs/_includes/install/universal.sh +++ b/docs/_includes/install/universal.sh @@ -33,6 +33,12 @@ then elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ] then DIR="powerpc64le" + elif [ "${ARCH}" = "riscv64" ] + then + DIR="riscv64" + elif [ "${ARCH}" = "s390x" ] + then + DIR="s390x" fi elif [ "${OS}" = "FreeBSD" ] then diff --git a/docs/changelogs/v22.8.20.11-lts.md b/docs/changelogs/v22.8.20.11-lts.md new file mode 100644 index 000000000000..bd45ce9319a5 --- /dev/null +++ b/docs/changelogs/v22.8.20.11-lts.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.20.11-lts (c9ca79e24e8) FIXME as compared to v22.8.19.10-lts (989bc2fe8b0) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). +* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v22.8.21.38-lts.md b/docs/changelogs/v22.8.21.38-lts.md new file mode 100644 index 000000000000..fc919b25735e --- /dev/null +++ b/docs/changelogs/v22.8.21.38-lts.md @@ -0,0 +1,36 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v22.8.21.38-lts (70872e9859e) FIXME as compared to v22.8.20.11-lts (c9ca79e24e8) + +#### Build/Testing/Packaging Improvement +* Backported in [#53017](https://github.com/ClickHouse/ClickHouse/issues/53017): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53459](https://github.com/ClickHouse/ClickHouse/issues/53459): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.3.10.5-lts.md b/docs/changelogs/v23.3.10.5-lts.md new file mode 100644 index 000000000000..61c477477091 --- /dev/null +++ b/docs/changelogs/v23.3.10.5-lts.md @@ -0,0 +1,14 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.10.5-lts (d8737007f9e) FIXME as compared to v23.3.9.55-lts (b9c5c8622d3) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/changelogs/v23.3.11.5-lts.md b/docs/changelogs/v23.3.11.5-lts.md new file mode 100644 index 000000000000..b671c7e5bb6a --- /dev/null +++ b/docs/changelogs/v23.3.11.5-lts.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.11.5-lts (5762a23a76d) FIXME as compared to v23.3.10.5-lts (d8737007f9e) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.3.12.11-lts.md b/docs/changelogs/v23.3.12.11-lts.md new file mode 100644 index 000000000000..2eaaa575f60b --- /dev/null +++ b/docs/changelogs/v23.3.12.11-lts.md @@ -0,0 +1,20 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.12.11-lts (414317bed21) FIXME as compared to v23.3.11.5-lts (5762a23a76d) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix crash in join on sparse column [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). +* Fix rows_before_limit_at_least for DelayedSource. [#54122](https://github.com/ClickHouse/ClickHouse/pull/54122) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/changelogs/v23.3.13.6-lts.md b/docs/changelogs/v23.3.13.6-lts.md new file mode 100644 index 000000000000..bcc087218066 --- /dev/null +++ b/docs/changelogs/v23.3.13.6-lts.md @@ -0,0 +1,13 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.13.6-lts (25635e27551) FIXME as compared to v23.3.12.11-lts (414317bed21) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Backport moving/ part checking code from master [#54157](https://github.com/ClickHouse/ClickHouse/pull/54157) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.3.7.5-lts.md b/docs/changelogs/v23.3.7.5-lts.md new file mode 100644 index 000000000000..7a5fd5a19b6f --- /dev/null +++ b/docs/changelogs/v23.3.7.5-lts.md @@ -0,0 +1,16 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.7.5-lts (bc683c11c92) FIXME as compared to v23.3.6.7-lts (7e3f0a271b7) + +#### Build/Testing/Packaging Improvement +* Backported in [#51568](https://github.com/ClickHouse/ClickHouse/issues/51568): This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v23.3.8.21-lts.md b/docs/changelogs/v23.3.8.21-lts.md new file mode 100644 index 000000000000..83b5070ef525 --- /dev/null +++ b/docs/changelogs/v23.3.8.21-lts.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.8.21-lts (1675f2264f3) FIXME as compared to v23.3.7.5-lts (bc683c11c92) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)). + diff --git a/docs/changelogs/v23.3.9.55-lts.md b/docs/changelogs/v23.3.9.55-lts.md new file mode 100644 index 000000000000..a08070892b52 --- /dev/null +++ b/docs/changelogs/v23.3.9.55-lts.md @@ -0,0 +1,45 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.3.9.55-lts (b9c5c8622d3) FIXME as compared to v23.3.8.21-lts (1675f2264f3) + +#### Performance Improvement +* Backported in [#52213](https://github.com/ClickHouse/ClickHouse/issues/52213): Do not store blocks in `ANY` hash join if nothing is inserted. [#48633](https://github.com/ClickHouse/ClickHouse/pull/48633) ([vdimir](https://github.com/vdimir)). +* Backported in [#52826](https://github.com/ClickHouse/ClickHouse/issues/52826): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#53019](https://github.com/ClickHouse/ClickHouse/issues/53019): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53288](https://github.com/ClickHouse/ClickHouse/issues/53288): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53461](https://github.com/ClickHouse/ClickHouse/issues/53461): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v23.4.5.22-stable.md b/docs/changelogs/v23.4.5.22-stable.md new file mode 100644 index 000000000000..2d61f5b11cf3 --- /dev/null +++ b/docs/changelogs/v23.4.5.22-stable.md @@ -0,0 +1,27 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.4.5.22-stable (0ced5d6a8da) FIXME as compared to v23.4.4.16-stable (747ba4fc6a0) + +#### Build/Testing/Packaging Improvement +* Backported in [#51530](https://github.com/ClickHouse/ClickHouse/issues/51530): Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#51570](https://github.com/ClickHouse/ClickHouse/issues/51570): This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)). +* Fix reconnecting of HTTPS session when target host IP was changed [#50240](https://github.com/ClickHouse/ClickHouse/pull/50240) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix type of LDAP server params hash in cache entry [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Increase max array size in group bitmap [#50620](https://github.com/ClickHouse/ClickHouse/pull/50620) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/docs/changelogs/v23.4.6.25-stable.md b/docs/changelogs/v23.4.6.25-stable.md new file mode 100644 index 000000000000..01a9c06f3e91 --- /dev/null +++ b/docs/changelogs/v23.4.6.25-stable.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.4.6.25-stable (a06848b1770) FIXME as compared to v23.4.5.22-stable (0ced5d6a8da) + +#### Improvement +* Backported in [#51234](https://github.com/ClickHouse/ClickHouse/issues/51234): Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.5.4.25-stable.md b/docs/changelogs/v23.5.4.25-stable.md new file mode 100644 index 000000000000..53d3a7c9c0ad --- /dev/null +++ b/docs/changelogs/v23.5.4.25-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.5.4.25-stable (190f962abcf) FIXME as compared to v23.5.3.24-stable (76f54616d3b) + +#### Improvement +* Backported in [#51235](https://github.com/ClickHouse/ClickHouse/issues/51235): Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). +* Backported in [#51255](https://github.com/ClickHouse/ClickHouse/issues/51255): Disable cache setting `do_not_evict_index_and_mark_files` (Was enabled in `23.5`). [#51222](https://github.com/ClickHouse/ClickHouse/pull/51222) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#51531](https://github.com/ClickHouse/ClickHouse/issues/51531): Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#51572](https://github.com/ClickHouse/ClickHouse/issues/51572): This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Query Cache: Try to fix bad cast from ColumnConst to ColumnVector [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix type of LDAP server params hash in cache entry [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix ParallelReadBuffer seek [#50820](https://github.com/ClickHouse/ClickHouse/pull/50820) ([Michael Kolupaev](https://github.com/al13n321)). + diff --git a/docs/changelogs/v23.5.5.92-stable.md b/docs/changelogs/v23.5.5.92-stable.md new file mode 100644 index 000000000000..ade39b7545d1 --- /dev/null +++ b/docs/changelogs/v23.5.5.92-stable.md @@ -0,0 +1,62 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.5.5.92-stable (557edaddace) FIXME as compared to v23.5.4.25-stable (190f962abcf) + +#### Performance Improvement +* Backported in [#52749](https://github.com/ClickHouse/ClickHouse/issues/52749): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#51886](https://github.com/ClickHouse/ClickHouse/issues/51886): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Backported in [#52909](https://github.com/ClickHouse/ClickHouse/issues/52909): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53021](https://github.com/ClickHouse/ClickHouse/issues/53021): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53289](https://github.com/ClickHouse/ClickHouse/issues/53289): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53463](https://github.com/ClickHouse/ClickHouse/issues/53463): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.6.1.1524-stable.md b/docs/changelogs/v23.6.1.1524-stable.md new file mode 100644 index 000000000000..6d295d61ef44 --- /dev/null +++ b/docs/changelogs/v23.6.1.1524-stable.md @@ -0,0 +1,301 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.1.1524-stable (d1c7e13d088) FIXME as compared to v23.5.1.3174-stable (2fec796e73e) + +#### Backward Incompatible Change +* Delete feature `do_not_evict_index_and_mark_files` in the fs cache. This feature was only making things worse. [#51253](https://github.com/ClickHouse/ClickHouse/pull/51253) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove ALTER support for experimental LIVE VIEW. [#51287](https://github.com/ClickHouse/ClickHouse/pull/51287) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Add setting `session_timezone`, it is used as default timezone for session when not explicitly specified. [#44149](https://github.com/ClickHouse/ClickHouse/pull/44149) ([Andrey Zvonov](https://github.com/zvonand)). +* Added overlay database engine and representation of a directory as a database This commit adds 4 databases: 1. DatabaseOverlay: Implements the IDatabase interface. Allow to combine multiple databases, such as FileSystem and Memory. Internally, it stores a vector with other database pointers and proxies requests to them in turn until it is executed successfully. 2. DatabaseFilesystem: allows to read-only interact with files stored on the file system. Internally, it uses TableFunctionFile to implicitly load file when a user requests the table. Result of TableFunctionFile call cached inside to provide quick access. 3. DatabaseS3: allows to read-only interact with s3 storage. It uses TableFunctionS3 to implicitly load table from s3 4. DatabaseHDFS: allows to interact with hdfs storage. It uses TableFunctionHDFS to implicitly load table from hdfs. [#48821](https://github.com/ClickHouse/ClickHouse/pull/48821) ([alekseygolub](https://github.com/alekseygolub)). +* Add a new setting named `use_mysql_types_in_show_columns` to alter the `SHOW COLUMNS` SQL statement to display MySQL equivalent types when a client is connected via the MySQL compatibility port. [#49577](https://github.com/ClickHouse/ClickHouse/pull/49577) ([Thomas Panetti](https://github.com/tpanetti)). +* Added option `--rename_files_after_processing `. This closes [#34207](https://github.com/ClickHouse/ClickHouse/issues/34207). [#49626](https://github.com/ClickHouse/ClickHouse/pull/49626) ([alekseygolub](https://github.com/alekseygolub)). +* 1. Add `TableFunctionRedis` 3. Add table engine Redis 4. Add `RedisCommon` which contains Redis related tools and types 5. Support `equals` and `in` filter push down into Redis. [#50150](https://github.com/ClickHouse/ClickHouse/pull/50150) ([JackyWoo](https://github.com/JackyWoo)). +* Allow to skip empty files in file/s3/url/hdfs table functions using settings `s3_skip_empty_files`, `hdfs_skip_empty_files`, `engine_file_skip_empty_files`, `engine_url_skip_empty_files`. [#50364](https://github.com/ClickHouse/ClickHouse/pull/50364) ([Kruglov Pavel](https://github.com/Avogar)). +* Clickhouse-client can now be called with a connection instead of "--host", "--port", "--user" etc. [#50689](https://github.com/ClickHouse/ClickHouse/pull/50689) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Codec DEFLATE_QPL is now controlled via server setting "enable_deflate_qpl_codec" (default: false) instead of setting "allow_experimental_codecs". This marks QPL_DEFLATE non-experimental. [#50775](https://github.com/ClickHouse/ClickHouse/pull/50775) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Performance Improvement +* Improve performance with enabled QueryProfiler using thread-local timer_id instead of global object. [#48778](https://github.com/ClickHouse/ClickHouse/pull/48778) ([Jiebin Sun](https://github.com/jiebinn)). +* Rewrite CapnProto input/output format to improve its performance. Map column names and CapnProto fields case insensitive, fix reading/writing of nested structure fields. [#49752](https://github.com/ClickHouse/ClickHouse/pull/49752) ([Kruglov Pavel](https://github.com/Avogar)). +* Optimize parquet write performance for parallel threads. [#50102](https://github.com/ClickHouse/ClickHouse/pull/50102) ([Hongbin Ma](https://github.com/binmahone)). +* ### Documentation entry for user-facing changes Disable `parallelize_output_from_storages` for processing MATERIALIZED VIEWs and storages with one block only. [#50214](https://github.com/ClickHouse/ClickHouse/pull/50214) ([Azat Khuzhin](https://github.com/azat)). +* Merge PR https://github.com/ClickHouse/ClickHouse/pull/46558 (Avoid processing already sorted data). Avoid block permutation during sort if the block is already sorted. [#50697](https://github.com/ClickHouse/ClickHouse/pull/50697) ([Maksim Kita](https://github.com/kitaisreal)). +* In the earlier PRs ([#50062](https://github.com/ClickHouse/ClickHouse/issues/50062), [#50307](https://github.com/ClickHouse/ClickHouse/issues/50307)), we used to propose an optimization pattern which transforms the predicates with toYear/toYYYYMM into its equivalent but converter-free form. This transformation could bring significant performance impact to some workloads, such as SSB. However, as issue [#50628](https://github.com/ClickHouse/ClickHouse/issues/50628) indicated, these two PRs would introduce some issues which may results in incomplete query results, and as a result, they were reverted by [#50629](https://github.com/ClickHouse/ClickHouse/issues/50629). [#50951](https://github.com/ClickHouse/ClickHouse/pull/50951) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Make multiple list requests to ZooKeeper in parallel to speed up reading from system.zookeeper table. [#51042](https://github.com/ClickHouse/ClickHouse/pull/51042) ([Alexander Gololobov](https://github.com/davenger)). +* Speedup initialization of DateTime lookup tables for time zones. This should reduce startup/connect time of clickhouse client especially in debug build as it is rather heavy. [#51347](https://github.com/ClickHouse/ClickHouse/pull/51347) ([Alexander Gololobov](https://github.com/davenger)). + +#### Improvement +* Allow to cast IPv6 to IPv4 address for CIDR ::ffff:0:0/96 (IPv4-mapped addresses). [#49759](https://github.com/ClickHouse/ClickHouse/pull/49759) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update MongoDB protocol to support MongoDB 5.1 version and newer. Support for the versions with the old protocol (<3.6) is preserved. Closes [#45621](https://github.com/ClickHouse/ClickHouse/issues/45621), [#49879](https://github.com/ClickHouse/ClickHouse/issues/49879). [#50061](https://github.com/ClickHouse/ClickHouse/pull/50061) ([Nikolay Degterinsky](https://github.com/evillique)). +* Improved scheduling of merge selecting and cleanup tasks in `ReplicatedMergeTree`. The tasks will not be executed too frequently when there's nothing to merge or cleanup. Added settings `max_merge_selecting_sleep_ms`, `merge_selecting_sleep_slowdown_factor`, `max_cleanup_delay_period` and `cleanup_thread_preferred_points_per_iteration`. It should close [#31919](https://github.com/ClickHouse/ClickHouse/issues/31919). [#50107](https://github.com/ClickHouse/ClickHouse/pull/50107) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Support parallel replicas with the analyzer. [#50441](https://github.com/ClickHouse/ClickHouse/pull/50441) ([Raúl Marín](https://github.com/Algunenano)). +* Add setting `input_format_max_bytes_to_read_for_schema_inference` to limit the number of bytes to read in schema inference. Closes [#50577](https://github.com/ClickHouse/ClickHouse/issues/50577). [#50592](https://github.com/ClickHouse/ClickHouse/pull/50592) ([Kruglov Pavel](https://github.com/Avogar)). +* Respect setting input_format_as_default in schema inference. [#50602](https://github.com/ClickHouse/ClickHouse/pull/50602) ([Kruglov Pavel](https://github.com/Avogar)). +* Make filter push down through cross join. [#50605](https://github.com/ClickHouse/ClickHouse/pull/50605) ([Han Fei](https://github.com/hanfei1991)). +* Actual lz4 version is used now. [#50621](https://github.com/ClickHouse/ClickHouse/pull/50621) ([Nikita Taranov](https://github.com/nickitat)). +* Allow to skip trailing empty lines in CSV/TSV/CustomSeparated formats via settings `input_format_csv_skip_trailing_empty_lines`, `input_format_tsv_skip_trailing_empty_lines` and `input_format_custom_skip_trailing_empty_lines` (disabled by default). Closes [#49315](https://github.com/ClickHouse/ClickHouse/issues/49315). [#50635](https://github.com/ClickHouse/ClickHouse/pull/50635) ([Kruglov Pavel](https://github.com/Avogar)). +* Functions "toDateOrDefault|OrNull()" and "accuateCast[OrDefault|OrNull]()" now correctly parse numeric arguments. [#50709](https://github.com/ClickHouse/ClickHouse/pull/50709) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Currently, the csv input format can not parse the csv file with whitespace or \t field delimiter, and these delimiters is supported in spark. [#50712](https://github.com/ClickHouse/ClickHouse/pull/50712) ([KevinyhZou](https://github.com/KevinyhZou)). +* Settings `number_of_mutations_to_delay` and `number_of_mutations_to_throw` are enabled by default now with values 500 and 1000 respectively. [#50726](https://github.com/ClickHouse/ClickHouse/pull/50726) ([Anton Popov](https://github.com/CurtizJ)). +* Keeper improvement: add feature flags for Keeper API. Each feature flag can be disabled or enabled by defining it under `keeper_server.feature_flags` config. E.g. to enable `CheckNotExists` request, `keeper_server.feature_flags.check_not_exists` should be set to `1` on Keeper. [#50796](https://github.com/ClickHouse/ClickHouse/pull/50796) ([Antonio Andelic](https://github.com/antonio2368)). +* The dashboard correctly shows missing values. This closes [#50831](https://github.com/ClickHouse/ClickHouse/issues/50831). [#50832](https://github.com/ClickHouse/ClickHouse/pull/50832) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* CGroups metrics related to CPU are replaced with one metric, `CGroupMaxCPU` for better usability. The `Normalized` CPU usage metrics will be normalized to CGroups limits instead of the total number of CPUs when they are set. This closes [#50836](https://github.com/ClickHouse/ClickHouse/issues/50836). [#50835](https://github.com/ClickHouse/ClickHouse/pull/50835) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Relax the thresholds for "too many parts" to be more modern. Return the backpressure during long-running insert queries. [#50856](https://github.com/ClickHouse/ClickHouse/pull/50856) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added the possibility to use date and time arguments in syslog timestamp format in functions parseDateTimeBestEffort*() and parseDateTime64BestEffort*(). [#50925](https://github.com/ClickHouse/ClickHouse/pull/50925) ([Victor Krasnov](https://github.com/sirvickr)). +* Suggest using `APPEND` or `TRUNCATE` for `INTO OUTFILE` when file exists. [#50950](https://github.com/ClickHouse/ClickHouse/pull/50950) ([alekar](https://github.com/alekar)). +* Add embedded keeper-client to standalone keeper binary. [#50964](https://github.com/ClickHouse/ClickHouse/pull/50964) ([pufit](https://github.com/pufit)). +* Command line parameter "--password" in clickhouse-client can now be specified only once. [#50966](https://github.com/ClickHouse/ClickHouse/pull/50966) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix data lakes slowness because of synchronous head requests. (Related to Iceberg/Deltalake/Hudi being slow with a lot of files). [#50976](https://github.com/ClickHouse/ClickHouse/pull/50976) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Use `hash_of_all_files` from `system.parts` to check identity of parts during on-cluster backups. [#50997](https://github.com/ClickHouse/ClickHouse/pull/50997) ([Vitaly Baranov](https://github.com/vitlibar)). +* The system table zookeeper_connection connected_time identifies the time when the connection is established (standard format), and session_uptime_elapsed_seconds is added, which labels the duration of the established connection session (in seconds). [#51026](https://github.com/ClickHouse/ClickHouse/pull/51026) ([郭小龙](https://github.com/guoxiaolongzte)). +* Show halves of checksums in `system.parts`, `system.projection_parts` and in error messages in the correct order. [#51040](https://github.com/ClickHouse/ClickHouse/pull/51040) ([Vitaly Baranov](https://github.com/vitlibar)). +* Do not replicate `ALTER PARTITION` queries and mutations through `Replicated` database if it has only one shard and the underlying table is `ReplicatedMergeTree`. [#51049](https://github.com/ClickHouse/ClickHouse/pull/51049) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve the progress bar for file/s3/hdfs/url table functions by using chunk size from source data and using incremental total size counting in each thread. Fix the progress bar for *Cluster functions. This closes [#47250](https://github.com/ClickHouse/ClickHouse/issues/47250). [#51088](https://github.com/ClickHouse/ClickHouse/pull/51088) ([Kruglov Pavel](https://github.com/Avogar)). +* Add total_bytes_to_read to Progress packet in TCP protocol for better Progress bar. [#51158](https://github.com/ClickHouse/ClickHouse/pull/51158) ([Kruglov Pavel](https://github.com/Avogar)). +* Better checking of data parts on disks with filesystem cache. [#51164](https://github.com/ClickHouse/ClickHouse/pull/51164) ([Anton Popov](https://github.com/CurtizJ)). +* Disable cache setting `do_not_evict_index_and_mark_files` (Was enabled in `23.5`). [#51222](https://github.com/ClickHouse/ClickHouse/pull/51222) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix sometimes not correct current_elements_num in fs cache. [#51242](https://github.com/ClickHouse/ClickHouse/pull/51242) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add random sleep before merges/mutations execution to split load more evenly between replicas in case of zero-copy replication. [#51282](https://github.com/ClickHouse/ClickHouse/pull/51282) ([alesapin](https://github.com/alesapin)). +* The function `transform` as well as `CASE` with value matching started to support all data types. This closes [#29730](https://github.com/ClickHouse/ClickHouse/issues/29730). This closes [#32387](https://github.com/ClickHouse/ClickHouse/issues/32387). This closes [#50827](https://github.com/ClickHouse/ClickHouse/issues/50827). This closes [#31336](https://github.com/ClickHouse/ClickHouse/issues/31336). This closes [#40493](https://github.com/ClickHouse/ClickHouse/issues/40493). [#51351](https://github.com/ClickHouse/ClickHouse/pull/51351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* We have found a bug in LLVM that makes the usage of `compile_expressions` setting unsafe. It is disabled by default. [#51368](https://github.com/ClickHouse/ClickHouse/pull/51368) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Issue [#50220](https://github.com/ClickHouse/ClickHouse/issues/50220) reports a core in `grace_hash` join. We finally reproduce the exception on local, and found that the issue is related to the failure of creating temporary file. Somehow this is triggered in https://github.com/ClickHouse/ClickHouse/pull/49816 https://github.com/ClickHouse/ClickHouse/pull/49483. [#51382](https://github.com/ClickHouse/ClickHouse/pull/51382) ([lgbo](https://github.com/lgbo-ustc)). + +#### Build/Testing/Packaging Improvement +* Update contrib/re2 to 2023-06-02. [#50949](https://github.com/ClickHouse/ClickHouse/pull/50949) ([Yuriy Chernyshov](https://github.com/georgthegreat)). +* ClickHouse server will print the list of changed settings on fatal errors. This closes [#51137](https://github.com/ClickHouse/ClickHouse/issues/51137). [#51138](https://github.com/ClickHouse/ClickHouse/pull/51138) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* In https://github.com/ClickHouse/ClickHouse/pull/51143 the fasstests failed, but the status wasn't created because of the chown `file not found`. This addresses it. Decrease the default values for `http-max-field-value-size` and `http_max_field_name_size` to 128K. [#51163](https://github.com/ClickHouse/ClickHouse/pull/51163) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update Ubuntu version in docker containers. [#51180](https://github.com/ClickHouse/ClickHouse/pull/51180) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Allow building ClickHouse with clang-17. [#51300](https://github.com/ClickHouse/ClickHouse/pull/51300) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [SQLancer](https://github.com/sqlancer/sqlancer) check is considered stable as bugs that were triggered by it are fixed. Now failures of SQLancer check will be reported as failed check status. [#51340](https://github.com/ClickHouse/ClickHouse/pull/51340) ([Ilya Yatsishin](https://github.com/qoega)). +* Making our CI even better. [#51494](https://github.com/ClickHouse/ClickHouse/pull/51494) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Split huge `RUN` in Dockerfile into smaller conditional. Install the necessary tools on demand in the same `RUN` layer, and remove them after that. Upgrade the OS only once at the beginning. Use a modern way to check the signed repository. Downgrade the base repo to ubuntu:20.04 to address the issues on older docker versions. Upgrade golang version to address golang vulnerabilities. [#51504](https://github.com/ClickHouse/ClickHouse/pull/51504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* This a follow-up for [#51504](https://github.com/ClickHouse/ClickHouse/issues/51504), the cleanup was lost during refactoring. [#51564](https://github.com/ClickHouse/ClickHouse/pull/51564) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Report loading status for executable dictionaries correctly [#48775](https://github.com/ClickHouse/ClickHouse/pull/48775) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Proper mutation of skip indices and projections [#50104](https://github.com/ClickHouse/ClickHouse/pull/50104) ([Amos Bird](https://github.com/amosbird)). +* Cleanup moving parts [#50489](https://github.com/ClickHouse/ClickHouse/pull/50489) ([vdimir](https://github.com/vdimir)). +* Fix backward compatibility for IP types hashing in aggregate functions [#50551](https://github.com/ClickHouse/ClickHouse/pull/50551) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix Log family table return wrong rows count after truncate [#50585](https://github.com/ClickHouse/ClickHouse/pull/50585) ([flynn](https://github.com/ucasfl)). +* Fix bug in `uniqExact` parallel merging [#50590](https://github.com/ClickHouse/ClickHouse/pull/50590) ([Nikita Taranov](https://github.com/nickitat)). +* Revert recent grace hash join changes [#50699](https://github.com/ClickHouse/ClickHouse/pull/50699) ([vdimir](https://github.com/vdimir)). +* Query Cache: Try to fix bad cast from ColumnConst to ColumnVector [#50704](https://github.com/ClickHouse/ClickHouse/pull/50704) ([Robert Schulze](https://github.com/rschu1ze)). +* Do not read all the columns from right GLOBAL JOIN table. [#50721](https://github.com/ClickHouse/ClickHouse/pull/50721) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Avoid storing logs in Keeper containing unknown operation [#50751](https://github.com/ClickHouse/ClickHouse/pull/50751) ([Antonio Andelic](https://github.com/antonio2368)). +* SummingMergeTree support for DateTime64 [#50797](https://github.com/ClickHouse/ClickHouse/pull/50797) ([Jordi Villar](https://github.com/jrdi)). +* Add compat setting for non-const timezones [#50834](https://github.com/ClickHouse/ClickHouse/pull/50834) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix type of LDAP server params hash in cache entry [#50865](https://github.com/ClickHouse/ClickHouse/pull/50865) ([Julian Maicher](https://github.com/jmaicher)). +* Fallback to parsing big integer from String instead of exception in Parquet format [#50873](https://github.com/ClickHouse/ClickHouse/pull/50873) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix checking the lock file too often while writing a backup [#50889](https://github.com/ClickHouse/ClickHouse/pull/50889) ([Vitaly Baranov](https://github.com/vitlibar)). +* Do not apply projection if read-in-order was enabled. [#50923](https://github.com/ClickHouse/ClickHouse/pull/50923) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix race azure blob storage iterator [#50936](https://github.com/ClickHouse/ClickHouse/pull/50936) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix erroneous `sort_description` propagation in `CreatingSets` [#50955](https://github.com/ClickHouse/ClickHouse/pull/50955) ([Nikita Taranov](https://github.com/nickitat)). +* Fix iceberg V2 optional metadata parsing [#50974](https://github.com/ClickHouse/ClickHouse/pull/50974) ([Kseniia Sumarokova](https://github.com/kssenii)). +* MaterializedMySQL: Keep parentheses for empty table overrides [#50977](https://github.com/ClickHouse/ClickHouse/pull/50977) ([Val Doroshchuk](https://github.com/valbok)). +* Fix crash in BackupCoordinationStageSync::setError() [#51012](https://github.com/ClickHouse/ClickHouse/pull/51012) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix subtly broken copy-on-write of ColumnLowCardinality dictionary [#51064](https://github.com/ClickHouse/ClickHouse/pull/51064) ([Michael Kolupaev](https://github.com/al13n321)). +* Generate safe IVs [#51086](https://github.com/ClickHouse/ClickHouse/pull/51086) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix ineffective query cache for SELECTs with subqueries [#51132](https://github.com/ClickHouse/ClickHouse/pull/51132) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix Set index with constant nullable comparison. [#51205](https://github.com/ClickHouse/ClickHouse/pull/51205) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix a crash in s3 and s3Cluster functions [#51209](https://github.com/ClickHouse/ClickHouse/pull/51209) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix core dump when compile expression [#51231](https://github.com/ClickHouse/ClickHouse/pull/51231) ([LiuNeng](https://github.com/liuneng1994)). +* Fix use-after-free in StorageURL when switching URLs [#51260](https://github.com/ClickHouse/ClickHouse/pull/51260) ([Michael Kolupaev](https://github.com/al13n321)). +* Updated check for parameterized view [#51272](https://github.com/ClickHouse/ClickHouse/pull/51272) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix multiple writing of same file to backup [#51299](https://github.com/ClickHouse/ClickHouse/pull/51299) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove garbage from function `transform` [#51350](https://github.com/ClickHouse/ClickHouse/pull/51350) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix MSan report in lowerUTF8/upperUTF8 [#51371](https://github.com/ClickHouse/ClickHouse/pull/51371) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* fs cache: fix a bit incorrect use_count after [#44985](https://github.com/ClickHouse/ClickHouse/issues/44985) [#51406](https://github.com/ClickHouse/ClickHouse/pull/51406) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)). +* Fix logical assert in `tupleElement()` with default values [#51534](https://github.com/ClickHouse/ClickHouse/pull/51534) ([Robert Schulze](https://github.com/rschu1ze)). +* fs cache: remove file from opened file cache immediately when evicting file [#51596](https://github.com/ClickHouse/ClickHouse/pull/51596) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Deprecate delete-on-destroy.txt [#49181](https://github.com/ClickHouse/ClickHouse/pull/49181) ([Alexander Gololobov](https://github.com/davenger)). +* Attempt to increase the general runners' survival rate [#49283](https://github.com/ClickHouse/ClickHouse/pull/49283) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Refactor subqueries for IN [#49570](https://github.com/ClickHouse/ClickHouse/pull/49570) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Test plan optimization analyzer [#50095](https://github.com/ClickHouse/ClickHouse/pull/50095) ([Igor Nikonov](https://github.com/devcrafter)). +* Implement endianness-independent serialization for quantileTiming [#50324](https://github.com/ClickHouse/ClickHouse/pull/50324) ([ltrk2](https://github.com/ltrk2)). +* require `finalize()` call before d-tor for all writes buffers [#50395](https://github.com/ClickHouse/ClickHouse/pull/50395) ([Sema Checherinda](https://github.com/CheSema)). +* Implement big-endian support for the deterministic reservoir sampler [#50405](https://github.com/ClickHouse/ClickHouse/pull/50405) ([ltrk2](https://github.com/ltrk2)). +* Fix compilation error on big-endian platforms [#50406](https://github.com/ClickHouse/ClickHouse/pull/50406) ([ltrk2](https://github.com/ltrk2)). +* Attach gdb in stateless tests [#50487](https://github.com/ClickHouse/ClickHouse/pull/50487) ([Kruglov Pavel](https://github.com/Avogar)). +* JIT infrastructure refactoring [#50531](https://github.com/ClickHouse/ClickHouse/pull/50531) ([Maksim Kita](https://github.com/kitaisreal)). +* Analyzer: Do not apply Query Tree optimizations on shards [#50584](https://github.com/ClickHouse/ClickHouse/pull/50584) ([Dmitry Novik](https://github.com/novikd)). +* Increase max array size in group bitmap [#50620](https://github.com/ClickHouse/ClickHouse/pull/50620) ([Kruglov Pavel](https://github.com/Avogar)). +* Misc Annoy index improvements [#50661](https://github.com/ClickHouse/ClickHouse/pull/50661) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix reading negative decimals in avro format [#50668](https://github.com/ClickHouse/ClickHouse/pull/50668) ([Kruglov Pavel](https://github.com/Avogar)). +* Unify priorities for connection pools [#50675](https://github.com/ClickHouse/ClickHouse/pull/50675) ([Sergei Trifonov](https://github.com/serxa)). +* Prostpone check of outdated parts [#50676](https://github.com/ClickHouse/ClickHouse/pull/50676) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Unify priorities: `IExecutableTask`s [#50677](https://github.com/ClickHouse/ClickHouse/pull/50677) ([Sergei Trifonov](https://github.com/serxa)). +* Disable grace_hash join in stress tests [#50693](https://github.com/ClickHouse/ClickHouse/pull/50693) ([vdimir](https://github.com/vdimir)). +* ReverseTransform small improvement [#50698](https://github.com/ClickHouse/ClickHouse/pull/50698) ([Maksim Kita](https://github.com/kitaisreal)). +* Support OPTIMIZE for temporary tables [#50710](https://github.com/ClickHouse/ClickHouse/pull/50710) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Refactor reading from object storages [#50711](https://github.com/ClickHouse/ClickHouse/pull/50711) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in log message of cached buffer [#50723](https://github.com/ClickHouse/ClickHouse/pull/50723) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add new keywords into projections documentation [#50743](https://github.com/ClickHouse/ClickHouse/pull/50743) ([YalalovSM](https://github.com/YalalovSM)). +* Fix build for aarch64 (temporary disable azure) [#50770](https://github.com/ClickHouse/ClickHouse/pull/50770) ([alesapin](https://github.com/alesapin)). +* Update version after release [#50772](https://github.com/ClickHouse/ClickHouse/pull/50772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.5.1.3174-stable [#50774](https://github.com/ClickHouse/ClickHouse/pull/50774) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update CHANGELOG.md [#50788](https://github.com/ClickHouse/ClickHouse/pull/50788) ([Ilya Yatsishin](https://github.com/qoega)). +* Update version_date.tsv and changelogs after v23.2.7.32-stable [#50809](https://github.com/ClickHouse/ClickHouse/pull/50809) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Desctructing --> Destructing [#50810](https://github.com/ClickHouse/ClickHouse/pull/50810) ([Robert Schulze](https://github.com/rschu1ze)). +* Don't mark a part as broken on `Poco::TimeoutException` [#50811](https://github.com/ClickHouse/ClickHouse/pull/50811) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Rename azure_blob_storage to azureBlobStorage [#50812](https://github.com/ClickHouse/ClickHouse/pull/50812) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix ParallelReadBuffer seek [#50820](https://github.com/ClickHouse/ClickHouse/pull/50820) ([Michael Kolupaev](https://github.com/al13n321)). +* [RFC] Print git hash when crashing [#50823](https://github.com/ClickHouse/ClickHouse/pull/50823) ([Michael Kolupaev](https://github.com/al13n321)). +* Add tests for function "transform" [#50833](https://github.com/ClickHouse/ClickHouse/pull/50833) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.5.2.7-stable [#50844](https://github.com/ClickHouse/ClickHouse/pull/50844) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Updated changelog with azureBlobStorage table function & engine entry [#50850](https://github.com/ClickHouse/ClickHouse/pull/50850) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Update easy_tasks_sorted_ru.md [#50853](https://github.com/ClickHouse/ClickHouse/pull/50853) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Document x86 / ARM prerequisites for Docker image [#50867](https://github.com/ClickHouse/ClickHouse/pull/50867) ([Robert Schulze](https://github.com/rschu1ze)). +* MaterializedMySQL: Add test_named_collections [#50874](https://github.com/ClickHouse/ClickHouse/pull/50874) ([Val Doroshchuk](https://github.com/valbok)). +* Update version_date.tsv and changelogs after v22.8.18.31-lts [#50881](https://github.com/ClickHouse/ClickHouse/pull/50881) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.3.52-lts [#50882](https://github.com/ClickHouse/ClickHouse/pull/50882) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.4.3.48-stable [#50883](https://github.com/ClickHouse/ClickHouse/pull/50883) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* MaterializedMySQL: Add additional test case to insert_with_modify_binlog_checksum [#50884](https://github.com/ClickHouse/ClickHouse/pull/50884) ([Val Doroshchuk](https://github.com/valbok)). +* Update broken tests list [#50886](https://github.com/ClickHouse/ClickHouse/pull/50886) ([Dmitry Novik](https://github.com/novikd)). +* Fix LOGICAL_ERROR in snowflakeToDateTime*() [#50893](https://github.com/ClickHouse/ClickHouse/pull/50893) ([Robert Schulze](https://github.com/rschu1ze)). +* Tests with parallel replicas are no more "always green" [#50896](https://github.com/ClickHouse/ClickHouse/pull/50896) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Slightly more information in error message about cached disk [#50897](https://github.com/ClickHouse/ClickHouse/pull/50897) ([Michael Kolupaev](https://github.com/al13n321)). +* do not call finalize after exception [#50907](https://github.com/ClickHouse/ClickHouse/pull/50907) ([Sema Checherinda](https://github.com/CheSema)). +* Update Annoy docs [#50912](https://github.com/ClickHouse/ClickHouse/pull/50912) ([Robert Schulze](https://github.com/rschu1ze)). +* A bit safer UserDefinedSQLFunctionVisitor [#50913](https://github.com/ClickHouse/ClickHouse/pull/50913) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update contribe/orc in .gitmodules [#50920](https://github.com/ClickHouse/ClickHouse/pull/50920) ([San](https://github.com/santrancisco)). +* MaterializedMySQL: Add missing DROP DATABASE for tests [#50924](https://github.com/ClickHouse/ClickHouse/pull/50924) ([Val Doroshchuk](https://github.com/valbok)). +* Fix 'Illegal column timezone' in stress tests [#50929](https://github.com/ClickHouse/ClickHouse/pull/50929) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix tests sanity checks and avoid dropping system.query_log table [#50934](https://github.com/ClickHouse/ClickHouse/pull/50934) ([Azat Khuzhin](https://github.com/azat)). +* Fix tests for throttling by allowing more margin of error for trottling event [#50935](https://github.com/ClickHouse/ClickHouse/pull/50935) ([Azat Khuzhin](https://github.com/azat)). +* 01746_convert_type_with_default: Temporarily disable flaky test [#50937](https://github.com/ClickHouse/ClickHouse/pull/50937) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix the statless tests image for old commits [#50947](https://github.com/ClickHouse/ClickHouse/pull/50947) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix logic in `AsynchronousBoundedReadBuffer::seek` [#50952](https://github.com/ClickHouse/ClickHouse/pull/50952) ([Nikita Taranov](https://github.com/nickitat)). +* Uncomment flaky test (01746_convert_type_with_default) [#50954](https://github.com/ClickHouse/ClickHouse/pull/50954) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Fix keeper-client help message [#50965](https://github.com/ClickHouse/ClickHouse/pull/50965) ([pufit](https://github.com/pufit)). +* fix build issue on clang 15 [#50967](https://github.com/ClickHouse/ClickHouse/pull/50967) ([Chang chen](https://github.com/baibaichen)). +* Docs: Fix embedded video link [#50972](https://github.com/ClickHouse/ClickHouse/pull/50972) ([Robert Schulze](https://github.com/rschu1ze)). +* Change submodule capnproto to it's fork in ClickHouse [#50987](https://github.com/ClickHouse/ClickHouse/pull/50987) ([Kruglov Pavel](https://github.com/Avogar)). +* Attempt to make 01281_group_by_limit_memory_tracking not flaky [#50995](https://github.com/ClickHouse/ClickHouse/pull/50995) ([Dmitry Novik](https://github.com/novikd)). +* Fix flaky 02561_null_as_default_more_formats [#51001](https://github.com/ClickHouse/ClickHouse/pull/51001) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flaky test_seekable_formats [#51002](https://github.com/ClickHouse/ClickHouse/pull/51002) ([Kruglov Pavel](https://github.com/Avogar)). +* Follow-up to [#50448](https://github.com/ClickHouse/ClickHouse/issues/50448) [#51006](https://github.com/ClickHouse/ClickHouse/pull/51006) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix a versions' tweak for tagged commits, improve version_helper [#51035](https://github.com/ClickHouse/ClickHouse/pull/51035) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Sqlancer has changed master to main [#51060](https://github.com/ClickHouse/ClickHouse/pull/51060) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Do not spam sqlancer build log [#51061](https://github.com/ClickHouse/ClickHouse/pull/51061) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Refactor IColumn::forEachSubcolumn to make it slightly harder to implement incorrectly [#51072](https://github.com/ClickHouse/ClickHouse/pull/51072) ([Michael Kolupaev](https://github.com/al13n321)). +* MaterializedMySQL: Rename materialize_with_ddl.py -> materialized_with_ddl [#51074](https://github.com/ClickHouse/ClickHouse/pull/51074) ([Val Doroshchuk](https://github.com/valbok)). +* Improve woboq browser report [#51077](https://github.com/ClickHouse/ClickHouse/pull/51077) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix for part_names_mutex used after destruction [#51099](https://github.com/ClickHouse/ClickHouse/pull/51099) ([Alexander Gololobov](https://github.com/davenger)). +* Fix ColumnConst::forEachSubcolumn missing from previous PR [#51102](https://github.com/ClickHouse/ClickHouse/pull/51102) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix the test 02783_parsedatetimebesteffort_syslog flakiness [#51112](https://github.com/ClickHouse/ClickHouse/pull/51112) ([Victor Krasnov](https://github.com/sirvickr)). +* Compatibility with clang-17 [#51114](https://github.com/ClickHouse/ClickHouse/pull/51114) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make more parallel get requests to ZooKeeper in system.zookeeper [#51118](https://github.com/ClickHouse/ClickHouse/pull/51118) ([Alexander Gololobov](https://github.com/davenger)). +* Fix 02703_max_local_write_bandwidth flakiness [#51120](https://github.com/ClickHouse/ClickHouse/pull/51120) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.5.3.24-stable [#51121](https://github.com/ClickHouse/ClickHouse/pull/51121) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.4.4.16-stable [#51122](https://github.com/ClickHouse/ClickHouse/pull/51122) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.4.17-lts [#51123](https://github.com/ClickHouse/ClickHouse/pull/51123) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.19.10-lts [#51124](https://github.com/ClickHouse/ClickHouse/pull/51124) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix typo [#51126](https://github.com/ClickHouse/ClickHouse/pull/51126) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better diagnostics [#51127](https://github.com/ClickHouse/ClickHouse/pull/51127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix in `MergeTreePrefetchedReadPool` [#51131](https://github.com/ClickHouse/ClickHouse/pull/51131) ([Nikita Taranov](https://github.com/nickitat)). +* Don't report table function accesses to system.errors [#51147](https://github.com/ClickHouse/ClickHouse/pull/51147) ([Raúl Marín](https://github.com/Algunenano)). +* Fix SQLancer branch name [#51148](https://github.com/ClickHouse/ClickHouse/pull/51148) ([Ilya Yatsishin](https://github.com/qoega)). +* Revert "Added ability to implicitly use file/hdfs/s3 table functions in clickhouse-local" [#51149](https://github.com/ClickHouse/ClickHouse/pull/51149) ([Alexander Tokmakov](https://github.com/tavplubix)). +* More profile events for fs cache [#51161](https://github.com/ClickHouse/ClickHouse/pull/51161) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Unforget to pass callback to readBigAt() in ParallelReadBuffer [#51165](https://github.com/ClickHouse/ClickHouse/pull/51165) ([Michael Kolupaev](https://github.com/al13n321)). +* Update README.md [#51179](https://github.com/ClickHouse/ClickHouse/pull/51179) ([Tyler Hannan](https://github.com/tylerhannan)). +* Update exception message [#51187](https://github.com/ClickHouse/ClickHouse/pull/51187) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Split long test 02149_schema_inference_formats_with_schema into several tests to avoid timeout in debug [#51197](https://github.com/ClickHouse/ClickHouse/pull/51197) ([Kruglov Pavel](https://github.com/Avogar)). +* Avoid initializing DateLUT from emptyArray function registration [#51199](https://github.com/ClickHouse/ClickHouse/pull/51199) ([Alexander Gololobov](https://github.com/davenger)). +* Suppress check for covered parts in ZooKeeper [#51207](https://github.com/ClickHouse/ClickHouse/pull/51207) ([Alexander Tokmakov](https://github.com/tavplubix)). +* One more profile event for fs cache [#51223](https://github.com/ClickHouse/ClickHouse/pull/51223) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Typo: passowrd_sha256_hex --> password_sha256_hex [#51233](https://github.com/ClickHouse/ClickHouse/pull/51233) ([Robert Schulze](https://github.com/rschu1ze)). +* Introduce settings enum field with auto-generated values list [#51237](https://github.com/ClickHouse/ClickHouse/pull/51237) ([Sergei Trifonov](https://github.com/serxa)). +* Drop session if we fail to get Keeper API version [#51238](https://github.com/ClickHouse/ClickHouse/pull/51238) ([Alexander Gololobov](https://github.com/davenger)). +* Revert "Fix a crash in s3 and s3Cluster functions" [#51239](https://github.com/ClickHouse/ClickHouse/pull/51239) ([Alexander Tokmakov](https://github.com/tavplubix)). +* fix flaky `AsyncLoader` destructor [#51245](https://github.com/ClickHouse/ClickHouse/pull/51245) ([Sergei Trifonov](https://github.com/serxa)). +* Docs: little cleanup of configuration-files.md [#51249](https://github.com/ClickHouse/ClickHouse/pull/51249) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix a stupid bug on Replicated database recovery [#51252](https://github.com/ClickHouse/ClickHouse/pull/51252) ([Alexander Tokmakov](https://github.com/tavplubix)). +* FileCache: tryReserve() slight improvement [#51259](https://github.com/ClickHouse/ClickHouse/pull/51259) ([Igor Nikonov](https://github.com/devcrafter)). +* Ugly hotfix for "terminate on uncaught exception" in WriteBufferFromOStream [#51265](https://github.com/ClickHouse/ClickHouse/pull/51265) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Avoid too many calls to Poco::Logger::get [#51266](https://github.com/ClickHouse/ClickHouse/pull/51266) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update version_date.tsv and changelogs after v23.3.5.9-lts [#51269](https://github.com/ClickHouse/ClickHouse/pull/51269) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Better reporting of broken parts [#51270](https://github.com/ClickHouse/ClickHouse/pull/51270) ([Anton Popov](https://github.com/CurtizJ)). +* Update ext-dict-functions.md [#51283](https://github.com/ClickHouse/ClickHouse/pull/51283) ([Mike Kot](https://github.com/myrrc)). +* Disable table structure check for secondary queries from Replicated db [#51284](https://github.com/ClickHouse/ClickHouse/pull/51284) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Define Thrift version for parquet and use correct arrow version [#51285](https://github.com/ClickHouse/ClickHouse/pull/51285) ([Kruglov Pavel](https://github.com/Avogar)). +* Restore Azure build on ARM [#51288](https://github.com/ClickHouse/ClickHouse/pull/51288) ([Robert Schulze](https://github.com/rschu1ze)). +* Query Cache: Un-comment settings in server cfg [#51294](https://github.com/ClickHouse/ClickHouse/pull/51294) ([Robert Schulze](https://github.com/rschu1ze)). +* Require more checks [#51295](https://github.com/ClickHouse/ClickHouse/pull/51295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix metadata loading test [#51297](https://github.com/ClickHouse/ClickHouse/pull/51297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Scratch the strange Python code [#51302](https://github.com/ClickHouse/ClickHouse/pull/51302) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#47865](https://github.com/ClickHouse/ClickHouse/issues/47865) [#51306](https://github.com/ClickHouse/ClickHouse/pull/51306) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#48894](https://github.com/ClickHouse/ClickHouse/issues/48894) [#51307](https://github.com/ClickHouse/ClickHouse/pull/51307) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#48676](https://github.com/ClickHouse/ClickHouse/issues/48676) [#51308](https://github.com/ClickHouse/ClickHouse/pull/51308) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix long test `functions_bad_arguments` [#51310](https://github.com/ClickHouse/ClickHouse/pull/51310) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Unify merge predicate [#51344](https://github.com/ClickHouse/ClickHouse/pull/51344) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix using locks in ProcessList [#51348](https://github.com/ClickHouse/ClickHouse/pull/51348) ([Vitaly Baranov](https://github.com/vitlibar)). +* Add a test for [#42631](https://github.com/ClickHouse/ClickHouse/issues/42631) [#51353](https://github.com/ClickHouse/ClickHouse/pull/51353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix performance tests due to warnings from jemalloc about Per-CPU arena disabled [#51362](https://github.com/ClickHouse/ClickHouse/pull/51362) ([Azat Khuzhin](https://github.com/azat)). +* Fix "merge_truncate_long" test [#51369](https://github.com/ClickHouse/ClickHouse/pull/51369) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Increase timeout of Fast Test [#51372](https://github.com/ClickHouse/ClickHouse/pull/51372) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad tests for DNS [#51374](https://github.com/ClickHouse/ClickHouse/pull/51374) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attempt to fix the `relax_too_many_parts` test [#51375](https://github.com/ClickHouse/ClickHouse/pull/51375) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix MySQL test in Debug mode [#51376](https://github.com/ClickHouse/ClickHouse/pull/51376) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad test `01018_Distributed__shard_num` [#51377](https://github.com/ClickHouse/ClickHouse/pull/51377) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix "logical error" in addressToLineWithInlines [#51379](https://github.com/ClickHouse/ClickHouse/pull/51379) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 01280_ttl_where_group_by [#51380](https://github.com/ClickHouse/ClickHouse/pull/51380) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Attempt to fix `test_ssl_cert_authentication` [#51384](https://github.com/ClickHouse/ClickHouse/pull/51384) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert "Merge pull request [#50951](https://github.com/ClickHouse/ClickHouse/issues/50951) from ZhiguoZh/20230607-toyear-fix" [#51390](https://github.com/ClickHouse/ClickHouse/pull/51390) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Two tests are twice longer in average with Analyzer and sometimes failing [#51391](https://github.com/ClickHouse/ClickHouse/pull/51391) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 00899_long_attach_memory_limit [#51395](https://github.com/ClickHouse/ClickHouse/pull/51395) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 01293_optimize_final_force [#51396](https://github.com/ClickHouse/ClickHouse/pull/51396) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 02481_parquet_list_monotonically_increasing_offsets [#51397](https://github.com/ClickHouse/ClickHouse/pull/51397) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 02497_trace_events_stress_long [#51398](https://github.com/ClickHouse/ClickHouse/pull/51398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix broken labeling for `manual approve` [#51405](https://github.com/ClickHouse/ClickHouse/pull/51405) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix parts lifetime in `MergeTreeTransaction` [#51407](https://github.com/ClickHouse/ClickHouse/pull/51407) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test test_skip_empty_files [#51409](https://github.com/ClickHouse/ClickHouse/pull/51409) ([Kruglov Pavel](https://github.com/Avogar)). +* fix flacky test test_profile_events_s3 [#51412](https://github.com/ClickHouse/ClickHouse/pull/51412) ([Sema Checherinda](https://github.com/CheSema)). +* Update README.md [#51413](https://github.com/ClickHouse/ClickHouse/pull/51413) ([Tyler Hannan](https://github.com/tylerhannan)). +* Replace try/catch logic in hasTokenOrNull() by something more lightweight [#51425](https://github.com/ClickHouse/ClickHouse/pull/51425) ([Robert Schulze](https://github.com/rschu1ze)). +* Add retries to `tlsv1_3` tests [#51434](https://github.com/ClickHouse/ClickHouse/pull/51434) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Update exception message [#51440](https://github.com/ClickHouse/ClickHouse/pull/51440) ([Kseniia Sumarokova](https://github.com/kssenii)). +* fs cache: add check for intersecting ranges [#51444](https://github.com/ClickHouse/ClickHouse/pull/51444) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Slightly better code around packets for parallel replicas [#51451](https://github.com/ClickHouse/ClickHouse/pull/51451) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Update system_warnings test [#51453](https://github.com/ClickHouse/ClickHouse/pull/51453) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Many fixes [#51455](https://github.com/ClickHouse/ClickHouse/pull/51455) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 01605_adaptive_granularity_block_borders [#51457](https://github.com/ClickHouse/ClickHouse/pull/51457) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Try fix flaky 02497_storage_file_reader_selection [#51468](https://github.com/ClickHouse/ClickHouse/pull/51468) ([Kruglov Pavel](https://github.com/Avogar)). +* Try making Keeper in `DatabaseReplicated` tests more stable [#51473](https://github.com/ClickHouse/ClickHouse/pull/51473) ([Antonio Andelic](https://github.com/antonio2368)). +* Convert 02003_memory_limit_in_client from expect to sh test (to fix flakiness) [#51475](https://github.com/ClickHouse/ClickHouse/pull/51475) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_disk_over_web_server [#51476](https://github.com/ClickHouse/ClickHouse/pull/51476) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Delay shutdown of system and temporary databases [#51479](https://github.com/ClickHouse/ClickHouse/pull/51479) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix memory leakage in CompressionCodecDeflateQpl [#51480](https://github.com/ClickHouse/ClickHouse/pull/51480) ([Vitaly Baranov](https://github.com/vitlibar)). +* Increase retries in test_multiple_disks/test.py::test_start_stop_moves [#51482](https://github.com/ClickHouse/ClickHouse/pull/51482) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix race in BoundedReadBuffer [#51484](https://github.com/ClickHouse/ClickHouse/pull/51484) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky unit test [#51485](https://github.com/ClickHouse/ClickHouse/pull/51485) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky test `test_host_regexp_multiple_ptr_records` [#51506](https://github.com/ClickHouse/ClickHouse/pull/51506) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add a comment [#51517](https://github.com/ClickHouse/ClickHouse/pull/51517) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Make `test_ssl_cert_authentication` similar to `test_tlvs1_3` [#51520](https://github.com/ClickHouse/ClickHouse/pull/51520) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix duplicate storage set logical error. [#51521](https://github.com/ClickHouse/ClickHouse/pull/51521) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update test_storage_postgresql/test.py::test_concurrent_queries [#51523](https://github.com/ClickHouse/ClickHouse/pull/51523) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix FATAL: query context is not detached from thread group [#51540](https://github.com/ClickHouse/ClickHouse/pull/51540) ([Igor Nikonov](https://github.com/devcrafter)). +* Update version_date.tsv and changelogs after v23.3.6.7-lts [#51548](https://github.com/ClickHouse/ClickHouse/pull/51548) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try to fix deadlock in ZooKeeper client [#51563](https://github.com/ClickHouse/ClickHouse/pull/51563) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Retry chroot creation in ZK before stateless tests [#51585](https://github.com/ClickHouse/ClickHouse/pull/51585) ([Antonio Andelic](https://github.com/antonio2368)). +* use timeout instead trap in 01443_merge_truncate_long.sh [#51593](https://github.com/ClickHouse/ClickHouse/pull/51593) ([Sema Checherinda](https://github.com/CheSema)). +* Update version_date.tsv and changelogs after v23.5.4.25-stable [#51604](https://github.com/ClickHouse/ClickHouse/pull/51604) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix MergeTreeMarksLoader segfaulting if marks file is longer than expected [#51636](https://github.com/ClickHouse/ClickHouse/pull/51636) ([Michael Kolupaev](https://github.com/al13n321)). +* Update version_date.tsv and changelogs after v23.4.5.22-stable [#51638](https://github.com/ClickHouse/ClickHouse/pull/51638) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.7.5-lts [#51639](https://github.com/ClickHouse/ClickHouse/pull/51639) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update parts.md [#51643](https://github.com/ClickHouse/ClickHouse/pull/51643) ([Ramazan Polat](https://github.com/ramazanpolat)). + diff --git a/docs/changelogs/v23.6.2.18-stable.md b/docs/changelogs/v23.6.2.18-stable.md new file mode 100644 index 000000000000..1f872a190ba7 --- /dev/null +++ b/docs/changelogs/v23.6.2.18-stable.md @@ -0,0 +1,25 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.2.18-stable (89f39a7ccfe) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088) + +#### Build/Testing/Packaging Improvement +* Backported in [#51888](https://github.com/ClickHouse/ClickHouse/issues/51888): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.6.3.87-stable.md b/docs/changelogs/v23.6.3.87-stable.md new file mode 100644 index 000000000000..8db499f308ab --- /dev/null +++ b/docs/changelogs/v23.6.3.87-stable.md @@ -0,0 +1,58 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.6.3.87-stable (36911c17d0f) FIXME as compared to v23.6.2.18-stable (89f39a7ccfe) + +#### Performance Improvement +* Backported in [#52751](https://github.com/ClickHouse/ClickHouse/issues/52751): Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). + +#### Build/Testing/Packaging Improvement +* Backported in [#52911](https://github.com/ClickHouse/ClickHouse/issues/52911): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53023](https://github.com/ClickHouse/ClickHouse/issues/53023): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Backported in [#53290](https://github.com/ClickHouse/ClickHouse/issues/53290): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53465](https://github.com/ClickHouse/ClickHouse/issues/53465): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.7.1.2470-stable.md b/docs/changelogs/v23.7.1.2470-stable.md new file mode 100644 index 000000000000..a77078cb6532 --- /dev/null +++ b/docs/changelogs/v23.7.1.2470-stable.md @@ -0,0 +1,452 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.1.2470-stable (a70127baecc) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088) + +#### Backward Incompatible Change +* Add ` NAMED COLLECTION` access type (aliases `USE NAMED COLLECTION`, `NAMED COLLECTION USAGE`). This PR is backward incompatible because this access type is disabled by default (because a parent access type `NAMED COLLECTION ADMIN` is disabled by default as well). Proposed in [#50277](https://github.com/ClickHouse/ClickHouse/issues/50277). To grant use `GRANT NAMED COLLECTION ON collection_name TO user` or `GRANT NAMED COLLECTION ON * TO user`, to be able to give these grants `named_collection_admin` is required in config (previously it was named `named_collection_control`, so will remain as an alias). [#50625](https://github.com/ClickHouse/ClickHouse/pull/50625) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fixing a typo in the `system.parts` column name `last_removal_attemp_time`. Now it is named `last_removal_attempt_time`. [#52104](https://github.com/ClickHouse/ClickHouse/pull/52104) ([filimonov](https://github.com/filimonov)). +* Bump version of the distributed_ddl_entry_format_version to 5 by default (enables opentelemetry and initial_query_idd pass through). This will not allow to process existing entries for distributed DDL after **downgrade** (but note, that usually there should be no such unprocessed entries). [#52128](https://github.com/ClickHouse/ClickHouse/pull/52128) ([Azat Khuzhin](https://github.com/azat)). +* Check projection metadata the same way we check ordinary metadata. This change may prevent the server from starting in case there was a table with an invalid projection. An example is a projection that created positional columns in PK (e.g. `projection p (select * order by 1, 4)` which is not allowed in table PK and can cause a crash during insert/merge). Drop such projections before the update. Fixes [#52353](https://github.com/ClickHouse/ClickHouse/issues/52353). [#52361](https://github.com/ClickHouse/ClickHouse/pull/52361) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* The experimental feature `hashid` is removed due to a bug. The quality of implementation was questionable at the start, and it didn't get through the experimental status. This closes [#52406](https://github.com/ClickHouse/ClickHouse/issues/52406). [#52449](https://github.com/ClickHouse/ClickHouse/pull/52449) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `toDecimalString` is removed due to subpar implementation quality. This closes [#52407](https://github.com/ClickHouse/ClickHouse/issues/52407). [#52450](https://github.com/ClickHouse/ClickHouse/pull/52450) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### New Feature +* Implement KQL-style formatting for Interval. [#45671](https://github.com/ClickHouse/ClickHouse/pull/45671) ([ltrk2](https://github.com/ltrk2)). +* Support ZooKeeper `reconfig` command for CH Keeper with incremental reconfiguration which can be enabled via `keeper_server.enable_reconfiguration` setting. Support adding servers, removing servers, and changing server priorities. [#49450](https://github.com/ClickHouse/ClickHouse/pull/49450) ([Mike Kot](https://github.com/myrrc)). +* Kafka connector can fetch avro schema from schema registry with basic authentication using url-encoded credentials. [#49664](https://github.com/ClickHouse/ClickHouse/pull/49664) ([Ilya Golshtein](https://github.com/ilejn)). +* Add function `arrayJaccardIndex` which computes the Jaccard similarity between two arrays. [#50076](https://github.com/ClickHouse/ClickHouse/pull/50076) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Added support for prql as a query language. [#50686](https://github.com/ClickHouse/ClickHouse/pull/50686) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Add a column is_obsolete to system.settings and similar tables. Closes [#50819](https://github.com/ClickHouse/ClickHouse/issues/50819). [#50826](https://github.com/ClickHouse/ClickHouse/pull/50826) ([flynn](https://github.com/ucasfl)). +* Implement support of encrypted elements in configuration file Added possibility to use encrypted text in leaf elements of configuration file. The text is encrypted using encryption codecs from section. [#50986](https://github.com/ClickHouse/ClickHouse/pull/50986) ([Roman Vasin](https://github.com/rvasin)). +* Just a new request of [#49483](https://github.com/ClickHouse/ClickHouse/issues/49483). [#51013](https://github.com/ClickHouse/ClickHouse/pull/51013) ([lgbo](https://github.com/lgbo-ustc)). +* Add SYSTEM STOP LISTEN query. Closes [#47972](https://github.com/ClickHouse/ClickHouse/issues/47972). [#51016](https://github.com/ClickHouse/ClickHouse/pull/51016) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add input_format_csv_allow_variable_number_of_columns options. [#51273](https://github.com/ClickHouse/ClickHouse/pull/51273) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Another boring feature: add function substring_index, as in spark or mysql. [#51472](https://github.com/ClickHouse/ClickHouse/pull/51472) ([李扬](https://github.com/taiyang-li)). +* Show stats for jemalloc bins. Example ``` SELECT *, size * (nmalloc - ndalloc) AS allocated_bytes FROM system.jemalloc_bins WHERE allocated_bytes > 0 ORDER BY allocated_bytes DESC LIMIT 10. [#51674](https://github.com/ClickHouse/ClickHouse/pull/51674) ([Alexander Gololobov](https://github.com/davenger)). +* Add RowBinaryWithDefaults format with extra byte before each column for using column default value. Closes [#50854](https://github.com/ClickHouse/ClickHouse/issues/50854). [#51695](https://github.com/ClickHouse/ClickHouse/pull/51695) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `default_temporary_table_engine` setting. Same as `default_table_engine` but for temporary tables. [#51292](https://github.com/ClickHouse/ClickHouse/issues/51292). [#51708](https://github.com/ClickHouse/ClickHouse/pull/51708) ([velavokr](https://github.com/velavokr)). +* Added new initcap / initcapUTF8 functions which convert the first letter of each word to upper case and the rest to lower case. [#51735](https://github.com/ClickHouse/ClickHouse/pull/51735) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create table now supports `PRIMARY KEY` syntax in column definition. Columns are added to primary index in the same order columns are defined. [#51881](https://github.com/ClickHouse/ClickHouse/pull/51881) ([Ilya Yatsishin](https://github.com/qoega)). +* Added the possibility to use date and time format specifiers in log and error log file names, either in config files (`log` and `errorlog` tags) or command line arguments (`--log-file` and `--errorlog-file`). [#51945](https://github.com/ClickHouse/ClickHouse/pull/51945) ([Victor Krasnov](https://github.com/sirvickr)). +* Added Peak Memory Usage (for query) to client final statistics, and to http header. [#51946](https://github.com/ClickHouse/ClickHouse/pull/51946) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added new hasSubsequence() (+CaseInsensitive + UTF8 versions) functions. [#52050](https://github.com/ClickHouse/ClickHouse/pull/52050) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Add `array_agg` as alias of `groupArray` for PostgreSQL compatibility. Closes [#52100](https://github.com/ClickHouse/ClickHouse/issues/52100). ### Documentation entry for user-facing changes. [#52135](https://github.com/ClickHouse/ClickHouse/pull/52135) ([flynn](https://github.com/ucasfl)). +* Add `any_value` as a compatibility alias for `any` aggregate function. Closes [#52140](https://github.com/ClickHouse/ClickHouse/issues/52140). [#52147](https://github.com/ClickHouse/ClickHouse/pull/52147) ([flynn](https://github.com/ucasfl)). +* Add aggregate function `array_concat_agg` for compatibility with BigQuery, it's alias of `groupArrayArray`. Closes [#52139](https://github.com/ClickHouse/ClickHouse/issues/52139). [#52149](https://github.com/ClickHouse/ClickHouse/pull/52149) ([flynn](https://github.com/ucasfl)). +* Add `OCTET_LENGTH` as an alias to `length`. Closes [#52153](https://github.com/ClickHouse/ClickHouse/issues/52153). [#52176](https://github.com/ClickHouse/ClickHouse/pull/52176) ([FFFFFFFHHHHHHH](https://github.com/FFFFFFFHHHHHHH)). +* Re-add SipHash keyed functions. [#52206](https://github.com/ClickHouse/ClickHouse/pull/52206) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Added `firstLine` function to extract the first line from the multi-line string. This closes [#51172](https://github.com/ClickHouse/ClickHouse/issues/51172). [#52209](https://github.com/ClickHouse/ClickHouse/pull/52209) ([Mikhail Koviazin](https://github.com/mkmkme)). + +#### Performance Improvement +* Enable `move_all_conditions_to_prewhere` and `enable_multiple_prewhere_read_steps` settings by default. [#46365](https://github.com/ClickHouse/ClickHouse/pull/46365) ([Alexander Gololobov](https://github.com/davenger)). +* Improves performance of some queries by tuning allocator. [#46416](https://github.com/ClickHouse/ClickHouse/pull/46416) ([Azat Khuzhin](https://github.com/azat)). +* Writing parquet files is 10x faster, it's multi-threaded now. Almost the same speed as reading. [#49367](https://github.com/ClickHouse/ClickHouse/pull/49367) ([Michael Kolupaev](https://github.com/al13n321)). +* Enable automatic selection of the sparse serialization format by default. It improves performance. The format is supported since version 22.1. After this change, downgrading to versions older than 22.1 might not be possible. You can turn off the usage of the sparse serialization format by providing the `ratio_of_defaults_for_sparse_serialization = 1` setting for your MergeTree tables. [#49631](https://github.com/ClickHouse/ClickHouse/pull/49631) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now we use fixed-size tasks in `MergeTreePrefetchedReadPool` as in `MergeTreeReadPool`. Also from now we use connection pool for S3 requests. [#49732](https://github.com/ClickHouse/ClickHouse/pull/49732) ([Nikita Taranov](https://github.com/nickitat)). +* More pushdown to the right side of join. [#50532](https://github.com/ClickHouse/ClickHouse/pull/50532) ([Nikita Taranov](https://github.com/nickitat)). +* Improve grace_hash join by reserving hash table's size (resubmit). [#50875](https://github.com/ClickHouse/ClickHouse/pull/50875) ([lgbo](https://github.com/lgbo-ustc)). +* Waiting on lock in `OpenedFileCache` could be noticeable sometimes. We sharded it into multiple sub-maps (each with its own lock) to avoid contention. [#51341](https://github.com/ClickHouse/ClickHouse/pull/51341) ([Nikita Taranov](https://github.com/nickitat)). +* Remove duplicate condition in functionunixtimestamp64.h. [#51857](https://github.com/ClickHouse/ClickHouse/pull/51857) ([lcjh](https://github.com/ljhcage)). +* The idea is that conditions with PK columns are likely to be used in PK analysis and will not contribute much more to PREWHERE filtering. [#51958](https://github.com/ClickHouse/ClickHouse/pull/51958) ([Alexander Gololobov](https://github.com/davenger)). +* 1. Add rewriter for both old and new analyzer. 2. Add settings `optimize_uniq_to_count` which default is 0. [#52004](https://github.com/ClickHouse/ClickHouse/pull/52004) ([JackyWoo](https://github.com/JackyWoo)). +* The performance experiments of **OnTime** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of **11.6%** to the QPS of the query **Q8** while having no impact on others. [#52036](https://github.com/ClickHouse/ClickHouse/pull/52036) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Enable `allow_vertical_merges_from_compact_to_wide_parts` by default. It will save memory usage during merges. [#52295](https://github.com/ClickHouse/ClickHouse/pull/52295) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis which invalidates primary keys. This issue only exists when `query_plan_optimize_primary_key = 1, query_plan_optimize_projection = 1` . This fixes [#48823](https://github.com/ClickHouse/ClickHouse/issues/48823) . This fixes [#51173](https://github.com/ClickHouse/ClickHouse/issues/51173) . [#52308](https://github.com/ClickHouse/ClickHouse/pull/52308) ([Amos Bird](https://github.com/amosbird)). +* Reduce the number of syscalls in FileCache::loadMetadata. [#52435](https://github.com/ClickHouse/ClickHouse/pull/52435) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Added query `SYSTEM FLUSH ASYNC INSERT QUEUE` which flushes all pending asynchronous inserts to the destination tables. Added a server-side setting `async_insert_queue_flush_on_shutdown` (`true` by default) which determines whether to flush queue of asynchronous inserts on graceful shutdown. Setting `async_insert_threads` is now a server-side setting. [#49160](https://github.com/ClickHouse/ClickHouse/pull/49160) ([Anton Popov](https://github.com/CurtizJ)). +* Don't show messages about `16 EiB` free space in logs, as they don't make sense. This closes [#49320](https://github.com/ClickHouse/ClickHouse/issues/49320). [#49342](https://github.com/ClickHouse/ClickHouse/pull/49342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Properly check the limit for the `sleepEachRow` function. Add a setting `function_sleep_max_microseconds_per_block`. This is needed for generic query fuzzer. [#49343](https://github.com/ClickHouse/ClickHouse/pull/49343) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix two issues: ``` select geohashEncode(120.2, number::Float64) from numbers(10);. [#50066](https://github.com/ClickHouse/ClickHouse/pull/50066) ([李扬](https://github.com/taiyang-li)). +* Add support for external disks in Keeper for storing snapshots and logs. [#50098](https://github.com/ClickHouse/ClickHouse/pull/50098) ([Antonio Andelic](https://github.com/antonio2368)). +* Add support for multi-directory selection (`{}`) globs. [#50559](https://github.com/ClickHouse/ClickHouse/pull/50559) ([Andrey Zvonov](https://github.com/zvonand)). +* Allow to have strict lower boundary for file segment size by downloading remaining data in the background. Minimum size of file segment (if actual file size is bigger) is configured as cache configuration setting `boundary_alignment`, by default `4Mi`. Number of background threads are configured as cache configuration setting `background_download_threads`, by default `2`. Also `max_file_segment_size` was increased from `8Mi` to `32Mi` in this PR. [#51000](https://github.com/ClickHouse/ClickHouse/pull/51000) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow filtering HTTP headers with `http_forbid_headers` section in config. Both exact matching and regexp filters are available. [#51038](https://github.com/ClickHouse/ClickHouse/pull/51038) ([Nikolay Degterinsky](https://github.com/evillique)). +* #50727 new alias for function current_database and added new function current_schemas. [#51076](https://github.com/ClickHouse/ClickHouse/pull/51076) ([Pedro Riera](https://github.com/priera)). +* Log async insert flush queries into to system.query_log. [#51160](https://github.com/ClickHouse/ClickHouse/pull/51160) ([Raúl Marín](https://github.com/Algunenano)). +* Decreased default timeouts for S3 from 30 seconds to 3 seconds, and for other HTTP from 180 seconds to 30 seconds. [#51171](https://github.com/ClickHouse/ClickHouse/pull/51171) ([Michael Kolupaev](https://github.com/al13n321)). +* Use read_bytes/total_bytes_to_read for progress bar in s3/file/url/... table functions for better progress indication. [#51286](https://github.com/ClickHouse/ClickHouse/pull/51286) ([Kruglov Pavel](https://github.com/Avogar)). +* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#51291](https://github.com/ClickHouse/ClickHouse/pull/51291) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Allow SQL standard `FETCH` without `OFFSET`. See https://antonz.org/sql-fetch/. [#51293](https://github.com/ClickHouse/ClickHouse/pull/51293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve parsing of path in clickhouse-keeper-client. [#51359](https://github.com/ClickHouse/ClickHouse/pull/51359) ([Azat Khuzhin](https://github.com/azat)). +* A third-party product depending on ClickHouse (Gluten: Plugin to Double SparkSQL's Performance) had a bug. This fix avoids heap overflow in that third-party product while reading from HDFS. [#51386](https://github.com/ClickHouse/ClickHouse/pull/51386) ([李扬](https://github.com/taiyang-li)). +* Fix checking error caused by uninitialized class members. [#51418](https://github.com/ClickHouse/ClickHouse/pull/51418) ([李扬](https://github.com/taiyang-li)). +* Add ability to disable native copy for S3 (setting for BACKUP/RESTORE `allow_s3_native_copy`, and `s3_allow_native_copy` for `s3`/`s3_plain` disks). [#51448](https://github.com/ClickHouse/ClickHouse/pull/51448) ([Azat Khuzhin](https://github.com/azat)). +* Add column `primary_key_size` to `system.parts` table to show compressed primary key size on disk. Closes [#51400](https://github.com/ClickHouse/ClickHouse/issues/51400). [#51496](https://github.com/ClickHouse/ClickHouse/pull/51496) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Allow running `clickhouse-local` without procfs, without home directory existing, and without name resolution plugins from glibc. [#51518](https://github.com/ClickHouse/ClickHouse/pull/51518) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correcting the message of modify storage policy https://github.com/clickhouse/clickhouse/issues/51516 ### documentation entry for user-facing changes. [#51519](https://github.com/ClickHouse/ClickHouse/pull/51519) ([xiaolei565](https://github.com/xiaolei565)). +* Support `DROP FILESYSTEM CACHE KEY [ OFFSET ]`. [#51547](https://github.com/ClickHouse/ClickHouse/pull/51547) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Allow to add disk name for custom disks. Previously custom disks would use an internal generated disk name. Now it will be possible with `disk = disk_(...)` (e.g. disk will have name `name`) . [#51552](https://github.com/ClickHouse/ClickHouse/pull/51552) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add placeholder `%a` for rull filename in rename_files_after_processing setting. [#51603](https://github.com/ClickHouse/ClickHouse/pull/51603) ([Kruglov Pavel](https://github.com/Avogar)). +* Add column modification time into system.parts_columns. [#51685](https://github.com/ClickHouse/ClickHouse/pull/51685) ([Azat Khuzhin](https://github.com/azat)). +* Add new setting `input_format_csv_use_default_on_bad_values` to CSV format that allows to insert default value when parsing of a single field failed. [#51716](https://github.com/ClickHouse/ClickHouse/pull/51716) ([KevinyhZou](https://github.com/KevinyhZou)). +* Added a crash log flush to the disk after the unexpected crash. [#51720](https://github.com/ClickHouse/ClickHouse/pull/51720) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix behavior in dashboard page where errors unrelated to authentication are not shown. Also fix 'overlapping' chart behavior. [#51744](https://github.com/ClickHouse/ClickHouse/pull/51744) ([Zach Naimon](https://github.com/ArctypeZach)). +* Allow UUID to UInt128 conversion. [#51765](https://github.com/ClickHouse/ClickHouse/pull/51765) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Added support for function range of Nullable arguments. [#51767](https://github.com/ClickHouse/ClickHouse/pull/51767) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Convert condition like `toyear(x) = c` to `c1 <= x < c2`. [#51795](https://github.com/ClickHouse/ClickHouse/pull/51795) ([Han Fei](https://github.com/hanfei1991)). +* Improve MySQL compatibility of statement SHOW INDEX. [#51796](https://github.com/ClickHouse/ClickHouse/pull/51796) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix `use_structure_from_insertion_table_in_table_functions` does not work with `MATERIALIZED` and `ALIAS` columns. Closes [#51817](https://github.com/ClickHouse/ClickHouse/issues/51817). Closes [#51019](https://github.com/ClickHouse/ClickHouse/issues/51019). [#51825](https://github.com/ClickHouse/ClickHouse/pull/51825) ([flynn](https://github.com/ucasfl)). +* Introduce a table setting `wait_for_unique_parts_send_before_shutdown_ms` which specify the amount of time replica will wait before closing interserver handler for replicated sends. Also fix inconsistency with shutdown of tables and interserver handlers: now server shutdown tables first and only after it shut down interserver handlers. [#51851](https://github.com/ClickHouse/ClickHouse/pull/51851) ([alesapin](https://github.com/alesapin)). +* CacheDictionary request only unique keys from source. Closes [#51762](https://github.com/ClickHouse/ClickHouse/issues/51762). [#51853](https://github.com/ClickHouse/ClickHouse/pull/51853) ([Maksim Kita](https://github.com/kitaisreal)). +* Fixed settings not applied for explain query when format provided. [#51859](https://github.com/ClickHouse/ClickHouse/pull/51859) ([Nikita Taranov](https://github.com/nickitat)). +* Allow SETTINGS before FORMAT in DESCRIBE TABLE query for compatibility with SELECT query. Closes [#51544](https://github.com/ClickHouse/ClickHouse/issues/51544). [#51899](https://github.com/ClickHouse/ClickHouse/pull/51899) ([Nikolay Degterinsky](https://github.com/evillique)). +* Var-int encoded integers (e.g. used by the native protocol) can now use the full 64-bit range. 3rd party clients are advised to update their var-int code accordingly. [#51905](https://github.com/ClickHouse/ClickHouse/pull/51905) ([Robert Schulze](https://github.com/rschu1ze)). +* Update certificates when they change without the need to manually SYSTEM RELOAD CONFIG. [#52030](https://github.com/ClickHouse/ClickHouse/pull/52030) ([Mike Kot](https://github.com/myrrc)). +* Added `allow_create_index_without_type` setting that allow to ignore `ADD INDEX` queries without specified `TYPE`. Standard SQL queries will just succeed without changing table schema. [#52056](https://github.com/ClickHouse/ClickHouse/pull/52056) ([Ilya Yatsishin](https://github.com/qoega)). +* Fixed crash when mysqlxx::Pool::Entry is used after it was disconnected. [#52063](https://github.com/ClickHouse/ClickHouse/pull/52063) ([Val Doroshchuk](https://github.com/valbok)). +* CREATE TABLE ... AS SELECT .. is now supported in MaterializedMySQL. [#52067](https://github.com/ClickHouse/ClickHouse/pull/52067) ([Val Doroshchuk](https://github.com/valbok)). +* Introduced automatic conversion of text types to utf8 for MaterializedMySQL. [#52084](https://github.com/ClickHouse/ClickHouse/pull/52084) ([Val Doroshchuk](https://github.com/valbok)). +* Add alias for functions `today` (now available under the `curdate`/`current_date` names) and `now` (`current_timestamp`). [#52106](https://github.com/ClickHouse/ClickHouse/pull/52106) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). +* Log messages are written to text_log from the beginning. [#52113](https://github.com/ClickHouse/ClickHouse/pull/52113) ([Dmitry Kardymon](https://github.com/kardymonds)). +* In cases where the HTTP endpoint has multiple IP addresses and the first of them is unreachable, a timeout exception will be thrown. Made session creation with handling all resolved endpoints. [#52116](https://github.com/ClickHouse/ClickHouse/pull/52116) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Support async_deduplication_token for async insert. [#52136](https://github.com/ClickHouse/ClickHouse/pull/52136) ([Han Fei](https://github.com/hanfei1991)). +* Avro input format support Union with single type. Closes [#52131](https://github.com/ClickHouse/ClickHouse/issues/52131). [#52137](https://github.com/ClickHouse/ClickHouse/pull/52137) ([flynn](https://github.com/ucasfl)). +* Add setting `optimize_use_implicit_projections` to disable implicit projections (currently only `min_max_count` projection). This is defaulted to false until [#52075](https://github.com/ClickHouse/ClickHouse/issues/52075) is fixed. [#52152](https://github.com/ClickHouse/ClickHouse/pull/52152) ([Amos Bird](https://github.com/amosbird)). +* It was possible to use the function `hasToken` for infinite loop. Now this possibility is removed. This closes [#52156](https://github.com/ClickHouse/ClickHouse/issues/52156). [#52160](https://github.com/ClickHouse/ClickHouse/pull/52160) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 1. Upgrade Intel QPL from v1.1.0 to v1.2.0 2. Upgrade Intel accel-config from v3.5 to v4.0 3. Fixed issue that Device IOTLB miss has big perf. impact for IAA accelerators. [#52180](https://github.com/ClickHouse/ClickHouse/pull/52180) ([jasperzhu](https://github.com/jinjunzh)). +* Functions "date_diff() and age()" now support millisecond/microsecond unit and work with microsecond precision. [#52181](https://github.com/ClickHouse/ClickHouse/pull/52181) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Create ZK ancestors optimistically. [#52195](https://github.com/ClickHouse/ClickHouse/pull/52195) ([Raúl Marín](https://github.com/Algunenano)). +* Fix [#50582](https://github.com/ClickHouse/ClickHouse/issues/50582). Avoid the `Not found column ... in block` error in some cases of reading in-order and constants. [#52259](https://github.com/ClickHouse/ClickHouse/pull/52259) ([Chen768959](https://github.com/Chen768959)). +* Check whether S2 geo primitives are invalid as early as possible on ClickHouse side. This closes: [#27090](https://github.com/ClickHouse/ClickHouse/issues/27090). [#52260](https://github.com/ClickHouse/ClickHouse/pull/52260) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Now unquoted utf-8 strings are supported in DDL for MaterializedMySQL. [#52318](https://github.com/ClickHouse/ClickHouse/pull/52318) ([Val Doroshchuk](https://github.com/valbok)). +* Add back missing projection QueryAccessInfo when `query_plan_optimize_projection = 1`. This fixes [#50183](https://github.com/ClickHouse/ClickHouse/issues/50183) . This fixes [#50093](https://github.com/ClickHouse/ClickHouse/issues/50093) . [#52327](https://github.com/ClickHouse/ClickHouse/pull/52327) ([Amos Bird](https://github.com/amosbird)). +* Add new setting `disable_url_encoding` that allows to disable decoding/encoding path in uri in URL engine. [#52337](https://github.com/ClickHouse/ClickHouse/pull/52337) ([Kruglov Pavel](https://github.com/Avogar)). +* When `ZooKeeperRetriesControl` rethrows an error, it's more useful to see its original stack trace, not the one from `ZooKeeperRetriesControl` itself. [#52347](https://github.com/ClickHouse/ClickHouse/pull/52347) ([Vitaly Baranov](https://github.com/vitlibar)). +* Now double quoted comments are supported in MaterializedMySQL. [#52355](https://github.com/ClickHouse/ClickHouse/pull/52355) ([Val Doroshchuk](https://github.com/valbok)). +* Wait for zero copy replication lock even if some disks don't support it. [#52376](https://github.com/ClickHouse/ClickHouse/pull/52376) ([Raúl Marín](https://github.com/Algunenano)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52419](https://github.com/ClickHouse/ClickHouse/pull/52419) ([alesapin](https://github.com/alesapin)). +* The `session_timezone` setting is demoted to experimental. [#52445](https://github.com/ClickHouse/ClickHouse/pull/52445) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Now interserver port will be closed only after tables are shut down. [#52498](https://github.com/ClickHouse/ClickHouse/pull/52498) ([alesapin](https://github.com/alesapin)). +* Added field `refcount` to `system.remote_data_paths` table. [#52518](https://github.com/ClickHouse/ClickHouse/pull/52518) ([Anton Popov](https://github.com/CurtizJ)). +* New setting `merge_tree_determine_task_size_by_prewhere_columns` added. If set to `true` only sizes of the columns from `PREWHERE` section will be considered to determine reading task size. Otherwise all the columns from query are considered. [#52606](https://github.com/ClickHouse/ClickHouse/pull/52606) ([Nikita Taranov](https://github.com/nickitat)). + +#### Build/Testing/Packaging Improvement +* Add experimental ClickHouse builds for Linux RISC-V 64 to CI. [#31398](https://github.com/ClickHouse/ClickHouse/pull/31398) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed CRC32(WeakHash32) issue for s390x. [#50365](https://github.com/ClickHouse/ClickHouse/pull/50365) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Add integration test check with the enabled analyzer. [#50926](https://github.com/ClickHouse/ClickHouse/pull/50926) ([Dmitry Novik](https://github.com/novikd)). +* Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)). +* Fixed several issues found by OSS-Fuzz. [#51736](https://github.com/ClickHouse/ClickHouse/pull/51736) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* There were a couple of failures because of (?) S3 availability. The sccache has a feature of failing over to local compilation. [#51893](https://github.com/ClickHouse/ClickHouse/pull/51893) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* 02242_delete_user_race and 02243_drop_user_grant_race tests have been corrected. [#51923](https://github.com/ClickHouse/ClickHouse/pull/51923) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Make the function `CHColumnToArrowColumn::fillArrowArrayWithArrayColumnData` to work with nullable arrays, which are not possible in ClickHouse, but needed for Gluten. [#52112](https://github.com/ClickHouse/ClickHouse/pull/52112) ([李扬](https://github.com/taiyang-li)). +* We've updated the CCTZ library to master, but there are no user-visible changes. [#52124](https://github.com/ClickHouse/ClickHouse/pull/52124) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `system.licenses` table now includes the hard-forked library Poco. This closes [#52066](https://github.com/ClickHouse/ClickHouse/issues/52066). [#52127](https://github.com/ClickHouse/ClickHouse/pull/52127) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Follow up [#50926](https://github.com/ClickHouse/ClickHouse/issues/50926). Add integration tests check with enabled analyzer to master. [#52210](https://github.com/ClickHouse/ClickHouse/pull/52210) ([Dmitry Novik](https://github.com/novikd)). +* Reproducible builds for Rust. [#52395](https://github.com/ClickHouse/ClickHouse/pull/52395) ([Azat Khuzhin](https://github.com/azat)). +* Improve the startup time of `clickhouse-client` and `clickhouse-local` in debug and sanitizer builds. This closes [#52228](https://github.com/ClickHouse/ClickHouse/issues/52228). [#52489](https://github.com/ClickHouse/ClickHouse/pull/52489) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that there are no cases of bad punctuation: whitespace before a comma like `Hello ,world` instead of `Hello, world`. [#52549](https://github.com/ClickHouse/ClickHouse/pull/52549) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix materialised pg syncTables [#49698](https://github.com/ClickHouse/ClickHouse/pull/49698) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix projection with optimize_aggregators_of_group_by_keys [#49709](https://github.com/ClickHouse/ClickHouse/pull/49709) ([Amos Bird](https://github.com/amosbird)). +* Fix optimize_skip_unused_shards with JOINs [#51037](https://github.com/ClickHouse/ClickHouse/pull/51037) ([Azat Khuzhin](https://github.com/azat)). +* Fix formatDateTime() with fractional negative datetime64 [#51290](https://github.com/ClickHouse/ClickHouse/pull/51290) ([Dmitry Kardymon](https://github.com/kardymonds)). +* Functions `hasToken*` were totally wrong. Add a test for [#43358](https://github.com/ClickHouse/ClickHouse/issues/43358) [#51378](https://github.com/ClickHouse/ClickHouse/pull/51378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix optimization to move functions before sorting. [#51481](https://github.com/ClickHouse/ClickHouse/pull/51481) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix Block structure mismatch in Pipe::unitePipes for FINAL [#51492](https://github.com/ClickHouse/ClickHouse/pull/51492) ([Nikita Taranov](https://github.com/nickitat)). +* Fix SIGSEGV for clusters with zero weight across all shards (fixes INSERT INTO FUNCTION clusterAllReplicas()) [#51545](https://github.com/ClickHouse/ClickHouse/pull/51545) ([Azat Khuzhin](https://github.com/azat)). +* Fix timeout for hedged requests [#51582](https://github.com/ClickHouse/ClickHouse/pull/51582) ([Azat Khuzhin](https://github.com/azat)). +* Fix logical error in ANTI join with NULL [#51601](https://github.com/ClickHouse/ClickHouse/pull/51601) ([vdimir](https://github.com/vdimir)). +* Fix for moving 'IN' conditions to PREWHERE [#51610](https://github.com/ClickHouse/ClickHouse/pull/51610) ([Alexander Gololobov](https://github.com/davenger)). +* Do not apply PredicateExpressionsOptimizer for ASOF/ANTI join [#51633](https://github.com/ClickHouse/ClickHouse/pull/51633) ([vdimir](https://github.com/vdimir)). +* Fix async insert with deduplication for ReplicatedMergeTree using merging algorithms [#51676](https://github.com/ClickHouse/ClickHouse/pull/51676) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)). +* Fix segfault when create invalid EmbeddedRocksdb table [#51847](https://github.com/ClickHouse/ClickHouse/pull/51847) ([Duc Canh Le](https://github.com/canhld94)). +* Fix inserts into MongoDB tables [#51876](https://github.com/ClickHouse/ClickHouse/pull/51876) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix deadlock on DatabaseCatalog shutdown [#51908](https://github.com/ClickHouse/ClickHouse/pull/51908) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix error in subquery operators [#51922](https://github.com/ClickHouse/ClickHouse/pull/51922) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix async connect to hosts with multiple ips [#51934](https://github.com/ClickHouse/ClickHouse/pull/51934) ([Kruglov Pavel](https://github.com/Avogar)). +* Do not remove inputs after ActionsDAG::merge [#51947](https://github.com/ClickHouse/ClickHouse/pull/51947) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Check refcount in `RemoveManyObjectStorageOperation::finalize` instead of `execute` [#51954](https://github.com/ClickHouse/ClickHouse/pull/51954) ([vdimir](https://github.com/vdimir)). +* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Small fix for toDateTime64() for dates after 2283-12-31 [#52130](https://github.com/ClickHouse/ClickHouse/pull/52130) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix ORDER BY tuple of WINDOW functions [#52145](https://github.com/ClickHouse/ClickHouse/pull/52145) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect projection analysis when aggregation expression contains monotonic functions [#52151](https://github.com/ClickHouse/ClickHouse/pull/52151) ([Amos Bird](https://github.com/amosbird)). +* Fix error in `groupArrayMoving` functions [#52161](https://github.com/ClickHouse/ClickHouse/pull/52161) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable direct join for range dictionary [#52187](https://github.com/ClickHouse/ClickHouse/pull/52187) ([Duc Canh Le](https://github.com/canhld94)). +* Fix sticky mutations test (and extremely rare race condition) [#52197](https://github.com/ClickHouse/ClickHouse/pull/52197) ([alesapin](https://github.com/alesapin)). +* Fix race in Web disk [#52211](https://github.com/ClickHouse/ClickHouse/pull/52211) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix data race in Connection::setAsyncCallback on unknown packet from server [#52219](https://github.com/ClickHouse/ClickHouse/pull/52219) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix temp data deletion on startup, add test [#52275](https://github.com/ClickHouse/ClickHouse/pull/52275) ([vdimir](https://github.com/vdimir)). +* Don't use minmax_count projections when counting nullable columns [#52297](https://github.com/ClickHouse/ClickHouse/pull/52297) ([Amos Bird](https://github.com/amosbird)). +* MergeTree/ReplicatedMergeTree should use server timezone for log entries [#52325](https://github.com/ClickHouse/ClickHouse/pull/52325) ([Azat Khuzhin](https://github.com/azat)). +* Fix parameterized view with cte and multiple usage [#52328](https://github.com/ClickHouse/ClickHouse/pull/52328) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Disable expression templates for time intervals [#52335](https://github.com/ClickHouse/ClickHouse/pull/52335) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `apply_snapshot` in Keeper [#52358](https://github.com/ClickHouse/ClickHouse/pull/52358) ([Antonio Andelic](https://github.com/antonio2368)). +* Update build-osx.md [#52377](https://github.com/ClickHouse/ClickHouse/pull/52377) ([AlexBykovski](https://github.com/AlexBykovski)). +* Fix `countSubstrings()` hang with empty needle and a column haystack [#52409](https://github.com/ClickHouse/ClickHouse/pull/52409) ([Sergei Trifonov](https://github.com/serxa)). +* Fix normal projection with merge table [#52432](https://github.com/ClickHouse/ClickHouse/pull/52432) ([Amos Bird](https://github.com/amosbird)). +* Fix possible double-free in Aggregator [#52439](https://github.com/ClickHouse/ClickHouse/pull/52439) ([Nikita Taranov](https://github.com/nickitat)). +* Fixed inserting into Buffer engine [#52440](https://github.com/ClickHouse/ClickHouse/pull/52440) ([Vasily Nemkov](https://github.com/Enmk)). +* The implementation of AnyHash was non-conformant. [#52448](https://github.com/ClickHouse/ClickHouse/pull/52448) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check recursion depth in OptimizedRegularExpression [#52451](https://github.com/ClickHouse/ClickHouse/pull/52451) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix data-race DatabaseReplicated::startupTables()/canExecuteReplicatedMetadataAlter() [#52490](https://github.com/ClickHouse/ClickHouse/pull/52490) ([Azat Khuzhin](https://github.com/azat)). +* Fix abort in function `transform` [#52513](https://github.com/ClickHouse/ClickHouse/pull/52513) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix lightweight delete after drop of projection [#52517](https://github.com/ClickHouse/ClickHouse/pull/52517) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible error "Cannot drain connections: cancel first" [#52585](https://github.com/ClickHouse/ClickHouse/pull/52585) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Add documentation for building in docker"'. [#51773](https://github.com/ClickHouse/ClickHouse/pull/51773) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix build"'. [#51911](https://github.com/ClickHouse/ClickHouse/pull/51911) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Millisecond and microsecond support in date_diff / age functions"'. [#52129](https://github.com/ClickHouse/ClickHouse/pull/52129) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Re-add SipHash keyed functions"'. [#52466](https://github.com/ClickHouse/ClickHouse/pull/52466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add an ability to specify allocations size for sampling memory profiler"'. [#52496](https://github.com/ClickHouse/ClickHouse/pull/52496) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Rewrite uniq to count"'. [#52576](https://github.com/ClickHouse/ClickHouse/pull/52576) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove duplicate_order_by_and_distinct optimization [#47135](https://github.com/ClickHouse/ClickHouse/pull/47135) ([Igor Nikonov](https://github.com/devcrafter)). +* Update sort desc in ReadFromMergeTree after applying PREWHERE info [#48669](https://github.com/ClickHouse/ClickHouse/pull/48669) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix `BindException: Address already in use` in HDFS integration tests [#49428](https://github.com/ClickHouse/ClickHouse/pull/49428) ([Nikita Taranov](https://github.com/nickitat)). +* Force libunwind usage (removes gcc_eh support) [#49438](https://github.com/ClickHouse/ClickHouse/pull/49438) ([Azat Khuzhin](https://github.com/azat)). +* Cleanup `storage_conf.xml` [#49557](https://github.com/ClickHouse/ClickHouse/pull/49557) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix flaky tests caused by OPTIMIZE FINAL failing memory budget check [#49764](https://github.com/ClickHouse/ClickHouse/pull/49764) ([Michael Kolupaev](https://github.com/al13n321)). +* Remove unstable queries from performance/join_set_filter [#50235](https://github.com/ClickHouse/ClickHouse/pull/50235) ([vdimir](https://github.com/vdimir)). +* More accurate DNS resolve for the keeper connection [#50738](https://github.com/ClickHouse/ClickHouse/pull/50738) ([pufit](https://github.com/pufit)). +* Try to fix some trash in Disks and part moves [#51135](https://github.com/ClickHouse/ClickHouse/pull/51135) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add jemalloc support fro s390x [#51186](https://github.com/ClickHouse/ClickHouse/pull/51186) ([Boris Kuschel](https://github.com/bkuschel)). +* Resubmit [#48821](https://github.com/ClickHouse/ClickHouse/issues/48821) [#51208](https://github.com/ClickHouse/ClickHouse/pull/51208) ([Kseniia Sumarokova](https://github.com/kssenii)). +* test for [#36894](https://github.com/ClickHouse/ClickHouse/issues/36894) [#51274](https://github.com/ClickHouse/ClickHouse/pull/51274) ([Denny Crane](https://github.com/den-crane)). +* external_aggregation_fix for big endian machines [#51280](https://github.com/ClickHouse/ClickHouse/pull/51280) ([Sanjam Panda](https://github.com/saitama951)). +* Fix: Invalid number of rows in Chunk column Object [#51296](https://github.com/ClickHouse/ClickHouse/pull/51296) ([Igor Nikonov](https://github.com/devcrafter)). +* Add a test for [#44816](https://github.com/ClickHouse/ClickHouse/issues/44816) [#51305](https://github.com/ClickHouse/ClickHouse/pull/51305) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for `calculate_text_stack_trace` setting [#51311](https://github.com/ClickHouse/ClickHouse/pull/51311) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* decrease log level, make logs shorter [#51320](https://github.com/ClickHouse/ClickHouse/pull/51320) ([Sema Checherinda](https://github.com/CheSema)). +* Collect stack traces from job's scheduling and print along with exception's stack trace. [#51349](https://github.com/ClickHouse/ClickHouse/pull/51349) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add a test for [#42691](https://github.com/ClickHouse/ClickHouse/issues/42691) [#51352](https://github.com/ClickHouse/ClickHouse/pull/51352) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#32474](https://github.com/ClickHouse/ClickHouse/issues/32474) [#51354](https://github.com/ClickHouse/ClickHouse/pull/51354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#41727](https://github.com/ClickHouse/ClickHouse/issues/41727) [#51355](https://github.com/ClickHouse/ClickHouse/pull/51355) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#35801](https://github.com/ClickHouse/ClickHouse/issues/35801) [#51356](https://github.com/ClickHouse/ClickHouse/pull/51356) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for [#34626](https://github.com/ClickHouse/ClickHouse/issues/34626) [#51357](https://github.com/ClickHouse/ClickHouse/pull/51357) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Initialize text_log earlier to capture table startup messages [#51360](https://github.com/ClickHouse/ClickHouse/pull/51360) ([Azat Khuzhin](https://github.com/azat)). +* Use separate default settings for clickhouse-local [#51363](https://github.com/ClickHouse/ClickHouse/pull/51363) ([Azat Khuzhin](https://github.com/azat)). +* Attempt to remove wrong code (catch/throw in Functions) [#51367](https://github.com/ClickHouse/ClickHouse/pull/51367) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove suspicious code [#51383](https://github.com/ClickHouse/ClickHouse/pull/51383) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable hedged requests under TSan [#51392](https://github.com/ClickHouse/ClickHouse/pull/51392) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* no finalize in d-tor WriteBufferFromOStream [#51404](https://github.com/ClickHouse/ClickHouse/pull/51404) ([Sema Checherinda](https://github.com/CheSema)). +* Better diagnostics for 01193_metadata_loading [#51414](https://github.com/ClickHouse/ClickHouse/pull/51414) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix attaching gdb in stress tests [#51445](https://github.com/ClickHouse/ClickHouse/pull/51445) ([Kruglov Pavel](https://github.com/Avogar)). +* Merging [#36384](https://github.com/ClickHouse/ClickHouse/issues/36384) [#51458](https://github.com/ClickHouse/ClickHouse/pull/51458) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible race on shutdown wait [#51497](https://github.com/ClickHouse/ClickHouse/pull/51497) ([Sergei Trifonov](https://github.com/serxa)). +* Fix `test_alter_moving_garbage`: lock between getActiveContainingPart and swapActivePart in parts mover [#51498](https://github.com/ClickHouse/ClickHouse/pull/51498) ([vdimir](https://github.com/vdimir)). +* Fix a logical error on mutation [#51502](https://github.com/ClickHouse/ClickHouse/pull/51502) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix running integration tests with spaces in it's names [#51514](https://github.com/ClickHouse/ClickHouse/pull/51514) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test 00417_kill_query [#51522](https://github.com/ClickHouse/ClickHouse/pull/51522) ([Nikolay Degterinsky](https://github.com/evillique)). +* fs cache: add some checks [#51536](https://github.com/ClickHouse/ClickHouse/pull/51536) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't run 02782_uniq_exact_parallel_merging_bug in parallel with other tests [#51549](https://github.com/ClickHouse/ClickHouse/pull/51549) ([Nikita Taranov](https://github.com/nickitat)). +* 00900_orc_load: lift kill timeout [#51559](https://github.com/ClickHouse/ClickHouse/pull/51559) ([Robert Schulze](https://github.com/rschu1ze)). +* Add retries to 00416_pocopatch_progress_in_http_headers [#51575](https://github.com/ClickHouse/ClickHouse/pull/51575) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix merge_selecting_task scheduling [#51591](https://github.com/ClickHouse/ClickHouse/pull/51591) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add hex functions for cityhash [#51595](https://github.com/ClickHouse/ClickHouse/pull/51595) ([Vitaly Baranov](https://github.com/vitlibar)). +* Remove `unset CLICKHOUSE_LOG_COMMENT` from tests [#51623](https://github.com/ClickHouse/ClickHouse/pull/51623) ([Nikita Taranov](https://github.com/nickitat)). +* Implement endianness-independent serialization [#51637](https://github.com/ClickHouse/ClickHouse/pull/51637) ([ltrk2](https://github.com/ltrk2)). +* Ignore APPEND and TRUNCATE modifiers if file does not exist. [#51640](https://github.com/ClickHouse/ClickHouse/pull/51640) ([alekar](https://github.com/alekar)). +* Try to fix flaky 02210_processors_profile_log [#51641](https://github.com/ClickHouse/ClickHouse/pull/51641) ([Igor Nikonov](https://github.com/devcrafter)). +* Make common macros extendable [#51646](https://github.com/ClickHouse/ClickHouse/pull/51646) ([Amos Bird](https://github.com/amosbird)). +* Correct an exception message in src/Functions/nested.cpp [#51651](https://github.com/ClickHouse/ClickHouse/pull/51651) ([Alex Cheng](https://github.com/Alex-Cheng)). +* tests: fix 02050_client_profile_events flakiness [#51653](https://github.com/ClickHouse/ClickHouse/pull/51653) ([Azat Khuzhin](https://github.com/azat)). +* Minor follow-up to re2 update to 2023-06-02 ([#50949](https://github.com/ClickHouse/ClickHouse/issues/50949)) [#51655](https://github.com/ClickHouse/ClickHouse/pull/51655) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)). +* Update timeouts in tests for transactions [#51683](https://github.com/ClickHouse/ClickHouse/pull/51683) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove unused code [#51684](https://github.com/ClickHouse/ClickHouse/pull/51684) ([Sergei Trifonov](https://github.com/serxa)). +* Remove `mmap/mremap/munmap` from Allocator.h [#51686](https://github.com/ClickHouse/ClickHouse/pull/51686) ([alesapin](https://github.com/alesapin)). +* SonarCloud: Add C++23 Experimental Flag [#51687](https://github.com/ClickHouse/ClickHouse/pull/51687) ([Julio Jimenez](https://github.com/juliojimenez)). +* Wait with retries when attaching GDB in tests [#51688](https://github.com/ClickHouse/ClickHouse/pull/51688) ([Antonio Andelic](https://github.com/antonio2368)). +* Update version_date.tsv and changelogs after v23.6.1.1524-stable [#51691](https://github.com/ClickHouse/ClickHouse/pull/51691) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* fix write to finalized buffer [#51696](https://github.com/ClickHouse/ClickHouse/pull/51696) ([Sema Checherinda](https://github.com/CheSema)). +* do not log exception aborted for pending mutate/merge entries when shutdown [#51697](https://github.com/ClickHouse/ClickHouse/pull/51697) ([Sema Checherinda](https://github.com/CheSema)). +* Fix race in ContextAccess [#51704](https://github.com/ClickHouse/ClickHouse/pull/51704) ([Vitaly Baranov](https://github.com/vitlibar)). +* Make test scripts backwards compatible [#51707](https://github.com/ClickHouse/ClickHouse/pull/51707) ([Antonio Andelic](https://github.com/antonio2368)). +* test for full join and null predicate [#51709](https://github.com/ClickHouse/ClickHouse/pull/51709) ([Denny Crane](https://github.com/den-crane)). +* A cmake warning on job limits underutilizing CPU [#51710](https://github.com/ClickHouse/ClickHouse/pull/51710) ([velavokr](https://github.com/velavokr)). +* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)). +* Added ASK_PASSWORD client constant instead of hardcoded '\n' [#51723](https://github.com/ClickHouse/ClickHouse/pull/51723) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Update README.md [#51726](https://github.com/ClickHouse/ClickHouse/pull/51726) ([Tyler Hannan](https://github.com/tylerhannan)). +* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove MemoryPool from Poco because it's useless [#51732](https://github.com/ClickHouse/ClickHouse/pull/51732) ([alesapin](https://github.com/alesapin)). +* Fix: logical error in grace hash join [#51737](https://github.com/ClickHouse/ClickHouse/pull/51737) ([Igor Nikonov](https://github.com/devcrafter)). +* Update 01320_create_sync_race_condition_zookeeper.sh [#51742](https://github.com/ClickHouse/ClickHouse/pull/51742) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Revert "Fix: Invalid number of rows in Chunk column Object" [#51750](https://github.com/ClickHouse/ClickHouse/pull/51750) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add SonarCloud to README [#51751](https://github.com/ClickHouse/ClickHouse/pull/51751) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix test `02789_object_type_invalid_num_of_rows` [#51754](https://github.com/ClickHouse/ClickHouse/pull/51754) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix (benign) data race in `transform` [#51755](https://github.com/ClickHouse/ClickHouse/pull/51755) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky KeeperMap test [#51764](https://github.com/ClickHouse/ClickHouse/pull/51764) ([Antonio Andelic](https://github.com/antonio2368)). +* Version mypy=1.4.1 falsly reports unused ignore comment [#51769](https://github.com/ClickHouse/ClickHouse/pull/51769) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid keeping lock Context::getLock() while calculating access rights [#51772](https://github.com/ClickHouse/ClickHouse/pull/51772) ([Vitaly Baranov](https://github.com/vitlibar)). +* Making stateless tests with timeout less flaky [#51774](https://github.com/ClickHouse/ClickHouse/pull/51774) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix after [#51000](https://github.com/ClickHouse/ClickHouse/issues/51000) [#51790](https://github.com/ClickHouse/ClickHouse/pull/51790) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add assert in ThreadStatus destructor for correct current_thread [#51800](https://github.com/ClickHouse/ClickHouse/pull/51800) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix broken parts handling in `ReplicatedMergeTree` [#51801](https://github.com/ClickHouse/ClickHouse/pull/51801) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix tsan signal-unsafe call [#51802](https://github.com/ClickHouse/ClickHouse/pull/51802) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix for parallel replicas not completely disabled by granule count threshold [#51805](https://github.com/ClickHouse/ClickHouse/pull/51805) ([Alexander Gololobov](https://github.com/davenger)). +* Make sure that we don't attempt to serialize/deserialize block with 0 columns and non-zero rows [#51807](https://github.com/ClickHouse/ClickHouse/pull/51807) ([Alexander Gololobov](https://github.com/davenger)). +* Fix rare bug in `DROP COLUMN` and enabled sparse columns [#51809](https://github.com/ClickHouse/ClickHouse/pull/51809) ([Anton Popov](https://github.com/CurtizJ)). +* Fix flaky `test_multiple_disks` [#51821](https://github.com/ClickHouse/ClickHouse/pull/51821) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow up to [#51547](https://github.com/ClickHouse/ClickHouse/issues/51547) [#51822](https://github.com/ClickHouse/ClickHouse/pull/51822) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Correctly grep archives in stress tests [#51824](https://github.com/ClickHouse/ClickHouse/pull/51824) ([Antonio Andelic](https://github.com/antonio2368)). +* Update analyzer_tech_debt.txt [#51836](https://github.com/ClickHouse/ClickHouse/pull/51836) ([Alexander Tokmakov](https://github.com/tavplubix)). +* remove unused code [#51837](https://github.com/ClickHouse/ClickHouse/pull/51837) ([flynn](https://github.com/ucasfl)). +* Fix disk config for upgrade tests [#51839](https://github.com/ClickHouse/ClickHouse/pull/51839) ([Antonio Andelic](https://github.com/antonio2368)). +* Remove Coverity from workflows, but leave in the code [#51842](https://github.com/ClickHouse/ClickHouse/pull/51842) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Many fixes [3] [#51848](https://github.com/ClickHouse/ClickHouse/pull/51848) ([Ilya Yatsishin](https://github.com/qoega)). +* Change misleading name in joins: addJoinedBlock -> addBlockToJoin [#51852](https://github.com/ClickHouse/ClickHouse/pull/51852) ([Igor Nikonov](https://github.com/devcrafter)). +* fix: correct exception messages on policies comparison [#51854](https://github.com/ClickHouse/ClickHouse/pull/51854) ([Feng Kaiyu](https://github.com/fky2015)). +* Update 02439_merge_selecting_partitions.sql [#51862](https://github.com/ClickHouse/ClickHouse/pull/51862) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove useless packages [#51863](https://github.com/ClickHouse/ClickHouse/pull/51863) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless logs [#51865](https://github.com/ClickHouse/ClickHouse/pull/51865) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect log level = warning [#51867](https://github.com/ClickHouse/ClickHouse/pull/51867) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_replicated_table_attach [#51868](https://github.com/ClickHouse/ClickHouse/pull/51868) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Better usability of a test [#51869](https://github.com/ClickHouse/ClickHouse/pull/51869) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless code [#51873](https://github.com/ClickHouse/ClickHouse/pull/51873) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Another fix upgrade check script [#51878](https://github.com/ClickHouse/ClickHouse/pull/51878) ([Antonio Andelic](https://github.com/antonio2368)). +* Sqlloogic improvements [#51883](https://github.com/ClickHouse/ClickHouse/pull/51883) ([Ilya Yatsishin](https://github.com/qoega)). +* Disable ThinLTO on non-Linux [#51897](https://github.com/ClickHouse/ClickHouse/pull/51897) ([Robert Schulze](https://github.com/rschu1ze)). +* Pin rust nightly (to make it stable) [#51903](https://github.com/ClickHouse/ClickHouse/pull/51903) ([Azat Khuzhin](https://github.com/azat)). +* Fix build [#51909](https://github.com/ClickHouse/ClickHouse/pull/51909) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix build [#51910](https://github.com/ClickHouse/ClickHouse/pull/51910) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test `00175_partition_by_ignore` and move it to correct location [#51913](https://github.com/ClickHouse/ClickHouse/pull/51913) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test 02360_send_logs_level_colors: avoid usage of `file` tool [#51914](https://github.com/ClickHouse/ClickHouse/pull/51914) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe better tests [#51916](https://github.com/ClickHouse/ClickHouse/pull/51916) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert system drop filesystem cache by key [#51917](https://github.com/ClickHouse/ClickHouse/pull/51917) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test `detach_attach_partition_race` [#51920](https://github.com/ClickHouse/ClickHouse/pull/51920) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Another fix for `02481_async_insert_race_long` [#51925](https://github.com/ClickHouse/ClickHouse/pull/51925) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix segfault caused by `ThreadStatus` [#51931](https://github.com/ClickHouse/ClickHouse/pull/51931) ([Antonio Andelic](https://github.com/antonio2368)). +* Print short fault info only from safe fields [#51932](https://github.com/ClickHouse/ClickHouse/pull/51932) ([Alexander Gololobov](https://github.com/davenger)). +* Fix typo in integration tests [#51944](https://github.com/ClickHouse/ClickHouse/pull/51944) ([Ilya Yatsishin](https://github.com/qoega)). +* Better logs on shutdown [#51951](https://github.com/ClickHouse/ClickHouse/pull/51951) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Filter databases list before querying potentially slow fields [#51955](https://github.com/ClickHouse/ClickHouse/pull/51955) ([Alexander Gololobov](https://github.com/davenger)). +* Fix some issues with transactions [#51959](https://github.com/ClickHouse/ClickHouse/pull/51959) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix unrelated messages from LSan in clickhouse-client [#51966](https://github.com/ClickHouse/ClickHouse/pull/51966) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow OOM in AST Fuzzer with Sanitizers [#51967](https://github.com/ClickHouse/ClickHouse/pull/51967) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable one test under Analyzer [#51968](https://github.com/ClickHouse/ClickHouse/pull/51968) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix Docker [#51969](https://github.com/ClickHouse/ClickHouse/pull/51969) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `01825_type_json_from_map` [#51970](https://github.com/ClickHouse/ClickHouse/pull/51970) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02354_distributed_with_external_aggregation_memory_usage` [#51971](https://github.com/ClickHouse/ClickHouse/pull/51971) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix disaster in integration tests, part 2 [#51973](https://github.com/ClickHouse/ClickHouse/pull/51973) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* [RFC] Cleanup remote_servers in dist config.xml [#51985](https://github.com/ClickHouse/ClickHouse/pull/51985) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.6.2.18-stable [#51986](https://github.com/ClickHouse/ClickHouse/pull/51986) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.20.11-lts [#51987](https://github.com/ClickHouse/ClickHouse/pull/51987) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix performance test for regexp cache [#51988](https://github.com/ClickHouse/ClickHouse/pull/51988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Move a test to the right place [#51989](https://github.com/ClickHouse/ClickHouse/pull/51989) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a check to validate that the stateful tests are stateful [#51990](https://github.com/ClickHouse/ClickHouse/pull/51990) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Check that functional tests cleanup their tables [#51991](https://github.com/ClickHouse/ClickHouse/pull/51991) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test_extreme_deduplication [#51992](https://github.com/ClickHouse/ClickHouse/pull/51992) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Cleanup SymbolIndex after reload got removed [#51993](https://github.com/ClickHouse/ClickHouse/pull/51993) ([Azat Khuzhin](https://github.com/azat)). +* Update CompletedPipelineExecutor exception log name [#52028](https://github.com/ClickHouse/ClickHouse/pull/52028) ([xiao](https://github.com/nicelulu)). +* Fix `00502_custom_partitioning_replicated_zookeeper_long` [#52032](https://github.com/ClickHouse/ClickHouse/pull/52032) ([Antonio Andelic](https://github.com/antonio2368)). +* Prohibit send_metadata for s3_plain disks [#52038](https://github.com/ClickHouse/ClickHouse/pull/52038) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.4.6.25-stable [#52061](https://github.com/ClickHouse/ClickHouse/pull/52061) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Preparations for Trivial Support For Resharding (part1) [#52068](https://github.com/ClickHouse/ClickHouse/pull/52068) ([Azat Khuzhin](https://github.com/azat)). +* Update version_date.tsv and changelogs after v23.3.8.21-lts [#52077](https://github.com/ClickHouse/ClickHouse/pull/52077) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix flakiness of test_keeper_s3_snapshot flakiness [#52083](https://github.com/ClickHouse/ClickHouse/pull/52083) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_extreme_deduplication flakiness [#52085](https://github.com/ClickHouse/ClickHouse/pull/52085) ([Azat Khuzhin](https://github.com/azat)). +* Small docs update for toYearWeek() function [#52090](https://github.com/ClickHouse/ClickHouse/pull/52090) ([Andrey Zvonov](https://github.com/zvonand)). +* Small docs update for DateTime, DateTime64 [#52094](https://github.com/ClickHouse/ClickHouse/pull/52094) ([Andrey Zvonov](https://github.com/zvonand)). +* Add missing --force for docker network prune (otherwise it is noop on CI) [#52095](https://github.com/ClickHouse/ClickHouse/pull/52095) ([Azat Khuzhin](https://github.com/azat)). +* tests: drop existing view in test_materialized_mysql_database [#52103](https://github.com/ClickHouse/ClickHouse/pull/52103) ([Azat Khuzhin](https://github.com/azat)). +* Update README.md [#52115](https://github.com/ClickHouse/ClickHouse/pull/52115) ([Tyler Hannan](https://github.com/tylerhannan)). +* Print Zxid in keeper stat command in hex (so as ZooKeeper) [#52122](https://github.com/ClickHouse/ClickHouse/pull/52122) ([Azat Khuzhin](https://github.com/azat)). +* Skip protection from double decompression if inode from maps cannot be obtained [#52138](https://github.com/ClickHouse/ClickHouse/pull/52138) ([Azat Khuzhin](https://github.com/azat)). +* There is no point in detecting flaky tests [#52142](https://github.com/ClickHouse/ClickHouse/pull/52142) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove default argument value [#52143](https://github.com/ClickHouse/ClickHouse/pull/52143) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix the "kill_mutation" test [#52144](https://github.com/ClickHouse/ClickHouse/pull/52144) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ORDER BY tuple of WINDOW functions (and slightly more changes) [#52146](https://github.com/ClickHouse/ClickHouse/pull/52146) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix possible EADDRINUSE ("Address already in use") in integration tests [#52148](https://github.com/ClickHouse/ClickHouse/pull/52148) ([Azat Khuzhin](https://github.com/azat)). +* Fix test 02497_storage_file_reader_selection [#52154](https://github.com/ClickHouse/ClickHouse/pull/52154) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix unexpected AST Set [#52158](https://github.com/ClickHouse/ClickHouse/pull/52158) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix crash in comparison functions due to incorrect query analysis [#52172](https://github.com/ClickHouse/ClickHouse/pull/52172) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix slow test `02317_distinct_in_order_optimization` [#52173](https://github.com/ClickHouse/ClickHouse/pull/52173) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add comments for https://github.com/ClickHouse/ClickHouse/pull/52112 [#52175](https://github.com/ClickHouse/ClickHouse/pull/52175) ([李扬](https://github.com/taiyang-li)). +* Randomize timezone in tests across non-deterministic around 1970 and default [#52184](https://github.com/ClickHouse/ClickHouse/pull/52184) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_multiple_disks/test.py::test_start_stop_moves` [#52189](https://github.com/ClickHouse/ClickHouse/pull/52189) ([Antonio Andelic](https://github.com/antonio2368)). +* CMake: Simplify job limiting [#52196](https://github.com/ClickHouse/ClickHouse/pull/52196) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix self extracting binaries under qemu linux-user (qemu-$ARCH-static) [#52198](https://github.com/ClickHouse/ClickHouse/pull/52198) ([Azat Khuzhin](https://github.com/azat)). +* Fix `Integration tests flaky check (asan)` [#52201](https://github.com/ClickHouse/ClickHouse/pull/52201) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test test_lost_part [#52202](https://github.com/ClickHouse/ClickHouse/pull/52202) ([alesapin](https://github.com/alesapin)). +* MaterializedMySQL: Replace to_string by magic_enum::enum_name [#52204](https://github.com/ClickHouse/ClickHouse/pull/52204) ([Val Doroshchuk](https://github.com/valbok)). +* MaterializedMySQL: Add tests to parse db and table names from DDL [#52208](https://github.com/ClickHouse/ClickHouse/pull/52208) ([Val Doroshchuk](https://github.com/valbok)). +* Revert "Fixed several issues found by OSS-Fuzz" [#52216](https://github.com/ClickHouse/ClickHouse/pull/52216) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use one copy replication more agressively [#52218](https://github.com/ClickHouse/ClickHouse/pull/52218) ([alesapin](https://github.com/alesapin)). +* Fix flaky test `01076_parallel_alter_replicated_zookeeper` [#52221](https://github.com/ClickHouse/ClickHouse/pull/52221) ([alesapin](https://github.com/alesapin)). +* Fix 01889_key_condition_function_chains for analyzer. [#52223](https://github.com/ClickHouse/ClickHouse/pull/52223) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Inhibit settings randomization in the test `json_ghdata` [#52226](https://github.com/ClickHouse/ClickHouse/pull/52226) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Slightly better diagnostics in a test [#52227](https://github.com/ClickHouse/ClickHouse/pull/52227) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Enable no-upgrade-check for 02273_full_sort_join [#52235](https://github.com/ClickHouse/ClickHouse/pull/52235) ([vdimir](https://github.com/vdimir)). +* Fix network manager for integration tests [#52237](https://github.com/ClickHouse/ClickHouse/pull/52237) ([Azat Khuzhin](https://github.com/azat)). +* List replication queue only for current test database [#52238](https://github.com/ClickHouse/ClickHouse/pull/52238) ([Alexander Gololobov](https://github.com/davenger)). +* Attempt to fix assert in tsan with fibers [#52241](https://github.com/ClickHouse/ClickHouse/pull/52241) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix undefined behaviour in fuzzer [#52256](https://github.com/ClickHouse/ClickHouse/pull/52256) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow-up to [#51959](https://github.com/ClickHouse/ClickHouse/issues/51959) [#52261](https://github.com/ClickHouse/ClickHouse/pull/52261) ([Alexander Tokmakov](https://github.com/tavplubix)). +* More fair queue for `drop table sync` [#52276](https://github.com/ClickHouse/ClickHouse/pull/52276) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `02497_trace_events_stress_long` [#52279](https://github.com/ClickHouse/ClickHouse/pull/52279) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix test `01111_create_drop_replicated_db_stress` [#52283](https://github.com/ClickHouse/ClickHouse/pull/52283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix ugly code [#52284](https://github.com/ClickHouse/ClickHouse/pull/52284) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add missing replica syncs in test_backup_restore_on_cluster [#52306](https://github.com/ClickHouse/ClickHouse/pull/52306) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix test_replicated_database 'node doesn't exist' flakiness [#52307](https://github.com/ClickHouse/ClickHouse/pull/52307) ([Michael Kolupaev](https://github.com/al13n321)). +* Minor: Update description of events "QueryCacheHits/Misses" [#52309](https://github.com/ClickHouse/ClickHouse/pull/52309) ([Robert Schulze](https://github.com/rschu1ze)). +* Beautify pretty-printing of the query string in SYSTEM.QUERY_CACHE [#52312](https://github.com/ClickHouse/ClickHouse/pull/52312) ([Robert Schulze](https://github.com/rschu1ze)). +* Reduce dependencies for skim by avoid using default features [#52316](https://github.com/ClickHouse/ClickHouse/pull/52316) ([Azat Khuzhin](https://github.com/azat)). +* Fix 02725_memory-for-merges [#52317](https://github.com/ClickHouse/ClickHouse/pull/52317) ([alesapin](https://github.com/alesapin)). +* Skip unsupported disks in Keeper [#52321](https://github.com/ClickHouse/ClickHouse/pull/52321) ([Antonio Andelic](https://github.com/antonio2368)). +* Revert "Improve CSVInputFormat to check and set default value to column if deserialize failed" [#52322](https://github.com/ClickHouse/ClickHouse/pull/52322) ([Kruglov Pavel](https://github.com/Avogar)). +* Resubmit [#51716](https://github.com/ClickHouse/ClickHouse/issues/51716) [#52323](https://github.com/ClickHouse/ClickHouse/pull/52323) ([Kruglov Pavel](https://github.com/Avogar)). +* Add logging about all found workflows for merge_pr.py [#52324](https://github.com/ClickHouse/ClickHouse/pull/52324) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Minor: Less awkward IAST::FormatSettings [#52332](https://github.com/ClickHouse/ClickHouse/pull/52332) ([Robert Schulze](https://github.com/rschu1ze)). +* Mark test 02125_many_mutations_2 as no-parallel to avoid flakiness [#52338](https://github.com/ClickHouse/ClickHouse/pull/52338) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix capabilities installed via systemd service (fixes netlink/IO priorities) [#52357](https://github.com/ClickHouse/ClickHouse/pull/52357) ([Azat Khuzhin](https://github.com/azat)). +* Update 01606_git_import.sh [#52360](https://github.com/ClickHouse/ClickHouse/pull/52360) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update ci-slack-bot.py [#52372](https://github.com/ClickHouse/ClickHouse/pull/52372) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `test_keeper_session` [#52373](https://github.com/ClickHouse/ClickHouse/pull/52373) ([Antonio Andelic](https://github.com/antonio2368)). +* Update ci-slack-bot.py [#52374](https://github.com/ClickHouse/ClickHouse/pull/52374) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable analyzer setting in backward_compatibility integration tests. [#52375](https://github.com/ClickHouse/ClickHouse/pull/52375) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* New metric - Filesystem cache size limit [#52378](https://github.com/ClickHouse/ClickHouse/pull/52378) ([Krzysztof Góralski](https://github.com/kgoralski)). +* Fix `test_replicated_merge_tree_encrypted_disk ` [#52379](https://github.com/ClickHouse/ClickHouse/pull/52379) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Fix `02122_parallel_formatting_XML ` [#52380](https://github.com/ClickHouse/ClickHouse/pull/52380) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Follow up to [#49698](https://github.com/ClickHouse/ClickHouse/issues/49698) [#52381](https://github.com/ClickHouse/ClickHouse/pull/52381) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Less replication errors [#52382](https://github.com/ClickHouse/ClickHouse/pull/52382) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Rename TaskStatsInfoGetter into NetlinkMetricsProvider [#52392](https://github.com/ClickHouse/ClickHouse/pull/52392) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_keeper_force_recovery` [#52408](https://github.com/ClickHouse/ClickHouse/pull/52408) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky gtest_lru_file_cache.cpp [#52418](https://github.com/ClickHouse/ClickHouse/pull/52418) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix: remove redundant distinct with views [#52438](https://github.com/ClickHouse/ClickHouse/pull/52438) ([Igor Nikonov](https://github.com/devcrafter)). +* Add 02815_range_dict_no_direct_join to analyzer_tech_debt.txt [#52464](https://github.com/ClickHouse/ClickHouse/pull/52464) ([vdimir](https://github.com/vdimir)). +* do not throw exception in OptimizedRegularExpressionImpl::analyze [#52467](https://github.com/ClickHouse/ClickHouse/pull/52467) ([Han Fei](https://github.com/hanfei1991)). +* Remove skip_startup_tables from IDatabase::loadStoredObjects() [#52491](https://github.com/ClickHouse/ClickHouse/pull/52491) ([Azat Khuzhin](https://github.com/azat)). +* Fix test_insert_same_partition_and_merge by increasing wait time [#52497](https://github.com/ClickHouse/ClickHouse/pull/52497) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Try to fix asan wanring in HashJoin [#52499](https://github.com/ClickHouse/ClickHouse/pull/52499) ([Igor Nikonov](https://github.com/devcrafter)). +* Replace with three way comparison [#52509](https://github.com/ClickHouse/ClickHouse/pull/52509) ([flynn](https://github.com/ucasfl)). +* Fix flakiness of test_version_update_after_mutation by enabling force_remove_data_recursively_on_drop [#52514](https://github.com/ClickHouse/ClickHouse/pull/52514) ([Azat Khuzhin](https://github.com/azat)). +* Fix `test_throttling` [#52515](https://github.com/ClickHouse/ClickHouse/pull/52515) ([Antonio Andelic](https://github.com/antonio2368)). +* Improve logging macros [#52519](https://github.com/ClickHouse/ClickHouse/pull/52519) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `toDecimalString` function [#52520](https://github.com/ClickHouse/ClickHouse/pull/52520) ([Andrey Zvonov](https://github.com/zvonand)). +* Remove unused code [#52527](https://github.com/ClickHouse/ClickHouse/pull/52527) ([Raúl Marín](https://github.com/Algunenano)). +* Cancel execution in PipelineExecutor in case of exception in graph->updateNode [#52533](https://github.com/ClickHouse/ClickHouse/pull/52533) ([Kruglov Pavel](https://github.com/Avogar)). +* Make 01951_distributed_push_down_limit analyzer agnostic [#52534](https://github.com/ClickHouse/ClickHouse/pull/52534) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix disallow_concurrency test for backup and restore [#52536](https://github.com/ClickHouse/ClickHouse/pull/52536) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Update 02136_scalar_subquery_metrics.sql [#52537](https://github.com/ClickHouse/ClickHouse/pull/52537) ([Alexander Tokmakov](https://github.com/tavplubix)). +* tests: fix 01035_avg_weighted_long flakiness [#52556](https://github.com/ClickHouse/ClickHouse/pull/52556) ([Azat Khuzhin](https://github.com/azat)). +* tests: increase throttling for 01923_network_receive_time_metric_insert [#52557](https://github.com/ClickHouse/ClickHouse/pull/52557) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 00719_parallel_ddl_table flakiness in debug builds [#52558](https://github.com/ClickHouse/ClickHouse/pull/52558) ([Azat Khuzhin](https://github.com/azat)). +* tests: fix 01821_join_table_race_long flakiness [#52559](https://github.com/ClickHouse/ClickHouse/pull/52559) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky `00995_exception_while_insert` [#52568](https://github.com/ClickHouse/ClickHouse/pull/52568) ([Antonio Andelic](https://github.com/antonio2368)). +* MaterializedMySQL: Fix typos in tests [#52575](https://github.com/ClickHouse/ClickHouse/pull/52575) ([Val Doroshchuk](https://github.com/valbok)). +* Fix `02497_trace_events_stress_long` again [#52587](https://github.com/ClickHouse/ClickHouse/pull/52587) ([Antonio Andelic](https://github.com/antonio2368)). +* Revert "Remove `mmap/mremap/munmap` from Allocator.h" [#52589](https://github.com/ClickHouse/ClickHouse/pull/52589) ([Nikita Taranov](https://github.com/nickitat)). +* Remove peak memory usage from the final message in the client [#52598](https://github.com/ClickHouse/ClickHouse/pull/52598) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* GinIndexStore: fix a bug when files are finalizated after first write, [#52602](https://github.com/ClickHouse/ClickHouse/pull/52602) ([Sema Checherinda](https://github.com/CheSema)). +* Fix deadlocks in StorageTableFunctionProxy [#52626](https://github.com/ClickHouse/ClickHouse/pull/52626) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix build with clang-15 [#52627](https://github.com/ClickHouse/ClickHouse/pull/52627) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix style [#52647](https://github.com/ClickHouse/ClickHouse/pull/52647) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix logging level of a noisy message [#52648](https://github.com/ClickHouse/ClickHouse/pull/52648) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert "Added field `refcount` to `system.remote_data_paths` table" [#52657](https://github.com/ClickHouse/ClickHouse/pull/52657) ([Alexander Tokmakov](https://github.com/tavplubix)). + diff --git a/docs/changelogs/v23.7.2.25-stable.md b/docs/changelogs/v23.7.2.25-stable.md new file mode 100644 index 000000000000..267083d8e039 --- /dev/null +++ b/docs/changelogs/v23.7.2.25-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.2.25-stable (8dd1107b032) FIXME as compared to v23.7.1.2470-stable (a70127baecc) + +#### Backward Incompatible Change +* Backported in [#52850](https://github.com/ClickHouse/ClickHouse/issues/52850): If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### Build/Testing/Packaging Improvement +* Backported in [#52913](https://github.com/ClickHouse/ClickHouse/issues/52913): Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Rename setting disable_url_encoding to enable_url_encoding and add a test [#52656](https://github.com/ClickHouse/ClickHouse/pull/52656) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix bugs and better test for SYSTEM STOP LISTEN [#52680](https://github.com/ClickHouse/ClickHouse/pull/52680) ([Nikolay Degterinsky](https://github.com/evillique)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.7.3.14-stable.md b/docs/changelogs/v23.7.3.14-stable.md new file mode 100644 index 000000000000..dbe76bd19e76 --- /dev/null +++ b/docs/changelogs/v23.7.3.14-stable.md @@ -0,0 +1,23 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.3.14-stable (bd9a510550c) FIXME as compared to v23.7.2.25-stable (8dd1107b032) + +#### Build/Testing/Packaging Improvement +* Backported in [#53025](https://github.com/ClickHouse/ClickHouse/issues/53025): Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix named collections on cluster 23.7 [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Suspicious DISTINCT crashes from sqlancer [#52636](https://github.com/ClickHouse/ClickHouse/pull/52636) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix Parquet stats for Float32 and Float64 [#53067](https://github.com/ClickHouse/ClickHouse/pull/53067) ([Michael Kolupaev](https://github.com/al13n321)). + diff --git a/docs/changelogs/v23.7.4.5-stable.md b/docs/changelogs/v23.7.4.5-stable.md new file mode 100644 index 000000000000..c7926d79bde3 --- /dev/null +++ b/docs/changelogs/v23.7.4.5-stable.md @@ -0,0 +1,17 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.4.5-stable (bd2fcd44553) FIXME as compared to v23.7.3.14-stable (bd9a510550c) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Revert changes in `ZstdDeflatingAppendableWriteBuffer` [#53111](https://github.com/ClickHouse/ClickHouse/pull/53111) ([Antonio Andelic](https://github.com/antonio2368)). + diff --git a/docs/changelogs/v23.7.5.30-stable.md b/docs/changelogs/v23.7.5.30-stable.md new file mode 100644 index 000000000000..78bef9fb489b --- /dev/null +++ b/docs/changelogs/v23.7.5.30-stable.md @@ -0,0 +1,31 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.7.5.30-stable (e86c21fb922) FIXME as compared to v23.7.4.5-stable (bd2fcd44553) + +#### Build/Testing/Packaging Improvement +* Backported in [#53291](https://github.com/ClickHouse/ClickHouse/issues/53291): The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Backported in [#53467](https://github.com/ClickHouse/ClickHouse/issues/53467): Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + diff --git a/docs/changelogs/v23.8.1.2992-lts.md b/docs/changelogs/v23.8.1.2992-lts.md new file mode 100644 index 000000000000..e3e0e4f03443 --- /dev/null +++ b/docs/changelogs/v23.8.1.2992-lts.md @@ -0,0 +1,591 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.8.1.2992-lts (ebc7d9a9f3b) FIXME as compared to v23.7.1.2470-stable (a70127baecc) + +#### Backward Incompatible Change +* Deprecate the metadata cache feature. It is experimental and we have never used it. The feature is dangerous: [#51182](https://github.com/ClickHouse/ClickHouse/issues/51182). Remove the `system.merge_tree_metadata_cache` system table. The metadata cache is still available in this version but will be removed soon. This closes [#39197](https://github.com/ClickHouse/ClickHouse/issues/39197). [#51303](https://github.com/ClickHouse/ClickHouse/pull/51303) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* If a dynamic disk contains a name, it should be specified as `disk = disk(name = 'disk_name'`, ...) in disk function arguments. In previous version it could be specified as `disk = disk_(...)`, which is no longer supported. [#52820](https://github.com/ClickHouse/ClickHouse/pull/52820) ([Kseniia Sumarokova](https://github.com/kssenii)). +* `clickhouse-benchmark` will establish connections in parallel when invoked with `--concurrency` more than one. Previously it was unusable if you ran it with 1000 concurrent connections from Europe to the US. Correct calculation of QPS for connections with high latency. Backward incompatible change: the option for JSON output of `clickhouse-benchmark` is removed. If you've used this option, you can also extract data from the `system.query_log` in JSON format as a workaround. [#53293](https://github.com/ClickHouse/ClickHouse/pull/53293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The `microseconds` column is removed from the `system.text_log`, and the `milliseconds` column is removed from the `system.metric_log`, because they are redundant in the presence of the `event_time_microseconds` column. [#53601](https://github.com/ClickHouse/ClickHouse/pull/53601) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Changed zookeeper paths for storage `S3Queue` metadata. [#54137](https://github.com/ClickHouse/ClickHouse/pull/54137) ([Kseniia Sumarokova](https://github.com/kssenii)). + +#### New Feature +* Add column `ptr` to `system.trace_log` for `trace_type = 'MemorySample'`. This column contains an address of allocation. Added function `flameGraph` which can build flamegraph containing allocated and not released memory. Reworking of [#38391](https://github.com/ClickHouse/ClickHouse/issues/38391). [#45322](https://github.com/ClickHouse/ClickHouse/pull/45322) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add setting `rewrite_count_distinct_if_with_count_distinct_implementation` to rewrite `countDistinctIf` with `count_distinct_implementation`. Closes [#30642](https://github.com/ClickHouse/ClickHouse/issues/30642). [#46051](https://github.com/ClickHouse/ClickHouse/pull/46051) ([flynn](https://github.com/ucasfl)). +* Add new table engine `S3Queue` for streaming data import from s3. Closes [#37012](https://github.com/ClickHouse/ClickHouse/issues/37012). [#49086](https://github.com/ClickHouse/ClickHouse/pull/49086) ([s-kat](https://github.com/s-kat)). +* SevenZipArchiveReader - TarArchiveReader - Table Function file('path_to_archive :: filename') - Functional tests for "Table Function file('path_to_archive :: filename')" - Unit tests for TarArchiveReader/SevenZipArchiveReader. [#50321](https://github.com/ClickHouse/ClickHouse/pull/50321) ([nikitakeba](https://github.com/nikitakeba)). +* Added table function azureBlobStorageCluster table function. The supported set of features is very similar to table function S3Cluster. [#50795](https://github.com/ClickHouse/ClickHouse/pull/50795) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Allow using cluster, clusterAllReplicas, remote, remoteRaw and remoteSecure without table name in issue [#50808](https://github.com/ClickHouse/ClickHouse/issues/50808). [#50848](https://github.com/ClickHouse/ClickHouse/pull/50848) ([Yangkuan Liu](https://github.com/LiuYangkuan)). +* System table to monitor kafka consumers. [#50999](https://github.com/ClickHouse/ClickHouse/pull/50999) ([Ilya Golshtein](https://github.com/ilejn)). +* Added max_sessions_for_user setting. [#51724](https://github.com/ClickHouse/ClickHouse/pull/51724) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Now that clickhouse do not have a function to convert UTC timezone timestamp to other timezone timestamp, which is not same as spark, and so we and the functions `toUTCTimestamp/fromUTCTimestamp` to act same as spark's `to_utc_timestamp/from_utc_timestamp`. [#52117](https://github.com/ClickHouse/ClickHouse/pull/52117) ([KevinyhZou](https://github.com/KevinyhZou)). +* Add new functions `structureToCapnProtoSchema`/`structureToProtobufSchema` that convert ClickHouse table structure to CapnProto/Protobuf format schema. Allow to intput/output data in CapnProto/Protobuf format without external format schema using autogenerated schema from table structure (controled by settings `format_capn_proto_use_autogenerated_schema`/`format_protobuf_use_autogenerated_schema`). Allow to export autogenerated schema while input/outoput using setting `output_format_schema`. [#52278](https://github.com/ClickHouse/ClickHouse/pull/52278) ([Kruglov Pavel](https://github.com/Avogar)). +* A new field "query_cache_usage" in SYSTEM.QUERY_LOG now shows if and how the query cache was used. [#52384](https://github.com/ClickHouse/ClickHouse/pull/52384) ([Robert Schulze](https://github.com/rschu1ze)). +* Add new function startsWithUTF8 and endsWithUTF8. [#52555](https://github.com/ClickHouse/ClickHouse/pull/52555) ([李扬](https://github.com/taiyang-li)). +* Allow variable number of columns in TSV/CuatomSeprarated/JSONCompactEachRow, make schema inference work with variable number of columns. Add settings `input_format_tsv_allow_variable_number_of_columns`, `input_format_custom_allow_variable_number_of_columns`, `input_format_json_compact_allow_variable_number_of_columns`. [#52692](https://github.com/ClickHouse/ClickHouse/pull/52692) ([Kruglov Pavel](https://github.com/Avogar)). +* Added `SYSTEM STOP/START PULLING REPLICATION LOG` queries (for testing `ReplicatedMergeTree`). [#52881](https://github.com/ClickHouse/ClickHouse/pull/52881) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow to execute constant non-deterministic functions in mutations on initiator. [#53129](https://github.com/ClickHouse/ClickHouse/pull/53129) ([Anton Popov](https://github.com/CurtizJ)). +* Add input format One that doesn't read any data and always returns single row with column `dummy` with type `UInt8` and value `0` like `system.one`. It can be used together with `_file/_path` virtual columns to list files in file/s3/url/hdfs/etc table functions without reading any data. [#53209](https://github.com/ClickHouse/ClickHouse/pull/53209) ([Kruglov Pavel](https://github.com/Avogar)). +* Add tupleConcat function. Closes [#52759](https://github.com/ClickHouse/ClickHouse/issues/52759). [#53239](https://github.com/ClickHouse/ClickHouse/pull/53239) ([Nikolay Degterinsky](https://github.com/evillique)). +* Support `TRUNCATE DATABASE` operation. [#53261](https://github.com/ClickHouse/ClickHouse/pull/53261) ([Bharat Nallan](https://github.com/bharatnc)). +* Add max_threads_for_indexes setting to limit number of threads used for primary key processing. [#53313](https://github.com/ClickHouse/ClickHouse/pull/53313) ([jorisgio](https://github.com/jorisgio)). +* Add experimental support for HNSW as approximate neighbor search method. [#53447](https://github.com/ClickHouse/ClickHouse/pull/53447) ([Davit Vardanyan](https://github.com/davvard)). +* Re-add SipHash keyed functions. [#53525](https://github.com/ClickHouse/ClickHouse/pull/53525) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* ([#52755](https://github.com/ClickHouse/ClickHouse/issues/52755) , [#52895](https://github.com/ClickHouse/ClickHouse/issues/52895)) Added functions `arrayRotateLeft`, `arrayRotateRight`, `arrayShiftLeft`, `arrayShiftRight`. [#53557](https://github.com/ClickHouse/ClickHouse/pull/53557) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Add column `name` to `system.clusters` as an alias to cluster. [#53605](https://github.com/ClickHouse/ClickHouse/pull/53605) ([irenjj](https://github.com/irenjj)). +* The advanced dashboard now allows mass editing (save/load). [#53608](https://github.com/ClickHouse/ClickHouse/pull/53608) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add support for plural units. [#53641](https://github.com/ClickHouse/ClickHouse/pull/53641) ([irenjj](https://github.com/irenjj)). +* Support function `isNotDistinctFrom` in join on section for null-safe comparison, ref [#53061](https://github.com/ClickHouse/ClickHouse/issues/53061). [#53755](https://github.com/ClickHouse/ClickHouse/pull/53755) ([vdimir](https://github.com/vdimir)). +* Added the "hide_in_preprocessed" attribute to ClickHouse's server configuration XML dialect. This is a mechanism to hide certain settings from appearing in preprocessed server configuration files. Useful e.g. for passwords or private keys that should not appear verbatim in files. [#53818](https://github.com/ClickHouse/ClickHouse/pull/53818) ([Roman Vasin](https://github.com/rvasin)). +* Added server setting validate_tcp_client_information determines whether validation of client information enabled when query packet is received. [#53907](https://github.com/ClickHouse/ClickHouse/pull/53907) ([Alexey Gerasimchuck](https://github.com/Demilivor)). + +#### Performance Improvement +* Enable JIT compilation for AArch64, PowerPC, SystemZ, RISCV. [#38217](https://github.com/ClickHouse/ClickHouse/pull/38217) ([Maksim Kita](https://github.com/kitaisreal)). +* This patch will provide a method to deal with all the hashsets in parallel before merge. [#50748](https://github.com/ClickHouse/ClickHouse/pull/50748) ([Jiebin Sun](https://github.com/jiebinn)). +* Optimize aggregation performance of nullable string key when using aggregationmethodserialized. [#51399](https://github.com/ClickHouse/ClickHouse/pull/51399) ([LiuNeng](https://github.com/liuneng1994)). +* The performance experiments of **SSB** on the ICX device (Intel Xeon Platinum 8380 CPU, 80 cores, 160 threads) show that this change could bring an improvement of **8.5%** to the **geomean QPS** when the experimental analyzer is enabled. The details are shown below: ![image](https://github.com/ClickHouse/ClickHouse/assets/26588299/4e58bf8b-d276-408d-ad45-38c82d3cb918). [#52091](https://github.com/ClickHouse/ClickHouse/pull/52091) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Parquet filter pushdown. I.e. when reading Parquet files, row groups (chunks of the file) are skipped based on the WHERE condition and the min/max values in each column. In particular, if the file is roughly sorted by some column, queries that filter by a short range of that column will be much faster. [#52951](https://github.com/ClickHouse/ClickHouse/pull/52951) ([Michael Kolupaev](https://github.com/al13n321)). +* Optimize the merge if all hashSets are singleLevel in UniqExactSet. [#52973](https://github.com/ClickHouse/ClickHouse/pull/52973) ([Jiebin Sun](https://github.com/jiebinn)). +* StorageJoin: do not create clone hash join with all columns. [#53046](https://github.com/ClickHouse/ClickHouse/pull/53046) ([Duc Canh Le](https://github.com/canhld94)). +* Optimize reading small row groups by batching them together in Parquet. Closes [#53069](https://github.com/ClickHouse/ClickHouse/issues/53069). [#53281](https://github.com/ClickHouse/ClickHouse/pull/53281) ([Kruglov Pavel](https://github.com/Avogar)). +* Implement native orc input format without arrow to improve performance. [#53324](https://github.com/ClickHouse/ClickHouse/pull/53324) ([李扬](https://github.com/taiyang-li)). +* The dashboard will tell the server to compress the data, which is useful for large time frames over slow internet connections. For example, one chart with 86400 points can be 1.5 MB uncompressed and 60 KB compressed with `br`. [#53569](https://github.com/ClickHouse/ClickHouse/pull/53569) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Optimize count from files in most input formats. Closes [#44334](https://github.com/ClickHouse/ClickHouse/issues/44334). [#53637](https://github.com/ClickHouse/ClickHouse/pull/53637) ([Kruglov Pavel](https://github.com/Avogar)). +* Better utilization of thread pool for BACKUPs&RESTOREs. [#53649](https://github.com/ClickHouse/ClickHouse/pull/53649) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove useless slow on client performance check. [#53695](https://github.com/ClickHouse/ClickHouse/pull/53695) ([Raúl Marín](https://github.com/Algunenano)). + +#### Improvement +* Bloom filter indices are pruned so that they correlate with cardinality of the data set they are tracking. [#35102](https://github.com/ClickHouse/ClickHouse/pull/35102) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Add `stderr_reaction` configuration/setting to control the reaction (none, log or throw) when external command stderr has data. This helps make debugging external command easier. [#43210](https://github.com/ClickHouse/ClickHouse/pull/43210) ([Amos Bird](https://github.com/amosbird)). +* Https://github.com/clickhouse/clickhouse/issues/48720. @kgoralski helped with some thought about `system.merges` part. :d. [#48990](https://github.com/ClickHouse/ClickHouse/pull/48990) ([Jianfei Hu](https://github.com/incfly)). +* If a dictionary is created with a complex key, automatically choose the "complex key" layout variant. [#49587](https://github.com/ClickHouse/ClickHouse/pull/49587) ([xiebin](https://github.com/xbthink)). +* Add setting `use_concurrency_control` for better testing of the new concurrency control feature. [#49618](https://github.com/ClickHouse/ClickHouse/pull/49618) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added suggestions for mistyped names for db and tables with different scenarios commented. [#49801](https://github.com/ClickHouse/ClickHouse/pull/49801) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* While read small files from hdfs by gluten, we found that it will cost more times when compare to directly query by spark. [#50063](https://github.com/ClickHouse/ClickHouse/pull/50063) ([KevinyhZou](https://github.com/KevinyhZou)). +* Too many worthless error logs after session expiration. [#50171](https://github.com/ClickHouse/ClickHouse/pull/50171) ([helifu](https://github.com/helifu)). +* Introduce fallback ZooKeeper sessions which are time-bound. Fixed `index` column in system.zookeeper_connection for DNS addresses. [#50424](https://github.com/ClickHouse/ClickHouse/pull/50424) ([Anton Kozlov](https://github.com/tonickkozlov)). +* Add ability to log when max_partitions_per_insert_block is reached ... [#50948](https://github.com/ClickHouse/ClickHouse/pull/50948) ([Sean Haynes](https://github.com/seandhaynes)). +* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#51117](https://github.com/ClickHouse/ClickHouse/pull/51117) ([pufit](https://github.com/pufit)). +* Updated check for connection_string as connection string with sas does not always begin with DefaultEndPoint and updated connection url to include sas token after adding container to url. [#51141](https://github.com/ClickHouse/ClickHouse/pull/51141) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix description for filtering sets in full_sorting_merge join. [#51329](https://github.com/ClickHouse/ClickHouse/pull/51329) ([Tanay Tummalapalli](https://github.com/ttanay)). +* The sizes of the (index) uncompressed/mark, mmap and query caches can now be configured dynamically at runtime. [#51446](https://github.com/ClickHouse/ClickHouse/pull/51446) ([Robert Schulze](https://github.com/rschu1ze)). +* Fixed memory consumption in `Aggregator` when `max_block_size` is huge. [#51566](https://github.com/ClickHouse/ClickHouse/pull/51566) ([Nikita Taranov](https://github.com/nickitat)). +* Add `SYSTEM SYNC FILESYSTEM CACHE` command. It will compare in-memory state of filesystem cache with what it has on disk and fix in-memory state if needed. [#51622](https://github.com/ClickHouse/ClickHouse/pull/51622) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Attempt to create a generic proxy resolver for CH while keeping backwards compatibility with existing S3 storage conf proxy resolver. [#51749](https://github.com/ClickHouse/ClickHouse/pull/51749) ([Arthur Passos](https://github.com/arthurpassos)). +* Support reading tuple subcolumns from file/s3/hdfs/url/azureBlobStorage table functions. [#51806](https://github.com/ClickHouse/ClickHouse/pull/51806) ([Kruglov Pavel](https://github.com/Avogar)). +* Function `arrayIntersect` now returns the values sorted like the first argument. Closes [#27622](https://github.com/ClickHouse/ClickHouse/issues/27622). [#51850](https://github.com/ClickHouse/ClickHouse/pull/51850) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add new queries, which allow to create/drop of access entities in specified access storage or move access entities from one access storage to another. [#51912](https://github.com/ClickHouse/ClickHouse/pull/51912) ([pufit](https://github.com/pufit)). +* ALTER TABLE FREEZE are not replicated in Replicated engine. [#52064](https://github.com/ClickHouse/ClickHouse/pull/52064) ([Mike Kot](https://github.com/myrrc)). +* Added possibility to flush logs to the disk on crash - Added logs buffer configuration. [#52174](https://github.com/ClickHouse/ClickHouse/pull/52174) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix S3 table function does not work for pre-signed URL. close [#50846](https://github.com/ClickHouse/ClickHouse/issues/50846). [#52310](https://github.com/ClickHouse/ClickHouse/pull/52310) ([chen](https://github.com/xiedeyantu)). +* System.events and system.metrics tables add column name as an alias to event and metric. close [#51257](https://github.com/ClickHouse/ClickHouse/issues/51257). [#52315](https://github.com/ClickHouse/ClickHouse/pull/52315) ([chen](https://github.com/xiedeyantu)). +* Added support of syntax `CREATE UNIQUE INDEX` in parser for better SQL compatibility. `UNIQUE` index is not supported. Set `create_index_ignore_unique=1` to ignore UNIQUE keyword in queries. [#52320](https://github.com/ClickHouse/ClickHouse/pull/52320) ([Ilya Yatsishin](https://github.com/qoega)). +* Add support of predefined macro (`{database}` and `{table}`) in some kafka engine settings: topic, consumer, client_id, etc. [#52386](https://github.com/ClickHouse/ClickHouse/pull/52386) ([Yury Bogomolov](https://github.com/ybogo)). +* Disable updating fs cache during backup/restore. Filesystem cache must not be updated during backup/restore, it seems it just slows down the process without any profit (because the BACKUP command can read a lot of data and it's no use to put all the data to the filesystem cache and immediately evict it). [#52402](https://github.com/ClickHouse/ClickHouse/pull/52402) ([Vitaly Baranov](https://github.com/vitlibar)). +* Updated parameterized view implementation to create new StorageView with substituted parameters for every SELECT query of a parameterized view. [#52569](https://github.com/ClickHouse/ClickHouse/pull/52569) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* The configuration of S3 endpoint allow using it from the root, and append '/' automatically if needed. [#47809](https://github.com/ClickHouse/ClickHouse/issues/47809). [#52600](https://github.com/ClickHouse/ClickHouse/pull/52600) ([xiaolei565](https://github.com/xiaolei565)). +* Added support for adding and subtracting arrays: `[5,2] + [1,7]`. Division and multiplication were not implemented due to confusion between pointwise multiplication and the scalar product of arguments. Closes [#49939](https://github.com/ClickHouse/ClickHouse/issues/49939). [#52625](https://github.com/ClickHouse/ClickHouse/pull/52625) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add support for string literals as table name. Closes [#52178](https://github.com/ClickHouse/ClickHouse/issues/52178). [#52635](https://github.com/ClickHouse/ClickHouse/pull/52635) ([hendrik-m](https://github.com/hendrik-m)). +* For clickhouse-local allow positional options and populate global UDF settings (user_scripts_path and user_defined_executable_functions_config). [#52643](https://github.com/ClickHouse/ClickHouse/pull/52643) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* System.asynchronous_metrics now includes metrics "querycacheentries" and "querycachebytes" to inspect the query cache. [#52650](https://github.com/ClickHouse/ClickHouse/pull/52650) ([Robert Schulze](https://github.com/rschu1ze)). +* Added possibility use s3_storage_class parameter in SETTINGS of BACKUP statement for backups to S3. [#52658](https://github.com/ClickHouse/ClickHouse/pull/52658) ([Roman Vasin](https://github.com/rvasin)). +* Improve insert retries on keeper session expiration. [#52688](https://github.com/ClickHouse/ClickHouse/pull/52688) ([Raúl Marín](https://github.com/Algunenano)). +* Add utility `print-backup-info.py` which parses a backup metadata file and prints information about the backup. [#52690](https://github.com/ClickHouse/ClickHouse/pull/52690) ([Vitaly Baranov](https://github.com/vitlibar)). +* Closes [#49510](https://github.com/ClickHouse/ClickHouse/issues/49510). Currently we have database and table names case-sensitive, but the tools query `information_schema` sometimes in lowercase, sometimes in uppercase. For this reason we have `information_schema` database, containing lowercase tables, such as `information_schema.tables` and `INFORMATION_SCHEMA` database, containing uppercase tables, such as `INFORMATION_SCHEMA.TABLES`. But some tools are querying `INFORMATION_SCHEMA.tables` and `information_schema.TABLES`. The proposed solution is to duplicate both lowercase and uppercase tables in lowercase and uppercase `information_schema` database. [#52695](https://github.com/ClickHouse/ClickHouse/pull/52695) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* `GET_PART` and `ATTACH_PART` are almost identical, so they should use same executor pool. [#52716](https://github.com/ClickHouse/ClickHouse/pull/52716) ([Duc Canh Le](https://github.com/canhld94)). +* Query`CHECK TABLE` has better performance and usability (sends progress updates, cancellable). [#52745](https://github.com/ClickHouse/ClickHouse/pull/52745) ([vdimir](https://github.com/vdimir)). +* Add modulo, intDiv, intDivOrZero for tuple. [#52758](https://github.com/ClickHouse/ClickHouse/pull/52758) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Search for default `yaml` and `yml` configs in clickhouse-client after `xml`. [#52767](https://github.com/ClickHouse/ClickHouse/pull/52767) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* When merging into non-'clickhouse' rooted configuration, configs with different root node name just bypassed without exception. [#52770](https://github.com/ClickHouse/ClickHouse/pull/52770) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Now it's possible to specify min (`memory_profiler_sample_min_allocation_size`) and max (`memory_profiler_sample_max_allocation_size`) size for allocations to be tracked with sampling memory profiler. [#52779](https://github.com/ClickHouse/ClickHouse/pull/52779) ([alesapin](https://github.com/alesapin)). +* Add `precise_float_parsing` setting to switch float parsing methods (fast/precise). [#52791](https://github.com/ClickHouse/ClickHouse/pull/52791) ([Andrey Zvonov](https://github.com/zvonand)). +* Use the same default paths for `clickhouse_keeper` (symlink) as for `clickhouse_keeper` (executable). [#52861](https://github.com/ClickHouse/ClickHouse/pull/52861) ([Vitaly Baranov](https://github.com/vitlibar)). +* CVE-2016-2183: disable 3DES. [#52893](https://github.com/ClickHouse/ClickHouse/pull/52893) ([Kenji Noguchi](https://github.com/knoguchi)). +* Load filesystem cache metadata on startup in parallel. Configured by `load_metadata_threads` (default: 1) cache config setting. Related to [#52037](https://github.com/ClickHouse/ClickHouse/issues/52037). [#52943](https://github.com/ClickHouse/ClickHouse/pull/52943) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Improve error message for table function remote. Closes [#40220](https://github.com/ClickHouse/ClickHouse/issues/40220). [#52959](https://github.com/ClickHouse/ClickHouse/pull/52959) ([jiyoungyoooo](https://github.com/jiyoungyoooo)). +* Added the possibility to specify custom storage policy in the `SETTINGS` clause of `RESTORE` queries. [#52970](https://github.com/ClickHouse/ClickHouse/pull/52970) ([Victor Krasnov](https://github.com/sirvickr)). +* Add the ability to throttle the S3 requests on backup operations (`BACKUP` and `RESTORE` commands now honor `s3_max_[get/put]_[rps/burst]`). [#52974](https://github.com/ClickHouse/ClickHouse/pull/52974) ([Daniel Pozo Escalona](https://github.com/danipozo)). +* Add settings to ignore ON CLUSTER clause in queries for management of replicated user-defined functions or access control entities with replicated storage. [#52975](https://github.com/ClickHouse/ClickHouse/pull/52975) ([Aleksei Filatov](https://github.com/aalexfvk)). +* Enable parallel reading from replicas over distributed table. Related to [#49708](https://github.com/ClickHouse/ClickHouse/issues/49708). [#53005](https://github.com/ClickHouse/ClickHouse/pull/53005) ([Igor Nikonov](https://github.com/devcrafter)). +* EXPLAIN actions for JOIN step. [#53006](https://github.com/ClickHouse/ClickHouse/pull/53006) ([Maksim Kita](https://github.com/kitaisreal)). +* Make `hasTokenOrNull` and `hasTokenCaseInsensitiveOrNull` return null for empty needles. [#53059](https://github.com/ClickHouse/ClickHouse/pull/53059) ([ltrk2](https://github.com/ltrk2)). +* Allow to restrict allowed paths for filesystem caches. Mainly useful for dynamic disks. If in server config `filesystem_caches_path` is specified, all filesystem caches' paths will be restricted to this directory. E.g. if the `path` in cache config is relative - it will be put in `filesystem_caches_path`; if `path` in cache config is absolute, it will be required to lie inside `filesystem_caches_path`. If `filesystem_caches_path` is not specified in config, then behaviour will be the same as in earlier versions. [#53124](https://github.com/ClickHouse/ClickHouse/pull/53124) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Added a bunch of custom commands (mostly to make ClickHouse debugging easier). [#53127](https://github.com/ClickHouse/ClickHouse/pull/53127) ([pufit](https://github.com/pufit)). +* Add diagnostic info about file name during schema inference - it helps when you process multiple files with globs. [#53135](https://github.com/ClickHouse/ClickHouse/pull/53135) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Client will load suggestions using the main connection if the second connection is not allowed to create a session. [#53177](https://github.com/ClickHouse/ClickHouse/pull/53177) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add EXCEPT clause to `SYSTEM STOP/START LISTEN QUERIES [ALL/DEFAULT/CUSTOM]` query, for example `SYSTEM STOP LISTEN QUERIES ALL EXCEPT TCP, HTTP`. [#53280](https://github.com/ClickHouse/ClickHouse/pull/53280) ([Nikolay Degterinsky](https://github.com/evillique)). +* Change the default of `max_concurrent_queries` from 100 to 1000. It's ok to have many concurrent queries if they are not heavy, and mostly waiting for the network. Note: don't confuse concurrent queries and QPS: for example, ClickHouse server can do tens of thousands of QPS with less than 100 concurrent queries. [#53285](https://github.com/ClickHouse/ClickHouse/pull/53285) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ability to override credentials for accessing base backup in S3 (since tokens may be expired). [#53326](https://github.com/ClickHouse/ClickHouse/pull/53326) ([Azat Khuzhin](https://github.com/azat)). +* Improve `move_primary_key_columns_to_end_of_prewhere`. [#53337](https://github.com/ClickHouse/ClickHouse/pull/53337) ([Han Fei](https://github.com/hanfei1991)). +* Limit number of concurrent background partition optimize merges. [#53405](https://github.com/ClickHouse/ClickHouse/pull/53405) ([Duc Canh Le](https://github.com/canhld94)). +* Added a setting `allow_moving_table_directory_to_trash` that allows to ignore `Directory for table data already exists` error when replicating/recovering a `Replicated` database. [#53425](https://github.com/ClickHouse/ClickHouse/pull/53425) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Server settings asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s configured to 0 now fail gracefully instead of crash the server. [#53428](https://github.com/ClickHouse/ClickHouse/pull/53428) ([Robert Schulze](https://github.com/rschu1ze)). +* Previously the caller could register the same watch callback multiple times. In that case each entry was consuming memory and the same callback was called multiple times which didn't make much sense. In order to avoid this the caller could have some logic to not add the same watch multiple times. With this change this deduplication is done internally if the watch callback is passed via shared_ptr. [#53452](https://github.com/ClickHouse/ClickHouse/pull/53452) ([Alexander Gololobov](https://github.com/davenger)). +* The ClickHouse server now respects memory limits changed via cgroups when reloading its configuration. [#53455](https://github.com/ClickHouse/ClickHouse/pull/53455) ([Robert Schulze](https://github.com/rschu1ze)). +* Add ability to turn off flush of Distributed tables on `DETACH`/`DROP`/server shutdown. [#53501](https://github.com/ClickHouse/ClickHouse/pull/53501) ([Azat Khuzhin](https://github.com/azat)). +* Domainrfc support ipv6(ip literal within square brackets). [#53506](https://github.com/ClickHouse/ClickHouse/pull/53506) ([Chen768959](https://github.com/Chen768959)). +* Use filter by file/path before reading in url/file/hdfs table functins. [#53529](https://github.com/ClickHouse/ClickHouse/pull/53529) ([Kruglov Pavel](https://github.com/Avogar)). +* Use longer timeout for S3 CopyObject requests. [#53533](https://github.com/ClickHouse/ClickHouse/pull/53533) ([Michael Kolupaev](https://github.com/al13n321)). +* Added server setting `aggregate_function_group_array_max_element_size`. This setting is used to limit array size for `groupArray` function at serialization. The default value is `16777215`. [#53550](https://github.com/ClickHouse/ClickHouse/pull/53550) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* `SCHEMA()` was added as alias for `DATABASE()` to improve MySQL compatibility. [#53587](https://github.com/ClickHouse/ClickHouse/pull/53587) ([Daniël van Eeden](https://github.com/dveeden)). +* Add asynchronous metrics about tables in the system database. For example, `TotalBytesOfMergeTreeTablesSystem`. This closes [#53603](https://github.com/ClickHouse/ClickHouse/issues/53603). [#53604](https://github.com/ClickHouse/ClickHouse/pull/53604) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* SQL editor in the Play UI and Dashboard will not use Grammarly. [#53614](https://github.com/ClickHouse/ClickHouse/pull/53614) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The advanced dashboard now has an option to maximize charts and move them around. [#53622](https://github.com/ClickHouse/ClickHouse/pull/53622) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* As expert-level settings, it is now possible to 1. configure the size_ratio (i.e. the relative size of the protected queue) of the [index] mark/uncompressed caches, 2. configure the cache policy of the index mark and index uncompressed caches. [#53657](https://github.com/ClickHouse/ClickHouse/pull/53657) ([Robert Schulze](https://github.com/rschu1ze)). +* More careful thread management will improve the speed of the S3 table function over a large number of files by more than ~25%. [#53668](https://github.com/ClickHouse/ClickHouse/pull/53668) ([pufit](https://github.com/pufit)). +* Upgrade snappy to 1.1.10, clickhouse may benefit from it. [#53672](https://github.com/ClickHouse/ClickHouse/pull/53672) ([李扬](https://github.com/taiyang-li)). +* Added client info validation to the query packet in TCPHandler. [#53673](https://github.com/ClickHouse/ClickHouse/pull/53673) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Cache number of rows in files for count in file/s3/url/hdfs/azure functions. The cache can be enabled/disabled by setting `use_cache_for_count_from_files` (enabled by default). Continuation of https://github.com/ClickHouse/ClickHouse/pull/53637. [#53692](https://github.com/ClickHouse/ClickHouse/pull/53692) ([Kruglov Pavel](https://github.com/Avogar)). +* Updated to retry loading part in case of Azure::Core::Http::TransportException (https://github.com/ClickHouse/ClickHouse/issues/39700#issuecomment-1686442785). [#53750](https://github.com/ClickHouse/ClickHouse/pull/53750) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Stacktrace for exceptions, Materailized view exceptions are propagated. [#53766](https://github.com/ClickHouse/ClickHouse/pull/53766) ([Ilya Golshtein](https://github.com/ilejn)). +* If no hostname or port were specified, keeper client will try to search for a connection string in the ClickHouse's config.xml. [#53769](https://github.com/ClickHouse/ClickHouse/pull/53769) ([pufit](https://github.com/pufit)). +* Add profile event `PartsLockMicroseconds` which shows the amount of microseconds we hold the data parts lock in MergeTree table engine family. [#53797](https://github.com/ClickHouse/ClickHouse/pull/53797) ([alesapin](https://github.com/alesapin)). +* Make reconnect limit in raft limits configurable for keeper. This configuration can help to make keeper to rebuild connection with peers quicker if the current connection is broken. [#53817](https://github.com/ClickHouse/ClickHouse/pull/53817) ([Pengyuan Bian](https://github.com/bianpengyuan)). +* Supported globs in select from file in clickhouse-local. [#53863](https://github.com/ClickHouse/ClickHouse/pull/53863) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* ...Ignore foreign keys in tables definition to improve compatibility with MySQL, so a user wouldn't need to rewrite his SQL of the foreign key part, ref [#53380](https://github.com/ClickHouse/ClickHouse/issues/53380). [#53864](https://github.com/ClickHouse/ClickHouse/pull/53864) ([jsc0218](https://github.com/jsc0218)). +* 'from' is supported as a Expression. [#53914](https://github.com/ClickHouse/ClickHouse/pull/53914) ([Chen768959](https://github.com/Chen768959)). +* Changes of the server configuration are now detected with high precision (milliseconds and less). [#54065](https://github.com/ClickHouse/ClickHouse/pull/54065) ([Mikhail Koviazin](https://github.com/mkmkme)). + +#### Build/Testing/Packaging Improvement +* Don't expose symbols from ClickHouse binary to dynamic linker. It might fix [#43933](https://github.com/ClickHouse/ClickHouse/issues/43933). [#47475](https://github.com/ClickHouse/ClickHouse/pull/47475) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed endian issues in native protocol. [#50267](https://github.com/ClickHouse/ClickHouse/pull/50267) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Build `clickhouse/nginx-dav` and use it in integration tests instead of `kssenii/nginx-test`. Addresses [#43182](https://github.com/ClickHouse/ClickHouse/issues/43182). [#51843](https://github.com/ClickHouse/ClickHouse/pull/51843) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Add `clickhouse-keeper-client` symlink to the clickhouse-server package. [#51882](https://github.com/ClickHouse/ClickHouse/pull/51882) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed ForEach aggregate function state for s390x. [#52040](https://github.com/ClickHouse/ClickHouse/pull/52040) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Add https://github.com/elliotchance/sqltest to CI to report the SQL 2016 conformance. [#52293](https://github.com/ClickHouse/ClickHouse/pull/52293) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fixed codec delta endian issue for s390x. [#52592](https://github.com/ClickHouse/ClickHouse/pull/52592) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Packing inline cache into docker images sometimes causes strange special effects. Since we don't use it at all, it's good to go. [#53008](https://github.com/ClickHouse/ClickHouse/pull/53008) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Upgrade PRQL to 0.9.3. [#53060](https://github.com/ClickHouse/ClickHouse/pull/53060) ([Maximilian Roos](https://github.com/max-sixty)). +* System tables from CI checks are exported to ClickHouse Cloud. [#53086](https://github.com/ClickHouse/ClickHouse/pull/53086) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud. [#53100](https://github.com/ClickHouse/ClickHouse/pull/53100) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up Debug and Tidy builds. [#53178](https://github.com/ClickHouse/ClickHouse/pull/53178) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Speed up the build by removing tons and tonnes of garbage. One of the frequently included headers was poisoned by boost. [#53180](https://github.com/ClickHouse/ClickHouse/pull/53180) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add ClickHouse builds for Linux s390x to CI. [#53181](https://github.com/ClickHouse/ClickHouse/pull/53181) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Remove even more garbage. [#53182](https://github.com/ClickHouse/ClickHouse/pull/53182) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The function `arrayAUC` was using heavy C++ templates. [#53183](https://github.com/ClickHouse/ClickHouse/pull/53183) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Some translation units were always rebuilt regardless of ccache. The culprit is found and fixed. [#53184](https://github.com/ClickHouse/ClickHouse/pull/53184) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The compiler's profile data (`-ftime-trace`) is uploaded to ClickHouse Cloud., the second attempt after [#53100](https://github.com/ClickHouse/ClickHouse/issues/53100). [#53213](https://github.com/ClickHouse/ClickHouse/pull/53213) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Three tests were failing / flaky: 1. test_host_regexp_multiple_ptr_records 2. test_host_regexp_multiple_ptr_records_concurrent 3. test_reverse_dns_query. [#53286](https://github.com/ClickHouse/ClickHouse/pull/53286) ([Arthur Passos](https://github.com/arthurpassos)). +* Export logs from CI in stateful tests to ClickHouse Cloud. [#53351](https://github.com/ClickHouse/ClickHouse/pull/53351) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in stress tests. [#53353](https://github.com/ClickHouse/ClickHouse/pull/53353) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in fuzzer. [#53354](https://github.com/ClickHouse/ClickHouse/pull/53354) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Export logs from CI in performance test to ClickHouse Cloud. [#53355](https://github.com/ClickHouse/ClickHouse/pull/53355) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Preserve environment parameters in `clickhouse start` command. Fixes [#51962](https://github.com/ClickHouse/ClickHouse/issues/51962). [#53418](https://github.com/ClickHouse/ClickHouse/pull/53418) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow up for [#53418](https://github.com/ClickHouse/ClickHouse/issues/53418). Small improvements for install_check.py, adding tests for proper ENV parameters passing to the main process on `init.d start`. [#53457](https://github.com/ClickHouse/ClickHouse/pull/53457) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed base64 endian issue for s390x. [#53570](https://github.com/ClickHouse/ClickHouse/pull/53570) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Reorganize file management in CMake to prevent potential duplications. For instance, `indexHint.cpp` is duplicated in both `dbms_sources` and `clickhouse_functions_sources`. [#53621](https://github.com/ClickHouse/ClickHouse/pull/53621) ([Amos Bird](https://github.com/amosbird)). +* Fixed functional test in 02354_distributed_with_external_aggregation_memory_usage in s390x. [#53648](https://github.com/ClickHouse/ClickHouse/pull/53648) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Skipped QPL functional test for s390x. [#53758](https://github.com/ClickHouse/ClickHouse/pull/53758) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Slightly improve cmake build by sanitizing some dependencies and removing some duplicates. Each commit includes a short description of the changes made. [#53759](https://github.com/ClickHouse/ClickHouse/pull/53759) ([Amos Bird](https://github.com/amosbird)). +* Fixed StripeLog storage endian issue on the s390x platform. [#53902](https://github.com/ClickHouse/ClickHouse/pull/53902) ([Harry Lee](https://github.com/HarryLeeIBM)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Do not reset Annoy index during build-up with > 1 mark [#51325](https://github.com/ClickHouse/ClickHouse/pull/51325) ([Tian Xinhui](https://github.com/xinhuitian)). +* Fix usage of temporary directories during RESTORE [#51493](https://github.com/ClickHouse/ClickHouse/pull/51493) ([Azat Khuzhin](https://github.com/azat)). +* Fix binary arithmetic for Nullable(IPv4) [#51642](https://github.com/ClickHouse/ClickHouse/pull/51642) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Support IPv4 and IPv6 as dictionary attributes [#51756](https://github.com/ClickHouse/ClickHouse/pull/51756) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Bug fix for checksum of compress marks [#51777](https://github.com/ClickHouse/ClickHouse/pull/51777) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix mistakenly comma parsing as part of datetime in CSV best effort parsing [#51950](https://github.com/ClickHouse/ClickHouse/pull/51950) ([Kruglov Pavel](https://github.com/Avogar)). +* Don't throw exception when exec udf has parameters [#51961](https://github.com/ClickHouse/ClickHouse/pull/51961) ([Nikita Taranov](https://github.com/nickitat)). +* Fix recalculation of skip indexes and projections in `ALTER DELETE` queries [#52530](https://github.com/ClickHouse/ClickHouse/pull/52530) ([Anton Popov](https://github.com/CurtizJ)). +* MaterializedMySQL: Fix the infinite loop in ReadBuffer::read [#52621](https://github.com/ClickHouse/ClickHouse/pull/52621) ([Val Doroshchuk](https://github.com/valbok)). +* Load suggestion only with `clickhouse` dialect [#52628](https://github.com/ClickHouse/ClickHouse/pull/52628) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* init and destroy ares channel on demand.. [#52634](https://github.com/ClickHouse/ClickHouse/pull/52634) ([Arthur Passos](https://github.com/arthurpassos)). +* RFC: Fix filtering by virtual columns with OR expression [#52653](https://github.com/ClickHouse/ClickHouse/pull/52653) ([Azat Khuzhin](https://github.com/azat)). +* Fix crash in function `tuple` with one sparse column argument [#52659](https://github.com/ClickHouse/ClickHouse/pull/52659) ([Anton Popov](https://github.com/CurtizJ)). +* Fix named collections on cluster 23.7 [#52687](https://github.com/ClickHouse/ClickHouse/pull/52687) ([Al Korgun](https://github.com/alkorgun)). +* Fix reading of unnecessary column in case of multistage `PREWHERE` [#52689](https://github.com/ClickHouse/ClickHouse/pull/52689) ([Anton Popov](https://github.com/CurtizJ)). +* Fix unexpected sort result on multi columns with nulls first direction [#52761](https://github.com/ClickHouse/ClickHouse/pull/52761) ([copperybean](https://github.com/copperybean)). +* Fix data race in Keeper reconfiguration [#52804](https://github.com/ClickHouse/ClickHouse/pull/52804) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix sorting of sparse columns with large limit [#52827](https://github.com/ClickHouse/ClickHouse/pull/52827) ([Anton Popov](https://github.com/CurtizJ)). +* clickhouse-keeper: fix implementation of server with poll() [#52833](https://github.com/ClickHouse/ClickHouse/pull/52833) ([Andy Fiddaman](https://github.com/citrus-it)). +* make regexp analyzer recognize named capturing groups [#52840](https://github.com/ClickHouse/ClickHouse/pull/52840) ([Han Fei](https://github.com/hanfei1991)). +* Fix possible assert in ~PushingAsyncPipelineExecutor in clickhouse-local [#52862](https://github.com/ClickHouse/ClickHouse/pull/52862) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix reading of empty `Nested(Array(LowCardinality(...)))` [#52949](https://github.com/ClickHouse/ClickHouse/pull/52949) ([Anton Popov](https://github.com/CurtizJ)). +* Added new tests for session_log and fixed the inconsistency between login and logout. [#52958](https://github.com/ClickHouse/ClickHouse/pull/52958) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix password leak in show create mysql table [#52962](https://github.com/ClickHouse/ClickHouse/pull/52962) ([Duc Canh Le](https://github.com/canhld94)). +* Convert sparse to full in CreateSetAndFilterOnTheFlyStep [#53000](https://github.com/ClickHouse/ClickHouse/pull/53000) ([vdimir](https://github.com/vdimir)). +* Fix rare race condition with empty key prefix directory deletion in fs cache [#53055](https://github.com/ClickHouse/ClickHouse/pull/53055) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix ZstdDeflatingWriteBuffer truncating the output sometimes [#53064](https://github.com/ClickHouse/ClickHouse/pull/53064) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix query_id in part_log with async flush queries [#53103](https://github.com/ClickHouse/ClickHouse/pull/53103) ([Raúl Marín](https://github.com/Algunenano)). +* Fix possible error from cache "Read unexpected size" [#53121](https://github.com/ClickHouse/ClickHouse/pull/53121) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Disable the new parquet encoder [#53130](https://github.com/ClickHouse/ClickHouse/pull/53130) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Not-ready Set [#53162](https://github.com/ClickHouse/ClickHouse/pull/53162) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix character escaping in the PostgreSQL engine [#53250](https://github.com/ClickHouse/ClickHouse/pull/53250) ([Nikolay Degterinsky](https://github.com/evillique)). +* #2 Added new tests for session_log and fixed the inconsistency between login and logout. [#53255](https://github.com/ClickHouse/ClickHouse/pull/53255) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* #3 Fixed inconsistency between login success and logout [#53302](https://github.com/ClickHouse/ClickHouse/pull/53302) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix adding sub-second intervals to DateTime [#53309](https://github.com/ClickHouse/ClickHouse/pull/53309) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix "Context has expired" error in dictionaries [#53342](https://github.com/ClickHouse/ClickHouse/pull/53342) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix incorrect normal projection AST format [#53347](https://github.com/ClickHouse/ClickHouse/pull/53347) ([Amos Bird](https://github.com/amosbird)). +* Forbid use_structure_from_insertion_table_in_table_functions when execute Scalar [#53348](https://github.com/ClickHouse/ClickHouse/pull/53348) ([flynn](https://github.com/ucasfl)). +* Fix loading lazy database during system.table select query [#53372](https://github.com/ClickHouse/ClickHouse/pull/53372) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fixed system.data_skipping_indices for MaterializedMySQL [#53381](https://github.com/ClickHouse/ClickHouse/pull/53381) ([Filipp Ozinov](https://github.com/bakwc)). +* Fix processing single carriage return in TSV file segmentation engine [#53407](https://github.com/ClickHouse/ClickHouse/pull/53407) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix 'Context has expired' error properly [#53433](https://github.com/ClickHouse/ClickHouse/pull/53433) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix timeout_overflow_mode when having subquery in the rhs of IN [#53439](https://github.com/ClickHouse/ClickHouse/pull/53439) ([Duc Canh Le](https://github.com/canhld94)). +* Fix an unexpected behavior in [#53152](https://github.com/ClickHouse/ClickHouse/issues/53152) [#53440](https://github.com/ClickHouse/ClickHouse/pull/53440) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). +* Fix JSON_QUERY Function parse error while path is all number [#53470](https://github.com/ClickHouse/ClickHouse/pull/53470) ([KevinyhZou](https://github.com/KevinyhZou)). +* Fix wrong columns order for queries with parallel FINAL. [#53489](https://github.com/ClickHouse/ClickHouse/pull/53489) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fixed SELECTing from ReplacingMergeTree with do_not_merge_across_partitions_select_final [#53511](https://github.com/ClickHouse/ClickHouse/pull/53511) ([Vasily Nemkov](https://github.com/Enmk)). +* bugfix: Flush async insert queue first on shutdown [#53547](https://github.com/ClickHouse/ClickHouse/pull/53547) ([joelynch](https://github.com/joelynch)). +* Fix crash in join on sparse column [#53548](https://github.com/ClickHouse/ClickHouse/pull/53548) ([vdimir](https://github.com/vdimir)). +* Fix possible UB in Set skipping index for functions with incorrect args [#53559](https://github.com/ClickHouse/ClickHouse/pull/53559) ([Azat Khuzhin](https://github.com/azat)). +* Fix possible UB in inverted indexes (experimental feature) [#53560](https://github.com/ClickHouse/ClickHouse/pull/53560) ([Azat Khuzhin](https://github.com/azat)). +* Fix: interpolate expression takes source column instead of same name aliased from select expression. [#53572](https://github.com/ClickHouse/ClickHouse/pull/53572) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix number of dropped granules in EXPLAIN PLAN index=1 [#53616](https://github.com/ClickHouse/ClickHouse/pull/53616) ([wangxiaobo](https://github.com/wzb5212)). +* Correctly handle totals and extremes with `DelayedSource` [#53644](https://github.com/ClickHouse/ClickHouse/pull/53644) ([Antonio Andelic](https://github.com/antonio2368)). +* Prepared set cache in mutation pipeline stuck [#53645](https://github.com/ClickHouse/ClickHouse/pull/53645) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries. [#53677](https://github.com/ClickHouse/ClickHouse/pull/53677) ([VanDarkholme7](https://github.com/VanDarkholme7)). +* Fix filter pushdown for full_sorting_merge join [#53699](https://github.com/ClickHouse/ClickHouse/pull/53699) ([vdimir](https://github.com/vdimir)). +* Try to fix bug with NULL::LowCardinality(Nullable(...)) NOT IN [#53706](https://github.com/ClickHouse/ClickHouse/pull/53706) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix: sorted distinct with sparse columns [#53711](https://github.com/ClickHouse/ClickHouse/pull/53711) ([Igor Nikonov](https://github.com/devcrafter)). +* transform: correctly handle default column with multiple rows [#53742](https://github.com/ClickHouse/ClickHouse/pull/53742) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix fuzzer crash in parseDateTime() [#53764](https://github.com/ClickHouse/ClickHouse/pull/53764) ([Robert Schulze](https://github.com/rschu1ze)). +* Materialized postgres: fix uncaught exception in getCreateTableQueryImpl [#53832](https://github.com/ClickHouse/ClickHouse/pull/53832) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix possible segfault while using PostgreSQL engine [#53847](https://github.com/ClickHouse/ClickHouse/pull/53847) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix named_collection_admin alias [#54066](https://github.com/ClickHouse/ClickHouse/pull/54066) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix rows_before_limit_at_least for DelayedSource. [#54122](https://github.com/ClickHouse/ClickHouse/pull/54122) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Implementing new commands for keeper-client"'. [#52985](https://github.com/ClickHouse/ClickHouse/pull/52985) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Remove try/catch from DatabaseFilesystem"'. [#53044](https://github.com/ClickHouse/ClickHouse/pull/53044) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Upload build time-trace data to CI database"'. [#53210](https://github.com/ClickHouse/ClickHouse/pull/53210) ([Alexander Gololobov](https://github.com/davenger)). +* NO CL ENTRY: 'Revert "Added new tests for session_log and fixed the inconsistency between login and logout."'. [#53247](https://github.com/ClickHouse/ClickHouse/pull/53247) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Improve CHECK TABLE system query"'. [#53272](https://github.com/ClickHouse/ClickHouse/pull/53272) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "#2 Added new tests for session_log and fixed the inconsistency between login and logout."'. [#53294](https://github.com/ClickHouse/ClickHouse/pull/53294) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Documentation: add Ibis project to the integrations section"'. [#53374](https://github.com/ClickHouse/ClickHouse/pull/53374) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Planner prepare filters for analysis"'. [#53782](https://github.com/ClickHouse/ClickHouse/pull/53782) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "dateDiff: add support for plural units."'. [#53795](https://github.com/ClickHouse/ClickHouse/pull/53795) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fixed wrong python test name pattern"'. [#53929](https://github.com/ClickHouse/ClickHouse/pull/53929) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix bug on mutations with subcolumns of type JSON in predicates of UPDATE and DELETE queries."'. [#54063](https://github.com/ClickHouse/ClickHouse/pull/54063) ([Alexander Tokmakov](https://github.com/tavplubix)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* clickhouse-copier add check drop partition [#35263](https://github.com/ClickHouse/ClickHouse/pull/35263) ([sunny](https://github.com/sunny19930321)). +* Add more checks into ThreadStatus ctor. [#42019](https://github.com/ClickHouse/ClickHouse/pull/42019) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Refactor Query Tree visitor [#46740](https://github.com/ClickHouse/ClickHouse/pull/46740) ([Dmitry Novik](https://github.com/novikd)). +* Revert "Revert "Randomize JIT settings in tests"" [#48282](https://github.com/ClickHouse/ClickHouse/pull/48282) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix outdated cache configuration in s3 tests: s3_storage_policy_by_defau… [#48424](https://github.com/ClickHouse/ClickHouse/pull/48424) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix IN with decimal in analyzer [#48754](https://github.com/ClickHouse/ClickHouse/pull/48754) ([vdimir](https://github.com/vdimir)). +* Some unclear change in StorageBuffer::reschedule() for something [#49723](https://github.com/ClickHouse/ClickHouse/pull/49723) ([DimasKovas](https://github.com/DimasKovas)). +* MergeTree & SipHash checksum big-endian support [#50276](https://github.com/ClickHouse/ClickHouse/pull/50276) ([ltrk2](https://github.com/ltrk2)). +* Maintain same aggregate function merge behavior for small and big endian machine [#50609](https://github.com/ClickHouse/ClickHouse/pull/50609) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Add a test to limit client max opening fd [#51213](https://github.com/ClickHouse/ClickHouse/pull/51213) ([Duc Canh Le](https://github.com/canhld94)). +* Add info about acquired space in cache to not enough space error [#51537](https://github.com/ClickHouse/ClickHouse/pull/51537) ([vdimir](https://github.com/vdimir)). +* KeeperDispatcher: remove reductant lock as the ConcurrentBoundedQueue is thread-safe [#51766](https://github.com/ClickHouse/ClickHouse/pull/51766) ([frinkr](https://github.com/frinkr)). +* Fix build type in packager [#51771](https://github.com/ClickHouse/ClickHouse/pull/51771) ([Antonio Andelic](https://github.com/antonio2368)). +* metrics_perf_events_enabled turn off in perf tests [#52072](https://github.com/ClickHouse/ClickHouse/pull/52072) ([Sema Checherinda](https://github.com/CheSema)). +* Remove try/catch from DatabaseFilesystem [#52155](https://github.com/ClickHouse/ClickHouse/pull/52155) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test that clickhouse-client or local do not throw/catch on startup [#52159](https://github.com/ClickHouse/ClickHouse/pull/52159) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Retry blob listing in test_alter_moving_garbage [#52193](https://github.com/ClickHouse/ClickHouse/pull/52193) ([vdimir](https://github.com/vdimir)). +* Try to make `test_kafka_formats_with_broken_message` and `test_kafka_formats` integration tests stable [#52273](https://github.com/ClickHouse/ClickHouse/pull/52273) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Kill the runner process with all subprocesses [#52277](https://github.com/ClickHouse/ClickHouse/pull/52277) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Implement endianness-indepedent support for MergeTree checksums [#52329](https://github.com/ClickHouse/ClickHouse/pull/52329) ([ltrk2](https://github.com/ltrk2)). +* add tests with connection reset by peer error, and retry it inside client [#52441](https://github.com/ClickHouse/ClickHouse/pull/52441) ([Sema Checherinda](https://github.com/CheSema)). +* Fix logging for asynchronous non-batched distributed sends [#52583](https://github.com/ClickHouse/ClickHouse/pull/52583) ([Azat Khuzhin](https://github.com/azat)). +* Follow-up to "Implement support of encrypted elements in configuration file" [#52609](https://github.com/ClickHouse/ClickHouse/pull/52609) ([Robert Schulze](https://github.com/rschu1ze)). +* Return zxid from TestKeeper and in multi responses [#52618](https://github.com/ClickHouse/ClickHouse/pull/52618) ([Alexander Gololobov](https://github.com/davenger)). +* Analyzer: Support ARRAY JOIN COLUMNS(...) syntax [#52622](https://github.com/ClickHouse/ClickHouse/pull/52622) ([Dmitry Novik](https://github.com/novikd)). +* Fix stress test: check if storage shutdown before we operate MergeTreeDeduplicationLog [#52623](https://github.com/ClickHouse/ClickHouse/pull/52623) ([Han Fei](https://github.com/hanfei1991)). +* Suspicious DISTINCT crashes from sqlancer [#52636](https://github.com/ClickHouse/ClickHouse/pull/52636) ([Igor Nikonov](https://github.com/devcrafter)). +* Partially fixed test 01747_system_session_log_long [#52640](https://github.com/ClickHouse/ClickHouse/pull/52640) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Check for unexpected Cyrillic [#52641](https://github.com/ClickHouse/ClickHouse/pull/52641) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `test_keeper_reconfig_replace_leader` [#52651](https://github.com/ClickHouse/ClickHouse/pull/52651) ([Antonio Andelic](https://github.com/antonio2368)). +* Rename setting disable_url_encoding to enable_url_encoding and add a test [#52656](https://github.com/ClickHouse/ClickHouse/pull/52656) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove creation of a unnecessary temporary ContextAccess on login [#52660](https://github.com/ClickHouse/ClickHouse/pull/52660) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update version after release [#52661](https://github.com/ClickHouse/ClickHouse/pull/52661) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.7.1.2470-stable [#52664](https://github.com/ClickHouse/ClickHouse/pull/52664) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix bugs and better test for SYSTEM STOP LISTEN [#52680](https://github.com/ClickHouse/ClickHouse/pull/52680) ([Nikolay Degterinsky](https://github.com/evillique)). +* Remove unneeded readBinary() specializations + update docs [#52683](https://github.com/ClickHouse/ClickHouse/pull/52683) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove remainders of legacy setting 'allow_experimental_query_cache' [#52685](https://github.com/ClickHouse/ClickHouse/pull/52685) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix 02417_opentelemetry_insert_on_distributed_table flakiness [#52691](https://github.com/ClickHouse/ClickHouse/pull/52691) ([Azat Khuzhin](https://github.com/azat)). +* Improvements to backup restore disallow_concurrency test [#52709](https://github.com/ClickHouse/ClickHouse/pull/52709) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Move UnlinkMetadataFileOperationOutcome to common header [#52710](https://github.com/ClickHouse/ClickHouse/pull/52710) ([Alexander Gololobov](https://github.com/davenger)). +* Improve endianness-independent support for hash functions [#52712](https://github.com/ClickHouse/ClickHouse/pull/52712) ([ltrk2](https://github.com/ltrk2)). +* Allow reading zero objects in CachedObjectStorage::readObjects() [#52733](https://github.com/ClickHouse/ClickHouse/pull/52733) ([Michael Kolupaev](https://github.com/al13n321)). +* Merging reading from archives [#50321](https://github.com/ClickHouse/ClickHouse/issues/50321) [#52734](https://github.com/ClickHouse/ClickHouse/pull/52734) ([Antonio Andelic](https://github.com/antonio2368)). +* Merging [#52640](https://github.com/ClickHouse/ClickHouse/issues/52640) [#52744](https://github.com/ClickHouse/ClickHouse/pull/52744) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer: fix 00979_set_index_not.sql [#52754](https://github.com/ClickHouse/ClickHouse/pull/52754) ([Igor Nikonov](https://github.com/devcrafter)). +* Planner prepare filters for analysis [#52762](https://github.com/ClickHouse/ClickHouse/pull/52762) ([Maksim Kita](https://github.com/kitaisreal)). +* Allow reading empty file with no blobs [#52763](https://github.com/ClickHouse/ClickHouse/pull/52763) ([Alexander Gololobov](https://github.com/davenger)). +* Fix: check correctly window frame bounds for RANGE [#52768](https://github.com/ClickHouse/ClickHouse/pull/52768) ([Igor Nikonov](https://github.com/devcrafter)). +* Numerical stability of the test for Polygons [#52769](https://github.com/ClickHouse/ClickHouse/pull/52769) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Change the default timezones in Docker test images [#52772](https://github.com/ClickHouse/ClickHouse/pull/52772) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Upload build statistics to the CI database [#52773](https://github.com/ClickHouse/ClickHouse/pull/52773) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `instance_type` information to the CI database [#52774](https://github.com/ClickHouse/ClickHouse/pull/52774) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove Coverity (part 2) [#52775](https://github.com/ClickHouse/ClickHouse/pull/52775) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a tool to upload `-ftime-trace` to ClickHouse [#52776](https://github.com/ClickHouse/ClickHouse/pull/52776) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Revert revert of system drop filesystem cache by key [#52778](https://github.com/ClickHouse/ClickHouse/pull/52778) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove obsolete part of a check name [#52793](https://github.com/ClickHouse/ClickHouse/pull/52793) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Maybe fix TLS tests [#52796](https://github.com/ClickHouse/ClickHouse/pull/52796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Allow OOM in Stress and Upgrade checks [#52807](https://github.com/ClickHouse/ClickHouse/pull/52807) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not test upper bounds for throttlers [#52821](https://github.com/ClickHouse/ClickHouse/pull/52821) ([Sergei Trifonov](https://github.com/serxa)). +* Add more logging and touch test for materialize mysql [#52822](https://github.com/ClickHouse/ClickHouse/pull/52822) ([alesapin](https://github.com/alesapin)). +* Try to remove more leftovers. [#52823](https://github.com/ClickHouse/ClickHouse/pull/52823) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update test_crash_log/test.py [#52825](https://github.com/ClickHouse/ClickHouse/pull/52825) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Don't report LOGICAL_ERROR if a file got truncated during read [#52828](https://github.com/ClickHouse/ClickHouse/pull/52828) ([Michael Kolupaev](https://github.com/al13n321)). +* Throw S3Exception whenever possible. [#52829](https://github.com/ClickHouse/ClickHouse/pull/52829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Increase min protocol version for sparse serialization [#52835](https://github.com/ClickHouse/ClickHouse/pull/52835) ([Anton Popov](https://github.com/CurtizJ)). +* Cleanup localBackup [#52837](https://github.com/ClickHouse/ClickHouse/pull/52837) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Try to fix 02352_rwlock [#52852](https://github.com/ClickHouse/ClickHouse/pull/52852) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Disable a couple of long tests for debug build. [#52854](https://github.com/ClickHouse/ClickHouse/pull/52854) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix flaky tests in test_merge_tree_azure_blob_storage & test_storage_azure_blob_storage [#52855](https://github.com/ClickHouse/ClickHouse/pull/52855) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Cancel merges before renaming a system log table [#52858](https://github.com/ClickHouse/ClickHouse/pull/52858) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Try to fix a rare fail in 00612_http_max_query_size [#52859](https://github.com/ClickHouse/ClickHouse/pull/52859) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Remove duplicated dialect setting value [#52864](https://github.com/ClickHouse/ClickHouse/pull/52864) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Significant improvement of rust caching [#52865](https://github.com/ClickHouse/ClickHouse/pull/52865) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Docker improvements [#52869](https://github.com/ClickHouse/ClickHouse/pull/52869) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try to continue clickhouse process in stress test after terminating gdb. [#52871](https://github.com/ClickHouse/ClickHouse/pull/52871) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* fix master ci for [#52091](https://github.com/ClickHouse/ClickHouse/issues/52091) [#52873](https://github.com/ClickHouse/ClickHouse/pull/52873) ([Han Fei](https://github.com/hanfei1991)). +* Fix the PR body check for `Reverts #number` [#52874](https://github.com/ClickHouse/ClickHouse/pull/52874) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Analyzer WITH statement references test [#52875](https://github.com/ClickHouse/ClickHouse/pull/52875) ([Maksim Kita](https://github.com/kitaisreal)). +* Disable more tests for debug. [#52878](https://github.com/ClickHouse/ClickHouse/pull/52878) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix assertion in mutations with transactions [#52894](https://github.com/ClickHouse/ClickHouse/pull/52894) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed test_profile_max_sessions_for_user test flakiness [#52897](https://github.com/ClickHouse/ClickHouse/pull/52897) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Use concepts to replace more std::enable_if_t [#52898](https://github.com/ClickHouse/ClickHouse/pull/52898) ([flynn](https://github.com/ucasfl)). +* Disable `test_reconfig_replace_leader_in_one_command` [#52901](https://github.com/ClickHouse/ClickHouse/pull/52901) ([Antonio Andelic](https://github.com/antonio2368)). +* tests: fix possible EADDRINUSE v2 [#52906](https://github.com/ClickHouse/ClickHouse/pull/52906) ([Azat Khuzhin](https://github.com/azat)). +* Merging [#52897](https://github.com/ClickHouse/ClickHouse/issues/52897) [#52907](https://github.com/ClickHouse/ClickHouse/pull/52907) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove obsolete `no-upgrade-check` tag [#52915](https://github.com/ClickHouse/ClickHouse/pull/52915) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test_storage_s3_queue::test_multiple_tables_streaming_sync_distributed [#52944](https://github.com/ClickHouse/ClickHouse/pull/52944) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Don't create empty parts on drop partittion if we have a transaction [#52945](https://github.com/ClickHouse/ClickHouse/pull/52945) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer: fix WITH clause resolving [#52947](https://github.com/ClickHouse/ClickHouse/pull/52947) ([Dmitry Novik](https://github.com/novikd)). +* Refactor CI_CONFIG [#52948](https://github.com/ClickHouse/ClickHouse/pull/52948) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try to fix assert in remove redundant sorting [#52950](https://github.com/ClickHouse/ClickHouse/pull/52950) ([Igor Nikonov](https://github.com/devcrafter)). +* Remove unused code in StorageSystemStackTrace [#52952](https://github.com/ClickHouse/ClickHouse/pull/52952) ([Azat Khuzhin](https://github.com/azat)). +* Fix wrong error code "BAD_GET" [#52954](https://github.com/ClickHouse/ClickHouse/pull/52954) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix some issues with databases [#52956](https://github.com/ClickHouse/ClickHouse/pull/52956) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix config update in HTTP Header Filtering [#52957](https://github.com/ClickHouse/ClickHouse/pull/52957) ([Nikolay Degterinsky](https://github.com/evillique)). +* Added peak_memory_usage to clickhouse-client final progress message [#52961](https://github.com/ClickHouse/ClickHouse/pull/52961) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* tests: fix 01293_client_interactive_vertical_multiline flakiness (increase timeout) [#52965](https://github.com/ClickHouse/ClickHouse/pull/52965) ([Azat Khuzhin](https://github.com/azat)). +* Added TSAN option report_atomic_races=0 for test_max_sessions_for_user [#52969](https://github.com/ClickHouse/ClickHouse/pull/52969) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* MaterializedMySQL: Add tests for unquoted utf8 column names in DML [#52971](https://github.com/ClickHouse/ClickHouse/pull/52971) ([Val Doroshchuk](https://github.com/valbok)). +* Update version_date.tsv and changelogs after v23.7.2.25-stable [#52976](https://github.com/ClickHouse/ClickHouse/pull/52976) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Decrease a num of tries for a couple of too slow tests for debug. [#52981](https://github.com/ClickHouse/ClickHouse/pull/52981) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix test `00061_storage_buffer` [#52983](https://github.com/ClickHouse/ClickHouse/pull/52983) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove `test_host_regexp_multiple_ptr_records_concurrent`, CC @arthurpassos [#52984](https://github.com/ClickHouse/ClickHouse/pull/52984) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `test_zookeeper_config` [#52988](https://github.com/ClickHouse/ClickHouse/pull/52988) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove assertion from test_no_ttl_merges_in_busy_pool [#52989](https://github.com/ClickHouse/ClickHouse/pull/52989) ([alesapin](https://github.com/alesapin)). +* Fix `test_dictionary_custom_settings` [#52990](https://github.com/ClickHouse/ClickHouse/pull/52990) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix flaky test [#53007](https://github.com/ClickHouse/ClickHouse/pull/53007) ([alesapin](https://github.com/alesapin)). +* Fix default port for Keeper Client [#53010](https://github.com/ClickHouse/ClickHouse/pull/53010) ([pufit](https://github.com/pufit)). +* Add a test to broken tests (Analyzer) [#53013](https://github.com/ClickHouse/ClickHouse/pull/53013) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Implement big-endian support for transform [#53015](https://github.com/ClickHouse/ClickHouse/pull/53015) ([ltrk2](https://github.com/ltrk2)). +* Fix completion for clickhouse-keeper-client [#53029](https://github.com/ClickHouse/ClickHouse/pull/53029) ([Azat Khuzhin](https://github.com/azat)). +* clickhouse-keeper-client: fix version parsing for set command [#53031](https://github.com/ClickHouse/ClickHouse/pull/53031) ([Azat Khuzhin](https://github.com/azat)). +* MaterializedMySQL: Add tests to alter named collections [#53032](https://github.com/ClickHouse/ClickHouse/pull/53032) ([Val Doroshchuk](https://github.com/valbok)). +* Fix description for 's3_upload_part_size_multiply_parts_count_threshold' setting [#53042](https://github.com/ClickHouse/ClickHouse/pull/53042) ([Elena Torró](https://github.com/elenatorro)). +* Update 01114_database_atomic.sh [#53043](https://github.com/ClickHouse/ClickHouse/pull/53043) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Revert revert of "Remove try/catch from DatabaseFilesystem" [#53045](https://github.com/ClickHouse/ClickHouse/pull/53045) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix cache related logical error in stress tests [#53047](https://github.com/ClickHouse/ClickHouse/pull/53047) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Remove upgrade checks with sanitizers [#53051](https://github.com/ClickHouse/ClickHouse/pull/53051) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Increase election timeout in integration tests [#53052](https://github.com/ClickHouse/ClickHouse/pull/53052) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer: do not enable it for old servers in tests [#53053](https://github.com/ClickHouse/ClickHouse/pull/53053) ([Dmitry Novik](https://github.com/novikd)). +* Try to make `01414_mutations_and_errors_zookeeper` less flaky [#53056](https://github.com/ClickHouse/ClickHouse/pull/53056) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix test `02434_cancel_insert_when_client_dies` [#53062](https://github.com/ClickHouse/ClickHouse/pull/53062) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add `abort_on_error=1` to `TSAN_OPTIONS` [#53065](https://github.com/ClickHouse/ClickHouse/pull/53065) ([Nikita Taranov](https://github.com/nickitat)). +* Fix Parquet stats for Float32 and Float64 [#53067](https://github.com/ClickHouse/ClickHouse/pull/53067) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix a comment [#53072](https://github.com/ClickHouse/ClickHouse/pull/53072) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix 02263_format_insert_settings flakiness [#53080](https://github.com/ClickHouse/ClickHouse/pull/53080) ([Azat Khuzhin](https://github.com/azat)). +* Something with tests [#53081](https://github.com/ClickHouse/ClickHouse/pull/53081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.7.3.14-stable [#53084](https://github.com/ClickHouse/ClickHouse/pull/53084) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Simplify system logs creation [#53085](https://github.com/ClickHouse/ClickHouse/pull/53085) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix hung check in stress test [#53090](https://github.com/ClickHouse/ClickHouse/pull/53090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add clusters for running tests locally easily [#53091](https://github.com/ClickHouse/ClickHouse/pull/53091) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wording [#53092](https://github.com/ClickHouse/ClickHouse/pull/53092) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update README.md [#53097](https://github.com/ClickHouse/ClickHouse/pull/53097) ([Tyler Hannan](https://github.com/tylerhannan)). +* Remove old util [#53099](https://github.com/ClickHouse/ClickHouse/pull/53099) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add optional parameters to Buffer Engine definition [#53102](https://github.com/ClickHouse/ClickHouse/pull/53102) ([Elena Torró](https://github.com/elenatorro)). +* Compatibility with clang-17 [#53104](https://github.com/ClickHouse/ClickHouse/pull/53104) ([Raúl Marín](https://github.com/Algunenano)). +* Remove duplicate test: `test_concurrent_alter_with_ttl_move` [#53107](https://github.com/ClickHouse/ClickHouse/pull/53107) ([alesapin](https://github.com/alesapin)). +* Relax flaky test `test_s3_engine_heavy_write_check_mem` [#53108](https://github.com/ClickHouse/ClickHouse/pull/53108) ([alesapin](https://github.com/alesapin)). +* Update PocoHTTPClient.cpp [#53109](https://github.com/ClickHouse/ClickHouse/pull/53109) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add hints for HTTP handlers [#53110](https://github.com/ClickHouse/ClickHouse/pull/53110) ([Ruslan Mardugalliamov](https://github.com/rmarduga)). +* Revert changes in `ZstdDeflatingAppendableWriteBuffer` [#53111](https://github.com/ClickHouse/ClickHouse/pull/53111) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix flaky test by using azure_query function [#53113](https://github.com/ClickHouse/ClickHouse/pull/53113) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Update `test_restore_replica` [#53119](https://github.com/ClickHouse/ClickHouse/pull/53119) ([Alexander Tokmakov](https://github.com/tavplubix)). +* do not fail if prctl is not allowed ([#43589](https://github.com/ClickHouse/ClickHouse/issues/43589)) [#53122](https://github.com/ClickHouse/ClickHouse/pull/53122) ([ekrasikov](https://github.com/ekrasikov)). +* Use more unique name for TemporaryFileOnDisk [#53123](https://github.com/ClickHouse/ClickHouse/pull/53123) ([Vitaly Baranov](https://github.com/vitlibar)). +* Update `Mergeable Check` at the finishing CI [#53126](https://github.com/ClickHouse/ClickHouse/pull/53126) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Added retry for TransportException in azure blob storage [#53128](https://github.com/ClickHouse/ClickHouse/pull/53128) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Small fix for HTTPHeaderFilter [#53146](https://github.com/ClickHouse/ClickHouse/pull/53146) ([San](https://github.com/santrancisco)). +* Added functions to disallow concurrency of backup restore test [#53150](https://github.com/ClickHouse/ClickHouse/pull/53150) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Attempt to fix test_insert_quorum by adding sync second replica [#53155](https://github.com/ClickHouse/ClickHouse/pull/53155) ([vdimir](https://github.com/vdimir)). +* fix mem leak in RegExpTreeDictionary [#53160](https://github.com/ClickHouse/ClickHouse/pull/53160) ([Han Fei](https://github.com/hanfei1991)). +* Fixes for detach/attach partition and broken detached parts cleanup [#53164](https://github.com/ClickHouse/ClickHouse/pull/53164) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update conftest.py [#53166](https://github.com/ClickHouse/ClickHouse/pull/53166) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Allow experimantal features when recovering Replicated db replica [#53167](https://github.com/ClickHouse/ClickHouse/pull/53167) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update version_date.tsv and changelogs after v23.7.4.5-stable [#53169](https://github.com/ClickHouse/ClickHouse/pull/53169) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Analyzer: fix test_system_flush_logs [#53171](https://github.com/ClickHouse/ClickHouse/pull/53171) ([Dmitry Novik](https://github.com/novikd)). +* Fix warning in test_replicated_database [#53173](https://github.com/ClickHouse/ClickHouse/pull/53173) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix: 00838_unique_index test with analyzer [#53175](https://github.com/ClickHouse/ClickHouse/pull/53175) ([Igor Nikonov](https://github.com/devcrafter)). +* Improved efficiency for array operations [#53193](https://github.com/ClickHouse/ClickHouse/pull/53193) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Disable test_reverse_dns_query/test.py [#53195](https://github.com/ClickHouse/ClickHouse/pull/53195) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Improve reading from archives [#53198](https://github.com/ClickHouse/ClickHouse/pull/53198) ([Antonio Andelic](https://github.com/antonio2368)). +* Disable test_host_regexp_multiple_ptr_records/test.py [#53211](https://github.com/ClickHouse/ClickHouse/pull/53211) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Enable hedged requests under tsan [#53219](https://github.com/ClickHouse/ClickHouse/pull/53219) ([Kruglov Pavel](https://github.com/Avogar)). +* Remove garbage [#53241](https://github.com/ClickHouse/ClickHouse/pull/53241) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix LOGICAL_ERROR exception in ALTER query [#53242](https://github.com/ClickHouse/ClickHouse/pull/53242) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix bad test `00417_kill_query` [#53244](https://github.com/ClickHouse/ClickHouse/pull/53244) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test `02428_delete_with_settings` [#53246](https://github.com/ClickHouse/ClickHouse/pull/53246) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove unrecognizable garbage from the performance test [#53249](https://github.com/ClickHouse/ClickHouse/pull/53249) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable randomization in `02273_full_sort_join` [#53251](https://github.com/ClickHouse/ClickHouse/pull/53251) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove outdated Dockerfile [#53252](https://github.com/ClickHouse/ClickHouse/pull/53252) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve fs cache cleanup [#53273](https://github.com/ClickHouse/ClickHouse/pull/53273) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add garbage [#53279](https://github.com/ClickHouse/ClickHouse/pull/53279) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Inhibit randomization in `00906_low_cardinality_cache` [#53283](https://github.com/ClickHouse/ClickHouse/pull/53283) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix test 01169_old_alter_partition_isolation_stress [#53292](https://github.com/ClickHouse/ClickHouse/pull/53292) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove no-parallel tag from some tests [#53295](https://github.com/ClickHouse/ClickHouse/pull/53295) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix test `00002_log_and_exception_messages_formatting` [#53296](https://github.com/ClickHouse/ClickHouse/pull/53296) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `01485_256_bit_multiply` [#53297](https://github.com/ClickHouse/ClickHouse/pull/53297) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove flaky tests for the experimental `UNDROP` feature [#53298](https://github.com/ClickHouse/ClickHouse/pull/53298) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added test for session_log using remote and mysql sessions [#53304](https://github.com/ClickHouse/ClickHouse/pull/53304) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added integration test for session_log using concurrrent GRPC/PostgreSQL/MySQL sessions [#53305](https://github.com/ClickHouse/ClickHouse/pull/53305) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added test for session_log using concurrrent TCP/HTTP/MySQL sessions [#53306](https://github.com/ClickHouse/ClickHouse/pull/53306) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added test for session_log dropping user/role/profile currently used in active session [#53307](https://github.com/ClickHouse/ClickHouse/pull/53307) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Added an integration test for client peak_memory_usage value [#53308](https://github.com/ClickHouse/ClickHouse/pull/53308) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix log message [#53339](https://github.com/ClickHouse/ClickHouse/pull/53339) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Analyzer: fix quotas for system tables [#53343](https://github.com/ClickHouse/ClickHouse/pull/53343) ([Dmitry Novik](https://github.com/novikd)). +* Relax mergeable check [#53344](https://github.com/ClickHouse/ClickHouse/pull/53344) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add clickhouse-keeper-client and clickhouse-keeper-converter symlinks to clickhouse-keeper package [#53357](https://github.com/ClickHouse/ClickHouse/pull/53357) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Add linux s390x to universal installer [#53358](https://github.com/ClickHouse/ClickHouse/pull/53358) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* Make one exception message longer [#53375](https://github.com/ClickHouse/ClickHouse/pull/53375) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix wrong query in log messages check [#53376](https://github.com/ClickHouse/ClickHouse/pull/53376) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Non-significant changes [#53377](https://github.com/ClickHouse/ClickHouse/pull/53377) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Inhibit randomization in more tests [#53378](https://github.com/ClickHouse/ClickHouse/pull/53378) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Make some Keeper exceptions more structured [#53379](https://github.com/ClickHouse/ClickHouse/pull/53379) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Follow-up to [#52695](https://github.com/ClickHouse/ClickHouse/issues/52695): Move tests to a more appropriate place [#53400](https://github.com/ClickHouse/ClickHouse/pull/53400) ([Robert Schulze](https://github.com/rschu1ze)). +* Minor fixes (hints for wrong DB or table name) [#53402](https://github.com/ClickHouse/ClickHouse/pull/53402) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Quick fail undocumented features [#53413](https://github.com/ClickHouse/ClickHouse/pull/53413) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* boost getNestedColumnWithDefaultOnNull by insertManyDefaults [#53414](https://github.com/ClickHouse/ClickHouse/pull/53414) ([frinkr](https://github.com/frinkr)). +* Update test_distributed_inter_server_secret to pass with analyzer [#53416](https://github.com/ClickHouse/ClickHouse/pull/53416) ([vdimir](https://github.com/vdimir)). +* Parallel replicas: remove unnecessary code [#53419](https://github.com/ClickHouse/ClickHouse/pull/53419) ([Igor Nikonov](https://github.com/devcrafter)). +* Refactorings for configuration of in-memory caches [#53422](https://github.com/ClickHouse/ClickHouse/pull/53422) ([Robert Schulze](https://github.com/rschu1ze)). +* Less exceptions with runtime format string [#53424](https://github.com/ClickHouse/ClickHouse/pull/53424) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Analyzer: fix virtual columns in StorageDistributed [#53426](https://github.com/ClickHouse/ClickHouse/pull/53426) ([Dmitry Novik](https://github.com/novikd)). +* Fix creation of empty parts [#53429](https://github.com/ClickHouse/ClickHouse/pull/53429) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53177](https://github.com/ClickHouse/ClickHouse/issues/53177) [#53430](https://github.com/ClickHouse/ClickHouse/pull/53430) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53142](https://github.com/ClickHouse/ClickHouse/issues/53142) [#53431](https://github.com/ClickHouse/ClickHouse/pull/53431) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Do not send logs to CI if the credentials are not set [#53441](https://github.com/ClickHouse/ClickHouse/pull/53441) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor: Factorize constants in Annoy index [#53444](https://github.com/ClickHouse/ClickHouse/pull/53444) ([Robert Schulze](https://github.com/rschu1ze)). +* Restart killed PublishedReleaseCI workflows [#53445](https://github.com/ClickHouse/ClickHouse/pull/53445) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Follow-up: Do not send logs to CI if the credentials are not set [#53456](https://github.com/ClickHouse/ClickHouse/pull/53456) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Merging [#53307](https://github.com/ClickHouse/ClickHouse/issues/53307) [#53472](https://github.com/ClickHouse/ClickHouse/pull/53472) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53306](https://github.com/ClickHouse/ClickHouse/issues/53306) [#53473](https://github.com/ClickHouse/ClickHouse/pull/53473) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53304](https://github.com/ClickHouse/ClickHouse/issues/53304) [#53474](https://github.com/ClickHouse/ClickHouse/pull/53474) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Merging [#53373](https://github.com/ClickHouse/ClickHouse/issues/53373) [#53475](https://github.com/ClickHouse/ClickHouse/pull/53475) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix flaky test `02443_detach_attach_partition` [#53478](https://github.com/ClickHouse/ClickHouse/pull/53478) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove outdated code in ReplicatedMergeTreeQueue::initialize() [#53484](https://github.com/ClickHouse/ClickHouse/pull/53484) ([Azat Khuzhin](https://github.com/azat)). +* krb5: Fix CVE-2023-36054 [#53485](https://github.com/ClickHouse/ClickHouse/pull/53485) ([Robert Schulze](https://github.com/rschu1ze)). +* curl: update to latest master (fixes CVE-2023-32001) [#53487](https://github.com/ClickHouse/ClickHouse/pull/53487) ([Robert Schulze](https://github.com/rschu1ze)). +* Update boost to 1.79 [#53490](https://github.com/ClickHouse/ClickHouse/pull/53490) ([Robert Schulze](https://github.com/rschu1ze)). +* Get rid of secrets CLICKHOUSE_CI_LOGS [#53491](https://github.com/ClickHouse/ClickHouse/pull/53491) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update style checker [#53493](https://github.com/ClickHouse/ClickHouse/pull/53493) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update materialized_with_ddl.py [#53494](https://github.com/ClickHouse/ClickHouse/pull/53494) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix a race condition between RESTART REPLICAS and DROP DATABASE [#53495](https://github.com/ClickHouse/ClickHouse/pull/53495) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix tiny thing in Replicated database [#53496](https://github.com/ClickHouse/ClickHouse/pull/53496) ([Nikolay Degterinsky](https://github.com/evillique)). +* Simplify performance test [#53499](https://github.com/ClickHouse/ClickHouse/pull/53499) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added waiting for PostgreSQL compatibility port open in integrational tests. [#53505](https://github.com/ClickHouse/ClickHouse/pull/53505) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Allow non standalone keeper run in integration tests [#53512](https://github.com/ClickHouse/ClickHouse/pull/53512) ([Duc Canh Le](https://github.com/canhld94)). +* Make sending logs to the cloud less fragile (and fix an unrelated flaky test) [#53528](https://github.com/ClickHouse/ClickHouse/pull/53528) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update test.py [#53534](https://github.com/ClickHouse/ClickHouse/pull/53534) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `AddressSanitizer failed to allocate 0x0 (0) bytes of SetAlternateSignalStack` in integration tests [#53535](https://github.com/ClickHouse/ClickHouse/pull/53535) ([Nikita Taranov](https://github.com/nickitat)). +* Fix keeper default path check [#53539](https://github.com/ClickHouse/ClickHouse/pull/53539) ([pufit](https://github.com/pufit)). +* Follow-up to [#53528](https://github.com/ClickHouse/ClickHouse/issues/53528) [#53544](https://github.com/ClickHouse/ClickHouse/pull/53544) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update 00002_log_and_exception_messages_formatting.sql [#53545](https://github.com/ClickHouse/ClickHouse/pull/53545) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update krb5 to 1.21.2 [#53552](https://github.com/ClickHouse/ClickHouse/pull/53552) ([Robert Schulze](https://github.com/rschu1ze)). +* Enable ISA-L on x86-64 only by default [#53553](https://github.com/ClickHouse/ClickHouse/pull/53553) ([ltrk2](https://github.com/ltrk2)). +* Change Big Endian-UUID to work the same as Little Endian-UUID [#53556](https://github.com/ClickHouse/ClickHouse/pull/53556) ([Austin Kothig](https://github.com/kothiga)). +* Bump openldap to LTS version (v2.5.16) [#53558](https://github.com/ClickHouse/ClickHouse/pull/53558) ([Robert Schulze](https://github.com/rschu1ze)). +* Update 02443_detach_attach_partition.sh [#53564](https://github.com/ClickHouse/ClickHouse/pull/53564) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Proper destruction of task in ShellCommandSource [#53573](https://github.com/ClickHouse/ClickHouse/pull/53573) ([Amos Bird](https://github.com/amosbird)). +* Fix for flaky test_ssl_cert_authentication [#53586](https://github.com/ClickHouse/ClickHouse/pull/53586) ([Pradeep Chhetri](https://github.com/chhetripradeep)). +* AARCH64 Neon memequal wide [#53588](https://github.com/ClickHouse/ClickHouse/pull/53588) ([Maksim Kita](https://github.com/kitaisreal)). +* Experiment Aggregator merge and destroy states in batch [#53589](https://github.com/ClickHouse/ClickHouse/pull/53589) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix test `02102_row_binary_with_names_and_types` [#53592](https://github.com/ClickHouse/ClickHouse/pull/53592) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove useless test [#53599](https://github.com/ClickHouse/ClickHouse/pull/53599) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Simplify test `01600_parts_types_metrics_long` [#53606](https://github.com/ClickHouse/ClickHouse/pull/53606) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* :lipstick: [S3::URI] Fix comment typos around versionId [#53607](https://github.com/ClickHouse/ClickHouse/pull/53607) ([Tomáš Hromada](https://github.com/gyfis)). +* Fix upgrade check [#53611](https://github.com/ClickHouse/ClickHouse/pull/53611) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Cleanup cluster test: remove unnecessary zookeeper [#53617](https://github.com/ClickHouse/ClickHouse/pull/53617) ([Igor Nikonov](https://github.com/devcrafter)). +* Bump boost to 1.80 [#53625](https://github.com/ClickHouse/ClickHouse/pull/53625) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v23.3.9.55-lts [#53626](https://github.com/ClickHouse/ClickHouse/pull/53626) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* CMake small refactoring [#53628](https://github.com/ClickHouse/ClickHouse/pull/53628) ([Maksim Kita](https://github.com/kitaisreal)). +* Fix data race of shell command [#53631](https://github.com/ClickHouse/ClickHouse/pull/53631) ([Amos Bird](https://github.com/amosbird)). +* Fix 02443_detach_attach_partition [#53633](https://github.com/ClickHouse/ClickHouse/pull/53633) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Add default timeout value for ClickHouseHelper [#53639](https://github.com/ClickHouse/ClickHouse/pull/53639) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Implement support for more aggregate functions on big-endian [#53650](https://github.com/ClickHouse/ClickHouse/pull/53650) ([ltrk2](https://github.com/ltrk2)). +* fix Logical Error in AsynchronousBoundedReadBuffer [#53651](https://github.com/ClickHouse/ClickHouse/pull/53651) ([Sema Checherinda](https://github.com/CheSema)). +* State of State and avg aggregation function fix for big endian [#53655](https://github.com/ClickHouse/ClickHouse/pull/53655) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Resubmit [#50171](https://github.com/ClickHouse/ClickHouse/issues/50171) [#53678](https://github.com/ClickHouse/ClickHouse/pull/53678) ([alesapin](https://github.com/alesapin)). +* Bump boost to 1.81 [#53679](https://github.com/ClickHouse/ClickHouse/pull/53679) ([Robert Schulze](https://github.com/rschu1ze)). +* Whitespaces [#53690](https://github.com/ClickHouse/ClickHouse/pull/53690) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove bad test [#53691](https://github.com/ClickHouse/ClickHouse/pull/53691) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix bad path format in logs [#53693](https://github.com/ClickHouse/ClickHouse/pull/53693) ([alesapin](https://github.com/alesapin)). +* Correct a functional test to not use endianness-specific input [#53697](https://github.com/ClickHouse/ClickHouse/pull/53697) ([ltrk2](https://github.com/ltrk2)). +* Fix running clickhouse-test with python 3.8 [#53700](https://github.com/ClickHouse/ClickHouse/pull/53700) ([Dmitry Novik](https://github.com/novikd)). +* refactor some old code [#53704](https://github.com/ClickHouse/ClickHouse/pull/53704) ([flynn](https://github.com/ucasfl)). +* Fixed wrong python test name pattern [#53713](https://github.com/ClickHouse/ClickHouse/pull/53713) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix flaky `shutdown_wait_unfinished_queries` integration test [#53714](https://github.com/ClickHouse/ClickHouse/pull/53714) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Update version_date.tsv and changelogs after v23.3.10.5-lts [#53733](https://github.com/ClickHouse/ClickHouse/pull/53733) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix flaky test_storage_s3_queue/test.py::test_delete_after_processing [#53736](https://github.com/ClickHouse/ClickHouse/pull/53736) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix broken `02862_sorted_distinct_sparse_fix` [#53738](https://github.com/ClickHouse/ClickHouse/pull/53738) ([Antonio Andelic](https://github.com/antonio2368)). +* Do not warn about arch_sys_counter clock [#53739](https://github.com/ClickHouse/ClickHouse/pull/53739) ([Artur Malchanau](https://github.com/Hexta)). +* Add some profile events [#53741](https://github.com/ClickHouse/ClickHouse/pull/53741) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Support clang-18 (Wmissing-field-initializers) [#53751](https://github.com/ClickHouse/ClickHouse/pull/53751) ([Raúl Marín](https://github.com/Algunenano)). +* Upgrade openSSL to v3.0.10 [#53756](https://github.com/ClickHouse/ClickHouse/pull/53756) ([bhavnajindal](https://github.com/bhavnajindal)). +* Improve JSON-handling on s390x [#53760](https://github.com/ClickHouse/ClickHouse/pull/53760) ([ltrk2](https://github.com/ltrk2)). +* Reduce API calls to SSM client [#53762](https://github.com/ClickHouse/ClickHouse/pull/53762) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Remove branch references from .gitmodules [#53763](https://github.com/ClickHouse/ClickHouse/pull/53763) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix reading from `url` with all filtered paths [#53796](https://github.com/ClickHouse/ClickHouse/pull/53796) ([Antonio Andelic](https://github.com/antonio2368)). +* Follow-up to [#53611](https://github.com/ClickHouse/ClickHouse/issues/53611) [#53799](https://github.com/ClickHouse/ClickHouse/pull/53799) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix a bug in attach partition [#53811](https://github.com/ClickHouse/ClickHouse/pull/53811) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Bump boost to 1.82 [#53812](https://github.com/ClickHouse/ClickHouse/pull/53812) ([Robert Schulze](https://github.com/rschu1ze)). +* Enable producing endianness-independent output in lz4 [#53816](https://github.com/ClickHouse/ClickHouse/pull/53816) ([ltrk2](https://github.com/ltrk2)). +* Fix typo in cluster name. [#53829](https://github.com/ClickHouse/ClickHouse/pull/53829) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Get rid of describe_parameters for the best robot token [#53833](https://github.com/ClickHouse/ClickHouse/pull/53833) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update 00002_log_and_exception_messages_formatting.sql [#53839](https://github.com/ClickHouse/ClickHouse/pull/53839) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix after [#51622](https://github.com/ClickHouse/ClickHouse/issues/51622) [#53840](https://github.com/ClickHouse/ClickHouse/pull/53840) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix segfault in `TableNameHints` (with `Lazy` database) [#53849](https://github.com/ClickHouse/ClickHouse/pull/53849) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow-up to [#53501](https://github.com/ClickHouse/ClickHouse/issues/53501) [#53851](https://github.com/ClickHouse/ClickHouse/pull/53851) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Follow-up to [#53528](https://github.com/ClickHouse/ClickHouse/issues/53528) [#53852](https://github.com/ClickHouse/ClickHouse/pull/53852) ([Alexander Tokmakov](https://github.com/tavplubix)). +* refactor some code [#53856](https://github.com/ClickHouse/ClickHouse/pull/53856) ([flynn](https://github.com/ucasfl)). +* Bump boost to 1.83 [#53859](https://github.com/ClickHouse/ClickHouse/pull/53859) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove unused parallel replicas coordinator in query info [#53862](https://github.com/ClickHouse/ClickHouse/pull/53862) ([Igor Nikonov](https://github.com/devcrafter)). +* Update version_date.tsv and changelogs after v23.7.5.30-stable [#53870](https://github.com/ClickHouse/ClickHouse/pull/53870) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.6.3.87-stable [#53872](https://github.com/ClickHouse/ClickHouse/pull/53872) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.3.11.5-lts [#53873](https://github.com/ClickHouse/ClickHouse/pull/53873) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v23.5.5.92-stable [#53874](https://github.com/ClickHouse/ClickHouse/pull/53874) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Update version_date.tsv and changelogs after v22.8.21.38-lts [#53875](https://github.com/ClickHouse/ClickHouse/pull/53875) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix: USearch deserialize [#53876](https://github.com/ClickHouse/ClickHouse/pull/53876) ([Davit Vardanyan](https://github.com/davvard)). +* Improve schema inference for archives [#53880](https://github.com/ClickHouse/ClickHouse/pull/53880) ([Antonio Andelic](https://github.com/antonio2368)). +* Make UInt128TrivialHash endianness-independent [#53891](https://github.com/ClickHouse/ClickHouse/pull/53891) ([ltrk2](https://github.com/ltrk2)). +* Use iterators instead of std::ranges [#53893](https://github.com/ClickHouse/ClickHouse/pull/53893) ([ltrk2](https://github.com/ltrk2)). +* Finalize file descriptor in ~WriteBufferToFileSegment [#53895](https://github.com/ClickHouse/ClickHouse/pull/53895) ([vdimir](https://github.com/vdimir)). +* Fix: respect skip_unavailable_shards with parallel replicas [#53904](https://github.com/ClickHouse/ClickHouse/pull/53904) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flakiness of 00514_interval_operators [#53906](https://github.com/ClickHouse/ClickHouse/pull/53906) ([Michael Kolupaev](https://github.com/al13n321)). +* Change IStorage interface by random walk, no goal in particular [#54009](https://github.com/ClickHouse/ClickHouse/pull/54009) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Refactor logic around async insert with deduplication [#54012](https://github.com/ClickHouse/ClickHouse/pull/54012) ([Antonio Andelic](https://github.com/antonio2368)). +* More assertive [#54044](https://github.com/ClickHouse/ClickHouse/pull/54044) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Correct doc for filesystem_prefetch_max_memory_usage [#54058](https://github.com/ClickHouse/ClickHouse/pull/54058) ([Raúl Marín](https://github.com/Algunenano)). +* Fix after [#52943](https://github.com/ClickHouse/ClickHouse/issues/52943) [#54064](https://github.com/ClickHouse/ClickHouse/pull/54064) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Parse IS NOT DISTINCT and <=> operators [#54067](https://github.com/ClickHouse/ClickHouse/pull/54067) ([vdimir](https://github.com/vdimir)). +* Replace dlcdn.apache.org by archive domain [#54081](https://github.com/ClickHouse/ClickHouse/pull/54081) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Increased log waiting timeout in test_profile_max_sessions_for_user [#54092](https://github.com/ClickHouse/ClickHouse/pull/54092) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Update Dockerfile [#54118](https://github.com/ClickHouse/ClickHouse/pull/54118) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Small improvements in `getAlterMutationCommandsForPart` [#54126](https://github.com/ClickHouse/ClickHouse/pull/54126) ([Anton Popov](https://github.com/CurtizJ)). +* Fix some more analyzer tests [#54128](https://github.com/ClickHouse/ClickHouse/pull/54128) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Disable `01600_parts_types_metrics_long` for asan [#54132](https://github.com/ClickHouse/ClickHouse/pull/54132) ([Antonio Andelic](https://github.com/antonio2368)). +* Fixing 01086_odbc_roundtrip with analyzer. [#54133](https://github.com/ClickHouse/ClickHouse/pull/54133) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Add warnings about ingestion script speed and memory usage in Laion dataset instructions [#54153](https://github.com/ClickHouse/ClickHouse/pull/54153) ([Michael Kolupaev](https://github.com/al13n321)). +* tests: mark 02152_http_external_tables_memory_tracking as no-parallel [#54155](https://github.com/ClickHouse/ClickHouse/pull/54155) ([Azat Khuzhin](https://github.com/azat)). +* The external logs have had colliding arguments [#54165](https://github.com/ClickHouse/ClickHouse/pull/54165) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Rename macro [#54169](https://github.com/ClickHouse/ClickHouse/pull/54169) ([Kseniia Sumarokova](https://github.com/kssenii)). + diff --git a/docs/changelogs/v23.8.2.7-lts.md b/docs/changelogs/v23.8.2.7-lts.md new file mode 100644 index 000000000000..317e2c6d56a9 --- /dev/null +++ b/docs/changelogs/v23.8.2.7-lts.md @@ -0,0 +1,18 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.8.2.7-lts (f73c8f37874) FIXME as compared to v23.8.1.2992-lts (ebc7d9a9f3b) + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix: parallel replicas over distributed don't read from all replicas [#54199](https://github.com/ClickHouse/ClickHouse/pull/54199) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix: allow IPv6 for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* S3Queue is experimental [#54214](https://github.com/ClickHouse/ClickHouse/pull/54214) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/changelogs/v23.8.3.48-lts.md b/docs/changelogs/v23.8.3.48-lts.md new file mode 100644 index 000000000000..af669c5adc88 --- /dev/null +++ b/docs/changelogs/v23.8.3.48-lts.md @@ -0,0 +1,43 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.8.3.48-lts (ebe4eb3d23e) FIXME as compared to v23.8.2.7-lts (f73c8f37874) + +#### Improvement +* Backported in [#54287](https://github.com/ClickHouse/ClickHouse/issues/54287): Enable allow_remove_stale_moving_parts by default. [#54260](https://github.com/ClickHouse/ClickHouse/pull/54260) ([vdimir](https://github.com/vdimir)). +* Backported in [#55057](https://github.com/ClickHouse/ClickHouse/issues/55057): ProfileEvents added ContextLockWaitMicroseconds event. [#55029](https://github.com/ClickHouse/ClickHouse/pull/55029) ([Maksim Kita](https://github.com/kitaisreal)). + +#### Build/Testing/Packaging Improvement +* Backported in [#54702](https://github.com/ClickHouse/ClickHouse/issues/54702): Enrich `changed_images.json` with the latest tag from master for images that are not changed in the pull request. [#54369](https://github.com/ClickHouse/ClickHouse/pull/54369) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Backported in [#54743](https://github.com/ClickHouse/ClickHouse/issues/54743): Remove redundant `clickhouse-keeper-client` symlink. [#54587](https://github.com/ClickHouse/ClickHouse/pull/54587) ([Tomas Barton](https://github.com/deric)). +* Backported in [#54685](https://github.com/ClickHouse/ClickHouse/issues/54685): We build and upload them for every push, which isn't worth it. [#54675](https://github.com/ClickHouse/ClickHouse/pull/54675) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Fix: moved to prewhere condition actions can lose column [#53492](https://github.com/ClickHouse/ClickHouse/pull/53492) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix: parallel replicas over distributed with prefer_localhost_replica=1 [#54334](https://github.com/ClickHouse/ClickHouse/pull/54334) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix possible error 'URI contains invalid characters' in s3 table function [#54373](https://github.com/ClickHouse/ClickHouse/pull/54373) ([Kruglov Pavel](https://github.com/Avogar)). +* Check for overflow before addition in `analysisOfVariance` function [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)). +* reproduce and fix the bug in removeSharedRecursive [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)). +* Fix aggregate projections with normalized states [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)). +* Fix possible parsing error in WithNames formats with disabled input_format_with_names_use_header [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix zero copy garbage [#54550](https://github.com/ClickHouse/ClickHouse/pull/54550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix race in `ColumnUnique` [#54575](https://github.com/ClickHouse/ClickHouse/pull/54575) ([Nikita Taranov](https://github.com/nickitat)). +* Fix serialization of `ColumnDecimal` [#54601](https://github.com/ClickHouse/ClickHouse/pull/54601) ([Nikita Taranov](https://github.com/nickitat)). +* Fix virtual columns having incorrect values after ORDER BY [#54811](https://github.com/ClickHouse/ClickHouse/pull/54811) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix Keeper segfault during shutdown [#54841](https://github.com/ClickHouse/ClickHouse/pull/54841) ([Antonio Andelic](https://github.com/antonio2368)). +* Rebuild minmax_count_projection when partition key gets modified [#54943](https://github.com/ClickHouse/ClickHouse/pull/54943) ([Amos Bird](https://github.com/amosbird)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Test libunwind changes. [#51436](https://github.com/ClickHouse/ClickHouse/pull/51436) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Use pathlib.Path in S3Helper, rewrite build reports, improve small things [#54010](https://github.com/ClickHouse/ClickHouse/pull/54010) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Properly re-initialize ZooKeeper fault injection [#54251](https://github.com/ClickHouse/ClickHouse/pull/54251) ([Alexander Gololobov](https://github.com/davenger)). +* Fix segfault in system.zookeeper [#54326](https://github.com/ClickHouse/ClickHouse/pull/54326) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Update automated commit status comment [#54441](https://github.com/ClickHouse/ClickHouse/pull/54441) ([vdimir](https://github.com/vdimir)). +* Adjusting `num_streams` by expected work in StorageS3 [#54815](https://github.com/ClickHouse/ClickHouse/pull/54815) ([pufit](https://github.com/pufit)). + diff --git a/docs/changelogs/v23.9.1.1854-stable.md b/docs/changelogs/v23.9.1.1854-stable.md new file mode 100644 index 000000000000..655dd54d81bb --- /dev/null +++ b/docs/changelogs/v23.9.1.1854-stable.md @@ -0,0 +1,381 @@ +--- +sidebar_position: 1 +sidebar_label: 2023 +--- + +# 2023 Changelog + +### ClickHouse release v23.9.1.1854-stable (8f9a227de1f) FIXME as compared to v23.8.1.2992-lts (ebc7d9a9f3b) + +#### Backward Incompatible Change +* Remove the `status_info` configuration option and dictionaries status from the default Prometheus handler. [#54090](https://github.com/ClickHouse/ClickHouse/pull/54090) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* The experimental parts metadata cache is removed from the codebase. [#54215](https://github.com/ClickHouse/ClickHouse/pull/54215) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Disable setting `input_format_json_try_infer_numbers_from_strings` by default, so we don't try to infer numbers from strings in JSON formats by default to avoid possible parsing errors when sample data contains strings that looks like a number. [#55099](https://github.com/ClickHouse/ClickHouse/pull/55099) ([Kruglov Pavel](https://github.com/Avogar)). + +#### New Feature +* Added new type of authentication based on SSH keys. It works only for Native TCP protocol. [#41109](https://github.com/ClickHouse/ClickHouse/pull/41109) ([George Gamezardashvili](https://github.com/InfJoker)). +* Added IO Scheduling support for remote disks. Storage configuration for disk types `s3`, `s3_plain`, `hdfs` and `azure_blob_storage` can now contain `read_resource` and `write_resource` elements holding resource names. Scheduling policies for these resources can be configured in a separate server configuration section `resources`. Queries can be marked using setting `workload` and classified using server configuration section `workload_classifiers` to achieve diverse resource scheduling goals. More details in docs/en/operations/workload-scheduling.md. [#47009](https://github.com/ClickHouse/ClickHouse/pull/47009) ([Sergei Trifonov](https://github.com/serxa)). +* Added a new column _block_number resolves [#44532](https://github.com/ClickHouse/ClickHouse/issues/44532). [#47532](https://github.com/ClickHouse/ClickHouse/pull/47532) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Add options `partial_result_update_duration_ms` and `max_rows_in_partial_result` to show updates of a partial result of output table in real-time during query execution. [#48607](https://github.com/ClickHouse/ClickHouse/pull/48607) ([Alexey Perevyshin](https://github.com/alexX512)). +* Support case-insensitive and dot-all matching modes in RegExpTree dictionaries. [#50906](https://github.com/ClickHouse/ClickHouse/pull/50906) ([Johann Gan](https://github.com/johanngan)). +* Add support for `ALTER TABLE MODIFY COMMENT`. Note: something similar was added by an external contributor a long time ago, but the feature did not work at all and only confused users. This closes [#36377](https://github.com/ClickHouse/ClickHouse/issues/36377). [#51304](https://github.com/ClickHouse/ClickHouse/pull/51304) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Added "GCD" aka. "greatest common denominator" as a new data compression codec. The codec computes the GCD of all column values, and then divides each value by the GCD. The GCD codec is a data preparation codec (similar to Delta and DoubleDelta) and cannot be used stand-alone. It works with data integer, decimal and date/time type. A viable use case for the GCD codec are column values that change (increase/decrease) in multiples of the GCD, e.g. 24 - 28 - 16 - 24 - 8 - 24 (assuming GCD = 4). [#53149](https://github.com/ClickHouse/ClickHouse/pull/53149) ([Alexander Nam](https://github.com/seshWCS)). +* Two new type aliases "DECIMAL(P)" (as shortcut for "DECIMAL(P, 0") and "DECIMAL" (as shortcut for "DECIMAL(10, 0)") were added. This makes ClickHouse more compatible with MySQL's SQL dialect. [#53328](https://github.com/ClickHouse/ClickHouse/pull/53328) ([Val Doroshchuk](https://github.com/valbok)). +* Added a new system log table `backup_log` to track all `BACKUP` and `RESTORE` operations. [#53638](https://github.com/ClickHouse/ClickHouse/pull/53638) ([Victor Krasnov](https://github.com/sirvickr)). +* Added a format setting "output_format_markdown_escape_special_characters" (default: false). The setting controls whether special characters like "!", "#", "$" etc. are escaped (i.e. prefixed by a backslash) in the "Markdown" output format. [#53860](https://github.com/ClickHouse/ClickHouse/pull/53860) ([irenjj](https://github.com/irenjj)). +* Add function `decodeHTMLComponent`. [#54097](https://github.com/ClickHouse/ClickHouse/pull/54097) ([Bharat Nallan](https://github.com/bharatnc)). +* Added peak_threads_usage to query_log table. [#54335](https://github.com/ClickHouse/ClickHouse/pull/54335) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Add SHOW FUNCTIONS support to clickhouse-client. [#54337](https://github.com/ClickHouse/ClickHouse/pull/54337) ([Julia Kartseva](https://github.com/wat-ze-hex)). +* This PRs improves schema inference from JSON formats: 1) Now it's possible to infer named Tuples from JSON objects without experimantal JSON type under a setting `input_format_json_try_infer_named_tuples_from_objects` in JSON formats. Previously without experimantal type JSON we could only infer JSON objects as Strings or Maps, now we can infer named Tuple. Resulting Tuple type will conain all keys of objects that were read in data sample during schema inference. It can be useful for reading structured JSON data without sparse objects. The setting is enabled by default. 2) Allow parsing JSON array into a column with type String under setting `input_format_json_read_arrays_as_strings`. It can help reading arrays with values with different types. 3) Allow to use type String for JSON keys with unkown types (`null`/`[]`/`{}`) in sample data under setting `input_format_json_infer_incomplete_types_as_strings`. Now in JSON formats we can read any value into String column and we can avoid getting error `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference by using type String for unknown types, so the data will be read successfully. [#54427](https://github.com/ClickHouse/ClickHouse/pull/54427) ([Kruglov Pavel](https://github.com/Avogar)). +* Added function "toDaysSinceYearZero" with alias "TO_DAYS()" (for compatibility with MySQL) which returns the number of days passed since 0001-01-01. [#54479](https://github.com/ClickHouse/ClickHouse/pull/54479) ([Robert Schulze](https://github.com/rschu1ze)). +* Added functions YYYYMMDDtoDate(), YYYYMMDDtoDate32(), YYYYMMDDhhmmssToDateTime() and YYYYMMDDhhmmssToDateTime64(). They convert a date or date with time encoded as integer (e.g. 20230911) into a native date or date with time. As such, they provide the opposite functionality of existing functions YYYYMMDDToDate(), YYYYMMDDToDateTime(), YYYYMMDDhhmmddToDateTime(), YYYYMMDDhhmmddToDateTime64(). [#54509](https://github.com/ClickHouse/ClickHouse/pull/54509) ([Robert Schulze](https://github.com/rschu1ze)). +* Added "bandwidth_limit" IO scheduling node type. It allows you to specify `max_speed` and `max_burst` constraints on traffic passing though this node. More details in docs/en/operations/workload-scheduling.md. [#54618](https://github.com/ClickHouse/ClickHouse/pull/54618) ([Sergei Trifonov](https://github.com/serxa)). +* Function `toDaysSinceYearZero()` now supports arguments of type `DateTime` and `DateTime64`. [#54856](https://github.com/ClickHouse/ClickHouse/pull/54856) ([Serge Klochkov](https://github.com/slvrtrn)). +* Allow S3-style URLs for table functions `s3`, `gcs`, `oss`. URL is automatically converted to HTTP. Example: `'s3://clickhouse-public-datasets/hits.csv'` is converted to `'https://clickhouse-public-datasets.s3.amazonaws.com/hits.csv'`. [#54931](https://github.com/ClickHouse/ClickHouse/pull/54931) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Add several string distance functions, include `byteHammingDistance`, `byteJaccardIndex`, `byteEditDistance`. ### Documentation entry for user-facing changes. [#54935](https://github.com/ClickHouse/ClickHouse/pull/54935) ([flynn](https://github.com/ucasfl)). +* Add new setting `print_pretty_type_names` to print pretty deep nested types like Tuple/Maps/Arrays. [#55095](https://github.com/ClickHouse/ClickHouse/pull/55095) ([Kruglov Pavel](https://github.com/Avogar)). + +#### Performance Improvement +* Improve performance of sorting for decimal columns. Improve performance of insertion into MergeTree if ORDER BY contains Decimal column. Improve performance of sorting when data is already sorted or almost sorted. [#35961](https://github.com/ClickHouse/ClickHouse/pull/35961) ([Maksim Kita](https://github.com/kitaisreal)). +* Improve performance for huge query analysis. Fixes [#51224](https://github.com/ClickHouse/ClickHouse/issues/51224). [#51469](https://github.com/ClickHouse/ClickHouse/pull/51469) ([frinkr](https://github.com/frinkr)). +* 1. Add rewriter for new analyzer. [#52082](https://github.com/ClickHouse/ClickHouse/pull/52082) ([JackyWoo](https://github.com/JackyWoo)). +* 1. Add rewriter for both old and new analyzer. 2. Add settings `optimize_uniq_to_count`. [#52645](https://github.com/ClickHouse/ClickHouse/pull/52645) ([JackyWoo](https://github.com/JackyWoo)). +* Remove manual calls to `mmap/mremap/munmap` and delegate all this work to `jemalloc`. [#52792](https://github.com/ClickHouse/ClickHouse/pull/52792) ([Nikita Taranov](https://github.com/nickitat)). +* Now roaringBitmaps being optimized before serialization. [#52842](https://github.com/ClickHouse/ClickHouse/pull/52842) ([UnamedRus](https://github.com/UnamedRus)). +* Optimize group by constant keys. Will optimize queries with group by `_file/_path` after https://github.com/ClickHouse/ClickHouse/pull/53529. [#53549](https://github.com/ClickHouse/ClickHouse/pull/53549) ([Kruglov Pavel](https://github.com/Avogar)). +* Speed up reading from S3 by enabling prefetches by default. [#53709](https://github.com/ClickHouse/ClickHouse/pull/53709) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Do not implicitly read pk and version columns in lonely parts if unnecessary. [#53919](https://github.com/ClickHouse/ClickHouse/pull/53919) ([Duc Canh Le](https://github.com/canhld94)). +* Fixed high in CPU consumption when working with NATS. [#54399](https://github.com/ClickHouse/ClickHouse/pull/54399) ([Vasilev Pyotr](https://github.com/vahpetr)). +* Since we use separate instructions for executing `toString()` with datetime argument, it is possible to improve performance a bit for non-datetime arguments and have some parts of the code cleaner. Follows up [#53680](https://github.com/ClickHouse/ClickHouse/issues/53680). [#54443](https://github.com/ClickHouse/ClickHouse/pull/54443) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Instead of serializing json elements into a `std::stringstream`, this PR try to put the serialization result into `ColumnString` direclty. [#54613](https://github.com/ClickHouse/ClickHouse/pull/54613) ([lgbo](https://github.com/lgbo-ustc)). +* Enable ORDER BY optimization for reading data in corresponding order from a MergeTree table in case that the table is behind a view. [#54628](https://github.com/ClickHouse/ClickHouse/pull/54628) ([Vitaly Baranov](https://github.com/vitlibar)). +* Improve JSON SQL functions by reusing `GeneratorJSONPath`. Since there are several `make_shared` in `GenerateorJSONPath`'s constructor, it has bad performance. [#54735](https://github.com/ClickHouse/ClickHouse/pull/54735) ([lgbo](https://github.com/lgbo-ustc)). + +#### Improvement +* Keeper improvement: Add a `createIfNotExists` Keeper command. [#48855](https://github.com/ClickHouse/ClickHouse/pull/48855) ([Konstantin Bogdanov](https://github.com/thevar1able)). +* Add IF EMPTY clause for DROP TABLE queries. [#48915](https://github.com/ClickHouse/ClickHouse/pull/48915) ([Pavel Novitskiy](https://github.com/pnovitskiy)). +* The Keeper dynamically adjusts log levels. [#50372](https://github.com/ClickHouse/ClickHouse/pull/50372) ([helifu](https://github.com/helifu)). +* Allow to replace long names of files of columns in `MergeTree` data parts to hashes of names. It helps to avoid `File name too long` error in some cases. [#50612](https://github.com/ClickHouse/ClickHouse/pull/50612) ([Anton Popov](https://github.com/CurtizJ)). +* Allow specifying the expiration date and, optionally, the time for user credentials with `VALID UNTIL datetime` clause. [#51261](https://github.com/ClickHouse/ClickHouse/pull/51261) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add setting `ignore_access_denied_multidirectory_globs`. [#52839](https://github.com/ClickHouse/ClickHouse/pull/52839) ([Andrey Zvonov](https://github.com/zvonand)). +* Output valid JSON/XML on excetpion during HTTP query execution. Add setting `http_write_exception_in_output_format` to enable/disable this behaviour (enabled by default). [#52853](https://github.com/ClickHouse/ClickHouse/pull/52853) ([Kruglov Pavel](https://github.com/Avogar)). +* More precise Integer type inference, fix [#51236](https://github.com/ClickHouse/ClickHouse/issues/51236). [#53003](https://github.com/ClickHouse/ClickHouse/pull/53003) ([Chen768959](https://github.com/Chen768959)). +* Keeper tries to batch flush requests for better performance. [#53049](https://github.com/ClickHouse/ClickHouse/pull/53049) ([Antonio Andelic](https://github.com/antonio2368)). +* Introduced resolving of charsets in the string literals for MaterializedMySQL. [#53220](https://github.com/ClickHouse/ClickHouse/pull/53220) ([Val Doroshchuk](https://github.com/valbok)). +* Fix a subtle issue with a rarely used `EmbeddedRocksDB` table engine in an extremely rare scenario: sometimes the `EmbeddedRocksDB` table engine does not close files correctly in NFS after running `DROP TABLE`. [#53502](https://github.com/ClickHouse/ClickHouse/pull/53502) ([Mingliang Pan](https://github.com/liangliangpan)). +* SQL functions "toString(datetime)" and "formatDateTime()" now support non-constant timezone arguments. [#53680](https://github.com/ClickHouse/ClickHouse/pull/53680) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* `RESTORE TABLE ON CLUSTER` must create replicated tables with a matching UUID on hosts. Otherwise the macro `{uuid}` in ZooKeeper path can't work correctly after RESTORE. This PR implements that. [#53765](https://github.com/ClickHouse/ClickHouse/pull/53765) ([Vitaly Baranov](https://github.com/vitlibar)). +* Added restore setting `restore_broken_parts_as_detached`: if it's true the RESTORE process won't stop on broken parts while restoring, instead all the broken parts will be copied to the `detached` folder with the prefix `broken-from-backup'. If it's false the RESTORE process will stop on the first broken part (if any). The default value is false. [#53877](https://github.com/ClickHouse/ClickHouse/pull/53877) ([Vitaly Baranov](https://github.com/vitlibar)). +* The creation of Annoy indexes can now be parallelized using setting `max_threads_for_annoy_index_creation`. [#54047](https://github.com/ClickHouse/ClickHouse/pull/54047) ([Robert Schulze](https://github.com/rschu1ze)). +* The MySQL interface gained a minimal implementation of prepared statements, just enough to allow a connection from Tableau Online to ClickHouse via the MySQL connector. [#54115](https://github.com/ClickHouse/ClickHouse/pull/54115) ([Serge Klochkov](https://github.com/slvrtrn)). +* Replaced the library to handle (encode/decode) base64 values from Turbo-Base64 to aklomp-base64. Both are SIMD-accelerated on x86 and ARM but 1. the license of the latter (BSD-2) is more favorable for ClickHouse, Turbo64 switched in the meantime to GPL-3, 2. with more GitHub stars, aklomp-base64 seems more future-proof, 3. aklomp-base64 has a slightly nicer API (which is arguably subjective), and 4. aklomp-base64 does not require us to hack around bugs (like non-threadsafe initialization). Note: aklomp-base64 rejects unpadded base64 values whereas Turbo-Base64 decodes them on a best-effort basis. RFC-4648 leaves it open whether padding is mandatory or not, but depending on the context this may be a behavioral change to be aware of. [#54119](https://github.com/ClickHouse/ClickHouse/pull/54119) ([Mikhail Koviazin](https://github.com/mkmkme)). +* Add elapsed_ns to HTTP headers X-ClickHouse-Progress and X-ClickHouse-Summary. [#54179](https://github.com/ClickHouse/ClickHouse/pull/54179) ([joelynch](https://github.com/joelynch)). +* Implementation of `reconfig` (https://github.com/ClickHouse/ClickHouse/pull/49450), `sync`, and `exists` commands for keeper-client. [#54201](https://github.com/ClickHouse/ClickHouse/pull/54201) ([pufit](https://github.com/pufit)). +* "clickhouse-local" and "clickhouse-client" now allow to specify the "--query" parameter multiple times, e.g. './clickhouse-client --query "SELECT 1" --query "SELECT 2"'. This syntax is slightly more intuitive than `./clickhouse-client --multiquery "SELECT 1;SELECT2", a bit easier to script (e.g. "queries.push_back('--query "$q"')") and more consistent with the behavior of existing parameter "--queries-file" (e.g. "./clickhouse client --queries-file queries1.sql --queries-file queries2.sql"). [#54249](https://github.com/ClickHouse/ClickHouse/pull/54249) ([Robert Schulze](https://github.com/rschu1ze)). +* Add sub-second precision to `formatReadableTimeDelta`. [#54250](https://github.com/ClickHouse/ClickHouse/pull/54250) ([Andrey Zvonov](https://github.com/zvonand)). +* Fix wrong reallocation in HashedArrayDictionary:. [#54254](https://github.com/ClickHouse/ClickHouse/pull/54254) ([Vitaly Baranov](https://github.com/vitlibar)). +* Enable allow_remove_stale_moving_parts by default. [#54260](https://github.com/ClickHouse/ClickHouse/pull/54260) ([vdimir](https://github.com/vdimir)). +* Fix using count from cache and improve progress bar for reading from archives. [#54271](https://github.com/ClickHouse/ClickHouse/pull/54271) ([Kruglov Pavel](https://github.com/Avogar)). +* Add support for S3 credentials using SSO. To define a profile to be used with SSO, set `AWS_PROFILE` environment variable. [#54347](https://github.com/ClickHouse/ClickHouse/pull/54347) ([Antonio Andelic](https://github.com/antonio2368)). +* Support NULL as default for nested types Array/Tuple/Map for input formats. Closes [#51100](https://github.com/ClickHouse/ClickHouse/issues/51100). [#54351](https://github.com/ClickHouse/ClickHouse/pull/54351) ([Kruglov Pavel](https://github.com/Avogar)). +* This is actually a bug fix, but not sure I'll be able to add a test to support the case, so I have put it as an improvement. This issue was introduced in https://github.com/ClickHouse/ClickHouse/pull/45878, which is when CH started reading arrow in batches. [#54370](https://github.com/ClickHouse/ClickHouse/pull/54370) ([Arthur Passos](https://github.com/arthurpassos)). +* Add STD alias to stddevPop function for MySQL compatibility. Closes [#54274](https://github.com/ClickHouse/ClickHouse/issues/54274). [#54382](https://github.com/ClickHouse/ClickHouse/pull/54382) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add `addDate` function for compatibility with MySQL and `subDate` for consistency. Reference [#54275](https://github.com/ClickHouse/ClickHouse/issues/54275). [#54400](https://github.com/ClickHouse/ClickHouse/pull/54400) ([Nikolay Degterinsky](https://github.com/evillique)). +* Parse data in JSON format as JSONEachRow if failed to parse metadata. It will allow to read files with `.json` extension even if real format is JSONEachRow. Closes [#45740](https://github.com/ClickHouse/ClickHouse/issues/45740). [#54405](https://github.com/ClickHouse/ClickHouse/pull/54405) ([Kruglov Pavel](https://github.com/Avogar)). +* Pass http retry timeout as milliseconds. [#54438](https://github.com/ClickHouse/ClickHouse/pull/54438) ([Duc Canh Le](https://github.com/canhld94)). +* Support SAMPLE BY for VIEW. [#54477](https://github.com/ClickHouse/ClickHouse/pull/54477) ([Azat Khuzhin](https://github.com/azat)). +* Add modification_time into system.detached_parts. [#54506](https://github.com/ClickHouse/ClickHouse/pull/54506) ([Azat Khuzhin](https://github.com/azat)). +* Added a setting "splitby_max_substrings_includes_remaining_string" which controls if functions "splitBy*()" with argument "max_substring" > 0 include the remaining string (if any) in the result array (Python/Spark semantics) or not. The default behavior does not change. [#54518](https://github.com/ClickHouse/ClickHouse/pull/54518) ([Robert Schulze](https://github.com/rschu1ze)). +* Now clickhouse-client process files in parallel in case of `INFILE 'glob_expression'`. Closes [#54218](https://github.com/ClickHouse/ClickHouse/issues/54218). [#54533](https://github.com/ClickHouse/ClickHouse/pull/54533) ([Max K.](https://github.com/mkaynov)). +* Allow to use primary key for IN function where primary key column types are different from `IN` function right side column types. Example: `SELECT id FROM test_table WHERE id IN (SELECT '5')`. Closes [#48936](https://github.com/ClickHouse/ClickHouse/issues/48936). [#54544](https://github.com/ClickHouse/ClickHouse/pull/54544) ([Maksim Kita](https://github.com/kitaisreal)). +* Better integer types inference for Int64/UInt64 fields. Continuation of https://github.com/ClickHouse/ClickHouse/pull/53003. Now it works also for nested types like Arrays of Arrays anf for functions like `map/tuple`. Issue: [#51236](https://github.com/ClickHouse/ClickHouse/issues/51236). [#54553](https://github.com/ClickHouse/ClickHouse/pull/54553) ([Kruglov Pavel](https://github.com/Avogar)). +* HashJoin tries to shrink internal buffers consuming half of maximal available memory (set by `max_bytes_in_join`). [#54584](https://github.com/ClickHouse/ClickHouse/pull/54584) ([vdimir](https://github.com/vdimir)). +* Added array operations for multiplying, dividing and modulo on scalar. Works in each way, for example `5 * [5, 5]` and `[5, 5] * 5` - both cases are possible. [#54608](https://github.com/ClickHouse/ClickHouse/pull/54608) ([Yarik Briukhovetskyi](https://github.com/yariks5s)). +* Added function `timestamp` for compatibility with MySQL. Closes [#54275](https://github.com/ClickHouse/ClickHouse/issues/54275). [#54639](https://github.com/ClickHouse/ClickHouse/pull/54639) ([Nikolay Degterinsky](https://github.com/evillique)). +* Respect max_block_size for array join to avoid possible OOM. Close [#54290](https://github.com/ClickHouse/ClickHouse/issues/54290). [#54664](https://github.com/ClickHouse/ClickHouse/pull/54664) ([李扬](https://github.com/taiyang-li)). +* Add optional `version` argument to `rm` command in `keeper-client` to support safer deletes. [#54708](https://github.com/ClickHouse/ClickHouse/pull/54708) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Disable killing the server by systemd (that may lead to data loss when using Buffer tables). [#54744](https://github.com/ClickHouse/ClickHouse/pull/54744) ([Azat Khuzhin](https://github.com/azat)). +* Added field "is_deterministic" to system table "system.functions" which indicates whether the result of a function is stable between two invocations (given exactly the same inputs) or not. [#54766](https://github.com/ClickHouse/ClickHouse/pull/54766) ([Robert Schulze](https://github.com/rschu1ze)). +* Made the views in schema "information_schema" more compatible with the equivalent views in MySQL (i.e. modified and extended them) up to a point where Tableau Online is able to connect to ClickHouse. More specifically: 1. The type of field "information_schema.tables.table_type" changed from Enum8 to String. 2. Added fields "table_comment" and "table_collation" to view "information_schema.table". 3. Added views "information_schema.key_column_usage" and "referential_constraints". 4. Replaced uppercase aliases in "information_schema" views with concrete uppercase columns. [#54773](https://github.com/ClickHouse/ClickHouse/pull/54773) ([Serge Klochkov](https://github.com/slvrtrn)). +* The query cache now returns an error if the user tries to cache the result of a query with a non-deterministic function such as "now()", "randomString()" and "dictGet()". Compared to the previous behavior (silently don't cache the result), this reduces confusion and surprise for users. [#54801](https://github.com/ClickHouse/ClickHouse/pull/54801) ([Robert Schulze](https://github.com/rschu1ze)). +* Forbid special columns for file/s3/url/... storages, fix insert into ephemeral columns from files. Closes [#53477](https://github.com/ClickHouse/ClickHouse/issues/53477). [#54803](https://github.com/ClickHouse/ClickHouse/pull/54803) ([Kruglov Pavel](https://github.com/Avogar)). +* More configurable collecting metadata for backup. [#54804](https://github.com/ClickHouse/ClickHouse/pull/54804) ([Vitaly Baranov](https://github.com/vitlibar)). +* `clickhouse-local`'s log file (if enabled with --server_logs_file flag) will now prefix each line with timestamp, thread id, etc, just like `clickhouse-server`. [#54807](https://github.com/ClickHouse/ClickHouse/pull/54807) ([Michael Kolupaev](https://github.com/al13n321)). +* Reuse HTTP connections in s3 table function. [#54812](https://github.com/ClickHouse/ClickHouse/pull/54812) ([Michael Kolupaev](https://github.com/al13n321)). +* Avoid excessive calls to getifaddrs in isLocalAddress. [#54819](https://github.com/ClickHouse/ClickHouse/pull/54819) ([Duc Canh Le](https://github.com/canhld94)). +* Field "is_obsolete" in system.merge_tree_settings is now 1 for obsolete merge tree settings. Previously, only the description indicated that the setting is obsolete. [#54837](https://github.com/ClickHouse/ClickHouse/pull/54837) ([Robert Schulze](https://github.com/rschu1ze)). +* Make it possible to use plural when using interval literals. `INTERVAL 2 HOURS` should be equivalent to `INTERVAL 2 HOUR`. [#54860](https://github.com/ClickHouse/ClickHouse/pull/54860) ([Jordi Villar](https://github.com/jrdi)). +* Replace the linear method in `MergeTreeRangeReader::Stream::ceilRowsToCompleteGranules` with a binary search. [#54869](https://github.com/ClickHouse/ClickHouse/pull/54869) ([usurai](https://github.com/usurai)). +* Always allow the creation of a projection with `Nullable` PK. This fixes [#54814](https://github.com/ClickHouse/ClickHouse/issues/54814). [#54895](https://github.com/ClickHouse/ClickHouse/pull/54895) ([Amos Bird](https://github.com/amosbird)). +* Retry backup S3 operations after connection reset failure. [#54900](https://github.com/ClickHouse/ClickHouse/pull/54900) ([Vitaly Baranov](https://github.com/vitlibar)). +* Make the exception message exact in case of the maximum value of a settings is less than the minimum value. [#54925](https://github.com/ClickHouse/ClickHouse/pull/54925) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* LIKE, match, and other regular expressions matching functions now allow matching with patterns containing non-UTF-8 substrings by falling back to binary matching. Example: you can use `string LIKE '\xFE\xFF%'` to detect BOM. This closes [#54486](https://github.com/ClickHouse/ClickHouse/issues/54486). [#54942](https://github.com/ClickHouse/ClickHouse/pull/54942) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ProfileEvents added ContextLockWaitMicroseconds event. [#55029](https://github.com/ClickHouse/ClickHouse/pull/55029) ([Maksim Kita](https://github.com/kitaisreal)). +* Added field "is_deterministic" to system table "system.functions" which indicates whether the result of a function is stable between two invocations (given exactly the same inputs) or not. [#55035](https://github.com/ClickHouse/ClickHouse/pull/55035) ([Robert Schulze](https://github.com/rschu1ze)). +* View information_schema.tables now has a new field `data_length` which shows the approximate size of the data on disk. Required to run queries generated by Amazon QuickSight. [#55037](https://github.com/ClickHouse/ClickHouse/pull/55037) ([Robert Schulze](https://github.com/rschu1ze)). + +#### Build/Testing/Packaging Improvement +* ClickHouse is built with Musl instead of GLibc by default. [#52550](https://github.com/ClickHouse/ClickHouse/pull/52550) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* ClickHouse is built with Musl instead of GLibc. [#52721](https://github.com/ClickHouse/ClickHouse/pull/52721) ([Azat Khuzhin](https://github.com/azat)). +* Bumped the compiler of official and continuous integration builds of ClickHouse from Clang 16 to 17. [#53831](https://github.com/ClickHouse/ClickHouse/pull/53831) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix flaky test. `wait_resolver` function was asserting the response to be == proxy1, but it might actually return proxy2. Account for that as well. [#54191](https://github.com/ClickHouse/ClickHouse/pull/54191) ([Arthur Passos](https://github.com/arthurpassos)). +* Regenerated tld data for lookups (`tldLookup.generated.cpp`). [#54269](https://github.com/ClickHouse/ClickHouse/pull/54269) ([Bharat Nallan](https://github.com/bharatnc)). +* Report properly timeout for check itself in `fast_test_check`/`stress_check`. [#54278](https://github.com/ClickHouse/ClickHouse/pull/54278) ([Igor Nikonov](https://github.com/devcrafter)). +* Suddenly, `test_host_regexp_multiple_ptr_records_concurrent` became flaky. [#54307](https://github.com/ClickHouse/ClickHouse/pull/54307) ([Arthur Passos](https://github.com/arthurpassos)). +* Fixed precise float parsing issue on s390x. [#54330](https://github.com/ClickHouse/ClickHouse/pull/54330) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Enrich `changed_images.json` with the latest tag from master for images that are not changed in the pull request. [#54369](https://github.com/ClickHouse/ClickHouse/pull/54369) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fixed endian issue in jemalloc_bins system table for s390x. [#54517](https://github.com/ClickHouse/ClickHouse/pull/54517) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Fixed random generation issue for UInt256 and IPv4 on s390x. [#54576](https://github.com/ClickHouse/ClickHouse/pull/54576) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Remove redundant `clickhouse-keeper-client` symlink. [#54587](https://github.com/ClickHouse/ClickHouse/pull/54587) ([Tomas Barton](https://github.com/deric)). +* Use `/usr/bin/env` to resolve bash. [#54603](https://github.com/ClickHouse/ClickHouse/pull/54603) ([Fionera](https://github.com/fionera)). +* Move all `tests/ci/*.lib files` to `stateless-tests` image. Closes [#54540](https://github.com/ClickHouse/ClickHouse/issues/54540). [#54668](https://github.com/ClickHouse/ClickHouse/pull/54668) ([Kruglov Pavel](https://github.com/Avogar)). +* We build and upload them for every push, which isn't worth it. [#54675](https://github.com/ClickHouse/ClickHouse/pull/54675) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fixed SimHash function endian issue for s390x. [#54793](https://github.com/ClickHouse/ClickHouse/pull/54793) ([Harry Lee](https://github.com/HarryLeeIBM)). +* Do not clone the fast tests repo twice; parallelize submodules checkout; use the current user in the fast-tests container. [#54849](https://github.com/ClickHouse/ClickHouse/pull/54849) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Avoid running pull request ci workflow for fixes touching .md files only. [#54914](https://github.com/ClickHouse/ClickHouse/pull/54914) ([Max K.](https://github.com/mkaynov)). +* CMake added `PROFILE_CPU` option needed to perform `perf record` without using DWARF call graph. [#54917](https://github.com/ClickHouse/ClickHouse/pull/54917) ([Maksim Kita](https://github.com/kitaisreal)). +* Use `--gtest_output='json:'` to parse unit test results. [#54922](https://github.com/ClickHouse/ClickHouse/pull/54922) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Added support for additional scripts (you need to mound a volume) to extend build process. [#55000](https://github.com/ClickHouse/ClickHouse/pull/55000) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* If the linker is different than LLD, stop with a fatal error. [#55036](https://github.com/ClickHouse/ClickHouse/pull/55036) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + +#### Bug Fix (user-visible misbehavior in an official stable release) + +* Store NULL in scalar result map for empty subquery result [#52240](https://github.com/ClickHouse/ClickHouse/pull/52240) ([vdimir](https://github.com/vdimir)). +* Fix misleading error message in OUTFILE with CapnProto/Protobuf [#52870](https://github.com/ClickHouse/ClickHouse/pull/52870) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix summary reporting with parallel replicas with LIMIT [#53050](https://github.com/ClickHouse/ClickHouse/pull/53050) ([Raúl Marín](https://github.com/Algunenano)). +* Fix throttling of BACKUPs from/to S3 (in case native copy was not used) and in some other places as well [#53336](https://github.com/ClickHouse/ClickHouse/pull/53336) ([Azat Khuzhin](https://github.com/azat)). +* Fix IO throttling during copying whole directories [#53338](https://github.com/ClickHouse/ClickHouse/pull/53338) ([Azat Khuzhin](https://github.com/azat)). +* Fix: moved to prewhere condition actions can lose column [#53492](https://github.com/ClickHouse/ClickHouse/pull/53492) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* 37737 fixed internal error when replacing with byte-equal parts [#53735](https://github.com/ClickHouse/ClickHouse/pull/53735) ([Pedro Riera](https://github.com/priera)). +* Fix: require columns participating in interpolate expression [#53754](https://github.com/ClickHouse/ClickHouse/pull/53754) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix cluster discovery initialization + setting up fail points in config [#54113](https://github.com/ClickHouse/ClickHouse/pull/54113) ([vdimir](https://github.com/vdimir)). +* Fix issues in accurateCastOrNull [#54136](https://github.com/ClickHouse/ClickHouse/pull/54136) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Fix nullable primary key in final [#54164](https://github.com/ClickHouse/ClickHouse/pull/54164) ([Amos Bird](https://github.com/amosbird)). +* Inserting only non-duplicate chunks in MV [#54184](https://github.com/ClickHouse/ClickHouse/pull/54184) ([Pedro Riera](https://github.com/priera)). +* Fix REPLACE/MOVE PARTITION with zero-copy replication [#54193](https://github.com/ClickHouse/ClickHouse/pull/54193) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix: parallel replicas over distributed don't read from all replicas [#54199](https://github.com/ClickHouse/ClickHouse/pull/54199) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix: allow IPv6 for bloom filter [#54200](https://github.com/ClickHouse/ClickHouse/pull/54200) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* fix possible type mismatch with IPv4 [#54212](https://github.com/ClickHouse/ClickHouse/pull/54212) ([Bharat Nallan](https://github.com/bharatnc)). +* Fix system.data_skipping_indices for recreated indices [#54225](https://github.com/ClickHouse/ClickHouse/pull/54225) ([Artur Malchanau](https://github.com/Hexta)). +* fix name clash for multiple join rewriter v2 [#54240](https://github.com/ClickHouse/ClickHouse/pull/54240) ([Tao Wang](https://github.com/wangtZJU)). +* Fix unexpected errors in system.errors after join [#54306](https://github.com/ClickHouse/ClickHouse/pull/54306) ([vdimir](https://github.com/vdimir)). +* Fix isZeroOrNull(NULL) [#54316](https://github.com/ClickHouse/ClickHouse/pull/54316) ([flynn](https://github.com/ucasfl)). +* Fix: parallel replicas over distributed with prefer_localhost_replica=1 [#54334](https://github.com/ClickHouse/ClickHouse/pull/54334) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix logical error in vertical merge + replacing merge tree + optimize cleanup [#54368](https://github.com/ClickHouse/ClickHouse/pull/54368) ([alesapin](https://github.com/alesapin)). +* Fix possible error 'URI contains invalid characters' in s3 table function [#54373](https://github.com/ClickHouse/ClickHouse/pull/54373) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix segfault in AST optimization of `arrayExists` function [#54379](https://github.com/ClickHouse/ClickHouse/pull/54379) ([Nikolay Degterinsky](https://github.com/evillique)). +* Check for overflow before addition in `analysisOfVariance` function [#54385](https://github.com/ClickHouse/ClickHouse/pull/54385) ([Antonio Andelic](https://github.com/antonio2368)). +* reproduce and fix the bug in removeSharedRecursive [#54430](https://github.com/ClickHouse/ClickHouse/pull/54430) ([Sema Checherinda](https://github.com/CheSema)). +* Fix possible incorrect result with SimpleAggregateFunction in PREWHERE and FINAL [#54436](https://github.com/ClickHouse/ClickHouse/pull/54436) ([Azat Khuzhin](https://github.com/azat)). +* Fix filtering parts with indexHint for non analyzer [#54449](https://github.com/ClickHouse/ClickHouse/pull/54449) ([Azat Khuzhin](https://github.com/azat)). +* Fix aggregate projections with normalized states [#54480](https://github.com/ClickHouse/ClickHouse/pull/54480) ([Amos Bird](https://github.com/amosbird)). +* Bugfix/local multiquery parameter [#54498](https://github.com/ClickHouse/ClickHouse/pull/54498) ([CuiShuoGuo](https://github.com/bakam412)). +* clickhouse-local support --database command line argument [#54503](https://github.com/ClickHouse/ClickHouse/pull/54503) ([vdimir](https://github.com/vdimir)). +* Fix possible parsing error in WithNames formats with disabled input_format_with_names_use_header [#54513](https://github.com/ClickHouse/ClickHouse/pull/54513) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix rare case of CHECKSUM_DOESNT_MATCH error [#54549](https://github.com/ClickHouse/ClickHouse/pull/54549) ([alesapin](https://github.com/alesapin)). +* Fix zero copy garbage [#54550](https://github.com/ClickHouse/ClickHouse/pull/54550) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix sorting of UNION ALL of already sorted results [#54564](https://github.com/ClickHouse/ClickHouse/pull/54564) ([Vitaly Baranov](https://github.com/vitlibar)). +* Fix snapshot install in Keeper [#54572](https://github.com/ClickHouse/ClickHouse/pull/54572) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix race in `ColumnUnique` [#54575](https://github.com/ClickHouse/ClickHouse/pull/54575) ([Nikita Taranov](https://github.com/nickitat)). +* Annoy/Usearch index: Fix LOGICAL_ERROR during build-up with default values [#54600](https://github.com/ClickHouse/ClickHouse/pull/54600) ([Robert Schulze](https://github.com/rschu1ze)). +* Fix serialization of `ColumnDecimal` [#54601](https://github.com/ClickHouse/ClickHouse/pull/54601) ([Nikita Taranov](https://github.com/nickitat)). +* Fix schema inference for *Cluster functions for column names with spaces [#54635](https://github.com/ClickHouse/ClickHouse/pull/54635) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix using structure from insertion tables in case of defaults and explicit insert columns [#54655](https://github.com/ClickHouse/ClickHouse/pull/54655) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix: avoid using regex match, possibly containing alternation, as a key condition. [#54696](https://github.com/ClickHouse/ClickHouse/pull/54696) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Fix ReplacingMergeTree with vertical merge and cleanup [#54706](https://github.com/ClickHouse/ClickHouse/pull/54706) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix virtual columns having incorrect values after ORDER BY [#54811](https://github.com/ClickHouse/ClickHouse/pull/54811) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix filtering parts with indexHint for non analyzer (resubmit) [#54825](https://github.com/ClickHouse/ClickHouse/pull/54825) ([Azat Khuzhin](https://github.com/azat)). +* Fix Keeper segfault during shutdown [#54841](https://github.com/ClickHouse/ClickHouse/pull/54841) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix "Invalid number of rows in Chunk" in MaterializedPostgreSQL [#54844](https://github.com/ClickHouse/ClickHouse/pull/54844) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Move obsolete format settings to separate section [#54855](https://github.com/ClickHouse/ClickHouse/pull/54855) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix zero copy locks with hardlinks [#54859](https://github.com/ClickHouse/ClickHouse/pull/54859) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix `FINAL` produces invalid read ranges in a rare case [#54934](https://github.com/ClickHouse/ClickHouse/pull/54934) ([Nikita Taranov](https://github.com/nickitat)). +* Rebuild minmax_count_projection when partition key gets modified [#54943](https://github.com/ClickHouse/ClickHouse/pull/54943) ([Amos Bird](https://github.com/amosbird)). +* Fix bad cast to ColumnVector in function if [#55019](https://github.com/ClickHouse/ClickHouse/pull/55019) ([Kruglov Pavel](https://github.com/Avogar)). +* Fix: insert quorum w/o keeper retries [#55026](https://github.com/ClickHouse/ClickHouse/pull/55026) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix simple state with nullable [#55030](https://github.com/ClickHouse/ClickHouse/pull/55030) ([Pedro Riera](https://github.com/priera)). +* Prevent attaching parts from tables with different projections or indices [#55062](https://github.com/ClickHouse/ClickHouse/pull/55062) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). + +#### NO CL ENTRY + +* NO CL ENTRY: 'Revert "Revert "Fixed wrong python test name pattern""'. [#54043](https://github.com/ClickHouse/ClickHouse/pull/54043) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* NO CL ENTRY: 'Revert "Fix: respect skip_unavailable_shards with parallel replicas"'. [#54189](https://github.com/ClickHouse/ClickHouse/pull/54189) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Add settings for real-time updates during query execution"'. [#54470](https://github.com/ClickHouse/ClickHouse/pull/54470) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Fix issues in accurateCastOrNull"'. [#54472](https://github.com/ClickHouse/ClickHouse/pull/54472) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Revert "Add settings for real-time updates during query execution""'. [#54476](https://github.com/ClickHouse/ClickHouse/pull/54476) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* NO CL ENTRY: 'Revert "add runOptimize call in bitmap write method"'. [#54528](https://github.com/ClickHouse/ClickHouse/pull/54528) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Optimize uniq to count"'. [#54566](https://github.com/ClickHouse/ClickHouse/pull/54566) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Add stateless test for clickhouse keeper-client --no-confirmation"'. [#54616](https://github.com/ClickHouse/ClickHouse/pull/54616) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* NO CL ENTRY: 'Revert "Remove flaky tests for the experimental `UNDROP` feature"'. [#54671](https://github.com/ClickHouse/ClickHouse/pull/54671) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Fix filtering parts with indexHint for non analyzer"'. [#54806](https://github.com/ClickHouse/ClickHouse/pull/54806) ([Azat Khuzhin](https://github.com/azat)). +* NO CL ENTRY: 'Revert "refine error code of duplicated index in create query"'. [#54840](https://github.com/ClickHouse/ClickHouse/pull/54840) ([Alexander Tokmakov](https://github.com/tavplubix)). +* NO CL ENTRY: 'Revert "Avoid excessive calls to getifaddrs in isLocalAddress"'. [#54893](https://github.com/ClickHouse/ClickHouse/pull/54893) ([Igor Nikonov](https://github.com/devcrafter)). +* NO CL ENTRY: 'Revert "Fix NATS high cpu usage"'. [#55005](https://github.com/ClickHouse/ClickHouse/pull/55005) ([Nikolay Degterinsky](https://github.com/evillique)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* libFuzzer: add CI fuzzers build, add tcp protocol fuzzer, fix other fuzzers. [#42599](https://github.com/ClickHouse/ClickHouse/pull/42599) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Add new exceptions to 4xx error [#50722](https://github.com/ClickHouse/ClickHouse/pull/50722) ([Boris Kuschel](https://github.com/bkuschel)). +* Test libunwind changes. [#51436](https://github.com/ClickHouse/ClickHouse/pull/51436) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix data race in copyFromIStreamWithProgressCallback [#51449](https://github.com/ClickHouse/ClickHouse/pull/51449) ([Michael Kolupaev](https://github.com/al13n321)). +* Abort on `std::logic_error` in CI [#51907](https://github.com/ClickHouse/ClickHouse/pull/51907) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Unify setting http keep-alive timeout, increase default to 30s [#53068](https://github.com/ClickHouse/ClickHouse/pull/53068) ([Nikita Taranov](https://github.com/nickitat)). +* Add a regression test for broken Vertical merge after ADD+DROP COLUMN [#53214](https://github.com/ClickHouse/ClickHouse/pull/53214) ([Azat Khuzhin](https://github.com/azat)). +* Revert "Revert "dateDiff: add support for plural units."" [#53803](https://github.com/ClickHouse/ClickHouse/pull/53803) ([Han Fei](https://github.com/hanfei1991)). +* Fix some tests [#53892](https://github.com/ClickHouse/ClickHouse/pull/53892) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Refactoring of reading from `MergeTree` tables [#53931](https://github.com/ClickHouse/ClickHouse/pull/53931) ([Anton Popov](https://github.com/CurtizJ)). +* Use pathlib.Path in S3Helper, rewrite build reports, improve small things [#54010](https://github.com/ClickHouse/ClickHouse/pull/54010) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Correct UniquesHashSet to be endianness-independent. [#54045](https://github.com/ClickHouse/ClickHouse/pull/54045) ([Austin Kothig](https://github.com/kothiga)). +* Increase retries for test_merge_tree_azure_blob_storage [#54069](https://github.com/ClickHouse/ClickHouse/pull/54069) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). +* Fix SipHash128 reference for big-endian platforms [#54095](https://github.com/ClickHouse/ClickHouse/pull/54095) ([ltrk2](https://github.com/ltrk2)). +* Small usearch index improvements: metrics and configurable internal data type [#54103](https://github.com/ClickHouse/ClickHouse/pull/54103) ([Michael Kolupaev](https://github.com/al13n321)). +* Small refactoring for read from object storage [#54134](https://github.com/ClickHouse/ClickHouse/pull/54134) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Minor changes [#54171](https://github.com/ClickHouse/ClickHouse/pull/54171) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix hostname and co result constness in new analyzer [#54174](https://github.com/ClickHouse/ClickHouse/pull/54174) ([vdimir](https://github.com/vdimir)). +* Amend a confusing line of code in Loggers.cpp [#54183](https://github.com/ClickHouse/ClickHouse/pull/54183) ([Victor Krasnov](https://github.com/sirvickr)). +* Fix partition id pruning for analyzer. [#54185](https://github.com/ClickHouse/ClickHouse/pull/54185) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Update version after release [#54186](https://github.com/ClickHouse/ClickHouse/pull/54186) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Update version_date.tsv and changelogs after v23.8.1.2992-lts [#54188](https://github.com/ClickHouse/ClickHouse/pull/54188) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix pager in client/local interactive mode when not all data had been read [#54190](https://github.com/ClickHouse/ClickHouse/pull/54190) ([Azat Khuzhin](https://github.com/azat)). +* Fix flaky test `01099_operators_date_and_timestamp` [#54195](https://github.com/ClickHouse/ClickHouse/pull/54195) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Save system tables from s3_disk in the report [#54198](https://github.com/ClickHouse/ClickHouse/pull/54198) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix timezones in the CI Logs database [#54210](https://github.com/ClickHouse/ClickHouse/pull/54210) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* 2R: Fix: respect skip_unavailable_shards with parallel replicas [#54213](https://github.com/ClickHouse/ClickHouse/pull/54213) ([Igor Nikonov](https://github.com/devcrafter)). +* S3Queue is experimental [#54214](https://github.com/ClickHouse/ClickHouse/pull/54214) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improve vars with refernce in Settings cpps [#54220](https://github.com/ClickHouse/ClickHouse/pull/54220) ([xuzifu666](https://github.com/xuzifu666)). +* Add ProfileEvents::Timer class [#54221](https://github.com/ClickHouse/ClickHouse/pull/54221) ([Stig Bakken](https://github.com/stigsb)). +* Test: extend cluster_all_replicas integration test with skip_unavailable_shards [#54223](https://github.com/ClickHouse/ClickHouse/pull/54223) ([Igor Nikonov](https://github.com/devcrafter)). +* remove semicolon [#54236](https://github.com/ClickHouse/ClickHouse/pull/54236) ([YinZheng-Sun](https://github.com/YinZheng-Sun)). +* Fix bad code in the `system.filesystem_cache`: catching exceptions [#54237](https://github.com/ClickHouse/ClickHouse/pull/54237) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Merge [#54236](https://github.com/ClickHouse/ClickHouse/issues/54236) [#54238](https://github.com/ClickHouse/ClickHouse/pull/54238) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Minor improvement, load from config [#54244](https://github.com/ClickHouse/ClickHouse/pull/54244) ([zhanglistar](https://github.com/zhanglistar)). +* Follow-up to [#54198](https://github.com/ClickHouse/ClickHouse/issues/54198) [#54246](https://github.com/ClickHouse/ClickHouse/pull/54246) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Properly re-initialize ZooKeeper fault injection [#54251](https://github.com/ClickHouse/ClickHouse/pull/54251) ([Alexander Gololobov](https://github.com/davenger)). +* Update ci-slack-bot.py [#54253](https://github.com/ClickHouse/ClickHouse/pull/54253) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix clickhouse-test --no-drop-if-fail on reference mismatch [#54256](https://github.com/ClickHouse/ClickHouse/pull/54256) ([Azat Khuzhin](https://github.com/azat)). +* Improve slack-bot-ci lambda [#54258](https://github.com/ClickHouse/ClickHouse/pull/54258) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Update version_date.tsv and changelogs after v23.3.12.11-lts [#54259](https://github.com/ClickHouse/ClickHouse/pull/54259) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Minor change [#54261](https://github.com/ClickHouse/ClickHouse/pull/54261) ([flynn](https://github.com/ucasfl)). +* Add a note of where the lambda is deployed [#54268](https://github.com/ClickHouse/ClickHouse/pull/54268) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Query cache: Log caching of entries [#54270](https://github.com/ClickHouse/ClickHouse/pull/54270) ([Robert Schulze](https://github.com/rschu1ze)). +* Update version_date.tsv and changelogs after v23.8.2.7-lts [#54273](https://github.com/ClickHouse/ClickHouse/pull/54273) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Fix test `02783_parsedatetimebesteffort_syslog` [#54279](https://github.com/ClickHouse/ClickHouse/pull/54279) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix `test_keeper_disks` [#54291](https://github.com/ClickHouse/ClickHouse/pull/54291) ([Antonio Andelic](https://github.com/antonio2368)). +* Code improvement for reading from archives [#54293](https://github.com/ClickHouse/ClickHouse/pull/54293) ([Antonio Andelic](https://github.com/antonio2368)). +* Rollback testing part from [#42599](https://github.com/ClickHouse/ClickHouse/issues/42599) [#54301](https://github.com/ClickHouse/ClickHouse/pull/54301) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* CI: libFuzzer integration [#54310](https://github.com/ClickHouse/ClickHouse/pull/54310) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Update version_date.tsv and changelogs after v23.3.13.6-lts [#54313](https://github.com/ClickHouse/ClickHouse/pull/54313) ([robot-clickhouse](https://github.com/robot-clickhouse)). +* Add logs for parallel replica over distributed [#54315](https://github.com/ClickHouse/ClickHouse/pull/54315) ([Igor Nikonov](https://github.com/devcrafter)). +* Increase timeout for system.stack_trace in 01051_system_stack_trace [#54321](https://github.com/ClickHouse/ClickHouse/pull/54321) ([Azat Khuzhin](https://github.com/azat)). +* Fix replace_partition test [#54322](https://github.com/ClickHouse/ClickHouse/pull/54322) ([Pedro Riera](https://github.com/priera)). +* Fix segfault in system.zookeeper [#54326](https://github.com/ClickHouse/ClickHouse/pull/54326) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fixed flaky test `02841_parallel_replicas_summary` [#54331](https://github.com/ClickHouse/ClickHouse/pull/54331) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Consolidate GCD codec tests (Follow up to [#53149](https://github.com/ClickHouse/ClickHouse/issues/53149)) [#54332](https://github.com/ClickHouse/ClickHouse/pull/54332) ([Robert Schulze](https://github.com/rschu1ze)). +* Fixed wrong dereference problem in Context::setTemporaryStorageInCache [#54333](https://github.com/ClickHouse/ClickHouse/pull/54333) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Used assert_cast instead of dynamic_cast in ExternalDataSourceCache [#54336](https://github.com/ClickHouse/ClickHouse/pull/54336) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix bad punctuation in Keeper's logs [#54338](https://github.com/ClickHouse/ClickHouse/pull/54338) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Improved protection from dereferencing of nullptr [#54339](https://github.com/ClickHouse/ClickHouse/pull/54339) ([Alexey Gerasimchuck](https://github.com/Demilivor)). +* Fix filesystem cache test [#54343](https://github.com/ClickHouse/ClickHouse/pull/54343) ([Salvatore Mesoraca](https://github.com/aiven-sal)). +* Parallel replicas: remove unused code [#54354](https://github.com/ClickHouse/ClickHouse/pull/54354) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix flaky test test_storage_azure_blob_storage/test.py::test_schema_iference_cache [#54367](https://github.com/ClickHouse/ClickHouse/pull/54367) ([Kruglov Pavel](https://github.com/Avogar)). +* Enable hedged requests integration tests with tsan, use max_distributed_connections=1 to fix possible flakiness [#54371](https://github.com/ClickHouse/ClickHouse/pull/54371) ([Kruglov Pavel](https://github.com/Avogar)). +* Use abiv2 when generating OpenSSL .s files for powerpc64le [#54375](https://github.com/ClickHouse/ClickHouse/pull/54375) ([Boris Kuschel](https://github.com/bkuschel)). +* Disable prefer_localhost_replica in test for parallel replicas [#54377](https://github.com/ClickHouse/ClickHouse/pull/54377) ([Igor Nikonov](https://github.com/devcrafter)). +* Fix incorrect formatting of CREATE query with PRIMARY KEY [#54403](https://github.com/ClickHouse/ClickHouse/pull/54403) ([Nikolay Degterinsky](https://github.com/evillique)). +* Fix failed assert in attach thread during startup retries [#54408](https://github.com/ClickHouse/ClickHouse/pull/54408) ([Antonio Andelic](https://github.com/antonio2368)). +* Hashtable order fix on big endian platform [#54409](https://github.com/ClickHouse/ClickHouse/pull/54409) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Small fine-tune for using ColumnNullable pointer [#54435](https://github.com/ClickHouse/ClickHouse/pull/54435) ([Alex Cheng](https://github.com/Alex-Cheng)). +* Update automated commit status comment [#54441](https://github.com/ClickHouse/ClickHouse/pull/54441) ([vdimir](https://github.com/vdimir)). +* Remove useless line [#54466](https://github.com/ClickHouse/ClickHouse/pull/54466) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a log message on replicated table drop [#54467](https://github.com/ClickHouse/ClickHouse/pull/54467) ([Dmitry Novik](https://github.com/novikd)). +* Cleanup: unnecessary SelectQueryInfo usage around distributed [#54468](https://github.com/ClickHouse/ClickHouse/pull/54468) ([Igor Nikonov](https://github.com/devcrafter)). +* Add `instance_type` column to CI Logs and the `checks` table [#54469](https://github.com/ClickHouse/ClickHouse/pull/54469) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Refactor IHints [#54481](https://github.com/ClickHouse/ClickHouse/pull/54481) ([flynn](https://github.com/ucasfl)). +* Fix strange message [#54489](https://github.com/ClickHouse/ClickHouse/pull/54489) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Bump re2 to latest main [#54492](https://github.com/ClickHouse/ClickHouse/pull/54492) ([Robert Schulze](https://github.com/rschu1ze)). +* S3 artifacts [#54504](https://github.com/ClickHouse/ClickHouse/pull/54504) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Flush logs for system.backup_log test. [#54507](https://github.com/ClickHouse/ClickHouse/pull/54507) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). +* Fix use-after-free in `MergeTreePrefetchedReadPool` [#54512](https://github.com/ClickHouse/ClickHouse/pull/54512) ([Anton Popov](https://github.com/CurtizJ)). +* Disable parallel replicas on shards with not enough nodes [#54519](https://github.com/ClickHouse/ClickHouse/pull/54519) ([Igor Nikonov](https://github.com/devcrafter)). +* Parallel replicas: cleanup unused params [#54520](https://github.com/ClickHouse/ClickHouse/pull/54520) ([Igor Nikonov](https://github.com/devcrafter)). +* FunctionHelpers remove areTypesEqual function [#54546](https://github.com/ClickHouse/ClickHouse/pull/54546) ([Maksim Kita](https://github.com/kitaisreal)). +* Add stateless test for clickhouse keeper-client --no-confirmation [#54547](https://github.com/ClickHouse/ClickHouse/pull/54547) ([Azat Khuzhin](https://github.com/azat)). +* Increase default timeout in tests for keeper-client [#54551](https://github.com/ClickHouse/ClickHouse/pull/54551) ([pufit](https://github.com/pufit)). +* clang-format: Disable namespace indentation and omit {} in if/for/while [#54554](https://github.com/ClickHouse/ClickHouse/pull/54554) ([Robert Schulze](https://github.com/rschu1ze)). +* ngramDistance* queries fix for big endian platform [#54555](https://github.com/ClickHouse/ClickHouse/pull/54555) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Fix AST fuzzer crash in MergeTreeIndex{FullText|Inverted} [#54563](https://github.com/ClickHouse/ClickHouse/pull/54563) ([Robert Schulze](https://github.com/rschu1ze)). +* Remove output_format_markdown_escape_special_characters from settings changes history [#54585](https://github.com/ClickHouse/ClickHouse/pull/54585) ([Kruglov Pavel](https://github.com/Avogar)). +* Add basic logic to find releasable commits [#54604](https://github.com/ClickHouse/ClickHouse/pull/54604) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix reading of virtual columns in reverse order [#54610](https://github.com/ClickHouse/ClickHouse/pull/54610) ([Anton Popov](https://github.com/CurtizJ)). +* Fix possible CANNOT_READ_ALL_DATA during ZooKeeper client finalization and add some tests [#54632](https://github.com/ClickHouse/ClickHouse/pull/54632) ([Azat Khuzhin](https://github.com/azat)). +* Fix a bug in addData and subData functions [#54636](https://github.com/ClickHouse/ClickHouse/pull/54636) ([Nikolay Degterinsky](https://github.com/evillique)). +* Follow-up to [#54550](https://github.com/ClickHouse/ClickHouse/issues/54550) [#54641](https://github.com/ClickHouse/ClickHouse/pull/54641) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Remove broken lockless variant of re2 [#54642](https://github.com/ClickHouse/ClickHouse/pull/54642) ([Robert Schulze](https://github.com/rschu1ze)). +* Bump abseil [#54646](https://github.com/ClickHouse/ClickHouse/pull/54646) ([Robert Schulze](https://github.com/rschu1ze)). +* limit the delay before next try in S3 [#54651](https://github.com/ClickHouse/ClickHouse/pull/54651) ([Sema Checherinda](https://github.com/CheSema)). +* Fix parser unit tests [#54670](https://github.com/ClickHouse/ClickHouse/pull/54670) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)). +* Fix: Log engine Mark file to read and write in little Endian for s390x [#54677](https://github.com/ClickHouse/ClickHouse/pull/54677) ([bhavnajindal](https://github.com/bhavnajindal)). +* Update WebObjectStorage.cpp [#54695](https://github.com/ClickHouse/ClickHouse/pull/54695) ([Kseniia Sumarokova](https://github.com/kssenii)). +* add cancelation point to s3 retries [#54697](https://github.com/ClickHouse/ClickHouse/pull/54697) ([Sema Checherinda](https://github.com/CheSema)). +* Revert default batch size for Keeper [#54745](https://github.com/ClickHouse/ClickHouse/pull/54745) ([Antonio Andelic](https://github.com/antonio2368)). +* Enable `allow_experimental_undrop_table_query` [#54754](https://github.com/ClickHouse/ClickHouse/pull/54754) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Fix 02882_clickhouse_keeper_client_no_confirmation test [#54761](https://github.com/ClickHouse/ClickHouse/pull/54761) ([Azat Khuzhin](https://github.com/azat)). +* Better exception message in checkDataPart [#54768](https://github.com/ClickHouse/ClickHouse/pull/54768) ([alesapin](https://github.com/alesapin)). +* Don't use default move assignment in TimerDescriptor [#54769](https://github.com/ClickHouse/ClickHouse/pull/54769) ([Kruglov Pavel](https://github.com/Avogar)). +* Add retries to rests test_async_query_sending/test_async_connect [#54772](https://github.com/ClickHouse/ClickHouse/pull/54772) ([Kruglov Pavel](https://github.com/Avogar)). +* update comment [#54780](https://github.com/ClickHouse/ClickHouse/pull/54780) ([flynn](https://github.com/ucasfl)). +* Fix broken tests for clickhouse-diagnostics [#54790](https://github.com/ClickHouse/ClickHouse/pull/54790) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* refine error code of duplicated index in create query [#54791](https://github.com/ClickHouse/ClickHouse/pull/54791) ([Han Fei](https://github.com/hanfei1991)). +* Do not set PR status label [#54799](https://github.com/ClickHouse/ClickHouse/pull/54799) ([vdimir](https://github.com/vdimir)). +* Prevent parquet schema inference reading the first 1 MB of the file unnecessarily [#54808](https://github.com/ClickHouse/ClickHouse/pull/54808) ([Michael Kolupaev](https://github.com/al13n321)). +* Prevent ParquetMetadata reading 40 MB from each file unnecessarily [#54809](https://github.com/ClickHouse/ClickHouse/pull/54809) ([Michael Kolupaev](https://github.com/al13n321)). +* Use appropriate error code instead of LOGICAL_ERROR [#54810](https://github.com/ClickHouse/ClickHouse/pull/54810) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). +* Adjusting `num_streams` by expected work in StorageS3 [#54815](https://github.com/ClickHouse/ClickHouse/pull/54815) ([pufit](https://github.com/pufit)). +* Fix test_backup_restore_on_cluster/test.py::test_stop_other_host_during_backup flakiness [#54816](https://github.com/ClickHouse/ClickHouse/pull/54816) ([Azat Khuzhin](https://github.com/azat)). +* Remove config files sizes check [#54824](https://github.com/ClickHouse/ClickHouse/pull/54824) ([Igor Nikonov](https://github.com/devcrafter)). +* Set correct size for signal pipe buffer [#54836](https://github.com/ClickHouse/ClickHouse/pull/54836) ([Antonio Andelic](https://github.com/antonio2368)). +* Refactor and split up vector search tests [#54839](https://github.com/ClickHouse/ClickHouse/pull/54839) ([Robert Schulze](https://github.com/rschu1ze)). +* Add some logging to StorageRabbitMQ [#54842](https://github.com/ClickHouse/ClickHouse/pull/54842) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Update CHANGELOG.md [#54843](https://github.com/ClickHouse/ClickHouse/pull/54843) ([Ilya Yatsishin](https://github.com/qoega)). +* Refactor and simplify multi-directory globs [#54863](https://github.com/ClickHouse/ClickHouse/pull/54863) ([Andrey Zvonov](https://github.com/zvonand)). +* KeeperTCPHandler.cpp: Fix clang-17 build [#54874](https://github.com/ClickHouse/ClickHouse/pull/54874) ([Robert Schulze](https://github.com/rschu1ze)). +* Decrease timeout for fast tests with a commit [#54878](https://github.com/ClickHouse/ClickHouse/pull/54878) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* More stable `02703_keeper_map_concurrent_create_drop` [#54879](https://github.com/ClickHouse/ClickHouse/pull/54879) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix division by zero in StorageS3 [#54904](https://github.com/ClickHouse/ClickHouse/pull/54904) ([pufit](https://github.com/pufit)). +* Set exception for promise in `CreatingSetsTransform` [#54920](https://github.com/ClickHouse/ClickHouse/pull/54920) ([Antonio Andelic](https://github.com/antonio2368)). +* Fix an exception message in Pipe::addTransform [#54926](https://github.com/ClickHouse/ClickHouse/pull/54926) ([Alex Cheng](https://github.com/Alex-Cheng)). +* Fix data race during BackupsWorker::backup_log initialization [#54928](https://github.com/ClickHouse/ClickHouse/pull/54928) ([Victor Krasnov](https://github.com/sirvickr)). +* Provide support for BSON on BE [#54933](https://github.com/ClickHouse/ClickHouse/pull/54933) ([Austin Kothig](https://github.com/kothiga)). +* Set a minimum limit of `num_streams` in StorageS3 [#54936](https://github.com/ClickHouse/ClickHouse/pull/54936) ([pufit](https://github.com/pufit)). +* Ipv4 read big endian [#54938](https://github.com/ClickHouse/ClickHouse/pull/54938) ([Suzy Wang](https://github.com/SuzyWangIBMer)). +* Fix data race in SYSTEM STOP LISTEN [#54939](https://github.com/ClickHouse/ClickHouse/pull/54939) ([Nikolay Degterinsky](https://github.com/evillique)). +* Add desperate instrumentation for debugging deadlock in MultiplexedConnections [#54940](https://github.com/ClickHouse/ClickHouse/pull/54940) ([Michael Kolupaev](https://github.com/al13n321)). +* Respect max_block_size while generating rows for system.stack_trace (will fix flakiness of the test) [#54946](https://github.com/ClickHouse/ClickHouse/pull/54946) ([Azat Khuzhin](https://github.com/azat)). +* Remove test `01051_system_stack_trace` [#54951](https://github.com/ClickHouse/ClickHouse/pull/54951) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Add a test for compatibility [#54960](https://github.com/ClickHouse/ClickHouse/pull/54960) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove test `02151_hash_table_sizes_stats` [#54961](https://github.com/ClickHouse/ClickHouse/pull/54961) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Remove 02151_hash_table_sizes_stats_distributed (fixes broken CI) [#54969](https://github.com/ClickHouse/ClickHouse/pull/54969) ([Azat Khuzhin](https://github.com/azat)). +* Use pregenerated gRPC protocol pb2 files to fix test flakyness. [#54976](https://github.com/ClickHouse/ClickHouse/pull/54976) ([Vitaly Baranov](https://github.com/vitlibar)). +* Delete a test [#54984](https://github.com/ClickHouse/ClickHouse/pull/54984) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Add assertion [#54985](https://github.com/ClickHouse/ClickHouse/pull/54985) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix test parallel replicas over distributed [#54987](https://github.com/ClickHouse/ClickHouse/pull/54987) ([Igor Nikonov](https://github.com/devcrafter)). +* Update README.md [#54990](https://github.com/ClickHouse/ClickHouse/pull/54990) ([Tyler Hannan](https://github.com/tylerhannan)). +* Re-enable clang-tidy checks disabled in the Clang 17 update [#54999](https://github.com/ClickHouse/ClickHouse/pull/54999) ([Robert Schulze](https://github.com/rschu1ze)). +* Print more information about one logical error in MergeTreeDataWriter [#55001](https://github.com/ClickHouse/ClickHouse/pull/55001) ([Michael Kolupaev](https://github.com/al13n321)). +* Add a test [#55003](https://github.com/ClickHouse/ClickHouse/pull/55003) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Lower log levels for `SSOCredentialsProvider` [#55012](https://github.com/ClickHouse/ClickHouse/pull/55012) ([Antonio Andelic](https://github.com/antonio2368)). +* Set exception for promise in `CreatingSetsTransform` in more cases [#55013](https://github.com/ClickHouse/ClickHouse/pull/55013) ([Antonio Andelic](https://github.com/antonio2368)). +* Setting compile_aggregate_expressions comment fix [#55020](https://github.com/ClickHouse/ClickHouse/pull/55020) ([Maksim Kita](https://github.com/kitaisreal)). +* Revert "Added field "is_deterministic" to system.functions" [#55022](https://github.com/ClickHouse/ClickHouse/pull/55022) ([Alexander Tokmakov](https://github.com/tavplubix)). +* Get rid of the most of `os.path` stuff [#55028](https://github.com/ClickHouse/ClickHouse/pull/55028) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Fix pre-build scripts for old branches [#55032](https://github.com/ClickHouse/ClickHouse/pull/55032) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Review fix for [#54935](https://github.com/ClickHouse/ClickHouse/issues/54935) [#55042](https://github.com/ClickHouse/ClickHouse/pull/55042) ([flynn](https://github.com/ucasfl)). +* Update gtest_lru_file_cache.cpp [#55053](https://github.com/ClickHouse/ClickHouse/pull/55053) ([Kseniia Sumarokova](https://github.com/kssenii)). +* Fix prebuild scripts one more time [#55059](https://github.com/ClickHouse/ClickHouse/pull/55059) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Use different names for variables inside build.sh [#55067](https://github.com/ClickHouse/ClickHouse/pull/55067) ([Nikita Mikhaylov](https://github.com/nikitamikhaylov)). +* Remove String Jaccard Index [#55080](https://github.com/ClickHouse/ClickHouse/pull/55080) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* I don't understand why backup log is not enabled by default [#55081](https://github.com/ClickHouse/ClickHouse/pull/55081) ([Alexey Milovidov](https://github.com/alexey-milovidov)). +* Fix typo in packager when ccache is used [#55104](https://github.com/ClickHouse/ClickHouse/pull/55104) ([Ilya Yatsishin](https://github.com/qoega)). +* Reduce flakiness of 01455_opentelemetry_distributed [#55111](https://github.com/ClickHouse/ClickHouse/pull/55111) ([Michael Kolupaev](https://github.com/al13n321)). +* Fix build [#55113](https://github.com/ClickHouse/ClickHouse/pull/55113) ([Alexey Milovidov](https://github.com/alexey-milovidov)). + diff --git a/docs/en/development/build-cross-osx.md b/docs/en/development/build-cross-osx.md index ce8d1b77526d..b70fc36e28e6 100644 --- a/docs/en/development/build-cross-osx.md +++ b/docs/en/development/build-cross-osx.md @@ -11,14 +11,14 @@ This is intended for continuous integration checks that run on Linux servers. If The cross-build for macOS is based on the [Build instructions](../development/build.md), follow them first. -## Install Clang-16 +## Install Clang-17 Follow the instructions from https://apt.llvm.org/ for your Ubuntu or Debian setup. For example the commands for Bionic are like: ``` bash -sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-16 main" >> /etc/apt/sources.list -sudo apt-get install clang-16 +sudo echo "deb [trusted=yes] http://apt.llvm.org/bionic/ llvm-toolchain-bionic-17 main" >> /etc/apt/sources.list +sudo apt-get install clang-17 ``` ## Install Cross-Compilation Toolset {#install-cross-compilation-toolset} @@ -55,7 +55,7 @@ curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX1 cd ClickHouse mkdir build-darwin cd build-darwin -CC=clang-16 CXX=clang++-16 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. +CC=clang-17 CXX=clang++-17 cmake -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/x86_64-apple-darwin-install_name_tool -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld -DCMAKE_TOOLCHAIN_FILE=cmake/darwin/toolchain-x86_64.cmake .. ninja ``` diff --git a/docs/en/development/build-cross-riscv.md b/docs/en/development/build-cross-riscv.md index e3550a046c75..c21353f7f73c 100644 --- a/docs/en/development/build-cross-riscv.md +++ b/docs/en/development/build-cross-riscv.md @@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` bash cd ClickHouse mkdir build-riscv64 -CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF +CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF ninja -C build-riscv64 ``` diff --git a/docs/en/development/build-cross-s390x.md b/docs/en/development/build-cross-s390x.md index be2c37f5f412..088dd6f26794 100644 --- a/docs/en/development/build-cross-s390x.md +++ b/docs/en/development/build-cross-s390x.md @@ -90,34 +90,117 @@ Process 1 stopped ## Visual Studio Code integration -- [CodeLLDB extension](https://github.com/vadimcn/vscode-lldb) is required for visual debugging, the [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [cmake variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md). -- Make sure to set the backend to your llvm installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` -- Launcher: +- [CodeLLDB](https://github.com/vadimcn/vscode-lldb) extension is required for visual debugging. +- [Command Variable](https://github.com/rioj7/command-variable) extension can help dynamic launches if using [CMake Variants](https://github.com/microsoft/vscode-cmake-tools/blob/main/docs/variants.md). +- Make sure to set the backend to your LLVM installation eg. `"lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so"` +- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this) + +### Example configurations +#### cmake-variants.yaml +```yaml +buildType: + default: relwithdebinfo + choices: + debug: + short: Debug + long: Emit debug information + buildType: Debug + release: + short: Release + long: Optimize generated code + buildType: Release + relwithdebinfo: + short: RelWithDebInfo + long: Release with Debug Info + buildType: RelWithDebInfo + tsan: + short: MinSizeRel + long: Minimum Size Release + buildType: MinSizeRel + +toolchain: + default: default + description: Select toolchain + choices: + default: + short: x86_64 + long: x86_64 + s390x: + short: s390x + long: s390x + settings: + CMAKE_TOOLCHAIN_FILE: cmake/linux/toolchain-s390x.cmake +``` + +#### launch.json ```json { "version": "0.2.0", "configurations": [ { - "name": "Debug", "type": "lldb", "request": "custom", - "targetCreateCommands": ["target create ${command:cmake.launchTargetDirectory}/clickhouse"], - "processCreateCommands": ["settings set target.source-map ${input:targetdir} ${workspaceFolder}", "gdb-remote 31338"], - "sourceMap": { "${input:targetdir}": "${workspaceFolder}" }, + "name": "(lldb) Launch s390x with qemu", + "targetCreateCommands": ["target create ${command:cmake.launchTargetPath}"], + "processCreateCommands": ["gdb-remote 2159"], + "preLaunchTask": "Run ClickHouse" } - ], - "inputs": [ + ] +} +``` + +#### settings.json +This would also put different builds under different subfolders of the `build` folder. +```json +{ + "cmake.buildDirectory": "${workspaceFolder}/build/${buildKitVendor}-${buildKitVersion}-${variant:toolchain}-${variant:buildType}", + "lldb.library": "/usr/lib/x86_64-linux-gnu/liblldb-15.so" +} +``` + +#### run-debug.sh +```sh +#! /bin/sh +echo 'Starting debugger session' +cd $1 +qemu-s390x-static -g 2159 -L /usr/s390x-linux-gnu $2 $3 $4 +``` + +#### tasks.json +Defines a task to run the compiled executable in `server` mode under a `tmp` folder next to the binaries, with configuration from under `programs/server/config.xml`. +```json +{ + "version": "2.0.0", + "tasks": [ { - "id": "targetdir", - "type": "command", - "command": "extension.commandvariable.transform", - "args": { - "text": "${command:cmake.launchTargetDirectory}", - "find": ".*/([^/]+)/[^/]+$", - "replace": "$1" - } + "label": "Run ClickHouse", + "type": "shell", + "isBackground": true, + "command": "${workspaceFolder}/.vscode/run-debug.sh", + "args": [ + "${command:cmake.launchTargetDirectory}/tmp", + "${command:cmake.launchTargetPath}", + "server", + "--config-file=${workspaceFolder}/programs/server/config.xml" + ], + "problemMatcher": [ + { + "pattern": [ + { + "regexp": ".", + "file": 1, + "location": 2, + "message": 3 + } + ], + "background": { + "activeOnStart": true, + "beginsPattern": "^Starting debugger session", + "endsPattern": ".*" + } + } + ] } ] } ``` -- Make sure to run the clickhouse executable in debug mode prior to launch. (It is also possible to create a `preLaunchTask` that automates this) \ No newline at end of file diff --git a/docs/en/development/build.md b/docs/en/development/build.md index 83a4550df884..b474c4456047 100644 --- a/docs/en/development/build.md +++ b/docs/en/development/build.md @@ -11,7 +11,22 @@ Supported platforms: - x86_64 - AArch64 -- Power9 (experimental) +- PowerPC 64 LE (experimental) +- RISC-V 64 (experimental) + +## Building in docker +We use the docker image `clickhouse/binary-builder` for our CI builds. It contains everything necessary to build the binary and packages. There is a script `docker/packager/packager` to ease the image usage: + +```bash +# define a directory for the output artifacts +output_dir="build_results" +# a simplest build +./docker/packager/packager --package-type=binary --output-dir "$output_dir" +# build debian packages +./docker/packager/packager --package-type=deb --output-dir "$output_dir" +# by default, debian packages use thin LTO, so we can override it to speed up the build +CMAKE_FLAGS='-DENABLE_THINLTO=' ./docker/packager/packager --package-type=deb --output-dir "./$(git rev-parse --show-cdup)/build_results" +``` ## Building on Ubuntu @@ -27,22 +42,22 @@ sudo apt-get install git cmake ccache python3 ninja-build nasm yasm gawk lsb-rel ### Install and Use the Clang compiler -On Ubuntu/Debian you can use LLVM's automatic installation script, see [here](https://apt.llvm.org/). +On Ubuntu/Debian, you can use LLVM's automatic installation script; see [here](https://apt.llvm.org/). ``` bash sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)" ``` -Note: in case of troubles, you can also use this: +Note: in case of trouble, you can also use this: ```bash sudo apt-get install software-properties-common sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test ``` -For other Linux distribution - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). +For other Linux distributions - check the availability of LLVM's [prebuild packages](https://releases.llvm.org/download.html). -As of April 2023, any version of Clang >= 15 will work. +As of August 2023, clang-16 or higher will work. GCC as a compiler is not supported. To build with a specific Clang version: @@ -52,8 +67,8 @@ to see what version you have installed before setting this environment variable. ::: ``` bash -export CC=clang-16 -export CXX=clang++-16 +export CC=clang-17 +export CXX=clang++-17 ``` ### Checkout ClickHouse Sources {#checkout-clickhouse-sources} @@ -77,8 +92,12 @@ cmake -S . -B build cmake --build build # or: `cd build; ninja` ``` +:::tip +In case `cmake` isn't able to detect the number of available logical cores, the build will be done by one thread. To overcome this, you can tweak `cmake` to use a specific number of threads with `-j` flag, for example, `cmake --build build -j 16`. Alternatively, you can generate build files with a specific number of jobs in advance to avoid always setting the flag: `cmake -DPARALLEL_COMPILE_JOBS=16 -S . -B build`, where `16` is the desired number of threads. +::: + To create an executable, run `cmake --build build --target clickhouse` (or: `cd build; ninja clickhouse`). -This will create executable `build/programs/clickhouse` which can be used with `client` or `server` arguments. +This will create an executable `build/programs/clickhouse`, which can be used with `client` or `server` arguments. ## Building on Any Linux {#how-to-build-clickhouse-on-any-linux} @@ -86,13 +105,13 @@ The build requires the following components: - Git (used to checkout the sources, not needed for the build) - CMake 3.20 or newer -- Compiler: Clang 15 or newer -- Linker: lld 15 or newer +- Compiler: clang-17 or newer +- Linker: lld-17 or newer - Ninja - Yasm - Gawk -If all the components are installed, you may build in the same way as the steps above. +If all the components are installed, you may build it in the same way as the steps above. Example for OpenSUSE Tumbleweed: @@ -108,7 +127,7 @@ Example for Fedora Rawhide: ``` bash sudo yum update -sudo yum --nogpg install git cmake make clang python3 ccache nasm yasm gawk +sudo yum --nogpg install git cmake make clang python3 ccache lld nasm yasm gawk git clone --recursive https://github.com/ClickHouse/ClickHouse.git mkdir build cmake -S . -B build diff --git a/docs/en/development/building_and_benchmarking_deflate_qpl.md b/docs/en/development/building_and_benchmarking_deflate_qpl.md index 0501c1cbdcb1..4e01b41ab3c1 100644 --- a/docs/en/development/building_and_benchmarking_deflate_qpl.md +++ b/docs/en/development/building_and_benchmarking_deflate_qpl.md @@ -7,12 +7,8 @@ description: How to build Clickhouse and run benchmark with DEFLATE_QPL Codec # Build Clickhouse with DEFLATE_QPL -- Make sure your target machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) -- Pass the following flag to CMake when building ClickHouse: - -``` bash -cmake -DENABLE_QPL=1 .. -``` +- Make sure your host machine meet the QPL required [prerequisites](https://intel.github.io/qpl/documentation/get_started_docs/installation.html#prerequisites) +- deflate_qpl is enabled by default during cmake build. In case you accidentally change it, please double-check build flag: ENABLE_QPL=1 - For generic requirements, please refer to Clickhouse generic [build instructions](/docs/en/development/build.md) diff --git a/docs/en/development/continuous-integration.md b/docs/en/development/continuous-integration.md index 738c5458cc39..eec5ccbb9dc7 100644 --- a/docs/en/development/continuous-integration.md +++ b/docs/en/development/continuous-integration.md @@ -102,7 +102,7 @@ Builds ClickHouse in various configurations for use in further steps. You have t ### Report Details -- **Compiler**: `clang-16`, optionally with the name of a target platform +- **Compiler**: `clang-17`, optionally with the name of a target platform - **Build type**: `Debug` or `RelWithDebInfo` (cmake). - **Sanitizer**: `none` (without sanitizers), `address` (ASan), `memory` (MSan), `undefined` (UBSan), or `thread` (TSan). - **Status**: `success` or `fail` @@ -141,6 +141,10 @@ Runs [stateful functional tests](tests.md#functional-tests). Treat them in the s Runs [integration tests](tests.md#integration-tests). +## Bugfix validate check +Checks that either a new test (functional or integration) or there some changed tests that fail with the binary built on master branch. This check is triggered when pull request has "pr-bugfix" label. + + ## Stress Test Runs stateless functional tests concurrently from several clients to detect concurrency-related errors. If it fails: diff --git a/docs/en/development/developer-instruction.md b/docs/en/development/developer-instruction.md index 1f3ab1aae2c8..8ec049dd0046 100644 --- a/docs/en/development/developer-instruction.md +++ b/docs/en/development/developer-instruction.md @@ -152,7 +152,7 @@ While inside the `build` directory, configure your build by running CMake. Befor export CC=clang CXX=clang++ cmake .. -If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-16 CXX=clang++-16`. The clang version will be in the script output. +If you installed clang using the automatic installation script above, also specify the version of clang installed in the first command, e.g. `export CC=clang-17 CXX=clang++-17`. The clang version will be in the script output. The `CC` variable specifies the compiler for C (short for C Compiler), and `CXX` variable instructs which C++ compiler is to be used for building. @@ -276,8 +276,6 @@ Most probably some of the builds will fail at first times. This is due to the fa ## Browse ClickHouse Source Code {#browse-clickhouse-source-code} -You can use the **Woboq** online code browser available [here](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). It provides code navigation, semantic highlighting, search and indexing. The code snapshot is updated daily. - You can use GitHub integrated code browser [here](https://github.dev/ClickHouse/ClickHouse). Also, you can browse sources on [GitHub](https://github.com/ClickHouse/ClickHouse) as usual. diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index f7cc52e622e5..b7e567c7b6cd 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -190,7 +190,7 @@ These are the schema conversion manipulations you can do with table overrides fo * Modify [column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl). * Modify [column compression codec](/docs/en/sql-reference/statements/create/table.md/#codecs). * Add [ALIAS columns](/docs/en/sql-reference/statements/create/table.md/#alias). - * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes) + * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes). Note that you need to enable `use_skip_indexes_if_final` setting to make them work (MaterializedMySQL is using `SELECT ... FINAL` by default) * Add [projections](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#projections). Note that projection optimizations are disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here. `INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/) diff --git a/docs/en/engines/database-engines/replicated.md b/docs/en/engines/database-engines/replicated.md index 5672633c4a23..27bdc20ec57d 100644 --- a/docs/en/engines/database-engines/replicated.md +++ b/docs/en/engines/database-engines/replicated.md @@ -35,7 +35,9 @@ The [system.clusters](../../operations/system-tables/clusters.md) system table c When creating a new replica of the database, this replica creates tables by itself. If the replica has been unavailable for a long time and has lagged behind the replication log — it checks its local metadata with the current metadata in ZooKeeper, moves the extra tables with data to a separate non-replicated database (so as not to accidentally delete anything superfluous), creates the missing tables, updates the table names if they have been renamed. The data is replicated at the `ReplicatedMergeTree` level, i.e. if the table is not replicated, the data will not be replicated (the database is responsible only for metadata). -[`ALTER TABLE ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`. +[`ALTER TABLE FREEZE|ATTACH|FETCH|DROP|DROP DETACHED|DETACH PARTITION|PART`](../../sql-reference/statements/alter/partition.md) queries are allowed but not replicated. The database engine will only add/fetch/remove the partition/part to the current replica. However, if the table itself uses a Replicated table engine, then the data will be replicated after using `ATTACH`. + +In case you need only configure a cluster without maintaining table replication, refer to [Cluster Discovery](../../operations/cluster-discovery.md) feature. ## Usage Example {#usage-example} diff --git a/docs/en/engines/table-engines/index.md b/docs/en/engines/table-engines/index.md index bd704d0e87e4..b024820024ab 100644 --- a/docs/en/engines/table-engines/index.md +++ b/docs/en/engines/table-engines/index.md @@ -60,6 +60,7 @@ Engines in the family: - [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) - [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md) - [PostgreSQL](../../engines/table-engines/integrations/postgresql.md) +- [S3Queue](../../engines/table-engines/integrations/s3queue.md) ### Special Engines {#special-engines} diff --git a/docs/en/engines/table-engines/integrations/azureBlobStorage.md b/docs/en/engines/table-engines/integrations/azureBlobStorage.md index 60e448377d0c..3df08ee2ffba 100644 --- a/docs/en/engines/table-engines/integrations/azureBlobStorage.md +++ b/docs/en/engines/table-engines/integrations/azureBlobStorage.md @@ -21,7 +21,7 @@ CREATE TABLE azure_blob_storage_table (name String, value UInt32) - `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) - `container_name` - Container name -- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `account_name` - if storage_account_url is used, then account name can be specified here - `account_key` - if storage_account_url is used, then account key can be specified here - `format` — The [format](/docs/en/interfaces/formats.md) of the file. diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index b562e9d7fe6f..964c952f31af 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -22,7 +22,7 @@ CREATE TABLE deltalake - `url` — Bucket url with path to the existing Delta Lake table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. -Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Example** diff --git a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md index 2de981d33b7b..23ab89e19837 100644 --- a/docs/en/engines/table-engines/integrations/embedded-rocksdb.md +++ b/docs/en/engines/table-engines/integrations/embedded-rocksdb.md @@ -8,7 +8,7 @@ sidebar_label: EmbeddedRocksDB This engine allows integrating ClickHouse with [rocksdb](http://rocksdb.org/). -## Creating a Table {#table_engine-EmbeddedRocksDB-creating-a-table} +## Creating a Table {#creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -85,7 +85,7 @@ You can also change any [rocksdb options](https://github.com/facebook/rocksdb/wi ``` -## Supported operations {#table_engine-EmbeddedRocksDB-supported-operations} +## Supported operations {#supported-operations} ### Inserts diff --git a/docs/en/engines/table-engines/integrations/hive.md b/docs/en/engines/table-engines/integrations/hive.md index 48867394418f..ea289b27ad72 100644 --- a/docs/en/engines/table-engines/integrations/hive.md +++ b/docs/en/engines/table-engines/integrations/hive.md @@ -60,7 +60,7 @@ Before using cache, add it to `config.xml` - limit_size: Required. The maximum size(in bytes) of local cache files. - bytes_read_before_flush: Control bytes before flush to local filesystem when downloading file from remote filesystem. The default value is 1MB. -When ClickHouse is started up with local cache for remote filesystem enabled, users can still choose not to use cache with `settings use_local_cache_for_remote_fs = 0` in their query. `use_local_cache_for_remote_fs` is `false` in default. +When ClickHouse is started up with local cache for remote filesystem enabled, users can still choose not to use cache with `settings use_local_cache_for_remote_storage = 0` in their query. `use_local_cache_for_remote_storage` is `1` by default. ### Query Hive Table with ORC Input Format diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index c60618af2893..b2f599e5c92b 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -22,7 +22,7 @@ CREATE TABLE hudi_table - `url` — Bucket url with the path to an existing Hudi table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. -Engine parameters can be specified using [Named Collections](../../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Example** diff --git a/docs/en/engines/table-engines/integrations/kafka.md b/docs/en/engines/table-engines/integrations/kafka.md index b81d5624c1af..5d04dce4c515 100644 --- a/docs/en/engines/table-engines/integrations/kafka.md +++ b/docs/en/engines/table-engines/integrations/kafka.md @@ -14,7 +14,7 @@ Kafka lets you: - Organize fault-tolerant storage. - Process streams as they become available. -## Creating a Table {#table_engine-kafka-creating-a-table} +## Creating a Table {#creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -173,6 +173,7 @@ Similar to GraphiteMergeTree, the Kafka engine supports extended configuration u cgrp smallest + 600 @@ -260,3 +261,4 @@ The number of rows in one Kafka message depends on whether the format is row-bas - [Virtual columns](../../../engines/table-engines/index.md#table_engines-virtual_columns) - [background_message_broker_schedule_pool_size](../../../operations/server-configuration-parameters/settings.md#background_message_broker_schedule_pool_size) +- [system.kafka_consumers](../../../operations/system-tables/kafka_consumers.md) diff --git a/docs/en/engines/table-engines/integrations/materialized-postgresql.md b/docs/en/engines/table-engines/integrations/materialized-postgresql.md index bccafd67c2cf..47dae2ed494f 100644 --- a/docs/en/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/en/engines/table-engines/integrations/materialized-postgresql.md @@ -13,7 +13,7 @@ If more than one table is required, it is highly recommended to use the [Materia ``` sql CREATE TABLE postgresql_db.postgresql_replica (key UInt64, value UInt64) -ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_replica', 'postgres_user', 'postgres_password') +ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgresql_table', 'postgres_user', 'postgres_password') PRIMARY KEY key; ``` diff --git a/docs/en/engines/table-engines/integrations/mongodb.md b/docs/en/engines/table-engines/integrations/mongodb.md index 912f81573dbb..f87e8da8b5b0 100644 --- a/docs/en/engines/table-engines/integrations/mongodb.md +++ b/docs/en/engines/table-engines/integrations/mongodb.md @@ -33,6 +33,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name - `options` — MongoDB connection string options (optional parameter). +:::tip +If you are using the MongoDB Atlas cloud offering please add these options: + +``` +'connectTimeoutMS=10000&ssl=true&authSource=admin' +``` + +::: + ## Usage Example {#usage-example} Create a table in ClickHouse which allows to read data from MongoDB collection: diff --git a/docs/en/engines/table-engines/integrations/nats.md b/docs/en/engines/table-engines/integrations/nats.md index 25f442e5ce7d..570b219e5fa5 100644 --- a/docs/en/engines/table-engines/integrations/nats.md +++ b/docs/en/engines/table-engines/integrations/nats.md @@ -13,7 +13,7 @@ This engine allows integrating ClickHouse with [NATS](https://nats.io/). - Publish or subscribe to message subjects. - Process new messages as they become available. -## Creating a Table {#table_engine-redisstreams-creating-a-table} +## Creating a Table {#creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/integrations/odbc.md b/docs/en/engines/table-engines/integrations/odbc.md index e29e56c10b24..71085feb6260 100644 --- a/docs/en/engines/table-engines/integrations/odbc.md +++ b/docs/en/engines/table-engines/integrations/odbc.md @@ -54,7 +54,7 @@ $ sudo mysql ``` sql mysql> CREATE USER 'clickhouse'@'localhost' IDENTIFIED BY 'clickhouse'; -mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'clickhouse' WITH GRANT OPTION; +mysql> GRANT ALL PRIVILEGES ON *.* TO 'clickhouse'@'localhost' WITH GRANT OPTION; ``` Then configure the connection in `/etc/odbc.ini`. @@ -66,7 +66,7 @@ DRIVER = /usr/local/lib/libmyodbc5w.so SERVER = 127.0.0.1 PORT = 3306 DATABASE = test -USERNAME = clickhouse +USER = clickhouse PASSWORD = clickhouse ``` @@ -83,6 +83,9 @@ $ isql -v mysqlconn Table in MySQL: ``` text +mysql> CREATE DATABASE test; +Query OK, 1 row affected (0,01 sec) + mysql> CREATE TABLE `test`.`test` ( -> `int_id` INT NOT NULL AUTO_INCREMENT, -> `int_nullable` INT NULL DEFAULT NULL, @@ -91,10 +94,10 @@ mysql> CREATE TABLE `test`.`test` ( -> PRIMARY KEY (`int_id`)); Query OK, 0 rows affected (0,09 sec) -mysql> insert into test (`int_id`, `float`) VALUES (1,2); +mysql> insert into test.test (`int_id`, `float`) VALUES (1,2); Query OK, 1 row affected (0,00 sec) -mysql> select * from test; +mysql> select * from test.test; +------+----------+-----+----------+ | int_id | int_nullable | float | float_nullable | +------+----------+-----+----------+ diff --git a/docs/en/engines/table-engines/integrations/rabbitmq.md b/docs/en/engines/table-engines/integrations/rabbitmq.md index 3fd5a130173d..4f6107764ecd 100644 --- a/docs/en/engines/table-engines/integrations/rabbitmq.md +++ b/docs/en/engines/table-engines/integrations/rabbitmq.md @@ -13,7 +13,7 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c - Publish or subscribe to data flows. - Process streams as they become available. -## Creating a Table {#table_engine-rabbitmq-creating-a-table} +## Creating a Table {#creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/en/engines/table-engines/integrations/s3.md b/docs/en/engines/table-engines/integrations/s3.md index 0e2b48ef6a69..2967a15494ce 100644 --- a/docs/en/engines/table-engines/integrations/s3.md +++ b/docs/en/engines/table-engines/integrations/s3.md @@ -37,7 +37,7 @@ CREATE TABLE s3_engine_table (name String, value UInt32) ### Engine parameters -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). - `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. - `format` — The [format](../../../interfaces/formats.md#formats) of the file. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). @@ -57,7 +57,8 @@ Notice that the S3 endpoint in the `ENGINE` configuration uses the parameter tok :::note As shown in the example, querying from S3 tables that are partitioned is -not directly supported at this time, but can be accomplished by querying the bucket contents with a wildcard. +not directly supported at this time, but can be accomplished by querying the individual partitions +using the S3 table function. The primary use-case for writing partitioned data in S3 is to enable transferring that data into another @@ -127,23 +128,7 @@ FROM s3('http://minio:10000/clickhouse//test_45.csv', 'minioadmin', 'minioadminp └────┴────┴────┘ ``` -#### Select from all partitions - -```sql -SELECT * -FROM s3('http://minio:10000/clickhouse//**', 'minioadmin', 'minioadminpassword', 'CSV') -``` -```response -┌─c1─┬─c2─┬─c3─┐ -│ 3 │ 2 │ 1 │ -└────┴────┴────┘ -┌─c1─┬─c2─┬─c3─┐ -│ 1 │ 2 │ 3 │ -└────┴────┴────┘ -┌─c1─┬─c2─┬─c3─┐ -│ 78 │ 43 │ 45 │ -└────┴────┴────┘ -``` +#### Limitation You may naturally try to `Select * from p`, but as noted above, this query will fail; use the preceding query. @@ -179,6 +164,7 @@ For more information about virtual columns see [here](../../../engines/table-eng `path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). - `*` — Substitutes any number of any characters except `/` including empty string. +- `**` — Substitutes any number of any character include `/` including empty string. - `?` — Substitutes any single character. - `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. - `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. @@ -252,7 +238,7 @@ The following settings can be set before query execution or placed into configur - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. - `s3_upload_part_size_multiply_factor` - Multiply `s3_min_upload_part_size` by this factor each time `s3_multiply_parts_count_threshold` parts were uploaded from a single write to S3. Default values is `2`. -- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3 `s3_min_upload_part_size multiplied` by `s3_upload_part_size_multiply_factor`. Default value us `500`. +- `s3_upload_part_size_multiply_parts_count_threshold` - Each time this number of parts was uploaded to S3, `s3_min_upload_part_size` is multiplied by `s3_upload_part_size_multiply_factor`. Default value is `500`. - `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object. Its number should be limited. The value `0` means unlimited. Default value is `20`. Each in-flight part has a buffer with size `s3_min_upload_part_size` for the first `s3_upload_part_size_multiply_factor` parts and more when file is big enough, see `upload_part_size_multiply_factor`. With default settings one uploaded file consumes not more than `320Mb` for a file which is less than `8G`. The consumption is greater for a larger file. Security consideration: if malicious user can specify arbitrary S3 URLs, `s3_max_redirects` must be set to zero to avoid [SSRF](https://en.wikipedia.org/wiki/Server-side_request_forgery) attacks; or alternatively, `remote_host_filter` must be specified in server configuration. diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md new file mode 100644 index 000000000000..b8f8e51b3711 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -0,0 +1,232 @@ +--- +slug: /en/engines/table-engines/integrations/s3queue +sidebar_position: 181 +sidebar_label: S3Queue +--- + +# [experimental] S3Queue Table Engine +This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ecosystem and allows streaming import. This engine is similar to the [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) engines, but provides S3-specific features. + +:::note +This table engine is experimental. To use it, set `allow_experimental_s3queue` to 1 by using the `SET` command: +```sql +SET allow_experimental_s3queue=1 +``` +::: + +## Create Table {#creating-a-table} + +``` sql +CREATE TABLE s3_queue_engine_table (name String, value UInt32) + ENGINE = S3Queue(path [, NOSIGN | aws_access_key_id, aws_secret_access_key,] format, [compression]) + [SETTINGS] + [mode = 'unordered',] + [after_processing = 'keep',] + [keeper_path = '',] + [s3queue_loading_retries = 0,] + [s3queue_polling_min_timeout_ms = 1000,] + [s3queue_polling_max_timeout_ms = 10000,] + [s3queue_polling_backoff_ms = 0,] + [s3queue_tracked_files_limit = 1000,] + [s3queue_tracked_file_ttl_sec = 0,] + [s3queue_polling_size = 50,] +``` + +**Engine parameters** + +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [below](#wildcards-in-path). +- `NOSIGN` - If this keyword is provided in place of credentials, all the requests will not be signed. +- `format` — The [format](../../../interfaces/formats.md#formats) of the file. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). +- `compression` — Compression type. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. Parameter is optional. By default, it will autodetect compression by file extension. + +**Example** + +```sql +CREATE TABLE s3queue_engine_table (name String, value UInt32) +ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') +SETTINGS + mode = 'ordered'; +``` + +Using named collections: + +``` xml + + + + 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/* + test + test + + + +``` + +```sql +CREATE TABLE s3queue_engine_table (name String, value UInt32) +ENGINE=S3Queue(s3queue_conf, format = 'CSV', compression_method = 'gzip') +SETTINGS + mode = 'ordered'; +``` + +## Settings {#settings} + +### mode {#mode} + +Possible values: + +- unordered — With unordered mode, the set of all already processed files is tracked with persistent nodes in ZooKeeper. +- ordered — With ordered mode, only the max name of the successfully consumed file, and the names of files that will be retried after unsuccessful loading attempt are being stored in ZooKeeper. + +Default value: `unordered`. + +### after_processing {#after_processing} + +Delete or keep file after successful processing. +Possible values: + +- keep. +- delete. + +Default value: `keep`. + +### keeper_path {#keeper_path} + +The path in ZooKeeper can be specified as a table engine setting or default path can be formed from the global configuration-provided path and table UUID. +Possible values: + +- String. + +Default value: `/`. + +### s3queue_loading_retries {#loading_retries} + +Retry file loading up to specified number of times. By default, there are no retries. +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_polling_min_timeout_ms {#polling_min_timeout_ms} + +Minimal timeout before next polling (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `1000`. + +### s3queue_polling_max_timeout_ms {#polling_max_timeout_ms} + +Maximum timeout before next polling (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `10000`. + +### s3queue_polling_backoff_ms {#polling_backoff_ms} + +Polling backoff (in milliseconds). + +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_tracked_files_limit {#tracked_files_limit} + +Allows to limit the number of Zookeeper nodes if the 'unordered' mode is used, does nothing for 'ordered' mode. +If limit reached the oldest processed files will be deleted from ZooKeeper node and processed again. + +Possible values: + +- Positive integer. + +Default value: `1000`. + +### s3queue_tracked_file_ttl_sec {#tracked_file_ttl_sec} + +Maximum number of seconds to store processed files in ZooKeeper node (store forever by default) for 'unordered' mode, does nothing for 'ordered' mode. +After the specified number of seconds, the file will be re-imported. + +Possible values: + +- Positive integer. + +Default value: `0`. + +### s3queue_polling_size {#polling_size} + +Maximum files to fetch from S3 with SELECT or in background task. +Engine takes files for processing from S3 in batches. +We limit the batch size to increase concurrency if multiple table engines with the same `keeper_path` consume files from the same path. + +Possible values: + +- Positive integer. + +Default value: `50`. + + +## S3-related Settings {#s3-settings} + +Engine supports all s3 related settings. For more information about S3 settings see [here](../../../engines/table-engines/integrations/s3.md). + + +## Description {#description} + +`SELECT` is not particularly useful for streaming import (except for debugging), because each file can be imported only once. It is more practical to create real-time threads using [materialized views](../../../sql-reference/statements/create/view.md). To do this: + +1. Use the engine to create a table for consuming from specified path in S3 and consider it a data stream. +2. Create a table with the desired structure. +3. Create a materialized view that converts data from the engine and puts it into a previously created table. + +When the `MATERIALIZED VIEW` joins the engine, it starts collecting data in the background. + +Example: + +``` sql + CREATE TABLE s3queue_engine_table (name String, value UInt32) + ENGINE=S3Queue('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/*', 'CSV', 'gzip') + SETTINGS + mode = 'unordered', + keeper_path = '/clickhouse/s3queue/'; + + CREATE TABLE stats (name String, value UInt32) + ENGINE = MergeTree() ORDER BY name; + + CREATE MATERIALIZED VIEW consumer TO stats + AS SELECT name, value FROM s3queue_engine_table; + + SELECT * FROM stats ORDER BY name; +``` + +## Virtual columns {#virtual-columns} + +- `_path` — Path to the file. +- `_file` — Name of the file. + +For more information about virtual columns see [here](../../../engines/table-engines/index.md#table_engines-virtual_columns). + + +## Wildcards In Path {#wildcards-in-path} + +`path` argument can specify multiple files using bash-like wildcards. For being processed file should exist and match to the whole path pattern. Listing of files is determined during `SELECT` (not at `CREATE` moment). + +- `*` — Substitutes any number of any characters except `/` including empty string. +- `**` — Substitutes any number of any characters include `/` including empty string. +- `?` — Substitutes any single character. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{N..M}` — Substitutes any number in range from N to M including both borders. N and M can have leading zeroes e.g. `000..078`. + +Constructions with `{}` are similar to the [remote](../../../sql-reference/table-functions/remote.md) table function. + +:::note +If the listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: diff --git a/docs/en/engines/table-engines/mergetree-family/annindexes.md b/docs/en/engines/table-engines/mergetree-family/annindexes.md index 4a4ebb47bdc4..be588f1764dd 100644 --- a/docs/en/engines/table-engines/mergetree-family/annindexes.md +++ b/docs/en/engines/table-engines/mergetree-family/annindexes.md @@ -1,4 +1,4 @@ -# Approximate Nearest Neighbor Search Indexes [experimental] {#table_engines-ANNIndex} +# Approximate Nearest Neighbor Search Indexes [experimental] Nearest neighborhood search is the problem of finding the M closest points for a given point in an N-dimensional vector space. The most straightforward approach to solve this problem is a brute force search where the distance between all points in the vector space and the @@ -17,7 +17,7 @@ In terms of SQL, the nearest neighborhood problem can be expressed as follows: ``` sql SELECT * -FROM table +FROM table_with_ann_index ORDER BY Distance(vectors, Point) LIMIT N ``` @@ -32,7 +32,7 @@ An alternative formulation of the nearest neighborhood search problem looks as f ``` sql SELECT * -FROM table +FROM table_with_ann_index WHERE Distance(vectors, Point) < MaxDistance LIMIT N ``` @@ -45,12 +45,12 @@ With brute force search, both queries are expensive (linear in the number of poi `Point` must be computed. To speed this process up, Approximate Nearest Neighbor Search Indexes (ANN indexes) store a compact representation of the search space (using clustering, search trees, etc.) which allows to compute an approximate answer much quicker (in sub-linear time). -# Creating and Using ANN Indexes +# Creating and Using ANN Indexes {#creating_using_ann_indexes} Syntax to create an ANN index over an [Array](../../../sql-reference/data-types/array.md) column: ```sql -CREATE TABLE table +CREATE TABLE table_with_ann_index ( `id` Int64, `vectors` Array(Float32), @@ -63,7 +63,7 @@ ORDER BY id; Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: ```sql -CREATE TABLE table +CREATE TABLE table_with_ann_index ( `id` Int64, `vectors` Tuple(Float32[, Float32[, ...]]), @@ -83,7 +83,7 @@ ANN indexes support two types of queries: ``` sql SELECT * - FROM table + FROM table_with_ann_index [WHERE ...] ORDER BY Distance(vectors, Point) LIMIT N @@ -93,7 +93,7 @@ ANN indexes support two types of queries: ``` sql SELECT * - FROM table + FROM table_with_ann_index WHERE Distance(vectors, Point) < MaxDistance LIMIT N ``` @@ -103,7 +103,7 @@ To avoid writing out large vectors, you can use [query parameters](/docs/en/interfaces/cli.md#queries-with-parameters-cli-queries-with-parameters), e.g. ```bash -clickhouse-client --param_vec='hello' --query="SELECT * FROM table WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0" +clickhouse-client --param_vec='hello' --query="SELECT * FROM table_with_ann_index WHERE L2Distance(vectors, {vec: Array(Float32)}) < 1.0" ``` ::: @@ -138,17 +138,19 @@ back to a smaller `GRANULARITY` values only in case of problems like excessive m was specified for ANN indexes, the default value is 100 million. -# Available ANN Indexes +# Available ANN Indexes {#available_ann_indexes} - [Annoy](/docs/en/engines/table-engines/mergetree-family/annindexes.md#annoy-annoy) +- [USearch](/docs/en/engines/table-engines/mergetree-family/annindexes.md#usearch-usearch) + ## Annoy {#annoy} Annoy indexes are currently experimental, to use them you first need to `SET allow_experimental_annoy_index = 1`. They are also currently disabled on ARM due to memory safety problems with the algorithm. -This type of ANN index implements [the Annoy algorithm](https://github.com/spotify/annoy) which is based on a recursive division of the -space in random linear surfaces (lines in 2D, planes in 3D etc.). +This type of ANN index is based on the [Annoy library](https://github.com/spotify/annoy) which recursively divides the space into random +linear surfaces (lines in 2D, planes in 3D etc.).
+
+ +Syntax to create an USearch index over an [Array](../../../sql-reference/data-types/array.md) column: + +```sql +CREATE TABLE table_with_usearch_index +( + id Int64, + vectors Array(Float32), + INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N] +) +ENGINE = MergeTree +ORDER BY id; +``` + +Syntax to create an ANN index over a [Tuple](../../../sql-reference/data-types/tuple.md) column: + +```sql +CREATE TABLE table_with_usearch_index +( + id Int64, + vectors Tuple(Float32[, Float32[, ...]]), + INDEX [ann_index_name] vectors TYPE usearch([Distance[, ScalarKind]]) [GRANULARITY N] +) +ENGINE = MergeTree +ORDER BY id; +``` + +USearch currently supports two distance functions: +- `L2Distance`, also called Euclidean distance, is the length of a line segment between two points in Euclidean space + ([Wikipedia](https://en.wikipedia.org/wiki/Euclidean_distance)). +- `cosineDistance`, also called cosine similarity, is the cosine of the angle between two (non-zero) vectors + ([Wikipedia](https://en.wikipedia.org/wiki/Cosine_similarity)). + +USearch allows storing the vectors in reduced precision formats. Supported scalar kinds are `f64`, `f32`, `f16` or `i8`. If no scalar kind +was specified during index creation, `f16` is used as default. + +For normalized data, `L2Distance` is usually a better choice, otherwise `cosineDistance` is recommended to compensate for scale. If no +distance function was specified during index creation, `L2Distance` is used as default. diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 42454af6febe..e37fd3b72884 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -37,8 +37,8 @@ The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2], + name1 [type1] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [COMMENT ...] [CODEC(codec1)] [TTL expr1] [PRIMARY KEY], + name2 [type2] [[NOT] NULL] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [COMMENT ...] [CODEC(codec2)] [TTL expr2] [PRIMARY KEY], ... INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1], INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2], @@ -439,41 +439,41 @@ Syntax: `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, - `number_of_hash_functions` — The number of hash functions used in the Bloom filter. - `random_seed` — The seed for Bloom filter hash functions. -Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: +Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows: ```sql -CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); - -CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); - -CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] -AS -(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); - -CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] -AS +CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2)); + +CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2)))); + +CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster] +AS +(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions); + +CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster] +AS (number_of_hash_functions, probability_of_false_positives, size_of_bloom_filter_in_bytes) -> ceil(size_of_bloom_filter_in_bytes / (-number_of_hash_functions / log(1 - exp(log(probability_of_false_positives) / number_of_hash_functions)))) -``` +``` To use those functions,we need to specify two parameter at least. -For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: - +For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries: + ```sql --- estimate number of bits in the filter -SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; +SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes; ┌─size_of_bloom_filter_in_bytes─┐ │ 10304 │ └───────────────────────────────┘ - + --- estimate number of hash functions SELECT bfEstimateFunctions(4300, bfEstimateBmSize(4300, 0.0001)) as number_of_hash_functions - + ┌─number_of_hash_functions─┐ │ 13 │ └──────────────────────────┘ @@ -756,6 +756,17 @@ If you perform the `SELECT` query between merges, you may get expired data. To a - [ttl_only_drop_parts](/docs/en/operations/settings/settings.md/#ttl_only_drop_parts) setting +## Disk types + +In addition to local block devices, ClickHouse supports these storage types: +- [`s3` for S3 and MinIO](#table_engine-mergetree-s3) +- [`gcs` for GCS](/docs/en/integrations/data-ingestion/gcs/index.md/#creating-a-disk) +- [`blob_storage_disk` for Azure Blob Storage](#table_engine-mergetree-azure-blob-storage) +- [`hdfs` for HDFS](#hdfs-storage) +- [`web` for read-only from web](#web-storage) +- [`cache` for local caching](/docs/en/operations/storing-data.md/#using-local-cache) +- [`s3_plain` for backups to S3](/docs/en/operations/backup#backuprestore-using-an-s3-disk) + ## Using Multiple Block Devices for Data Storage {#table_engine-mergetree-multiple-volumes} ### Introduction {#introduction} @@ -936,7 +947,16 @@ configuration files; all the settings are in the CREATE/ATTACH query. The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk. ::: +#### Example dynamic web storage + +:::tip +A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub. To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver) +::: + +In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content. + ```sql +# highlight-next-line ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7' ( price UInt32, @@ -971,7 +991,7 @@ use a local disk to cache data from a table stored at a URL. Neither the cache d nor the web storage is configured in the ClickHouse configuration files; both are configured in the CREATE/ATTACH query settings. -In the settings highlighted below notice that the disk of `type=web` is nested within +In the settings highlighted below notice that the disk of `type=web` is nested within the disk of `type=cache`. ```sql @@ -1119,6 +1139,8 @@ Optional parameters: - `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`. - `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited). - `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`. +- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). ### Configuring the cache @@ -1200,7 +1222,6 @@ Configuration markup: account pass123 /var/lib/clickhouse/disks/blob_storage_disk/ - true /var/lib/clickhouse/disks/blob_storage_disk/cache/ false @@ -1228,15 +1249,102 @@ Limit parameters (mainly for internal usage): Other parameters: * `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks//`. -* `cache_enabled` - Allows to cache mark and index files on local FS. Default value is `true`. -* `cache_path` - Path on local FS where to store cached mark and index files. Default value is `/var/lib/clickhouse/disks//cache/`. * `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`. +* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). +* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk). Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)). - :::note Zero-copy replication is not ready for production - Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. - ::: +:::note Zero-copy replication is not ready for production +Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher. This feature is not recommended for production use. +::: + +## HDFS storage {#hdfs-storage} + +In this sample configuration: +- the disk is of type `hdfs` +- the data is hosted at `hdfs://hdfs1:9000/clickhouse/` + +```xml + + + + + hdfs + hdfs://hdfs1:9000/clickhouse/ + true + + + local + / + + + + + +
+ hdfs +
+ + hdd + +
+
+
+
+
+``` + +## Web storage (read-only) {#web-storage} + +Web storage can be used for read-only purposes. An example use is for hosting sample +data, or for migrating data. + +:::tip +Storage can also be configured temporarily within a query, if a web dataset is not expected +to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the +configuration file. +::: + +In this sample configuration: +- the disk is of type `web` +- the data is hosted at `http://nginx:80/test1/` +- a cache on local storage is used + +```xml + + + + + web + http://nginx:80/test1/ + + + cache + web + cached_web_cache/ + 100000000 + + + + + +
+ web +
+
+
+ + +
+ cached_web +
+
+
+
+
+
+``` ## Virtual Columns {#virtual-columns} @@ -1246,3 +1354,4 @@ Examples of working configurations can be found in integration tests directory ( - `_part_uuid` — Unique part identifier (if enabled MergeTree setting `assign_part_uuids`). - `_partition_value` — Values (a tuple) of a `partition by` expression. - `_sample_factor` — Sample factor (from the query). +- `_block_number` — Block number of the row, it is persisted on merges when `allow_experimental_block_number_column` is set to true. diff --git a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md index 7db2f3b465a2..874a43a419ff 100644 --- a/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/replacingmergetree.md @@ -100,8 +100,43 @@ SELECT * FROM mySecondReplacingMT FINAL; The row is deleted when `OPTIMIZE ... FINAL CLEANUP` or `OPTIMIZE ... FINAL` is used, or if the engine setting `clean_deleted_rows` has been set to `Always`. No matter the operation on the data, the version must be increased. If two inserted rows have the same version number, the last inserted row is the one kept. + +Always execute `OPTIMIZE ... FINAL CLEANUP` and `OPTIMIZE ... FINAL` to delete rows with `is_deleted=1` defined, especially when you wish to insert a new row with an older version. Otherwise, the new row with an older version will be replaced and not be persisted. + ::: +Example: +```sql +-- with ver and is_deleted +CREATE OR REPLACE TABLE myThirdReplacingMT +( + `key` Int64, + `someCol` String, + `eventTime` DateTime, + `is_deleted` UInt8 +) +ENGINE = ReplacingMergeTree(eventTime, is_deleted) +ORDER BY key; + +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 0); +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 01:01:01', 1); + +select * from myThirdReplacingMT final; + +0 rows in set. Elapsed: 0.003 sec. + +-- delete rows with is_deleted +OPTIMIZE TABLE myThirdReplacingMT FINAL CLEANUP; + +INSERT INTO myThirdReplacingMT Values (1, 'first', '2020-01-01 00:00:00', 0); + +select * from myThirdReplacingMT final; + +┌─key─┬─someCol─┬───────────eventTime─┬─is_deleted─┐ +│ 1 │ first │ 2020-01-01 00:00:00 │ 0 │ +└─────┴─────────┴─────────────────────┴────────────┘ +``` + ## Query clauses When creating a `ReplacingMergeTree` table the same [clauses](../../../engines/table-engines/mergetree-family/mergetree.md) are required, as when creating a `MergeTree` table. diff --git a/docs/en/engines/table-engines/special/buffer.md b/docs/en/engines/table-engines/special/buffer.md index f7d84b9b4525..a3bb11117cd8 100644 --- a/docs/en/engines/table-engines/special/buffer.md +++ b/docs/en/engines/table-engines/special/buffer.md @@ -13,7 +13,7 @@ A recommended alternative to the Buffer Table Engine is enabling [asynchronous i ::: ``` sql -Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) +Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) ``` ### Engine parameters: diff --git a/docs/en/engines/table-engines/special/generate.md b/docs/en/engines/table-engines/special/generate.md index 9fcdb47e5558..13efd3e444bc 100644 --- a/docs/en/engines/table-engines/special/generate.md +++ b/docs/en/engines/table-engines/special/generate.md @@ -23,7 +23,7 @@ array or map columns and strings correspondingly in generated data. Generate table engine supports only `SELECT` queries. -It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `LowCardinality` and `AggregateFunction`. +It supports all [DataTypes](../../../sql-reference/data-types/index.md) that can be stored in a table except `AggregateFunction`. ## Example {#example} diff --git a/docs/en/engines/table-engines/special/keepermap.md b/docs/en/engines/table-engines/special/keepermap.md index 6ce56adbae9a..5559cc2c6485 100644 --- a/docs/en/engines/table-engines/special/keepermap.md +++ b/docs/en/engines/table-engines/special/keepermap.md @@ -20,7 +20,7 @@ For example: where path can be any other valid ZooKeeper path. -## Creating a Table {#table_engine-KeeperMap-creating-a-table} +## Creating a Table {#creating-a-table} ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] @@ -74,7 +74,7 @@ If multiple tables are created on the same ZooKeeper path, the values are persis As a result, it is possible to use `ON CLUSTER` clause when creating the table and sharing the data from multiple ClickHouse instances. Of course, it's possible to manually run `CREATE TABLE` with same path on unrelated ClickHouse instances to have same data sharing effect. -## Supported operations {#table_engine-KeeperMap-supported-operations} +## Supported operations {#supported-operations} ### Inserts diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 26d4975954fa..5a5e15641804 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -106,3 +106,4 @@ For partitioning by month, use the `toYYYYMM(date_column)` expression, where `da ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. diff --git a/docs/en/getting-started/example-datasets/laion.md b/docs/en/getting-started/example-datasets/laion.md index 077adf016a32..0dbaceffc139 100644 --- a/docs/en/getting-started/example-datasets/laion.md +++ b/docs/en/getting-started/example-datasets/laion.md @@ -1,23 +1,21 @@ # Laion-400M dataset -The dataset contains 400 million images with English text. For more information follow this [link](https://laion.ai/blog/laion-400-open-dataset/). Laion provides even larger datasets (e.g. [5 billion](https://laion.ai/blog/laion-5b/)). Working with them will be similar. +The [Laion-400M dataset](https://laion.ai/blog/laion-400-open-dataset/) contains 400 million images with English image captions. Laion nowadays provides [an even larger dataset](https://laion.ai/blog/laion-5b/) but working with it will be similar. -The dataset has prepared embeddings for texts and images. This will be used to demonstrate [Approximate nearest neighbor search indexes](../../engines/table-engines/mergetree-family/annindexes.md). +The dataset contains the image URL, embeddings for both the image and the image caption, a similarity score between the image and the image caption, as well as metadata, e.g. the image width/height, the licence and a NSFW flag. We can use the dataset to demonstrate [approximate nearest neighbor search](../../engines/table-engines/mergetree-family/annindexes.md) in ClickHouse. -## Prepare data +## Data preparation -Embeddings are stored in `.npy` files, so we have to read them with python and merge with other data. - -Download data and process it with simple `download.sh` script: +The embeddings and the metadata are stored in separate files in the raw data. A data preparation step downloads the data, merges the files, +converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that: ```bash -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet -wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy -python3 process.py ${1} +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy # download image embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy # download text embedding +wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet # download metadata +python3 process.py ${1} # merge files and convert to CSV ``` - -Where `process.py`: +Script `process.py` is defined as follows: ```python import pandas as pd @@ -35,11 +33,11 @@ im_emb = np.load(npy_file) text_emb = np.load(text_npy) data = pd.read_parquet(metadata_file) -# combine them +# combine files data = pd.concat([data, pd.DataFrame({"image_embedding" : [*im_emb]}), pd.DataFrame({"text_embedding" : [*text_emb]})], axis=1, copy=False) -# you can save more columns -data = data[['url', 'caption', 'similarity', "image_embedding", "text_embedding"]] +# columns to be imported into ClickHouse +data = data[['url', 'caption', 'NSFW', 'similarity', "image_embedding", "text_embedding"]] # transform np.arrays to lists data['image_embedding'] = data['image_embedding'].apply(lambda x: list(x)) @@ -48,30 +46,34 @@ data['text_embedding'] = data['text_embedding'].apply(lambda x: list(x)) # this small hack is needed becase caption sometimes contains all kind of quotes data['caption'] = data['caption'].apply(lambda x: x.replace("'", " ").replace('"', " ")) -# save data to file +# export data as CSV file data.to_csv(str_i + '.csv', header=False) -# previous files can be removed +# removed raw data files os.system(f"rm {npy_file} {metadata_file} {text_npy}") ``` -You can download data with +To start the data preparation pipeline, run: + ```bash -seq 0 409 | xargs -P100 -I{} bash -c './download.sh {}' +seq 0 409 | xargs -P1 -I{} bash -c './download.sh {}' ``` -The dataset is divided into 409 files. If you want to work only with a certain part of the dataset, just change the limits. +The dataset is split into 410 files, each file contains ca. 1 million rows. If you like to work with a smaller subset of the data, simply adjust the limits, e.g. `seq 0 9 | ...`. + +(The python script above is very slow (~2-10 minutes per file), takes a lot of memory (41 GB per file), and the resulting csv files are big (10 GB each), so be careful. If you have enough RAM, increase the `-P1` number for more parallelism. If this is still too slow, consider coming up with a better ingestion procedure - maybe converting the .npy files to parquet, then doing all the other processing with clickhouse.) -## Create table for laion +## Create table -Without indexes table can be created by +To create a table without indexes, run: ```sql -CREATE TABLE laion_dataset +CREATE TABLE laion ( `id` Int64, `url` String, `caption` String, + `NSFW` String, `similarity` Float32, `image_embedding` Array(Float32), `text_embedding` Array(Float32) @@ -81,23 +83,23 @@ ORDER BY id SETTINGS index_granularity = 8192 ``` -Fill table with data: +To import the CSV files into ClickHouse: ```sql -INSERT INTO laion_dataset FROM INFILE '{path_to_csv_files}/*.csv' +INSERT INTO laion FROM INFILE '{path_to_csv_files}/*.csv' ``` -## Check data in table without indexes +## Run a brute-force ANN search (without ANN index) -Let's check the work of the following query on the part of the dataset (8 million records): +To run a brute-force approximate nearest neighbor search, run: ```sql -select url, caption from test_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 30 +SELECT url, caption FROM laion ORDER BY L2Distance(image_embedding, {target:Array(Float32)}) LIMIT 30 ``` -Since the embeddings for images and texts may not match, let's also require a certain threshold of matching accuracy to get images that are more likely to satisfy our queries. The client parameter `target`, which is an array of 512 elements. See later in this article for a convenient way of obtaining such vectors. I used a random picture of a cat from the Internet as a target vector. +`target` is an array of 512 elements and a client parameter. A convenient way to obtain such arrays will be presented at the end of the article. For now, we can run the embedding of a random cat picture as `target`. -**The result** +**Result** ``` ┌─url───────────────────────────────────────────────────────────────────────────────────────────────────────────┬─caption────────────────────────────────────────────────────────────────┐ @@ -114,32 +116,34 @@ Since the embeddings for images and texts may not match, let's also require a ce 8 rows in set. Elapsed: 6.432 sec. Processed 19.65 million rows, 43.96 GB (3.06 million rows/s., 6.84 GB/s.) ``` -## Add indexes +## Run a ANN with an ANN index -Create a new table or follow instructions from [alter documentation](../../sql-reference/statements/alter/skipping-index.md). +Create a new table with an ANN index and insert the data from the existing table: ```sql -CREATE TABLE laion_dataset +CREATE TABLE laion_annoy ( `id` Int64, `url` String, `caption` String, + `NSFW` String, `similarity` Float32, `image_embedding` Array(Float32), `text_embedding` Array(Float32), - INDEX annoy_image image_embedding TYPE annoy(1000) GRANULARITY 1000, - INDEX annoy_text text_embedding TYPE annoy(1000) GRANULARITY 1000 + INDEX annoy_image image_embedding TYPE annoy(), + INDEX annoy_text text_embedding TYPE annoy() ) ENGINE = MergeTree ORDER BY id -SETTINGS index_granularity = 8192 +SETTINGS index_granularity = 8192; + +INSERT INTO laion_annoy SELECT * FROM laion; ``` -When created, the index will be built by L2Distance. You can read more about the parameters in the [annoy documentation](../../engines/table-engines/mergetree-family/annindexes.md#annoy-annoy). It makes sense to build indexes for a large number of granules. If you need good speed, then GRANULARITY should be several times larger than the expected number of results in the search. -Now let's check again with the same query: +By default, Annoy indexes use the L2 distance as metric. Further tuning knobs for index creation and search are described in the Annoy index [documentation](../../engines/table-engines/mergetree-family/annindexes.md). Let's check now again with the same query: ```sql -select url, caption from test_indexes_laion where similarity > 0.2 order by L2Distance(image_embedding, {target:Array(Float32)}) limit 8 +SELECT url, caption FROM laion_annoy ORDER BY l2Distance(image_embedding, {target:Array(Float32)}) LIMIT 8 ``` **Result** @@ -159,15 +163,18 @@ select url, caption from test_indexes_laion where similarity > 0.2 order by L2Di 8 rows in set. Elapsed: 0.641 sec. Processed 22.06 thousand rows, 49.36 MB (91.53 thousand rows/s., 204.81 MB/s.) ``` -The speed has increased significantly. But now, the results sometimes differ from what you are looking for. This is due to the approximation of the search and the quality of the constructed embedding. Note that the example was given for picture embeddings, but there are also text embeddings in the dataset, which can also be used for searching. +The speed increased significantly at the cost of less accurate results. This is because the ANN index only provide approximate search results. Note the example searched for similar image embeddings, yet it is also possible to search for positive image caption embeddings. -## Scripts for embeddings +## Creating embeddings with UDFs -Usually, we do not want to get embeddings from existing data, but to get them for new data and look for similar ones in old data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) for this purpose. They will allow you to set the `target` vector without leaving the client. All of the following scripts will be written for the `ViT-B/32` model, as it was used for this dataset. You can use any model, but it is necessary to build embeddings in the dataset and for new objects using the same model. +One usually wants to create embeddings for new images or new image captions and search for similar image / image caption pairs in the data. We can use [UDF](../../sql-reference/functions/index.md#sql-user-defined-functions) to create the `target` vector without leaving the client. It is important to use the same model to create the data and new embeddings for searches. The following scripts utilize the `ViT-B/32` model which also underlies the dataset. ### Text embeddings +First, store the following Python script in the `user_scripts/` directory of your ClickHouse data path and make it executable (`chmod +x encode_text.py`). + `encode_text.py`: + ```python #!/usr/bin/python3 import clip @@ -182,10 +189,12 @@ if __name__ == '__main__': inputs = clip.tokenize(text) with torch.no_grad(): text_features = model.encode_text(inputs)[0].tolist() + print(text_features) sys.stdout.flush() ``` -`encode_text_function.xml`: +Then create `encode_text_function.xml` in a location referenced by `/path/to/*_function.xml` in your ClickHouse server configuration file. + ```xml @@ -203,19 +212,19 @@ if __name__ == '__main__': ``` -Now we can simply use: +You can now simply use: ```sql SELECT encode_text('cat'); ``` - -The first use will be slow because the model needs to be loaded. But repeated queries will be fast. Then we copy the results to ``set param_target=...`` and can easily write queries +The first run will be slow because it loads the model, but repeated runs will be fast. We can then copy the output to `SET param_target=...` and can easily write queries. ### Image embeddings -For pictures, the process is similar, but you send the path instead of the picture (if necessary, you can implement a download picture with processing, but it will take longer) +Image embeddings can be created similarly but we will provide the Python script the path to a local image instead of the image caption text. + +`encode_image.py` -`encode_picture.py` ```python #!/usr/bin/python3 import clip @@ -231,29 +240,31 @@ if __name__ == '__main__': image = preprocess(Image.open(text.strip())).unsqueeze(0).to(device) with torch.no_grad(): image_features = model.encode_image(image)[0].tolist() - print(image_features) + print(image_features) sys.stdout.flush() ``` -`encode_picture_function.xml` +`encode_image_function.xml` + ```xml executable_pool - encode_picture + encode_image Array(Float32) String path TabSeparated - encode_picture.py + encode_image.py 1000000 ``` -The query: +Then run this query: + ```sql -SELECT encode_picture('some/path/to/your/picture'); +SELECT encode_image('/path/to/your/image'); ``` diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md index d44dc861888f..d2e7ab304782 100644 --- a/docs/en/getting-started/install.md +++ b/docs/en/getting-started/install.md @@ -378,6 +378,10 @@ request](https://github.com/ClickHouse/ClickHouse/commits/master) and find CI ch https://s3.amazonaws.com/clickhouse/builds/PRs/.../.../binary_aarch64_v80compat/clickhouse". You can then click the link to download the build. +### macOS-only: Install with Homebrew + +To install ClickHouse using the popular `brew` package manager, follow the instructions listed in the [ClickHouse Homebrew tap](https://github.com/ClickHouse/homebrew-clickhouse). + ## Launch {#launch} To start the server as a daemon, run: diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 8779dd1a5443..f018f3a248e2 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -30,7 +30,7 @@ It may lack support for new features. ## Usage {#cli_usage} -The client can be used in interactive and non-interactive (batch) mode. +The client can be used in interactive and non-interactive (batch) mode. ### Gather your connection details @@ -177,8 +177,8 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va - `--user, -u` – The username. Default value: default. - `--password` – The password. Default value: empty string. - `--ask-password` - Prompt the user to enter a password. -- `--query, -q` – The query to process when using non-interactive mode. Cannot be used simultaneously with `--queries-file`. -- `--queries-file` – file path with queries to execute. Cannot be used simultaneously with `--query`. +- `--query, -q` – The query to process when using non-interactive mode. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`. +- `--queries-file` – file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`. - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter). - `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default). @@ -323,9 +323,9 @@ clickhouse-client clickhouse://192.168.1.15,192.168.1.25 `clickhouse-client` uses the first existing file of the following: - Defined in the `--config-file` parameter. -- `./clickhouse-client.xml` -- `~/.clickhouse-client/config.xml` -- `/etc/clickhouse-client/config.xml` +- `./clickhouse-client.xml`, `.yaml`, `.yml` +- `~/.clickhouse-client/config.xml`, `.yaml`, `.yml` +- `/etc/clickhouse-client/config.xml`, `.yaml`, `.yml` Example of a config file: @@ -342,6 +342,17 @@ Example of a config file:
``` +Or the same config in a YAML format: + +```yaml +user: username +password: 'password' +secure: true +openSSL: + client: + caConfig: '/etc/ssl/cert.pem' +``` + ### Query ID Format {#query-id-format} In interactive mode `clickhouse-client` shows query ID for every query. By default, the ID is formatted like this: diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 378a1c46d93c..e98f19b2a65d 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -11,81 +11,83 @@ results of a `SELECT`, and to perform `INSERT`s into a file-backed table. The supported formats are: | Format | Input | Output | -|-------------------------------------------------------------------------------------------|------|--------| -| [TabSeparated](#tabseparated) | ✔ | ✔ | -| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | -| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | -| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | -| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | -| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | -| [Template](#format-template) | ✔ | ✔ | -| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | -| [CSV](#csv) | ✔ | ✔ | -| [CSVWithNames](#csvwithnames) | ✔ | ✔ | -| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | -| [CustomSeparated](#format-customseparated) | ✔ | ✔ | -| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | -| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | -| [SQLInsert](#sqlinsert) | ✗ | ✔ | -| [Values](#data-format-values) | ✔ | ✔ | -| [Vertical](#vertical) | ✗ | ✔ | -| [JSON](#json) | ✔ | ✔ | -| [JSONAsString](#jsonasstring) | ✔ | ✗ | -| [JSONStrings](#jsonstrings) | ✔ | ✔ | -| [JSONColumns](#jsoncolumns) | ✔ | ✔ | -| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ | -| [JSONCompact](#jsoncompact) | ✔ | ✔ | -| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | -| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | -| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | -| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | -| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | -| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | -| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | -| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | -| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | -| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | -| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | -| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | -| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ | -| [BSONEachRow](#bsoneachrow) | ✔ | ✔ | -| [TSKV](#tskv) | ✔ | ✔ | -| [Pretty](#pretty) | ✗ | ✔ | -| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | -| [PrettyMonoBlock](#prettymonoblock) | ✗ | ✔ | -| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock) | ✗ | ✔ | -| [PrettyCompact](#prettycompact) | ✗ | ✔ | -| [PrettyCompactNoEscapes](#prettycompactnoescapes) | ✗ | ✔ | -| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | -| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock) | ✗ | ✔ | -| [PrettySpace](#prettyspace) | ✗ | ✔ | -| [PrettySpaceNoEscapes](#prettyspacenoescapes) | ✗ | ✔ | -| [PrettySpaceMonoBlock](#prettyspacemonoblock) | ✗ | ✔ | -| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock) | ✗ | ✔ | -| [Prometheus](#prometheus) | ✗ | ✔ | -| [Protobuf](#protobuf) | ✔ | ✔ | -| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | -| [Avro](#data-format-avro) | ✔ | ✔ | -| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | -| [Parquet](#data-format-parquet) | ✔ | ✔ | -| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ | -| [Arrow](#data-format-arrow) | ✔ | ✔ | -| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | -| [ORC](#data-format-orc) | ✔ | ✔ | -| [RowBinary](#rowbinary) | ✔ | ✔ | -| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | -| [Native](#native) | ✔ | ✔ | -| [Null](#null) | ✗ | ✔ | -| [XML](#xml) | ✗ | ✔ | -| [CapnProto](#capnproto) | ✔ | ✔ | -| [LineAsString](#lineasstring) | ✔ | ✔ | -| [Regexp](#data-format-regexp) | ✔ | ✗ | -| [RawBLOB](#rawblob) | ✔ | ✔ | -| [MsgPack](#msgpack) | ✔ | ✔ | -| [MySQLDump](#mysqldump) | ✔ | ✗ | -| [Markdown](#markdown) | ✗ | ✔ | +|-------------------------------------------------------------------------------------------|------|-------| +| [TabSeparated](#tabseparated) | ✔ | ✔ | +| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ | +| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ | +| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ | +| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ | +| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ | +| [Template](#format-template) | ✔ | ✔ | +| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ | +| [CSV](#csv) | ✔ | ✔ | +| [CSVWithNames](#csvwithnames) | ✔ | ✔ | +| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ | +| [CustomSeparated](#format-customseparated) | ✔ | ✔ | +| [CustomSeparatedWithNames](#customseparatedwithnames) | ✔ | ✔ | +| [CustomSeparatedWithNamesAndTypes](#customseparatedwithnamesandtypes) | ✔ | ✔ | +| [SQLInsert](#sqlinsert) | ✗ | ✔ | +| [Values](#data-format-values) | ✔ | ✔ | +| [Vertical](#vertical) | ✗ | ✔ | +| [JSON](#json) | ✔ | ✔ | +| [JSONAsString](#jsonasstring) | ✔ | ✗ | +| [JSONStrings](#jsonstrings) | ✔ | ✔ | +| [JSONColumns](#jsoncolumns) | ✔ | ✔ | +| [JSONColumnsWithMetadata](#jsoncolumnsmonoblock)) | ✔ | ✔ | +| [JSONCompact](#jsoncompact) | ✔ | ✔ | +| [JSONCompactStrings](#jsoncompactstrings) | ✗ | ✔ | +| [JSONCompactColumns](#jsoncompactcolumns) | ✔ | ✔ | +| [JSONEachRow](#jsoneachrow) | ✔ | ✔ | +| [PrettyJSONEachRow](#prettyjsoneachrow) | ✗ | ✔ | +| [JSONEachRowWithProgress](#jsoneachrowwithprogress) | ✗ | ✔ | +| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ | +| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ | +| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ | +| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ | +| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ | +| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ | +| [JSONObjectEachRow](#jsonobjecteachrow) | ✔ | ✔ | +| [BSONEachRow](#bsoneachrow) | ✔ | ✔ | +| [TSKV](#tskv) | ✔ | ✔ | +| [Pretty](#pretty) | ✗ | ✔ | +| [PrettyNoEscapes](#prettynoescapes) | ✗ | ✔ | +| [PrettyMonoBlock](#prettymonoblock) | ✗ | ✔ | +| [PrettyNoEscapesMonoBlock](#prettynoescapesmonoblock) | ✗ | ✔ | +| [PrettyCompact](#prettycompact) | ✗ | ✔ | +| [PrettyCompactNoEscapes](#prettycompactnoescapes) | ✗ | ✔ | +| [PrettyCompactMonoBlock](#prettycompactmonoblock) | ✗ | ✔ | +| [PrettyCompactNoEscapesMonoBlock](#prettycompactnoescapesmonoblock) | ✗ | ✔ | +| [PrettySpace](#prettyspace) | ✗ | ✔ | +| [PrettySpaceNoEscapes](#prettyspacenoescapes) | ✗ | ✔ | +| [PrettySpaceMonoBlock](#prettyspacemonoblock) | ✗ | ✔ | +| [PrettySpaceNoEscapesMonoBlock](#prettyspacenoescapesmonoblock) | ✗ | ✔ | +| [Prometheus](#prometheus) | ✗ | ✔ | +| [Protobuf](#protobuf) | ✔ | ✔ | +| [ProtobufSingle](#protobufsingle) | ✔ | ✔ | +| [Avro](#data-format-avro) | ✔ | ✔ | +| [AvroConfluent](#data-format-avro-confluent) | ✔ | ✗ | +| [Parquet](#data-format-parquet) | ✔ | ✔ | +| [ParquetMetadata](#data-format-parquet-metadata) | ✔ | ✗ | +| [Arrow](#data-format-arrow) | ✔ | ✔ | +| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ | +| [ORC](#data-format-orc) | ✔ | ✔ | +| [One](#data-format-one) | ✔ | ✗ | +| [RowBinary](#rowbinary) | ✔ | ✔ | +| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ | +| [RowBinaryWithDefaults](#rowbinarywithdefaults) | ✔ | ✔ | +| [Native](#native) | ✔ | ✔ | +| [Null](#null) | ✗ | ✔ | +| [XML](#xml) | ✗ | ✔ | +| [CapnProto](#capnproto) | ✔ | ✔ | +| [LineAsString](#lineasstring) | ✔ | ✔ | +| [Regexp](#data-format-regexp) | ✔ | ✗ | +| [RawBLOB](#rawblob) | ✔ | ✔ | +| [MsgPack](#msgpack) | ✔ | ✔ | +| [MySQLDump](#mysqldump) | ✔ | ✗ | +| [Markdown](#markdown) | ✗ | ✔ | You can control some format processing parameters with the ClickHouse settings. For more information read the [Settings](/docs/en/operations/settings/settings-formats.md) section. @@ -194,6 +196,7 @@ SELECT * FROM nestedt FORMAT TSV - [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`. - [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`. - [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. +- [input_format_tsv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_allow_variable_number_of_columns) - allow variable number of columns in TSV format, ignore extra columns and use default values on missing columns. Default value - `false`. ## TabSeparatedRaw {#tabseparatedraw} @@ -471,6 +474,8 @@ The CSV format supports the output of totals and extremes the same way as `TabSe - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`. - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`. - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`. +- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`. +- [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`. ## CSVWithNames {#csvwithnames} @@ -498,9 +503,10 @@ the types from input data will be compared with the types of the corresponding c Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings. -If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any. - -If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped. +Additional settings: +- [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) - enables automatic detection of header with names and types if any. Default value - `true`. +- [input_format_custom_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_skip_trailing_empty_lines) - skip trailing empty lines at the end of file . Default value - `false`. +- [input_format_custom_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_custom_allow_variable_number_of_columns) - allow variable number of columns in CustomSeparated format, ignore extra columns and use default values on missing columns. Default value - `false`. There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces). @@ -1253,11 +1259,16 @@ SELECT * FROM json_each_row_nested - [input_format_import_nested_json](/docs/en/operations/settings/settings-formats.md/#input_format_import_nested_json) - map nested JSON data to nested tables (it works for JSONEachRow format). Default value - `false`. - [input_format_json_read_bools_as_numbers](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_bools_as_numbers) - allow to parse bools as numbers in JSON input formats. Default value - `true`. -- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `false`. -- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `false`. +- [input_format_json_read_numbers_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_numbers_as_strings) - allow to parse numbers as strings in JSON input formats. Default value - `true`. +- [input_format_json_read_arrays_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_arrays_as_strings) - allow to parse JSON arrays as strings in JSON input formats. Default value - `true`. +- [input_format_json_read_objects_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_read_objects_as_strings) - allow to parse JSON objects as strings in JSON input formats. Default value - `true`. - [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`. +- [input_format_json_try_infer_numbers_from_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_try_infer_numbers_from_strings) - Try to infer numbers from string fields while schema inference. Default value - `false`. +- [input_format_json_try_infer_named_tuples_from_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_try_infer_named_tuples_from_objects) - try to infer named tuple from JSON objects during schema inference. Default value - `true`. +- [input_format_json_infer_incomplete_types_as_strings](/docs/en/operations/settings/settings-formats.md/#input_format_json_infer_incomplete_types_as_strings) - use type String for keys that contains only Nulls or empty objects/arrays during schema inference in JSON input formats. Default value - `true`. - [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`. - [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`. +- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`. - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`. - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`. - [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`. @@ -1514,6 +1525,23 @@ If setting [input_format_with_types_use_header](/docs/en/operations/settings/set the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped. ::: +## RowBinaryWithDefaults {#rowbinarywithdefaults} + +Similar to [RowBinary](#rowbinary), but with an extra byte before each column that indicates if default value should be used. + +Examples: + +```sql +:) select * from format('RowBinaryWithDefaults', 'x UInt32 default 42, y UInt32', x'010001000000') + +┌──x─┬─y─┐ +│ 42 │ 1 │ +└────┴───┘ +``` + +For column `x` there is only one byte `01` that indicates that default value should be used and no other data after this byte is provided. +For column `y` data starts with byte `00` that indicates that column has actual value that should be read from the subsequent data `01000000`. + ## RowBinary format settings {#row-binary-format-settings} - [format_binary_max_string_size](/docs/en/operations/settings/settings-formats.md/#format_binary_max_string_size) - The maximum allowed size for String in RowBinary format. Default value - `1GiB`. @@ -1703,6 +1731,34 @@ You can select data from a ClickHouse table and save them into some file in the ``` bash $ clickhouse-client --query = "SELECT * FROM test.hits FORMAT CapnProto SETTINGS format_schema = 'schema:Message'" ``` + +### Using autogenerated schema {#using-autogenerated-capn-proto-schema} + +If you don't have an external CapnProto schema for your data, you can still output/input data in CapnProto format using autogenerated schema. +For example: + +```sql +SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1 +``` + +In this case ClickHouse will autogenerate CapnProto schema according to the table structure using function [structureToCapnProtoSchema](../sql-reference/functions/other-functions.md#structure_to_capn_proto_schema) and will use this schema to serialize data in CapnProto format. + +You can also read CapnProto file with autogenerated schema (in this case the file must be created using the same schema): + +```bash +$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_capn_proto_use_autogenerated_schema=1 FORMAT CapnProto" +``` + +The setting [format_capn_proto_use_autogenerated_schema](../operations/settings/settings-formats.md#format_capn_proto_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set. + +You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example: + +```sql +SELECT * FROM test.hits format CapnProto SETTINGS format_capn_proto_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.capnp' +``` + +In this case autogenerated CapnProto schema will be saved in file `path/to/schema/schema.capnp`. + ## Prometheus {#prometheus} Expose metrics in [Prometheus text-based exposition format](https://prometheus.io/docs/instrumenting/exposition_formats/#text-based-format). @@ -1841,6 +1897,33 @@ ClickHouse inputs and outputs protobuf messages in the `length-delimited` format It means before every message should be written its length as a [varint](https://developers.google.com/protocol-buffers/docs/encoding#varints). See also [how to read/write length-delimited protobuf messages in popular languages](https://cwiki.apache.org/confluence/display/GEODE/Delimiting+Protobuf+Messages). +### Using autogenerated schema {#using-autogenerated-protobuf-schema} + +If you don't have an external Protobuf schema for your data, you can still output/input data in Protobuf format using autogenerated schema. +For example: + +```sql +SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1 +``` + +In this case ClickHouse will autogenerate Protobuf schema according to the table structure using function [structureToProtobufSchema](../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) and will use this schema to serialize data in Protobuf format. + +You can also read Protobuf file with autogenerated schema (in this case the file must be created using the same schema): + +```bash +$ cat hits.bin | clickhouse-client --query "INSERT INTO test.hits SETTINGS format_protobuf_use_autogenerated_schema=1 FORMAT Protobuf" +``` + +The setting [format_protobuf_use_autogenerated_schema](../operations/settings/settings-formats.md#format_protobuf_use_autogenerated_schema) is enabled by default and applies if [format_schema](../operations/settings/settings-formats.md#formatschema-format-schema) is not set. + +You can also save autogenerated schema in the file during input/output using setting [output_format_schema](../operations/settings/settings-formats.md#outputformatschema-output-format-schema). For example: + +```sql +SELECT * FROM test.hits format Protobuf SETTINGS format_protobuf_use_autogenerated_schema=1, output_format_schema='path/to/schema/schema.proto' +``` + +In this case autogenerated Protobuf schema will be saved in file `path/to/schema/schema.capnp`. + ## ProtobufSingle {#protobufsingle} Same as [Protobuf](#protobuf) but for storing/parsing single Protobuf message without length delimiters. @@ -2060,6 +2143,7 @@ To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/t - [input_format_parquet_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_case_insensitive_column_matching) - ignore case when matching Parquet columns with ClickHouse columns. Default value - `false`. - [input_format_parquet_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_allow_missing_columns) - allow missing columns while reading Parquet data. Default value - `false`. - [input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Parquet format. Default value - `false`. +- [input_format_parquet_local_file_min_bytes_for_seek](/docs/en/operations/settings/settings-formats.md/#input_format_parquet_local_file_min_bytes_for_seek) - min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. Default value - `8192`. - [output_format_parquet_fixed_string_as_fixed_byte_array](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_fixed_string_as_fixed_byte_array) - use Parquet FIXED_LENGTH_BYTE_ARRAY type instead of Binary/String for FixedString columns. Default value - `true`. - [output_format_parquet_version](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_version) - The version of Parquet format used in output format. Default value - `2.latest`. - [output_format_parquet_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_parquet_compression_method) - compression method used in output Parquet format. Default value - `snappy`. @@ -2261,7 +2345,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam - [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. @@ -2327,7 +2410,6 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`. - [output_format_orc_compression_method](/docs/en/operations/settings/settings-formats.md/#output_format_orc_compression_method) - compression method used in output ORC format. Default value - `none`. -- [input_format_arrow_import_nested](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_import_nested) - allow inserting array of structs into Nested table in Arrow input format. Default value - `false`. - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`. - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`. - [input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_skip_columns_with_unsupported_types_in_schema_inference) - allow skipping columns with unsupported types while schema inference for Arrow format. Default value - `false`. @@ -2335,6 +2417,34 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT ORC" > {filename. To exchange data with Hadoop, you can use [HDFS table engine](/docs/en/engines/table-engines/integrations/hdfs.md). +## One {#data-format-one} + +Special input format that doesn't read any data from file and returns only one row with column of type `UInt8`, name `dummy` and value `0` (like `system.one` table). +Can be used with virtual columns `_file/_path` to list all files without reading actual data. + +Example: + +Query: +```sql +SELECT _file FROM file('path/to/files/data*', One); +``` + +Result: +```text +┌─_file────┐ +│ data.csv │ +└──────────┘ +┌─_file──────┐ +│ data.jsonl │ +└────────────┘ +┌─_file────┐ +│ data.tsv │ +└──────────┘ +┌─_file────────┐ +│ data.parquet │ +└──────────────┘ +``` + ## LineAsString {#lineasstring} In this format, every line of input data is interpreted as a single string value. This format can only be parsed for table with a single field of type [String](/docs/en/sql-reference/data-types/string.md). The remaining columns must be set to [DEFAULT](/docs/en/sql-reference/statements/create/table.md/#default) or [MATERIALIZED](/docs/en/sql-reference/statements/create/table.md/#materialized), or omitted. diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md index 3a7f6d4d8547..0e2c0c00e4c1 100644 --- a/docs/en/interfaces/http.md +++ b/docs/en/interfaces/http.md @@ -18,6 +18,8 @@ $ curl 'http://localhost:8123/' Ok. ``` +Also see: [HTTP response codes caveats](#http_response_codes_caveats). + Sometimes, `curl` command is not available on user operating systems. On Ubuntu or Debian, run `sudo apt install curl`. Please refer this [documentation](https://curl.se/download.html) to install it before running the examples. Web UI can be accessed here: `http://localhost:8123/play`. @@ -56,7 +58,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} 1 ``` @@ -286,9 +288,9 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you You can receive information about the progress of a query in `X-ClickHouse-Progress` response headers. To do this, enable [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Example of the header sequence: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334"} ``` Possible header fields: @@ -323,6 +325,27 @@ $ curl -sS 'http://localhost:8123/?max_result_bytes=4000000&buffer_size=3000000& Use buffering to avoid situations where a query processing error occurred after the response code and HTTP headers were sent to the client. In this situation, an error message is written at the end of the response body, and on the client-side, the error can only be detected at the parsing stage. +## HTTP response codes caveats {#http_response_codes_caveats} + +Because of limitation of HTTP protocol, HTTP 200 response code does not guarantee that a query was successful. + +Here is an example: + +``` +curl -v -Ss "http://localhost:8123/?max_block_size=1&query=select+sleepEachRow(0.001),throwIf(number=2)from+numbers(5)" +* Trying 127.0.0.1:8123... +... +< HTTP/1.1 200 OK +... +Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(equals(number, 2) :: 1) -> throwIf(equals(number, 2)) +``` + +The reason for this behavior is the nature of the HTTP protocol. The HTTP header is sent first with an HTTP code of 200, followed by the HTTP body, and then the error is injected into the body as plain text. +This behavior is independent of the format used, whether it's `Native`, `TSV`, or `JSON`; the error message will always be in the middle of the response stream. +You can mitigate this problem by enabling `wait_end_of_query=1` ([Response Buffering](#response-buffering)). In this case, the sending of the HTTP header is delayed until the entire query is resolved. +However, this does not completely solve the problem because the result must still fit within the `http_response_buffer_size`, and other settings like `send_progress_in_http_headers` can interfere with the delay of the header. +The only way to catch all errors is to analyze the HTTP body before parsing it using the required format. + ### Queries with Parameters {#cli-queries-with-parameters} You can create a query with parameters and pass values for them from the corresponding HTTP request parameters. For more information, see [Queries with Parameters for CLI](../interfaces/cli.md#cli-queries-with-parameters). @@ -416,7 +439,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -581,7 +604,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < * Connection #0 to host localhost left intact Say Hi!% @@ -621,7 +644,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < * Connection #0 to host localhost left intact
% @@ -673,7 +696,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < Absolute Path File * Connection #0 to host localhost left intact @@ -692,8 +715,88 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < Relative Path File * Connection #0 to host localhost left intact ``` + +## Valid JSON/XML response on exception during HTTP streaming {valid-output-on-exception-http-streaming} + +While query execution over HTTP an exception can happen when part of the data has already been sent. Usually an exception is sent to the client in plain text +even if some specific data format was used to output data and the output may become invalid in terms of specified data format. +To prevent it, you can use setting `http_write_exception_in_output_format` (enabled by default) that will tell ClickHouse to write an exception in specified format (currently supported for XML and JSON* formats). + +Examples: + +```bash +$ curl 'http://localhost:8123/?query=SELECT+number,+throwIf(number>3)+from+system.numbers+format+JSON+settings+max_block_size=1&http_write_exception_in_output_format=1' +{ + "meta": + [ + { + "name": "number", + "type": "UInt64" + }, + { + "name": "throwIf(greater(number, 2))", + "type": "UInt8" + } + ], + + "data": + [ + { + "number": "0", + "throwIf(greater(number, 2))": 0 + }, + { + "number": "1", + "throwIf(greater(number, 2))": 0 + }, + { + "number": "2", + "throwIf(greater(number, 2))": 0 + } + ], + + "rows": 3, + + "exception": "Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 2) :: 2) -> throwIf(greater(number, 2)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) (version 23.8.1.1)" +} +``` + +```bash +$ curl 'http://localhost:8123/?query=SELECT+number,+throwIf(number>2)+from+system.numbers+format+XML+settings+max_block_size=1&http_write_exception_in_output_format=1' + + + + + + number + UInt64 + + + throwIf(greater(number, 2)) + UInt8 + + + + + + 0 + 0 + + + 1 + 0 + + + 2 + 0 + + + 3 + Code: 395. DB::Exception: Value passed to 'throwIf' function is non-zero: while executing 'FUNCTION throwIf(greater(number, 2) :: 2) -> throwIf(greater(number, 2)) UInt8 : 1'. (FUNCTION_THROW_IF_VALUE_IS_NON_ZERO) (version 23.8.1.1) + +``` diff --git a/docs/en/interfaces/images/mysql1.png b/docs/en/interfaces/images/mysql1.png new file mode 100644 index 000000000000..f5ac85b6e2ca Binary files /dev/null and b/docs/en/interfaces/images/mysql1.png differ diff --git a/docs/en/interfaces/images/mysql2.png b/docs/en/interfaces/images/mysql2.png new file mode 100644 index 000000000000..7b999e416657 Binary files /dev/null and b/docs/en/interfaces/images/mysql2.png differ diff --git a/docs/en/interfaces/images/mysql3.png b/docs/en/interfaces/images/mysql3.png new file mode 100644 index 000000000000..be6cb9630034 Binary files /dev/null and b/docs/en/interfaces/images/mysql3.png differ diff --git a/docs/en/interfaces/images/mysql4.png b/docs/en/interfaces/images/mysql4.png new file mode 100644 index 000000000000..3b5ce1e844d6 Binary files /dev/null and b/docs/en/interfaces/images/mysql4.png differ diff --git a/docs/en/interfaces/images/mysql5.png b/docs/en/interfaces/images/mysql5.png new file mode 100644 index 000000000000..fc026a8b7537 Binary files /dev/null and b/docs/en/interfaces/images/mysql5.png differ diff --git a/docs/en/interfaces/mysql.md b/docs/en/interfaces/mysql.md index fab3ba427580..32c612dfa5f7 100644 --- a/docs/en/interfaces/mysql.md +++ b/docs/en/interfaces/mysql.md @@ -6,7 +6,38 @@ sidebar_label: MySQL Interface # MySQL Interface -ClickHouse supports MySQL wire protocol. To enable the MySQL wire protocol, add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d` folder: +ClickHouse supports the MySQL wire protocol. This allow tools that are MySQL-compatible to interact with ClickHouse seamlessly (e.g. [Looker Studio](../integrations/data-visualization/looker-studio-and-clickhouse.md)). + +## Enabling the MySQL Interface On ClickHouse Cloud + +:::note +The MySQL interface for ClickHouse Cloud is currently in private preview. Please contact support@clickhouse.com to enable this feature for your ClickHouse Cloud service. +::: + +1. After creating your ClickHouse Cloud Service, on the credentials screen, select the MySQL tab + +![Credentials screen - Prompt](./images/mysql1.png) + +2. Toggle the switch to enable the MySQL interface for this specific service. This will expose port `3306` for this service and prompt you with your MySQL connection screen that include your unique MySQL username. The password will be the same as the service's default user password. + +![Credentials screen - Enabled MySQL](./images/mysql2.png) + +Alternatively, in order to enable the MySQL interface for an existing service: + +1. Ensure your service is in `Running` state then click on the "View connection string" button for the service you want to enable the MySQL interface for + +![Connection screen - Prompt MySQL](./images/mysql3.png) + +2. Toggle the switch to enable the MySQL interface for this specific service. This will prompt you to enter the default password. + +![Connection screen - Prompt MySQL](./images/mysql4.png) + +3. After entering the password, you will get prompted the MySQL connection string for this service +![Connection screen - MySQL Enabled](./images/mysql5.png) + +## Enabling the MySQL Interface On Self-managed ClickHouse + +Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files): ``` xml @@ -20,7 +51,7 @@ Startup your ClickHouse server and look for a log message similar to the followi {} Application: Listening for MySQL compatibility protocol: 127.0.0.1:9004 ``` -## Connect mysql to ClickHouse +## Connect MySQL to ClickHouse The following command demonstrates how to connect the MySQL client `mysql` to ClickHouse: diff --git a/docs/en/interfaces/overview.md b/docs/en/interfaces/overview.md index 8f16dcf5f835..e60aff927c4c 100644 --- a/docs/en/interfaces/overview.md +++ b/docs/en/interfaces/overview.md @@ -21,6 +21,11 @@ In most cases it is recommended to use an appropriate tool or library instead of - [ODBC driver](../interfaces/odbc.md) - [C++ client library](../interfaces/cpp.md) +ClickHouse server provides embedded visual interfaces for power users: + +- Play UI: open `/play` in the browser; +- Advanced Dashboard: open `/dashboard` in the browser; + There are also a wide range of third-party libraries for working with ClickHouse: - [Client libraries](../interfaces/third-party/client-libraries.md) diff --git a/docs/en/interfaces/schema-inference.md b/docs/en/interfaces/schema-inference.md index a4c7eb61e50a..0aadb09730a6 100644 --- a/docs/en/interfaces/schema-inference.md +++ b/docs/en/interfaces/schema-inference.md @@ -228,8 +228,8 @@ For most input formats schema inference reads some data to determine its structu To prevent inferring the same schema every time ClickHouse read the data from the same file, the inferred schema is cached and when accessing the same file again, ClickHouse will use the schema from the cache. There are special settings that control this cache: -- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config. -- `schema_inference_use_cache_for_{file,s3,hdfs,url}` - allows turning on/off using cache for schema inference. These settings can be used in queries. +- `schema_inference_cache_max_elements_for_{file/s3/hdfs/url/azure}` - the maximum number of cached schemas for the corresponding table function. The default value is `4096`. These settings should be set in the server config. +- `schema_inference_use_cache_for_{file,s3,hdfs,url,azure}` - allows turning on/off using cache for schema inference. These settings can be used in queries. The schema of the file can be changed by modifying the data or by changing format settings. For this reason, the schema inference cache identifies the schema by file source, format name, used format settings, and the last modification time of the file. @@ -389,9 +389,25 @@ DESC format(JSONEachRow, '{"arr" : [null, 42, null]}') └──────┴────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -Tuples: +Named tuples: + +When setting `input_format_json_try_infer_named_tuples_from_objects` is enabled, during schema inference ClickHouse will try to infer named Tuple from JSON objects. +The resulting named Tuple will contain all elements from all corresponding JSON objects from sample data. -In JSON formats we treat Arrays with elements of different types as Tuples. +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +DESC format(JSONEachRow, '{"obj" : {"a" : 42, "b" : "Hello"}}, {"obj" : {"a" : 43, "c" : [1, 2, 3]}}, {"obj" : {"d" : {"e" : 42}}}') +``` + +```response +┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Tuple(e Nullable(Int64))) │ │ │ │ │ │ +└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Unnamed Tuples: + +In JSON formats we treat Arrays with elements of different types as Unnamed Tuples. ```sql DESC format(JSONEachRow, '{"tuple" : [1, "Hello, World!", [1, 2, 3]]}') ``` @@ -418,7 +434,10 @@ DESC format(JSONEachRow, $$ Maps: In JSON we can read objects with values of the same type as Map type. +Note: it will work only when settings `input_format_json_read_objects_as_strings` and `input_format_json_try_infer_named_tuples_from_objects` are disabled. + ```sql +SET input_format_json_read_objects_as_strings = 0, input_format_json_try_infer_named_tuples_from_objects = 0; DESC format(JSONEachRow, '{"map" : {"key1" : 42, "key2" : 24, "key3" : 4}}') ``` ```response @@ -448,14 +467,22 @@ Nested complex types: DESC format(JSONEachRow, '{"value" : [[[42, 24], []], {"key1" : 42, "key2" : 24}]}') ``` ```response -┌─name──┬─type───────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ value │ Tuple(Array(Array(Nullable(Int64))), Map(String, Nullable(Int64))) │ │ │ │ │ │ -└───────┴────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +┌─name──┬─type─────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ value │ Tuple(Array(Array(Nullable(String))), Tuple(key1 Nullable(Int64), key2 Nullable(Int64))) │ │ │ │ │ │ +└───────┴──────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` -If ClickHouse cannot determine the type, because the data contains only nulls, an exception will be thrown: +If ClickHouse cannot determine the type for some key, because the data contains only nulls/empty objects/empty arrays, type `String` will be used if setting `input_format_json_infer_incomplete_types_as_strings` is enabled or an exception will be thrown otherwise: +```sql +DESC format(JSONEachRow, '{"arr" : [null, null]}') SETTINGS input_format_json_infer_incomplete_types_as_strings = 1; +``` +```response +┌─name─┬─type────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ arr │ Array(Nullable(String)) │ │ │ │ │ │ +└──────┴─────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` ```sql -DESC format(JSONEachRow, '{"arr" : [null, null]}') +DESC format(JSONEachRow, '{"arr" : [null, null]}') SETTINGS input_format_json_infer_incomplete_types_as_strings = 0; ``` ```response Code: 652. DB::Exception: Received from localhost:9000. DB::Exception: @@ -466,31 +493,11 @@ most likely this column contains only Nulls or empty Arrays/Maps. #### JSON settings {#json-settings} -##### input_format_json_read_objects_as_strings - -Enabling this setting allows reading nested JSON objects as strings. -This setting can be used to read nested JSON objects without using JSON object type. - -This setting is enabled by default. - -```sql -SET input_format_json_read_objects_as_strings = 1; -DESC format(JSONEachRow, $$ - {"obj" : {"key1" : 42, "key2" : [1,2,3,4]}} - {"obj" : {"key3" : {"nested_key" : 1}}} - $$) -``` -```response -┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ -│ obj │ Nullable(String) │ │ │ │ │ │ -└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ -``` - ##### input_format_json_try_infer_numbers_from_strings Enabling this setting allows inferring numbers from string values. -This setting is enabled by default. +This setting is disabled by default. **Example:** @@ -507,11 +514,69 @@ DESC format(JSONEachRow, $$ └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +##### input_format_json_try_infer_named_tuples_from_objects + +Enabling this setting allows inferring named Tuples from JSON objects. The resulting named Tuple will contain all elements from all corresponding JSON objects from sample data. +It can be useful when JSON data is not sparse so the sample of data will contain all possible object keys. + +This setting is enabled by default. + +**Example** + +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +DESC format(JSONEachRow, '{"obj" : {"a" : 42, "b" : "Hello"}}, {"obj" : {"a" : 43, "c" : [1, 2, 3]}}, {"obj" : {"d" : {"e" : 42}}}') +``` + +Result: + +``` +┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Tuple(e Nullable(Int64))) │ │ │ │ │ │ +└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +DESC format(JSONEachRow, '{"array" : [{"a" : 42, "b" : "Hello"}, {}, {"c" : [1,2,3]}, {"d" : "2020-01-01"}]}') +``` + +Result: + +``` +┌─name──┬─type────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ array │ Array(Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Nullable(Date))) │ │ │ │ │ │ +└───────┴─────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +##### input_format_json_read_objects_as_strings + +Enabling this setting allows reading nested JSON objects as strings. +This setting can be used to read nested JSON objects without using JSON object type. + +This setting is enabled by default. + +Note: enabling this setting will take effect only if setting `input_format_json_try_infer_named_tuples_from_objects` is disabled. + +```sql +SET input_format_json_read_objects_as_strings = 1, input_format_json_try_infer_named_tuples_from_objects = 0; +DESC format(JSONEachRow, $$ + {"obj" : {"key1" : 42, "key2" : [1,2,3,4]}} + {"obj" : {"key3" : {"nested_key" : 1}}} + $$) +``` +```response +┌─name─┬─type─────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Nullable(String) │ │ │ │ │ │ +└──────┴──────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + + ##### input_format_json_read_numbers_as_strings Enabling this setting allows reading numeric values as strings. -This setting is disabled by default. +This setting is enabled by default. **Example** @@ -549,6 +614,49 @@ DESC format(JSONEachRow, $$ └───────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ ``` +##### input_format_json_read_arrays_as_strings + +Enabling this setting allows reading JSON array values as strings. + +This setting is enabled by default. + +**Example** + +```sql +SET input_format_json_read_arrays_as_strings = 1; +SELECT arr, toTypeName(arr), JSONExtractArrayRaw(arr)[3] from format(JSONEachRow, 'arr String', '{"arr" : [1, "Hello", [1,2,3]]}'); +``` +```response +┌─arr───────────────────┬─toTypeName(arr)─┬─arrayElement(JSONExtractArrayRaw(arr), 3)─┐ +│ [1, "Hello", [1,2,3]] │ String │ [1,2,3] │ +└───────────────────────┴─────────────────┴───────────────────────────────────────────┘ +``` + +##### input_format_json_infer_incomplete_types_as_strings + +Enabling this setting allows to use String type for JSON keys that contain only `Null`/`{}`/`[]` in data sample during schema inference. +In JSON formats any value can be read as String if all corresponding settings are enabled (they are all enabled by default), and we can avoid errors like `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference +by using String type for keys with unknown types. + +Example: + +```sql +SET input_format_json_infer_incomplete_types_as_strings = 1, input_format_json_try_infer_named_tuples_from_objects = 1; +DESCRIBE format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}'); +SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}'); +``` + +Result: +``` +┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Tuple(a Array(Nullable(Int64)), b Nullable(String), c Nullable(String), d Nullable(String), e Array(Nullable(String))) │ │ │ │ │ │ +└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +┌─obj────────────────────────────┐ +│ ([1,2,3],'hello',NULL,'{}',[]) │ +└────────────────────────────────┘ +``` + ### CSV {#csv} In CSV format ClickHouse extracts column values from the row according to delimiters. ClickHouse expects all types except numbers and strings to be enclosed in double quotes. If the value is in double quotes, ClickHouse tries to parse diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index f7603994163f..e229198bdfda 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -2,6 +2,7 @@ slug: /en/interfaces/third-party/client-libraries sidebar_position: 26 sidebar_label: Client Libraries +description: Third-party client libraries --- # Client Libraries from Third-party Developers @@ -10,68 +11,68 @@ sidebar_label: Client Libraries ClickHouse Inc does **not** maintain the libraries listed below and hasn’t done any extensive testing to ensure their quality. ::: -- Python - - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) - - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) - - [clickhouse-client](https://github.com/yurial/clickhouse-client) - - [aiochclient](https://github.com/maximdanilchenko/aiochclient) - - [asynch](https://github.com/long2ice/asynch) -- PHP - - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) - - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) - - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client) - - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client) - - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) - - [SeasClick C++ client](https://github.com/SeasX/SeasClick) - - [one-ck](https://github.com/lizhichao/one-ck) - - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) - - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php) - - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php) -- Go - - [clickhouse](https://github.com/kshvakov/clickhouse/) - - [go-clickhouse](https://github.com/roistat/go-clickhouse) - - [chconn](https://github.com/vahid-sohrabloo/chconn) - - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) - - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) - - [uptrace/go-clickhouse](https://clickhouse.uptrace.dev/) -- Swift - - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO) - - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor) -- NodeJs - - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) - - [node-clickhouse](https://github.com/apla/node-clickhouse) - - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) - - [clickhouse-client](https://github.com/depyronick/clickhouse-client) - - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) -- Perl - - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) - - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse) -- Ruby - - [ClickHouse (Ruby)](https://github.com/shlima/click_house) - - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord) -- Rust - - [clickhouse.rs](https://github.com/loyd/clickhouse.rs) - - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - - [Klickhouse](https://github.com/Protryon/klickhouse) -- R - - [RClickHouse](https://github.com/IMSMWU/RClickHouse) -- Java - - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) - - [clickhouse-client](https://github.com/Ecwid/clickhouse-client) -- Scala - - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) -- Kotlin - - [AORM](https://github.com/TanVD/AORM) -- C# - - [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient) - - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) - - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) - - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) -- Elixir - - [clickhousex](https://github.com/appodeal/clickhousex/) - - [pillar](https://github.com/sofakingworld/pillar) -- Nim - - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) -- Haskell - - [hdbc-clickhouse](https://github.com/zaneli/hdbc-clickhouse) +### Python + - [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm) + - [clickhouse-driver](https://github.com/mymarilyn/clickhouse-driver) + - [clickhouse-client](https://github.com/yurial/clickhouse-client) + - [aiochclient](https://github.com/maximdanilchenko/aiochclient) + - [asynch](https://github.com/long2ice/asynch) +### PHP + - [smi2/phpclickhouse](https://packagist.org/packages/smi2/phpClickHouse) + - [8bitov/clickhouse-php-client](https://packagist.org/packages/8bitov/clickhouse-php-client) + - [bozerkins/clickhouse-client](https://packagist.org/packages/bozerkins/clickhouse-client) + - [simpod/clickhouse-client](https://packagist.org/packages/simpod/clickhouse-client) + - [seva-code/php-click-house-client](https://packagist.org/packages/seva-code/php-click-house-client) + - [SeasClick C++ client](https://github.com/SeasX/SeasClick) + - [one-ck](https://github.com/lizhichao/one-ck) + - [glushkovds/phpclickhouse-laravel](https://packagist.org/packages/glushkovds/phpclickhouse-laravel) + - [kolya7k ClickHouse PHP extension](https://github.com//kolya7k/clickhouse-php) + - [hyvor/clickhouse-php](https://github.com/hyvor/clickhouse-php) +### Go + - [clickhouse](https://github.com/kshvakov/clickhouse/) + - [go-clickhouse](https://github.com/roistat/go-clickhouse) + - [chconn](https://github.com/vahid-sohrabloo/chconn) + - [mailrugo-clickhouse](https://github.com/mailru/go-clickhouse) + - [golang-clickhouse](https://github.com/leprosus/golang-clickhouse) + - [uptrace/go-clickhouse](https://clickhouse.uptrace.dev/) +### Swift + - [ClickHouseNIO](https://github.com/patrick-zippenfenig/ClickHouseNIO) + - [ClickHouseVapor ORM](https://github.com/patrick-zippenfenig/ClickHouseVapor) +### NodeJs + - [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse) + - [node-clickhouse](https://github.com/apla/node-clickhouse) + - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) + - [clickhouse-client](https://github.com/depyronick/clickhouse-client) + - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) +### Perl + - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) + - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) + - [AnyEvent-ClickHouse](https://metacpan.org/release/AnyEvent-ClickHouse) +### Ruby + - [ClickHouse (Ruby)](https://github.com/shlima/click_house) + - [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord) +### Rust + - [clickhouse.rs](https://github.com/loyd/clickhouse.rs) + - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) + - [Klickhouse](https://github.com/Protryon/klickhouse) +### R + - [RClickHouse](https://github.com/IMSMWU/RClickHouse) +### Java + - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) + - [clickhouse-client](https://github.com/Ecwid/clickhouse-client) +### Scala + - [clickhouse-scala-client](https://github.com/crobox/clickhouse-scala-client) +### Kotlin + - [AORM](https://github.com/TanVD/AORM) +### C# + - [Octonica.ClickHouseClient](https://github.com/Octonica/ClickHouseClient) + - [ClickHouse.Ado](https://github.com/killwort/ClickHouse-Net) + - [ClickHouse.Client](https://github.com/DarkWanderer/ClickHouse.Client) + - [ClickHouse.Net](https://github.com/ilyabreev/ClickHouse.Net) +### Elixir + - [clickhousex](https://github.com/appodeal/clickhousex/) + - [pillar](https://github.com/sofakingworld/pillar) +### Nim + - [nim-clickhouse](https://github.com/leonardoce/nim-clickhouse) +### Haskell + - [hdbc-clickhouse](https://github.com/zaneli/hdbc-clickhouse) diff --git a/docs/en/interfaces/third-party/integrations.md b/docs/en/interfaces/third-party/integrations.md index 3e1b1e84f5d4..a9f1af93495b 100644 --- a/docs/en/interfaces/third-party/integrations.md +++ b/docs/en/interfaces/third-party/integrations.md @@ -83,8 +83,8 @@ ClickHouse, Inc. does **not** maintain the tools and libraries listed below and - Python - [SQLAlchemy](https://www.sqlalchemy.org) - [sqlalchemy-clickhouse](https://github.com/cloudflare/sqlalchemy-clickhouse) (uses [infi.clickhouse_orm](https://github.com/Infinidat/infi.clickhouse_orm)) - - [pandas](https://pandas.pydata.org) - - [pandahouse](https://github.com/kszucs/pandahouse) + - [PyArrow/Pandas](https://pandas.pydata.org) + - [Ibis](https://github.com/ibis-project/ibis) - PHP - [Doctrine](https://www.doctrine-project.org/) - [dbal-clickhouse](https://packagist.org/packages/friendsofdoctrine/dbal-clickhouse) diff --git a/docs/en/operations/backup.md b/docs/en/operations/backup.md index c3ddee07d0b3..6068b185ede2 100644 --- a/docs/en/operations/backup.md +++ b/docs/en/operations/backup.md @@ -30,7 +30,7 @@ description: In order to effectively mitigate possible human errors, you should ``` :::note ALL -`ALL` is only applicable to the `RESTORE` command prior to version 23.4 of Clickhouse. +Prior to version 23.4 of ClickHouse, `ALL` was only applicable to the `RESTORE` command. ::: ## Background @@ -84,6 +84,8 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des - `password` for the file on disk - `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')` - `structure_only`: if enabled, allows to only backup or restore the CREATE statements without the data of tables + - `storage_policy`: storage policy for the tables being restored. See [Using Multiple Block Devices for Data Storage](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes). This setting is only applicable to the `RESTORE` command. The specified storage policy applies only to tables with an engine from the `MergeTree` family. + - `s3_storage_class`: the storage class used for S3 backup. For example, `STANDARD` ### Usage examples @@ -204,6 +206,55 @@ end_time: 2022-08-30 09:21:46 1 row in set. Elapsed: 0.002 sec. ``` +Along with `system.backups` table, all backup and restore operations are also tracked in the system log table [backup_log](../operations/system-tables/backup_log.md): +``` +SELECT * +FROM system.backup_log +WHERE id = '7678b0b3-f519-4e6e-811f-5a0781a4eb52' +ORDER BY event_time_microseconds ASC +FORMAT Vertical +``` +```response +Row 1: +────── +event_date: 2023-08-18 +event_time_microseconds: 2023-08-18 11:13:43.097414 +id: 7678b0b3-f519-4e6e-811f-5a0781a4eb52 +name: Disk('backups', '1.zip') +status: CREATING_BACKUP +error: +start_time: 2023-08-18 11:13:43 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-18 +event_time_microseconds: 2023-08-18 11:13:43.174782 +id: 7678b0b3-f519-4e6e-811f-5a0781a4eb52 +name: Disk('backups', '1.zip') +status: BACKUP_FAILED +#highlight-next-line +error: Code: 598. DB::Exception: Backup Disk('backups', '1.zip') already exists. (BACKUP_ALREADY_EXISTS) (version 23.8.1.1) +start_time: 2023-08-18 11:13:43 +end_time: 2023-08-18 11:13:43 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +2 rows in set. Elapsed: 0.075 sec. +``` + ## Configuring BACKUP/RESTORE to use an S3 Endpoint To write backups to an S3 bucket you need three pieces of information: @@ -355,7 +406,7 @@ RESTORE TABLE data AS data_restored FROM Disk('s3_plain', 'cloud_backup'); :::note But keep in mind that: - This disk should not be used for `MergeTree` itself, only for `BACKUP`/`RESTORE` -- It has excessive API calls +- If your tables are backed by S3 storage, it doesn't use `CopyObject` calls to copy parts to the destination bucket, instead, it downloads and uploads them, which is very inefficient. Prefer to use `BACKUP ... TO S3()` syntax for this use-case. ::: ## Alternatives diff --git a/docs/en/operations/cluster-discovery.md b/docs/en/operations/cluster-discovery.md new file mode 100644 index 000000000000..a925afac916b --- /dev/null +++ b/docs/en/operations/cluster-discovery.md @@ -0,0 +1,171 @@ +--- +slug: /en/operations/cluster-discovery +sidebar_label: Cluster Discovery +--- +# Cluster Discovery + +## Overview + +ClickHouse's Cluster Discovery feature simplifies cluster configuration by allowing nodes to automatically discover and register themselves without the need for explicit definition in the configuration files. This is especially beneficial in cases where the manual definition of each node becomes cumbersome. + +:::note + +Cluster Discovery is an experimental feature and can be changed or removed in future versions. +To enable it include the `allow_experimental_cluster_discovery` setting in your configuration file: + +```xml + + + 1 + + +``` +::: + +## Remote Servers Configuration + +### Traditional Manual Configuration + +Traditionally, in ClickHouse, each shard and replica in the cluster needed to be manually specified in the configuration: + +```xml + + + + + node1 + 9000 + + + node2 + 9000 + + + + + node3 + 9000 + + + node4 + 9000 + + + + + +``` + +### Using Cluster Discovery + +With Cluster Discovery, rather than defining each node explicitly, you simply specify a path in ZooKeeper. All nodes that register under this path in ZooKeeper will be automatically discovered and added to the cluster. + +```xml + + + + /clickhouse/discovery/cluster_name + + + +``` + +If you want to specify a shard number for a particular node, you can include the `` tag within the `` section: + +for `node1` and `node2`: + +```xml + + /clickhouse/discovery/cluster_name + 1 + +``` + +for `node3` and `node4`: + +```xml + + /clickhouse/discovery/cluster_name + 2 + +``` + +### Observer mode + + +Nodes configured in observer mode will not register themselves as replicas. +They will solely observe and discover other active replicas in the cluster without actively participating. +To enable observer mode, include the `` tag within the `` section: + +```xml + + /clickhouse/discovery/cluster_name + + +``` + + +## Use-Cases and Limitations + +As nodes are added or removed from the specified ZooKeeper path, they are automatically discovered or removed from the cluster without the need for configuration changes or server restarts. + +However, changes affect only cluster configuration, not the data or existing databases and tables. + +Consider the following example with a cluster of 3 nodes: + + +```xml + + + + /clickhouse/discovery/default_cluster + + + +``` + +``` +SELECT * EXCEPT (default_database, errors_count, slowdowns_count, estimated_recovery_time, database_shard_name, database_replica_name) +FROM system.clusters WHERE cluster = 'default'; + +┌─cluster─┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name────┬─host_address─┬─port─┬─is_local─┬─user─┬─is_active─┐ +│ default │ 1 │ 1 │ 1 │ 92d3c04025e8 │ 172.26.0.5 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │ +│ default │ 1 │ 1 │ 2 │ a6a68731c21b │ 172.26.0.4 │ 9000 │ 1 │ │ ᴺᵁᴸᴸ │ +│ default │ 1 │ 1 │ 3 │ 8e62b9cb17a1 │ 172.26.0.2 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │ +└─────────┴───────────┴──────────────┴─────────────┴──────────────┴──────────────┴──────┴──────────┴──────┴───────────┘ +``` + +```sql +CREATE TABLE event_table ON CLUSTER default (event_time DateTime, value String) +ENGINE = ReplicatedMergeTree('/clickhouse/tables/event_table', '{replica}') +ORDER BY event_time PARTITION BY toYYYYMM(event_time); + +INSERT INTO event_table ... +``` + +Then, we add a new node to the cluster, starting a new node with the same entry in the `remote_servers` section in a configuration file: + +``` +┌─cluster─┬─shard_num─┬─shard_weight─┬─replica_num─┬─host_name────┬─host_address─┬─port─┬─is_local─┬─user─┬─is_active─┐ +│ default │ 1 │ 1 │ 1 │ 92d3c04025e8 │ 172.26.0.5 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │ +│ default │ 1 │ 1 │ 2 │ a6a68731c21b │ 172.26.0.4 │ 9000 │ 1 │ │ ᴺᵁᴸᴸ │ +│ default │ 1 │ 1 │ 3 │ 8e62b9cb17a1 │ 172.26.0.2 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │ +│ default │ 1 │ 1 │ 4 │ b0df3669b81f │ 172.26.0.6 │ 9000 │ 0 │ │ ᴺᵁᴸᴸ │ +└─────────┴───────────┴──────────────┴─────────────┴──────────────┴──────────────┴──────┴──────────┴──────┴───────────┘ +``` + +The fourth node is participating in the cluster, but table `event_table` still exists only on the first three nodes: + + +```sql +SELECT hostname(), database, table FROM clusterAllReplicas(default, system.tables) WHERE table = 'event_table' FORMAT PrettyCompactMonoBlock + +┌─hostname()───┬─database─┬─table───────┐ +│ a6a68731c21b │ default │ event_table │ +│ 92d3c04025e8 │ default │ event_table │ +│ 8e62b9cb17a1 │ default │ event_table │ +└──────────────┴──────────┴─────────────┘ +``` + +If you need to have tables replicated on all the nodes, you may use the [Replicated](../engines/database-engines/replicated.md) database engine in alternative to cluster discovery. + diff --git a/docs/en/operations/configuration-files.md b/docs/en/operations/configuration-files.md index d3e21cb23645..dfe62d591e32 100644 --- a/docs/en/operations/configuration-files.md +++ b/docs/en/operations/configuration-files.md @@ -65,6 +65,58 @@ XML substitution example: Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element. +## Encrypting and Hiding Configuration {#encryption} + +You can use symmetric encryption to encrypt a configuration element, for example, a plaintext password or private key. To do so, first configure the [encryption codec](../sql-reference/statements/create/table.md#encryption-codecs), then add attribute `encrypted_by` with the name of the encryption codec as value to the element to encrypt. + +Unlike attributes `from_zk`, `from_env` and `incl` (or element `include`), no substitution, i.e. decryption of the encrypted value, is performed in the preprocessed file. Decryption happens only at runtime in the server process. + +Example: + +```xml + + + + + 00112233445566778899aabbccddeeff + + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + + +``` + +To encrypt a value, you can use the (example) program `encrypt_decrypt`: + +Example: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + +Even with encrypted configuration elements, encrypted elements still appear in the preprocessed configuration file. If this is a problem for your ClickHouse deployment, we suggest two alternatives: either set file permissions of the preprocessed file to 600 or use the `hide_in_preprocessed` attribute. + +Example: + +```xml + + + + admin + secret + + + +``` + ## User Settings {#user-settings} The `config.xml` file can specify a separate config with user settings, profiles, and quotas. The relative path to this config is set in the `users_config` element. By default, it is `users.xml`. If `users_config` is omitted, the user settings, profiles, and quotas are specified directly in `config.xml`. @@ -104,12 +156,17 @@ Here you can see default config written in YAML: [config.yaml.example](https://g There are some differences between YAML and XML formats in terms of ClickHouse configurations. Here are some tips for writing a configuration in YAML format. -You should use a Scalar node to write a key-value pair: +An XML tag with a text value is represented by a YAML key-value pair ``` yaml key: value ``` -To create a node, containing other nodes you should use a Map: +Corresponding XML: +``` xml +value +``` + +A nested XML node is represented by a YAML map: ``` yaml map_key: key1: val1 @@ -117,7 +174,16 @@ map_key: key3: val3 ``` -To create a list of values or nodes assigned to one tag you should use a Sequence: +Corresponding XML: +``` xml + + val1 + val2 + val3 + +``` + +To create the same XML tag multiple times, use a YAML sequence: ``` yaml seq_key: - val1 @@ -128,8 +194,22 @@ seq_key: key3: val5 ``` -If you want to write an attribute for a Sequence or Map node, you should use a @ prefix before the attribute key. Note, that @ is reserved by YAML standard, so you should also to wrap it into double quotes: +Corresponding XML: +```xml +val1 +val2 + + val3 + + + + val4 + val5 + + +``` +To provide an XML attribute, you can use an attribute key with a `@` prefix. Note that `@` is reserved by YAML standard, so must be wrapped in double quotes: ``` yaml map: "@attr1": value1 @@ -137,16 +217,14 @@ map: key: 123 ``` -From that Map we will get these XML nodes: - +Corresponding XML: ``` xml 123 ``` -You can also set attributes for Sequence: - +It is also possible to use attributes in YAML sequence: ``` yaml seq: - "@attr1": value1 @@ -155,13 +233,25 @@ seq: - abc ``` -So, we can get YAML config equal to this XML one: - +Corresponding XML: ``` xml 123 abc ``` +The aforementioned syntax does not allow to express XML text nodes with XML attributes as YAML. This special case can be achieved using an +`#text` attribute key: +```yaml +map_key: + "@attr1": value1 + "#text": value2 +``` + +Corresponding XML: +```xml +value2 +``` + ## Implementation Details {#implementation-details} For each config file, the server also generates `file-preprocessed.xml` files when starting. These files contain all the completed substitutions and overrides, and they are intended for informational use. If ZooKeeper substitutions were used in the config files but ZooKeeper is not available on the server start, the server loads the configuration from the preprocessed file. diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md index 02f52b6f8bf4..6ed72152c1e7 100644 --- a/docs/en/operations/named-collections.md +++ b/docs/en/operations/named-collections.md @@ -169,7 +169,6 @@ host = '127.0.0.1', port = 3306, database = 'test', connection_pool_size = 8, -on_duplicate_clause = 1, replace_query = 1 ``` @@ -185,7 +184,6 @@ replace_query = 1 3306 test 8 - 1 1 diff --git a/docs/en/operations/optimizing-performance/profile-guided-optimization.md b/docs/en/operations/optimizing-performance/profile-guided-optimization.md new file mode 100644 index 000000000000..cda21e3c6044 --- /dev/null +++ b/docs/en/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1,26 @@ +--- +slug: /en/operations/optimizing-performance/profile-guided-optimization +sidebar_position: 54 +sidebar_label: Profile Guided Optimization (PGO) +--- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + +# Profile Guided Optimization + +Profile-Guided Optimization (PGO) is a compiler optimization technique where a program is optimized based on the runtime profile. + +According to the tests, PGO helps with achieving better performance for ClickHouse. According to the tests, we see improvements up to 15% in QPS on the ClickBench test suite. The more detailed results are available [here](https://pastebin.com/xbue3HMU). The performance benefits depend on your typical workload - you can get better or worse results. + +More information about PGO in ClickHouse you can read in the corresponding GitHub [issue](https://github.com/ClickHouse/ClickHouse/issues/44567). + +## How to build ClickHouse with PGO? + +There are two major kinds of PGO: [Instrumentation](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) and [Sampling](https://clang.llvm.org/docs/UsersManual.html#using-sampling-profilers) (also known as AutoFDO). In this guide is described the Instrumentation PGO with ClickHouse. + +1. Build ClickHouse in Instrumented mode. In Clang it can be done via passing `-fprofile-instr-generate` option to `CXXFLAGS`. +2. Run instrumented ClickHouse on a sample workload. Here you need to use your usual workload. One of the approaches could be using [ClickBench](https://github.com/ClickHouse/ClickBench) as a sample workload. ClickHouse in the instrumentation mode could work slowly so be ready for that and do not run instrumented ClickHouse in performance-critical environments. +3. Recompile ClickHouse once again with `-fprofile-instr-use` compiler flags and profiles that are collected from the previous step. + +A more detailed guide on how to apply PGO is in the Clang [documentation](https://clang.llvm.org/docs/UsersManual.html#profile-guided-optimization). + +If you are going to collect a sample workload directly from a production environment, we recommend trying to use Sampling PGO. diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md index f5d0e5d6aed9..9988bfc44bcf 100644 --- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md +++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md @@ -11,6 +11,8 @@ ClickHouse runs sampling profiler that allows analyzing query execution. Using p Query profiler is automatically enabled in ClickHouse Cloud and you can run a sample query as follows +:::note If you are running the following query in ClickHouse Cloud, make sure to change `FROM system.trace_log` to `FROM clusterAllReplicas(default, system.trace_log)` to select from all nodes of the cluster ::: + ``` sql SELECT count(), diff --git a/docs/en/operations/query-cache.md b/docs/en/operations/query-cache.md index bfa51650cd89..8405cd2bd36a 100644 --- a/docs/en/operations/query-cache.md +++ b/docs/en/operations/query-cache.md @@ -43,6 +43,12 @@ SETTINGS use_query_cache = true; will store the query result in the query cache. Subsequent executions of the same query (also with parameter `use_query_cache = true`) will read the computed result from the cache and return it immediately. +:::note +Setting `use_query_cache` and all other query-cache-related settings only take an effect on stand-alone `SELECT` statements. In particular, +the results of `SELECT`s to views created by `CREATE VIEW AS SELECT [...] SETTINGS use_query_cache = true` are not cached unless the `SELECT` +statement runs with `SETTINGS use_query_cache = true`. +::: + The way the cache is utilized can be configured in more detail using settings [enable_writes_to_query_cache](settings/settings.md#enable-writes-to-query-cache) and [enable_reads_from_query_cache](settings/settings.md#enable-reads-from-query-cache) (both `true` by default). The former setting controls whether query results are stored in the cache, whereas the latter setting determines if the database should try to retrieve query @@ -61,9 +67,12 @@ use_query_cache = true`) but one should keep in mind that all `SELECT` queries i may return cached results then. The query cache can be cleared using statement `SYSTEM DROP QUERY CACHE`. The content of the query cache is displayed in system table -`system.query_cache`. The number of query cache hits and misses are shown as events "QueryCacheHits" and "QueryCacheMisses" in system table -`system.events`. Both counters are only updated for `SELECT` queries which run with setting "use_query_cache = true". Other queries do not -affect the cache miss counter. +`system.query_cache`. The number of query cache hits and misses since database start are shown as events "QueryCacheHits" and +"QueryCacheMisses" in system table [system.events](system-tables/events.md). Both counters are only updated for `SELECT` queries which run +with setting `use_query_cache = true`, other queries do not affect "QueryCacheMisses". Field `query_cache_usage` in system table +[system.query_log](system-tables/query_log.md) shows for each executed query whether the query result was written into or read from the +query cache. Asynchronous metrics "QueryCacheEntries" and "QueryCacheBytes" in system table +[system.asynchronous_metrics](system-tables/asynchronous_metrics.md) show how many entries / bytes the query cache currently contains. The query cache exists once per ClickHouse server process. However, cache results are by default not shared between users. This can be changed (see below) but doing so is not recommended for security reasons. @@ -81,7 +90,7 @@ It is also possible to limit the cache usage of individual users using [settings constraints](settings/constraints-on-settings.md). More specifically, you can restrict the maximum amount of memory (in bytes) a user may allocate in the query cache and the the maximum number of stored query results. For that, first provide configurations [query_cache_max_size_in_bytes](settings/settings.md#query-cache-max-size-in-bytes) and -[query_cache_max_entries](settings/settings.md#query-cache-size-max-items) in a user profile in `users.xml`, then make both settings +[query_cache_max_entries](settings/settings.md#query-cache-size-max-entries) in a user profile in `users.xml`, then make both settings readonly: ``` xml @@ -131,10 +140,26 @@ block granularity when query results are later served from the query cache. As a result, the query cache stores for each query multiple (partial) result blocks. While this behavior is a good default, it can be suppressed using setting -[query_cache_squash_partial_query_results](settings/settings.md#query-cache-squash-partial-query-results). - -Also, results of queries with non-deterministic functions such as `rand()` and `now()` are not cached. This can be overruled using -setting [query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions). +[query_cache_squash_partial_results](settings/settings.md#query-cache-squash-partial-results). + +Also, results of queries with non-deterministic functions are not cached by default. Such functions include +- functions for accessing dictionaries: [`dictGet()`](../sql-reference/functions/ext-dict-functions.md#dictGet) etc. +- [user-defined functions](../sql-reference/statements/create/function.md), +- functions which return the current date or time: [`now()`](../sql-reference/functions/date-time-functions.md#now), + [`today()`](../sql-reference/functions/date-time-functions.md#today), + [`yesterday()`](../sql-reference/functions/date-time-functions.md#yesterday) etc., +- functions which return random values: [`randomString()`](../sql-reference/functions/random-functions.md#randomString), + [`fuzzBits()`](../sql-reference/functions/random-functions.md#fuzzBits) etc., +- functions whose result depends on the size and order or the internal chunks used for query processing: + [`nowInBlock()`](../sql-reference/functions/date-time-functions.md#nowInBlock) etc., + [`rowNumberInBlock()`](../sql-reference/functions/other-functions.md#rowNumberInBlock), + [`runningDifference()`](../sql-reference/functions/other-functions.md#runningDifference), + [`blockSize()`](../sql-reference/functions/other-functions.md#blockSize) etc., +- functions which depend on the environment: [`currentUser()`](../sql-reference/functions/other-functions.md#currentUser), + [`queryID()`](../sql-reference/functions/other-functions.md#queryID), + [`getMacro()`](../sql-reference/functions/other-functions.md#getMacro) etc. +To force caching of results of queries with non-deterministic functions regardless, use setting +[query_cache_store_results_of_queries_with_nondeterministic_functions](settings/settings.md#query-cache-store-results-of-queries-with-nondeterministic-functions). Finally, entries in the query cache are not shared between users due to security reasons. For example, user A must not be able to bypass a row policy on a table by running the same query as another user B for whom no such policy exists. However, if necessary, cache entries can diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 40c1b8d64a16..5586dbafd88d 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -1,11 +1,11 @@ --- slug: /en/operations/server-configuration-parameters/settings sidebar_position: 57 -sidebar_label: Server Settings +sidebar_label: Global Server Settings description: This section contains descriptions of server settings that cannot be changed at the session or query level. --- -# Server Settings +# Global Server Settings This section contains descriptions of server settings that cannot be changed at the session or query level. @@ -88,7 +88,7 @@ Default: 2 ## background_merges_mutations_scheduling_policy -The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. +The policy on how to perform a scheduling for background merges and mutations. Possible values are: `round_robin` and `shortest_task_first`. ## background_merges_mutations_scheduling_policy @@ -217,23 +217,61 @@ Type: UInt32 Default: 1024 +## index_mark_cache_policy + +Index mark cache policy name. + +Type: String + +Default: SLRU + ## index_mark_cache_size Size of cache for index marks. Zero means disabled. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 +## index_mark_cache_size_ratio + +The size of the protected queue in the index mark cache relative to the cache's total size. + +Type: Double + +Default: 0.5 + +## index_uncompressed_cache_policy + +Index uncompressed cache policy name. + +Type: String + +Default: SLRU ## index_uncompressed_cache_size Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 +## index_uncompressed_cache_size_ratio + +The size of the protected queue in the index uncompressed cache relative to the cache's total size. + +Type: Double + +Default: 0.5 ## io_thread_pool_queue_size @@ -255,10 +293,22 @@ Default: SLRU Size of cache for marks (index of MergeTree family of tables). +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 5368709120 +## mark_cache_size_ratio + +The size of the protected queue in the mark cache relative to the cache's total size. + +Type: Double + +Default: 0.5 + ## max_backup_bandwidth_for_server The maximum read speed in bytes per second for all backups on server. Zero means unlimited. @@ -288,7 +338,7 @@ Default: 1000 Limit on total number of concurrently executed queries. Zero means Unlimited. Note that limits on insert and select queries, and on the maximum number of queries for users must also be considered. See also max_concurrent_insert_queries, max_concurrent_select_queries, max_concurrent_queries_for_all_users. Zero means unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -300,7 +350,7 @@ Default: 0 Limit on total number of concurrent insert queries. Zero means Unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -312,7 +362,7 @@ Default: 0 Limit on total number of concurrently select queries. Zero means Unlimited. :::note -These settings can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. +This setting can be modified at runtime and will take effect immediately. Queries that are already running will remain unchanged. ::: Type: UInt64 @@ -456,6 +506,10 @@ Sets the cache size (in bytes) for mapped files. This setting allows avoiding fr Note that the amount of data in mapped files does not consume memory directly and is not accounted for in query or server memory usage — because this memory can be discarded similar to the OS page cache. The cache is dropped (the files are closed) automatically on the removal of old parts in tables of the MergeTree family, also it can be dropped manually by the `SYSTEM DROP MMAP CACHE` query. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 1000 @@ -512,7 +566,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc cache local_disk /tiny_local_cache/ - 10M + 10M 1M 1 0 @@ -529,7 +583,7 @@ Both the cache for `local_disk`, and temporary data will be stored in `/tiny_loc Type: String -Default: +Default: ## thread_pool_queue_size @@ -586,7 +640,7 @@ When `/disk1` is full, temporary data will be stored on `/disk2`. ``` Type: String -Default: +Default: ## uncompressed_cache_policy @@ -605,10 +659,22 @@ There is one shared cache for the server. Memory is allocated on demand. The cac The uncompressed cache is advantageous for very short queries in individual cases. +:::note +This setting can be modified at runtime and will take effect immediately. +::: + Type: UInt64 Default: 0 +## uncompressed_cache_size_ratio + +The size of the protected queue in the uncompressed cache relative to the cache's total size. + +Type: Double + +Default: 0.5 + ## builtin_dictionaries_reload_interval {#builtin-dictionaries-reload-interval} The interval in seconds before reloading built-in dictionaries. @@ -769,7 +835,7 @@ List of prefixes for [custom settings](../../operations/settings/index.md#custom - [Custom settings](../../operations/settings/index.md#custom_settings) -## core_dump {#server_configuration_parameters-core_dump} +## core_dump {#core_dump} Configures soft limit for core dump file size. @@ -858,7 +924,7 @@ The path to the table in ZooKeeper. {replica} ``` -## dictionaries_config {#server_configuration_parameters-dictionaries_config} +## dictionaries_config {#dictionaries_config} The path to the config file for dictionaries. @@ -875,7 +941,7 @@ See also “[Dictionaries](../../sql-reference/dictionaries/index.md)”. *_dictionary.xml ``` -## user_defined_executable_functions_config {#server_configuration_parameters-user_defined_executable_functions_config} +## user_defined_executable_functions_config {#user_defined_executable_functions_config} The path to the config file for executable user defined functions. @@ -892,7 +958,7 @@ See also “[Executable User Defined Functions](../../sql-reference/functions/in *_function.xml ``` -## dictionaries_lazy_load {#server_configuration_parameters-dictionaries_lazy_load} +## dictionaries_lazy_load {#dictionaries_lazy_load} Lazy loading of dictionaries. @@ -908,7 +974,7 @@ The default is `true`. true ``` -## format_schema_path {#server_configuration_parameters-format_schema_path} +## format_schema_path {#format_schema_path} The path to the directory with the schemes for the input data, such as schemas for the [CapnProto](../../interfaces/formats.md#capnproto) format. @@ -919,7 +985,7 @@ The path to the directory with the schemes for the input data, such as schemas f format_schemas/ ``` -## graphite {#server_configuration_parameters-graphite} +## graphite {#graphite} Sending data to [Graphite](https://github.com/graphite-project). @@ -953,7 +1019,7 @@ You can configure multiple `` clauses. For instance, you can use this ``` -## graphite_rollup {#server_configuration_parameters-graphite-rollup} +## graphite_rollup {#graphite-rollup} Settings for thinning data for Graphite. @@ -985,7 +1051,7 @@ For more details, see [GraphiteMergeTree](../../engines/table-engines/mergetree- The port for connecting to the server over HTTP(s). -If `https_port` is specified, [openSSL](#server_configuration_parameters-openssl) must be configured. +If `https_port` is specified, [openSSL](#openssl) must be configured. If `http_port` is specified, the OpenSSL configuration is ignored even if it is set. @@ -995,7 +1061,7 @@ If `http_port` is specified, the OpenSSL configuration is ignored even if it is 9999 ``` -## http_server_default_response {#server_configuration_parameters-http_server_default_response} +## http_server_default_response {#http_server_default_response} The page that is shown by default when you access the ClickHouse HTTP(s) server. The default value is “Ok.” (with a line feed at the end) @@ -1020,7 +1086,7 @@ Expired time for HSTS in seconds. The default value is 0 means clickhouse disabl 600000 ``` -## include_from {#server_configuration_parameters-include_from} +## include_from {#include_from} The path to the file with substitutions. @@ -1156,7 +1222,7 @@ The number of seconds that ClickHouse waits for incoming requests before closing 10 ``` -## listen_host {#server_configuration_parameters-listen_host} +## listen_host {#listen_host} Restriction on hosts that requests can come from. If you want the server to answer all of them, specify `::`. @@ -1167,7 +1233,7 @@ Examples: 127.0.0.1 ``` -## listen_backlog {#server_configuration_parameters-listen_backlog} +## listen_backlog {#listen_backlog} Backlog (queue size of pending connections) of the listen socket. @@ -1187,7 +1253,7 @@ Examples: 4096 ``` -## logger {#server_configuration_parameters-logger} +## logger {#logger} Logging settings. @@ -1201,13 +1267,58 @@ Keys: - `console` – Send `log` and `errorlog` to the console instead of file. To enable, set to `1` or `true`. - `stream_compress` – Compress `log` and `errorlog` with `lz4` stream compression. To enable, set to `1` or `true`. +Both log and error log file names (only file names, not directories) support date and time format specifiers. + +**Format specifiers** +Using the following format specifiers, you can define a pattern for the resulting file name. “Example” column shows possible results for `2023-07-06 18:32:07`. + +| Specifier | Description | Example | +|-------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| +| %% | Literal % | % | +| %n | New-line character | | +| %t | Horizontal tab character | | +| %Y | Year as a decimal number, e.g. 2017 | 2023 | +| %y | Last 2 digits of year as a decimal number (range [00,99]) | 23 | +| %C | First 2 digits of year as a decimal number (range [00,99]) | 20 | +| %G | Four-digit [ISO 8601 week-based year](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), i.e. the year that contains the specified week. Normally useful only with %V | 2023 | +| %g | Last 2 digits of [ISO 8601 week-based year](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), i.e. the year that contains the specified week. | 23 | +| %b | Abbreviated month name, e.g. Oct (locale dependent) | Jul | +| %h | Synonym of %b | Jul | +| %B | Full month name, e.g. October (locale dependent) | July | +| %m | Month as a decimal number (range [01,12]) | 07 | +| %U | Week of the year as a decimal number (Sunday is the first day of the week) (range [00,53]) | 27 | +| %W | Week of the year as a decimal number (Monday is the first day of the week) (range [00,53]) | 27 | +| %V | ISO 8601 week number (range [01,53]) | 27 | +| %j | Day of the year as a decimal number (range [001,366]) | 187 | +| %d | Day of the month as a zero-padded decimal number (range [01,31]). Single digit is preceded by zero. | 06 | +| %e | Day of the month as a space-padded decimal number (range [1,31]). Single digit is preceded by a space. |   6 | +| %a | Abbreviated weekday name, e.g. Fri (locale dependent) | Thu | +| %A | Full weekday name, e.g. Friday (locale dependent) | Thursday | +| %w | Weekday as a integer number with Sunday as 0 (range [0-6]) | 4 | +| %u | Weekday as a decimal number, where Monday is 1 (ISO 8601 format) (range [1-7]) | 4 | +| %H | Hour as a decimal number, 24 hour clock (range [00-23]) | 18 | +| %I | Hour as a decimal number, 12 hour clock (range [01,12]) | 06 | +| %M | Minute as a decimal number (range [00,59]) | 32 | +| %S | Second as a decimal number (range [00,60]) | 07 | +| %c | Standard date and time string, e.g. Sun Oct 17 04:41:13 2010 (locale dependent) | Thu Jul 6 18:32:07 2023 | +| %x | Localized date representation (locale dependent) | 07/06/23 | +| %X | Localized time representation, e.g. 18:40:20 or 6:40:20 PM (locale dependent) | 18:32:07 | +| %D | Short MM/DD/YY date, equivalent to %m/%d/%y | 07/06/23 | +| %F | Short YYYY-MM-DD date, equivalent to %Y-%m-%d | 2023-07-06 | +| %r | Localized 12-hour clock time (locale dependent) | 06:32:07 PM | +| %R | Equivalent to "%H:%M" | 18:32 | +| %T | Equivalent to "%H:%M:%S" (the ISO 8601 time format) | 18:32:07 | +| %p | Localized a.m. or p.m. designation (locale dependent) | PM | +| %z | Offset from UTC in the ISO 8601 format (e.g. -0430), or no characters if the time zone information is not available | +0800 | +| %Z | Locale-dependent time zone name or abbreviation, or no characters if the time zone information is not available | Z AWST | + **Example** ``` xml trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.err.log 1000M 10 true @@ -1246,7 +1357,7 @@ Keys for syslog: Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON` otherwise. - format – Message format. Possible values: `bsd` and `syslog.` -## send_crash_reports {#server_configuration_parameters-send_crash_reports} +## send_crash_reports {#send_crash_reports} Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). Enabling it, especially in pre-production environments, is highly appreciated. @@ -1518,7 +1629,7 @@ Default value: `0.5`. -## merge_tree {#server_configuration_parameters-merge_tree} +## merge_tree {#merge_tree} Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). @@ -1547,6 +1658,10 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera metric_log
7500 1000 + 1048576 + 8192 + 524288 + false
``` @@ -1561,7 +1676,7 @@ To disable `metric_log` setting, you should create the following file `/etc/clic ``` -## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree} +## replicated_merge_tree {#replicated_merge_tree} Fine tuning for tables in the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/mergetree.md). @@ -1577,7 +1692,7 @@ For more information, see the MergeTreeSettings.h header file. ``` -## openSSL {#server_configuration_parameters-openssl} +## openSSL {#openssl} SSL client/server configuration. @@ -1591,7 +1706,7 @@ Keys for server/client settings: - verificationMode (default: relaxed) – The method for checking the node’s certificates. Details are in the description of the [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) class. Possible values: `none`, `relaxed`, `strict`, `once`. - verificationDepth (default: 9) – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile (default: true) – Wether built-in CA certificates for OpenSSL will be used. ClickHouse assumes that builtin CA certificates are in the file `/etc/ssl/cert.pem` (resp. the directory `/etc/ssl/certs`) or in file (resp. directory) specified by the environment variable `SSL_CERT_FILE` (resp. `SSL_CERT_DIR`). -- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`) - Supported OpenSSL encryptions. +- cipherList (default: `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`) - Supported OpenSSL encryptions. - cacheSessions (default: false) – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. Acceptable values: `true`, `false`. - sessionIdContext (default: `${application.name}`) – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. This parameter is always recommended since it helps avoid problems both if the server caches the session and if the client requested caching. Default value: `${application.name}`. - sessionCacheSize (default: [1024\*20](https://github.com/ClickHouse/boringssl/blob/master/include/openssl/ssl.h#L1978)) – The maximum number of sessions that the server caches. A value of 0 means unlimited sessions. @@ -1636,7 +1751,7 @@ Keys for server/client settings: ``` -## part_log {#server_configuration_parameters-part-log} +## part_log {#part-log} Logging events that are associated with [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). For instance, adding or merging data. You can use the log to simulate merge algorithms and compare their characteristics. You can visualize the merge process. @@ -1650,6 +1765,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1661,10 +1784,14 @@ Use the following parameters to configure logging: part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` -## path {#server_configuration_parameters-path} +## path {#path} The path to the directory containing data. @@ -1678,7 +1805,7 @@ The trailing slash is mandatory. /var/lib/clickhouse/ ``` -## Prometheus {#server_configuration_parameters-prometheus} +## Prometheus {#prometheus} Exposing metrics data for scraping from [Prometheus](https://prometheus.io). @@ -1714,7 +1841,7 @@ Check (replace `127.0.0.1` with the IP addr or hostname of your ClickHouse serve curl 127.0.0.1:9363/metrics ``` -## query_log {#server_configuration_parameters-query-log} +## query_log {#query-log} Setting for logging queries received with the [log_queries=1](../../operations/settings/settings.md) setting. @@ -1728,6 +1855,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1741,6 +1876,10 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1772,7 +1911,7 @@ Data for the query cache is allocated in DRAM. If memory is scarce, make sure to ``` -## query_thread_log {#server_configuration_parameters-query_thread_log} +## query_thread_log {#query_thread_log} Setting for logging threads of queries received with the [log_query_threads=1](../../operations/settings/settings.md#settings-log-query-threads) setting. @@ -1786,6 +1925,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size_rows, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1799,10 +1946,14 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` -## query_views_log {#server_configuration_parameters-query_views_log} +## query_views_log {#query_views_log} Setting for logging views (live, materialized etc) dependant of queries received with the [log_query_views=1](../../operations/settings/settings.md#settings-log-query-views) setting. @@ -1816,6 +1967,14 @@ Use the following parameters to configure logging: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` – Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1829,10 +1988,14 @@ If the table does not exist, ClickHouse will create it. If the structure of the query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` -## text_log {#server_configuration_parameters-text_log} +## text_log {#text_log} Settings for the [text_log](../../operations/system-tables/text_log.md#system_tables-text_log) system table for logging text messages. @@ -1845,6 +2008,14 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1856,14 +2027,17 @@ Parameters: system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day ``` - -## trace_log {#server_configuration_parameters-trace_log} +## trace_log {#trace_log} Settings for the [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. @@ -1875,6 +2049,12 @@ Parameters: - `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. - `storage_policy` – Name of storage policy to use for the table (optional) - `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). @@ -1886,10 +2066,14 @@ The default server configuration file `config.xml` contains the following settin trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` -## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log} +## asynchronous_insert_log {#asynchronous_insert_log} Settings for the [asynchronous_insert_log](../../operations/system-tables/asynchronous_insert_log.md#system_tables-asynchronous_insert_log) system table for logging async inserts. @@ -1900,9 +2084,18 @@ Parameters: - `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. - `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` defined. - `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. - `storage_policy` – Name of storage policy to use for the table (optional) **Example** + ```xml @@ -1910,11 +2103,94 @@ Parameters: asynchronous_insert_log
7500 toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false
``` +## crash_log {#crash_log} + +Settings for the [crash_log](../../operations/system-tables/crash-log.md) system table operation. + +Parameters: + +- `database` — Database for storing a table. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/index.md) for a system table. Can't be used if `partition_by` or `order_by` defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. +- `storage_policy` – Name of storage policy to use for the table (optional) +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md/#settings) that control the behavior of the MergeTree (optional). + +The default server configuration file `config.xml` contains the following settings section: + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + false +
+``` + +## backup_log {#backup_log} + +Settings for the [backup_log](../../operations/system-tables/backup_log.md) system table for logging `BACKUP` and `RESTORE` operations. + +Parameters: + +- `database` — Database name. +- `table` — Table name. +- `partition_by` — [Custom partitioning key](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) for a system table. Can't be used if `engine` is defined. +- `order_by` - [Custom sorting key](../../engines/table-engines/mergetree-family/mergetree.md#order_by) for a system table. Can't be used if `engine` is defined. +- `engine` - [MergeTree Engine Definition](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) for a system table. Can't be used if `partition_by` or `order_by` is defined. +- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `max_size_rows` – Maximal size in lines for the logs. When non-flushed logs amount reaches max_size, logs dumped to the disk. +Default: 1048576. +- `reserved_size_rows` – Pre-allocated memory size in lines for the logs. +Default: 8192. +- `buffer_size_rows_flush_threshold` – Lines amount threshold, reaching it launches flushing logs to the disk in background. +Default: `max_size_rows / 2`. +- `flush_on_crash` - Indication whether logs should be dumped to the disk in case of a crash. +Default: false. +- `storage_policy` – Name of storage policy to use for the table (optional). +- `settings` - [Additional parameters](../../engines/table-engines/mergetree-family/mergetree.md#settings) that control the behavior of the MergeTree (optional). + +**Example** + +```xml + + + system + backup_log
+ 1000 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false + +
+
+``` + ## query_masking_rules {#query-masking-rules} Regexp-based rules, which will be applied to queries as well as all log messages before storing them in server logs, @@ -1960,8 +2236,10 @@ For the value of the `incl` attribute, see the section “[Configuration files]( **See Also** - [skip_unavailable_shards](../../operations/settings/settings.md#settings-skip_unavailable_shards) +- [Cluster Discovery](../../operations/cluster-discovery.md) +- [Replicated database engine](../../engines/database-engines/replicated.md) -## timezone {#server_configuration_parameters-timezone} +## timezone {#timezone} The server’s time zone. @@ -1979,7 +2257,7 @@ The time zone is necessary for conversions between String and DateTime formats w - [session_timezone](../settings/settings.md#session_timezone) -## tcp_port {#server_configuration_parameters-tcp_port} +## tcp_port {#tcp_port} Port for communicating with clients over the TCP protocol. @@ -1989,9 +2267,9 @@ Port for communicating with clients over the TCP protocol. 9000 ``` -## tcp_port_secure {#server_configuration_parameters-tcp_port_secure} +## tcp_port_secure {#tcp_port_secure} -TCP port for secure communication with clients. Use it with [OpenSSL](#server_configuration_parameters-openssl) settings. +TCP port for secure communication with clients. Use it with [OpenSSL](#openssl) settings. **Possible values** @@ -2003,7 +2281,7 @@ Positive integer. 9440 ``` -## mysql_port {#server_configuration_parameters-mysql_port} +## mysql_port {#mysql_port} Port for communicating with clients over MySQL protocol. @@ -2017,7 +2295,7 @@ Example 9004 ``` -## postgresql_port {#server_configuration_parameters-postgresql_port} +## postgresql_port {#postgresql_port} Port for communicating with clients over PostgreSQL protocol. @@ -2048,7 +2326,7 @@ Path on the local filesystem to store temporary data for processing large querie ``` -## user_files_path {#server_configuration_parameters-user_files_path} +## user_files_path {#user_files_path} The directory with user files. Used in the table function [file()](../../sql-reference/table-functions/file.md). @@ -2058,7 +2336,7 @@ The directory with user files. Used in the table function [file()](../../sql-ref /var/lib/clickhouse/user_files/ ``` -## user_scripts_path {#server_configuration_parameters-user_scripts_path} +## user_scripts_path {#user_scripts_path} The directory with user scripts files. Used for Executable user defined functions [Executable User Defined Functions](../../sql-reference/functions/index.md#executable-user-defined-functions). @@ -2068,7 +2346,7 @@ The directory with user scripts files. Used for Executable user defined function /var/lib/clickhouse/user_scripts/ ``` -## user_defined_path {#server_configuration_parameters-user_defined_path} +## user_defined_path {#user_defined_path} The directory with user defined files. Used for SQL user defined functions [SQL User Defined Functions](../../sql-reference/functions/index.md#user-defined-functions). @@ -2119,7 +2397,15 @@ This section contains the following parameters: - `session_timeout_ms` — Maximum timeout for the client session in milliseconds. - `operation_timeout_ms` — Maximum timeout for one operation in milliseconds. - `root` — The [znode](http://zookeeper.apache.org/doc/r3.5.5/zookeeperOver.html#Nodes+and+ephemeral+nodes) that is used as the root for znodes used by the ClickHouse server. Optional. +- `fallback_session_lifetime.min` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the minimal duration of the fallback session. Set in seconds. Optional. Default is 3 hours. +- `fallback_session_lifetime.max` - If the first zookeeper host resolved by zookeeper_load_balancing strategy is unavailable, limit the lifetime of a zookeeper session to the fallback node. This is done for load-balancing purposes to avoid excessive load on one of zookeeper hosts. This setting sets the maximum duration of the fallback session. Set in seconds. Optional. Default is 6 hours. - `identity` — User and password, that can be required by ZooKeeper to give access to requested znodes. Optional. +- zookeeper_load_balancing - Specifies the algorithm of ZooKeeper node selection. + * random - randomly selects one of ZooKeeper nodes. + * in_order - selects the first ZooKeeper node, if it's not available then the second, and so on. + * nearest_hostname - selects a ZooKeeper node with a hostname that is most similar to the server’s hostname. + * first_or_random - selects the first ZooKeeper node, if it's not available then randomly selects one of remaining ZooKeeper nodes. + * round_robin - selects the first ZooKeeper node, if reconnection happens selects the next. **Example configuration** @@ -2139,6 +2425,8 @@ This section contains the following parameters: /path/to/zookeeper/node user:password + + random ``` @@ -2154,7 +2442,7 @@ Storage method for data part headers in ZooKeeper. This setting only applies to the `MergeTree` family. It can be specified: -- Globally in the [merge_tree](#server_configuration_parameters-merge_tree) section of the `config.xml` file. +- Globally in the [merge_tree](#merge_tree) section of the `config.xml` file. ClickHouse uses the setting for all the tables on the server. You can change the setting at any time. Existing tables change their behaviour when the setting changes. diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index eb1d5db5676f..86d24c3a942d 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -7,90 +7,20 @@ pagination_next: en/operations/settings/settings # Settings Overview -There are multiple ways to define ClickHouse settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting. +:::note +XML-based Settings Profiles and [configuration files](https://clickhouse.com/docs/en/operations/configuration-files) are currently not supported for ClickHouse Cloud. To specify settings for your ClickHouse Cloud service, you must use [SQL-driven Settings Profiles](https://clickhouse.com/docs/en/operations/access-rights#settings-profiles-management). +::: -The order of priority for defining a setting is: +There are two main groups of ClickHouse settings: -1. Settings in the `users.xml` server configuration file +- Global server settings +- Query-level settings - - Set in the element ``. +The main distinction between global server settings and query-level settings is that +global server settings must be set in configuration files while query-level settings +can be set in configuration files or with SQL queries. -2. Session settings +Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level. - - Send `SET setting=value` from the ClickHouse console client in interactive mode. - Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you need to specify the `session_id` HTTP parameter. +Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level. -3. Query settings - - - When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`. - - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). - - Define settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to the default or previous value after the query is executed. - -View the [Settings](./settings.md) page for a description of the ClickHouse settings. - -## Converting a Setting to its Default Value - -If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like: - -```sql -SET setting_name = DEFAULT -``` - -For example, the default value of `max_insert_block_size` is 1048449. Suppose you change its value to 100000: - -```sql -SET max_insert_block_size=100000; - -SELECT value FROM system.settings where name='max_insert_block_size'; -``` - -The response is: - -```response -┌─value──┐ -│ 100000 │ -└────────┘ -``` - -The following command sets its value back to 1048449: - -```sql -SET max_insert_block_size=DEFAULT; - -SELECT value FROM system.settings where name='max_insert_block_size'; -``` - -The setting is now back to its default: - -```response -┌─value───┐ -│ 1048449 │ -└─────────┘ -``` - - -## Custom Settings {#custom_settings} - -In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings. - -A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file. - -```xml -custom_ -``` - -To define a custom setting use `SET` command: - -```sql -SET custom_a = 123; -``` - -To get the current value of a custom setting use `getSetting()` function: - -```sql -SELECT getSetting('custom_a'); -``` - -**See Also** - -- [Server Configuration Settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index 4122b4af40f7..a0428bd09777 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -56,11 +56,11 @@ Possible values: - Any positive integer. -Default value: 300. +Default value: 3000. To achieve maximum performance of `SELECT` queries, it is necessary to minimize the number of parts processed, see [Merge Tree](../../development/architecture.md#merge-tree). -You can set a larger value to 600 (1200), this will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300. +Prior to 23.6 this setting was set to 300. You can set a higher different value, it will reduce the probability of the `Too many parts` error, but at the same time `SELECT` performance might degrade. Also in case of a merge issue (for example, due to insufficient disk space) you will notice it later than it could be with the original 300. ## parts_to_delay_insert {#parts-to-delay-insert} @@ -71,7 +71,7 @@ Possible values: - Any positive integer. -Default value: 150. +Default value: 1000. ClickHouse artificially executes `INSERT` longer (adds ‘sleep’) so that the background merge process can merge parts faster than they are added. @@ -502,7 +502,7 @@ Possible values: Default value: 480. After merging several parts into a new part, ClickHouse marks the original parts as inactive and deletes them only after `old_parts_lifetime` seconds. -Inactive parts are removed if they are not used by current queries, i.e. if the `refcount` of the part is zero. +Inactive parts are removed if they are not used by current queries, i.e. if the `refcount` of the part is 1. `fsync` is not called for new parts, so for some time new parts exist only in the server's RAM (OS cache). If the server is rebooted spontaneously, new parts can be lost or damaged. To protect data inactive parts are not deleted immediately. @@ -555,7 +555,7 @@ Merge reads rows from parts in blocks of `merge_max_block_size` rows, then merge ## number_of_free_entries_in_pool_to_lower_max_size_of_merge {#number-of-free-entries-in-pool-to-lower-max-size-of-merge} -When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). +When there is less than specified number of free entries in pool (or replicated queue), start to lower maximum size of merge to process (or to put in queue). This is to allow small merges to process - not filling the pool with long running merges. Possible values: @@ -566,7 +566,7 @@ Default value: 8 ## number_of_free_entries_in_pool_to_execute_mutation {#number-of-free-entries-in-pool-to-execute-mutation} -When there is less than specified number of free entries in pool, do not execute part mutations. +When there is less than specified number of free entries in pool, do not execute part mutations. This is to leave free threads for regular merges and avoid "Too many parts". Possible values: @@ -623,6 +623,19 @@ Possible values: Default value: false +## number_of_free_entries_in_pool_to_execute_optimize_entire_partition {#number_of_free_entries_in_pool_to_execute_optimize_entire_partition} + +When there is less than specified number of free entries in pool, do not execute optimizing entire partition in the background (this task generated when set `min_age_to_force_merge_seconds` and enable `min_age_to_force_merge_on_partition_only`). This is to leave free threads for regular merges and avoid "Too many parts". + +Possible values: + +- Positive integer. + +Default value: 25 + +The value of the `number_of_free_entries_in_pool_to_execute_optimize_entire_partition` setting should be less than the value of the [background_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_pool_size) * [background_merges_mutations_concurrency_ratio](/docs/en/operations/server-configuration-parameters/settings.md/#background_merges_mutations_concurrency_ratio). Otherwise, ClickHouse throws an exception. + + ## allow_floating_point_partition_key {#allow_floating_point_partition_key} Enables to allow floating-point number as a partition key. @@ -733,14 +746,14 @@ Default value: `0` (limit never applied). Minimal ratio of the number of _default_ values to the number of _all_ values in a column. Setting this value causes the column to be stored using sparse serializations. -If a column is sparse (contains mostly zeros), ClickHouse can encode it in a sparse format and automatically optimize calculations - the data does not require full decompression during queries. To enable this sparse serialization, define the `ratio_of_defaults_for_sparse_serialization` setting to be less than 1.0. If the value is greater than or equal to 1.0 (the default), then the columns will be always written using the normal full serialization. +If a column is sparse (contains mostly zeros), ClickHouse can encode it in a sparse format and automatically optimize calculations - the data does not require full decompression during queries. To enable this sparse serialization, define the `ratio_of_defaults_for_sparse_serialization` setting to be less than 1.0. If the value is greater than or equal to 1.0, then the columns will be always written using the normal full serialization. Possible values: - Float between 0 and 1 to enable sparse serialization - 1.0 (or greater) if you do not want to use sparse serialization -Default value: `1.0` (sparse serialization is disabled) +Default value: `0.9375` **Example** @@ -832,6 +845,13 @@ You can see which parts of `s` were stored using the sparse serialization: └────────┴────────────────────┘ ``` +## replace_long_file_name_to_hash {#replace_long_file_name_to_hash} +If the file name for column is too long (more than `max_file_name_length` bytes) replace it to SipHash128. Default value: `false`. + +## max_file_name_length {#max_file_name_length} + +The maximal length of the file name to keep it as is without hashing. Takes effect only if setting `replace_long_file_name_to_hash` is enabled. The value of this setting does not include the length of file extension. So, it is recommended to set it below the maximum filename length (usually 255 bytes) with some gap to avoid filesystem errors. Default value: 127. + ## clean_deleted_rows Enable/disable automatic deletion of rows flagged as `is_deleted` when perform `OPTIMIZE ... FINAL` on a table using the ReplacingMergeTree engine. When disabled, the `CLEANUP` keyword has to be added to the `OPTIMIZE ... FINAL` to have the same behaviour. @@ -841,3 +861,9 @@ Possible values: - `Always` or `Never`. Default value: `Never` + +## allow_experimental_block_number_column + +Persists virtual column `_block_number` on merges. + +Default value: false. diff --git a/docs/en/operations/settings/permissions-for-queries.md b/docs/en/operations/settings/permissions-for-queries.md index 9e9c564d4261..bdcd1139418d 100644 --- a/docs/en/operations/settings/permissions-for-queries.md +++ b/docs/en/operations/settings/permissions-for-queries.md @@ -48,7 +48,7 @@ Setting `readonly = 1` prohibits the user from changing settings. There is a way ::: -## allow_ddl {#settings_allow_ddl} +## allow_ddl {#allow_ddl} Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries. diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md index 163ed5d58260..15f39b53e076 100644 --- a/docs/en/operations/settings/query-complexity.md +++ b/docs/en/operations/settings/query-complexity.md @@ -154,6 +154,13 @@ Result: Maximum query execution time in seconds. At this time, it is not checked for one of the sorting stages, or when merging and finalizing aggregate functions. +The `max_execution_time` parameter can be a bit tricky to understand. +It operates based on interpolation relative to the current query execution speed (this behaviour is controlled by [timeout_before_checking_execution_speed](#timeout-before-checking-execution-speed)). +ClickHouse will interrupt a query if the projected execution time exceeds the specified `max_execution_time`. +By default, the timeout_before_checking_execution_speed is set to 10 seconds. This means that after 10 seconds of query execution, ClickHouse will begin estimating the total execution time. +If, for example, `max_execution_time` is set to 3600 seconds (1 hour), ClickHouse will terminate the query if the estimated time exceeds this 3600-second limit. +If you set `timeout_before_checking_execution_speed `to 0, ClickHouse will use clock time as the basis for `max_execution_time`. + ## timeout_overflow_mode {#timeout-overflow-mode} What to do if the query is run longer than ‘max_execution_time’: ‘throw’ or ‘break’. By default, throw. @@ -298,7 +305,7 @@ Default value: `THROW`. - [JOIN clause](../../sql-reference/statements/select/join.md#select-join) - [Join table engine](../../engines/table-engines/special/join.md) -## max_partitions_per_insert_block {#max-partitions-per-insert-block} +## max_partitions_per_insert_block {#settings-max_partitions_per_insert_block} Limits the maximum number of partitions in a single inserted block. @@ -309,9 +316,18 @@ Default value: 100. **Details** -When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse throws an exception with the following text: +When inserting data, ClickHouse calculates the number of partitions in the inserted block. If the number of partitions is more than `max_partitions_per_insert_block`, ClickHouse either logs a warning or throws an exception based on `throw_on_max_partitions_per_insert_block`. Exceptions have the following text: + +> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” + +## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block} -> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” +Allows you to control behaviour when `max_partitions_per_insert_block` is reached. + +- `true` - When an insert block reaches `max_partitions_per_insert_block`, an exception is raised. +- `false` - Logs a warning when `max_partitions_per_insert_block` is reached. + +Default value: `true` ## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user} @@ -327,3 +343,39 @@ The maximum amount of data consumed by temporary files on disk in bytes for all Zero means unlimited. Default value: 0. + +## max_sessions_for_user {#max-sessions-per-user} + +Maximum number of simultaneous sessions per authenticated user to the ClickHouse server. + +Example: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_user + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Default value: 0 (Infinite count of simultaneous sessions). diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md index 1b22a6d1223b..bb59402079e5 100644 --- a/docs/en/operations/settings/settings-formats.md +++ b/docs/en/operations/settings/settings-formats.md @@ -242,6 +242,26 @@ See also: - [DateTime data type.](../../sql-reference/data-types/datetime.md) - [Functions for working with dates and times.](../../sql-reference/functions/date-time-functions.md) +## interval_output_format {#interval_output_format} + +Allows choosing different output formats of the text representation of interval types. + +Possible values: + +- `kusto` - KQL-style output format. + + ClickHouse outputs intervals in [KQL format](https://learn.microsoft.com/en-us/dotnet/standard/base-types/standard-timespan-format-strings#the-constant-c-format-specifier). For example, `toIntervalDay(2)` would be formatted as `2.00:00:00`. Please note that for interval types of varying length (ie. `IntervalMonth` and `IntervalYear`) the average number of seconds per interval is taken into account. + +- `numeric` - Numeric output format. + + ClickHouse outputs intervals as their underlying numeric representation. For example, `toIntervalDay(2)` would be formatted as `2`. + +Default value: `numeric`. + +See also: + +- [Interval](../../sql-reference/data-types/special-data-types/interval.md) + ## input_format_ipv4_default_on_conversion_error {#input_format_ipv4_default_on_conversion_error} Deserialization of IPv4 will use default values instead of throwing exception on conversion error. @@ -301,6 +321,10 @@ If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` ar This parameter is useful when you are using formats that require a schema definition, such as [Cap’n Proto](https://capnproto.org/) or [Protobuf](https://developers.google.com/protocol-buffers/). The value depends on the format. +## output_format_schema {#output-format-schema} + +The path to the file where the automatically generated schema will be saved in [Cap’n Proto](../../interfaces/formats.md#capnproto-capnproto) or [Protobuf](../../interfaces/formats.md#protobuf-protobuf) formats. + ## output_format_enable_streaming {#output_format_enable_streaming} Enable streaming in output formats that support it. @@ -357,6 +381,13 @@ Enabled by default. Allow parsing numbers as strings in JSON input formats. +Enabled by default. + +## input_format_json_try_infer_numbers_from_strings {#input_format_json_try_infer_numbers_from_strings} + +If enabled, during schema inference ClickHouse will try to infer numbers from string fields. +It can be useful if JSON data contains quoted UInt64 numbers. + Disabled by default. ## input_format_json_read_objects_as_strings {#input_format_json_read_objects_as_strings} @@ -380,7 +411,76 @@ Result: └────┴──────────────────────────┴────────────┘ ``` -Disabled by default. +Enabled by default. + +## input_format_json_try_infer_named_tuples_from_objects {#input_format_json_try_infer_named_tuples_from_objects} + +If enabled, during schema inference ClickHouse will try to infer named Tuple from JSON objects. +The resulting named Tuple will contain all elements from all corresponding JSON objects from sample data. + +Example: + +```sql +SET input_format_json_try_infer_named_tuples_from_objects = 1; +DESC format(JSONEachRow, '{"obj" : {"a" : 42, "b" : "Hello"}}, {"obj" : {"a" : 43, "c" : [1, 2, 3]}}, {"obj" : {"d" : {"e" : 42}}}') +``` + +Result: + +``` +┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Tuple(a Nullable(Int64), b Nullable(String), c Array(Nullable(Int64)), d Tuple(e Nullable(Int64))) │ │ │ │ │ │ +└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` + +Enabled by default. + +## input_format_json_read_arrays_as_strings {#input_format_json_read_arrays_as_strings} + +Allow parsing JSON arrays as strings in JSON input formats. + +Example: + +```sql +SET input_format_json_read_arrays_as_strings = 1; +SELECT arr, toTypeName(arr), JSONExtractArrayRaw(arr)[3] from format(JSONEachRow, 'arr String', '{"arr" : [1, "Hello", [1,2,3]]}'); +``` + +Result: +``` +┌─arr───────────────────┬─toTypeName(arr)─┬─arrayElement(JSONExtractArrayRaw(arr), 3)─┐ +│ [1, "Hello", [1,2,3]] │ String │ [1,2,3] │ +└───────────────────────┴─────────────────┴───────────────────────────────────────────┘ +``` + +Enabled by default. + +## input_format_json_infer_incomplete_types_as_strings {#input_format_json_infer_incomplete_types_as_strings} + +Allow to use String type for JSON keys that contain only `Null`/`{}`/`[]` in data sample during schema inference. +In JSON formats any value can be read as String, and we can avoid errors like `Cannot determine type for column 'column_name' by first 25000 rows of data, most likely this column contains only Nulls or empty Arrays/Maps` during schema inference +by using String type for keys with unknown types. + +Example: + +```sql +SET input_format_json_infer_incomplete_types_as_strings = 1, input_format_json_try_infer_named_tuples_from_objects = 1; +DESCRIBE format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}'); +SELECT * FROM format(JSONEachRow, '{"obj" : {"a" : [1,2,3], "b" : "hello", "c" : null, "d" : {}, "e" : []}}'); +``` + +Result: +``` +┌─name─┬─type───────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ +│ obj │ Tuple(a Array(Nullable(Int64)), b Nullable(String), c Nullable(String), d Nullable(String), e Array(Nullable(String))) │ │ │ │ │ │ +└──────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ + +┌─obj────────────────────────────┐ +│ ([1,2,3],'hello',NULL,'{}',[]) │ +└────────────────────────────────┘ +``` + +Enabled by default. ## input_format_json_validate_types_from_metadata {#input_format_json_validate_types_from_metadata} @@ -603,6 +703,30 @@ Column type should be String. If value is empty, default names `row_{i}`will be Default value: ''. +### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns} + +Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + +### output_format_markdown_escape_special_characters {#output_format_markdown_escape_special_characters} + +When enabled, escape special characters in Markdown. + +[Common Mark](https://spec.commonmark.org/0.30/#example-12) defines the following special characters that can be escaped by \: + +``` +! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~ +``` + +Possible values: + ++ 0 — Disable. ++ 1 — Enable. + +Default value: 0. + ## TSV format settings {#tsv-format-settings} ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default} @@ -740,6 +864,13 @@ When enabled, trailing empty lines at the end of TSV file will be skipped. Disabled by default. +### input_format_tsv_allow_variable_number_of_columns {#input_format_tsv_allow_variable_number_of_columns} + +Allow variable number of columns in rows in TSV input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + ## CSV format settings {#csv-format-settings} ### format_csv_delimiter {#format_csv_delimiter} @@ -932,6 +1063,13 @@ Result " string " ``` +### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} + +Allow variable number of columns in rows in CSV input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + ### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter} Allow to use whitespace or tab as field delimiter in CSV strings. @@ -964,6 +1102,28 @@ Result a b ``` +### input_format_csv_use_default_on_bad_values {#input_format_csv_use_default_on_bad_values} + +Allow to set default value to column when CSV field deserialization failed on bad value + +Default value: `false`. + +**Examples** + +Query + +```bash +./clickhouse local -q "create table test_tbl (x String, y UInt32, z Date) engine=MergeTree order by x" +echo 'a,b,c' | ./clickhouse local -q "INSERT INTO test_tbl SETTINGS input_format_csv_use_default_on_bad_values=true FORMAT CSV" +./clickhouse local -q "select * from test_tbl" +``` + +Result + +```text +a 0 1971-01-01 +``` + ## Values format settings {#values-format-settings} ### input_format_values_interpret_expressions {#input_format_values_interpret_expressions} @@ -1061,17 +1221,6 @@ Default value: 1. ## Arrow format settings {#arrow-format-settings} -### input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_arrow_case_insensitive_column_matching {#input_format_arrow_case_insensitive_column_matching} Ignore case when matching Arrow column names with ClickHouse column names. @@ -1117,21 +1266,10 @@ Enabled by default. Compression method used in output Arrow format. Supported codecs: `lz4_frame`, `zstd`, `none` (uncompressed) -Default value: `none`. +Default value: `lz4_frame`. ## ORC format settings {#orc-format-settings} -### input_format_orc_import_nested {#input_format_orc_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_orc_row_batch_size {#input_format_orc_row_batch_size} Batch size when reading ORC stripes. @@ -1170,17 +1308,6 @@ Default value: `none`. ## Parquet format settings {#parquet-format-settings} -### input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/index.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format. - -Possible values: - -- 0 — Data can not be inserted into `Nested` columns as an array of structs. -- 1 — Data can be inserted into `Nested` columns as an array of structs. - -Default value: `0`. - ### input_format_parquet_case_insensitive_column_matching {#input_format_parquet_case_insensitive_column_matching} Ignore case when matching Parquet column names with ClickHouse column names. @@ -1205,6 +1332,12 @@ Allow skipping columns with unsupported types while schema inference for format Disabled by default. +### input_format_parquet_local_file_min_bytes_for_seek {#input_format_parquet_local_file_min_bytes_for_seek} + +min bytes required for local read (file) to do seek, instead of read with ignore in Parquet input format. + +Default value - `8192`. + ### output_format_parquet_string_as_string {#output_format_parquet_string_as_string} Use Parquet String type instead of Binary for String columns. @@ -1283,6 +1416,11 @@ When serializing Nullable columns with Google wrappers, serialize default values Disabled by default. +### format_protobuf_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema} + +Use autogenerated Protobuf schema when [format_schema](#formatschema-format-schema) is not set. +The schema is generated from ClickHouse table structure using function [structureToProtobufSchema](../../sql-reference/functions/other-functions.md#structure_to_protobuf_schema) + ## Avro format settings {#avro-format-settings} ### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields} @@ -1300,6 +1438,17 @@ Default value: 0. Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format. +Format: +``` text +http://[user:password@]machine[:port]" +``` + +Examples: +``` text +http://registry.example.com:8081 +http://admin:secret@registry.example.com:8081 +``` + Default value: `Empty`. ### output_format_avro_codec {#output_format_avro_codec} @@ -1531,6 +1680,13 @@ When enabled, trailing empty lines at the end of file in CustomSeparated format Disabled by default. +### input_format_custom_allow_variable_number_of_columns {#input_format_custom_allow_variable_number_of_columns} + +Allow variable number of columns in rows in CustomSeparated input format. +Ignore extra columns in rows with more columns than expected and treat missing columns as default values. + +Disabled by default. + ## Regexp format settings {#regexp-format-settings} ### format_regexp_escaping_rule {#format_regexp_escaping_rule} @@ -1568,6 +1724,11 @@ Possible values: Default value: `'by_values'`. +### format_capn_proto_use_autogenerated_schema {#format_capn_proto_use_autogenerated_schema} + +Use autogenerated CapnProto schema when [format_schema](#formatschema-format-schema) is not set. +The schema is generated from ClickHouse table structure using function [structureToCapnProtoSchema](../../sql-reference/functions/other-functions.md#structure_to_capnproto_schema) + ## MySQLDump format settings {#musqldump-format-settings} ### input_format_mysql_dump_table_name (#input_format_mysql_dump_table_name) diff --git a/docs/en/operations/settings/settings-profiles.md b/docs/en/operations/settings/settings-profiles.md index 2f39a75453c4..d08266b8ef8a 100644 --- a/docs/en/operations/settings/settings-profiles.md +++ b/docs/en/operations/settings/settings-profiles.md @@ -39,7 +39,7 @@ Example: 8
- + 1000000000 100000000000 @@ -67,6 +67,8 @@ Example: 50 100 + 4 + 1 diff --git a/docs/en/operations/settings/settings-query-level.md b/docs/en/operations/settings/settings-query-level.md new file mode 100644 index 000000000000..81cc2294a4cb --- /dev/null +++ b/docs/en/operations/settings/settings-query-level.md @@ -0,0 +1,217 @@ +--- +sidebar_label: Query-level Settings +title: Query-level Settings +slug: /en/operations/settings/query-level +--- + +There are multiple ways to set ClickHouse query-level settings. Settings are configured in layers, and each subsequent layer redefines the previous values of a setting. + +The order of priority for defining a setting is: + +1. Applying a setting to a user directly, or within a settings profile + + - SQL (recommended) + - adding one or more XML or YAML files to `/etc/clickhouse-server/users.d` + +2. Session settings + + - Send `SET setting=value` from the ClickHouse Cloud SQL console or + `clickhouse client` in interactive mode. Similarly, you can use ClickHouse + sessions in the HTTP protocol. To do this, you need to specify the + `session_id` HTTP parameter. + +3. Query settings + + - When starting `clickhouse client` in non-interactive mode, set the startup + parameter `--setting=value`. + - When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`). + - Define settings in the + [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) + clause of the SELECT query. The setting value is applied only to that query + and is reset to the default or previous value after the query is executed. + +## Examples + +These examples all set the value of the `async_insert` setting to `1`, and +show how to examine the settings in a running system. + +### Using SQL to apply a setting to a user directly + +This creates the user `ingester` with the setting `async_inset = 1`: + +```sql +CREATE USER ingester +IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3' +# highlight-next-line +SETTINGS async_insert = 1 +``` + +#### Examine the settings profile and assignment + +```sql +SHOW ACCESS +``` + +```response +┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐ +│ ... │ +# highlight-next-line +│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS async_insert = true │ +│ ... │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` +### Using SQL to create a settings profile and assign to a user + +This creates the profile `log_ingest` with the setting `async_inset = 1`: + +```sql +CREATE +SETTINGS PROFILE log_ingest SETTINGS async_insert = 1 +``` + +This creates the user `ingester` and assigns the user the settings profile `log_ingest`: + +```sql +CREATE USER ingester +IDENTIFIED WITH sha256_hash BY '7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3' +# highlight-next-line +SETTINGS PROFILE log_ingest +``` + + +### Using XML to create a settings profile and user + +```xml title=/etc/clickhouse-server/users.d/users.xml + +# highlight-start + + + 1 + + +# highlight-end + + + + 7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3 +# highlight-start + log_ingest +# highlight-end + + + 7e099f39b84ea79559b3e85ea046804e63725fd1f46b37f281276aae20f86dc3 + 1 + 1 + + + +``` + +#### Examine the settings profile and assignment + +```sql +SHOW ACCESS +``` + +```response +┌─ACCESS─────────────────────────────────────────────────────────────────────────────┐ +│ CREATE USER default IDENTIFIED WITH sha256_password │ +# highlight-next-line +│ CREATE USER ingester IDENTIFIED WITH sha256_password SETTINGS PROFILE log_ingest │ +│ CREATE SETTINGS PROFILE default │ +# highlight-next-line +│ CREATE SETTINGS PROFILE log_ingest SETTINGS async_insert = true │ +│ CREATE SETTINGS PROFILE readonly SETTINGS readonly = 1 │ +│ ... │ +└────────────────────────────────────────────────────────────────────────────────────┘ +``` + +### Assign a setting to a session + +```sql +SET async_insert =1; +SELECT value FROM system.settings where name='async_insert'; +``` + +```response +┌─value──┐ +│ 1 │ +└────────┘ +``` + +### Assign a setting during a query + +```sql +INSERT INTO YourTable +# highlight-next-line +SETTINGS async_insert=1 +VALUES (...) +``` + + +## Converting a Setting to its Default Value + +If you change a setting and would like to revert it back to its default value, set the value to `DEFAULT`. The syntax looks like: + +```sql +SET setting_name = DEFAULT +``` + +For example, the default value of `async_insert` is `0`. Suppose you change its value to `1`: + +```sql +SET async_insert = 1; + +SELECT value FROM system.settings where name='async_insert'; +``` + +The response is: + +```response +┌─value──┐ +│ 1 │ +└────────┘ +``` + +The following command sets its value back to 0: + +```sql +SET async_insert = DEFAULT; + +SELECT value FROM system.settings where name='async_insert'; +``` + +The setting is now back to its default: + +```response +┌─value───┐ +│ 0 │ +└─────────┘ +``` + +## Custom Settings {#custom_settings} + +In addition to the common [settings](../../operations/settings/settings.md), users can define custom settings. + +A custom setting name must begin with one of predefined prefixes. The list of these prefixes must be declared in the [custom_settings_prefixes](../../operations/server-configuration-parameters/settings.md#custom_settings_prefixes) parameter in the server configuration file. + +```xml +custom_ +``` + +To define a custom setting use `SET` command: + +```sql +SET custom_a = 123; +``` + +To get the current value of a custom setting use `getSetting()` function: + +```sql +SELECT getSetting('custom_a'); +``` + +**See Also** + +- View the [Settings](./settings.md) page for a description of the ClickHouse settings. +- [Global server settings](../../operations/server-configuration-parameters/settings.md) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index cff65e049f32..f703429cb703 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -17,7 +17,8 @@ Default value: 0. **Example** ``` sql -insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +SELECT * FROM table_1; ``` ```response ┌─x─┬─y────┐ @@ -30,7 +31,7 @@ insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); ```sql SELECT * FROM table_1 -SETTINGS additional_table_filters = (('table_1', 'x != 2')) +SETTINGS additional_table_filters = {'table_1': 'x != 2'} ``` ```response ┌─x─┬─y────┐ @@ -50,7 +51,8 @@ Default value: `''`. **Example** ``` sql -insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd'); +SElECT * FROM table_1; ``` ```response ┌─x─┬─y────┐ @@ -96,6 +98,18 @@ Default value: 0. ``` +## mutations_execute_nondeterministic_on_initiator {#mutations_execute_nondeterministic_on_initiator} + +If true constant nondeterministic functions (e.g. function `now()`) are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. It helps to keep data in sync on replicas while executing mutations with constant nondeterministic functions. Default value: `false`. + +## mutations_execute_subqueries_on_initiator {#mutations_execute_subqueries_on_initiator} + +If true scalar subqueries are executed on initiator and replaced to literals in `UPDATE` and `DELETE` queries. Default value: `false`. + +## mutations_max_literal_size_to_replace {#mutations_max_literal_size_to_replace} + +The maximum size of serialized literal in bytes to replace in `UPDATE` and `DELETE` queries. Takes effect only if at least one the two settings above is enabled. Default value: 16384 (16 KiB). + ## distributed_product_mode {#distributed-product-mode} Changes the behaviour of [distributed subqueries](../../sql-reference/operators/in.md). @@ -163,7 +177,7 @@ If `enable_optimize_predicate_expression = 1`, then the execution time of these If `enable_optimize_predicate_expression = 0`, then the execution time of the second query is much longer because the `WHERE` clause applies to all the data after the subquery finishes. -## fallback_to_stale_replicas_for_distributed_queries {#settings-fallback_to_stale_replicas_for_distributed_queries} +## fallback_to_stale_replicas_for_distributed_queries {#fallback_to_stale_replicas_for_distributed_queries} Forces a query to an out-of-date replica if updated data is not available. See [Replication](../../engines/table-engines/mergetree-family/replication.md). @@ -173,7 +187,7 @@ Used when performing `SELECT` from a distributed table that points to replicated By default, 1 (enabled). -## force_index_by_date {#settings-force_index_by_date} +## force_index_by_date {#force_index_by_date} Disables query execution if the index can’t be used by date. @@ -189,7 +203,7 @@ Works with tables in the MergeTree family. If `force_primary_key=1`, ClickHouse checks to see if the query has a primary key condition that can be used for restricting data ranges. If there is no suitable condition, it throws an exception. However, it does not check whether the condition reduces the amount of data to read. For more information about data ranges in MergeTree tables, see [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). -## use_skip_indexes {#settings-use_skip_indexes} +## use_skip_indexes {#use_skip_indexes} Use data skipping indexes during query execution. @@ -200,7 +214,7 @@ Possible values: Default value: 1. -## force_data_skipping_indices {#settings-force_data_skipping_indices} +## force_data_skipping_indices {#force_data_skipping_indices} Disables query execution if passed data skipping indices wasn't used. @@ -227,7 +241,7 @@ SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_ SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- Ok. ``` -## ignore_data_skipping_indices {#settings-ignore_data_skipping_indices} +## ignore_data_skipping_indices {#ignore_data_skipping_indices} Ignores the skipping indexes specified if used by the query. @@ -387,7 +401,7 @@ Enables or disables [fsync](http://pubs.opengroup.org/onlinepubs/9699919799/func It makes sense to disable it if the server has millions of tiny tables that are constantly being created and destroyed. -## function_range_max_elements_in_block {#settings-function_range_max_elements_in_block} +## function_range_max_elements_in_block {#function_range_max_elements_in_block} Sets the safety threshold for data volume generated by function [range](../../sql-reference/functions/array-functions.md/#range). Defines the maximum number of values generated by function per block of data (sum of array sizes for every row in a block). @@ -402,7 +416,7 @@ Default value: `500,000,000`. - [max_block_size](#setting-max_block_size) - [min_insert_block_size_rows](#min-insert-block-size-rows) -## enable_http_compression {#settings-enable_http_compression} +## enable_http_compression {#enable_http_compression} Enables or disables data compression in the response to an HTTP request. @@ -415,15 +429,15 @@ Possible values: Default value: 0. -## http_zlib_compression_level {#settings-http_zlib_compression_level} +## http_zlib_compression_level {#http_zlib_compression_level} -Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression). +Sets the level of data compression in the response to an HTTP request if [enable_http_compression = 1](#enable_http_compression). Possible values: Numbers from 1 to 9. Default value: 3. -## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress} +## http_native_compression_disable_checksumming_on_decompress {#http_native_compression_disable_checksumming_on_decompress} Enables or disables checksum verification when decompressing the HTTP POST data from the client. Used only for ClickHouse native compression format (not used with `gzip` or `deflate`). @@ -466,7 +480,7 @@ Possible values: Default value: `1000`. -## send_progress_in_http_headers {#settings-send_progress_in_http_headers} +## send_progress_in_http_headers {#send_progress_in_http_headers} Enables or disables `X-ClickHouse-Progress` HTTP response headers in `clickhouse-server` responses. @@ -504,7 +518,7 @@ Possible values: Default value: `1`. -## join_default_strictness {#settings-join_default_strictness} +## join_default_strictness {#join_default_strictness} Sets default strictness for [JOIN clauses](../../sql-reference/statements/select/join.md/#select-join). @@ -517,7 +531,7 @@ Possible values: Default value: `ALL`. -## join_algorithm {#settings-join_algorithm} +## join_algorithm {#join_algorithm} Specifies which [JOIN](../../sql-reference/statements/select/join.md) algorithm is used. @@ -533,7 +547,9 @@ Possible values: [Grace hash join](https://en.wikipedia.org/wiki/Hash_join#Grace_hash_join) is used. Grace hash provides an algorithm option that provides performant complex joins while limiting memory use. - The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#settings-max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. + The first phase of a grace join reads the right table and splits it into N buckets depending on the hash value of key columns (initially, N is `grace_hash_join_initial_buckets`). This is done in a way to ensure that each bucket can be processed independently. Rows from the first bucket are added to an in-memory hash table while the others are saved to disk. If the hash table grows beyond the memory limit (e.g., as set by [`max_bytes_in_join`](/docs/en/operations/settings/query-complexity.md/#max_bytes_in_join)), the number of buckets is increased and the assigned bucket for each row. Any rows which don’t belong to the current bucket are flushed and reassigned. + + Supports `INNER/LEFT/RIGHT/FULL ALL/ANY JOIN`. - hash @@ -572,7 +588,7 @@ Possible values: ClickHouse always tries to use `partial_merge` join if possible, otherwise, it uses `hash`. *Deprecated*, same as `partial_merge,hash`. -## join_any_take_last_row {#settings-join_any_take_last_row} +## join_any_take_last_row {#join_any_take_last_row} Changes the behaviour of join operations with `ANY` strictness. @@ -591,7 +607,7 @@ See also: - [JOIN clause](../../sql-reference/statements/select/join.md/#select-join) - [Join table engine](../../engines/table-engines/special/join.md) -- [join_default_strictness](#settings-join_default_strictness) +- [join_default_strictness](#join_default_strictness) ## join_use_nulls {#join_use_nulls} @@ -863,7 +879,7 @@ Possible values: Default value: 2013265920. -## min_bytes_to_use_direct_io {#settings-min-bytes-to-use-direct-io} +## min_bytes_to_use_direct_io {#min-bytes-to-use-direct-io} The minimum data volume required for using direct I/O access to the storage disk. @@ -901,7 +917,7 @@ Possible values: Default value: `1`. -## log_queries {#settings-log-queries} +## log_queries {#log-queries} Setting up query logging. @@ -913,7 +929,7 @@ Example: log_queries=1 ``` -## log_queries_min_query_duration_ms {#settings-log-queries-min-query-duration-ms} +## log_queries_min_query_duration_ms {#log-queries-min-query-duration-ms} If enabled (non-zero), queries faster than the value of this setting will not be logged (you can think about this as a `long_query_time` for [MySQL Slow Query Log](https://dev.mysql.com/doc/refman/5.7/en/slow-query-log.html)), and this basically means that you will not find them in the following tables: @@ -928,7 +944,7 @@ Only the queries with the following type will get to the log: - Type: milliseconds - Default value: 0 (any query) -## log_queries_min_type {#settings-log-queries-min-type} +## log_queries_min_type {#log-queries-min-type} `query_log` minimal type to log. @@ -946,11 +962,11 @@ Can be used to limit which entities will go to `query_log`, say you are interest log_queries_min_type='EXCEPTION_WHILE_PROCESSING' ``` -## log_query_threads {#settings-log-query-threads} +## log_query_threads {#log-query-threads} Setting up query threads logging. -Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter. +Query threads log into the [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting has effect only when [log_queries](#log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter. Possible values: @@ -965,7 +981,7 @@ Default value: `1`. log_query_threads=1 ``` -## log_query_views {#settings-log-query-views} +## log_query_views {#log-query-views} Setting up query views logging. @@ -977,7 +993,7 @@ Example: log_query_views=1 ``` -## log_formatted_queries {#settings-log-formatted-queries} +## log_formatted_queries {#log-formatted-queries} Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)). @@ -988,7 +1004,7 @@ Possible values: Default value: `0`. -## log_comment {#settings-log-comment} +## log_comment {#log-comment} Specifies the value for the `log_comment` field of the [system.query_log](../system-tables/query_log.md) table and comment text for the server log. @@ -996,7 +1012,7 @@ It can be used to improve the readability of server logs. Additionally, it helps Possible values: -- Any string no longer than [max_query_size](#settings-max_query_size). If the max_query_size is exceeded, the server throws an exception. +- Any string no longer than [max_query_size](#max_query_size). If the max_query_size is exceeded, the server throws an exception. Default value: empty string. @@ -1020,7 +1036,7 @@ Result: └─────────────┴───────────┘ ``` -## log_processors_profiles {#settings-log_processors_profiles} +## log_processors_profiles {#log_processors_profiles} Write time that processor spent during execution/waiting for data to `system.processors_profile_log` table. @@ -1029,7 +1045,7 @@ See also: - [`system.processors_profile_log`](../../operations/system-tables/processors_profile_log.md) - [`EXPLAIN PIPELINE`](../../sql-reference/statements/explain.md#explain-pipeline) -## max_insert_block_size {#settings-max_insert_block_size} +## max_insert_block_size {#max_insert_block_size} The size of blocks (in a count of rows) to form for insertion into a table. This setting only applies in cases when the server forms the blocks. @@ -1063,7 +1079,7 @@ Possible values: Default value: 268435456. -## max_replica_delay_for_distributed_queries {#settings-max_replica_delay_for_distributed_queries} +## max_replica_delay_for_distributed_queries {#max_replica_delay_for_distributed_queries} Disables lagging replicas for distributed queries. See [Replication](../../engines/table-engines/mergetree-family/replication.md). @@ -1080,7 +1096,7 @@ Default value: 300. Used when performing `SELECT` from a distributed table that points to replicated tables. -## max_threads {#settings-max_threads} +## max_threads {#max_threads} The maximum number of query processing threads, excluding threads for retrieving data from remote servers (see the ‘max_distributed_connections’ parameter). @@ -1093,7 +1109,7 @@ For queries that are completed quickly because of a LIMIT, you can set a lower The smaller the `max_threads` value, the less memory is consumed. -## max_insert_threads {#settings-max-insert-threads} +## max_insert_threads {#max-insert-threads} The maximum number of threads to execute the `INSERT SELECT` query. @@ -1104,7 +1120,7 @@ Possible values: Default value: 0. -Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#settings-max_threads) setting. +Parallel `INSERT SELECT` has effect only if the `SELECT` part is executed in parallel, see [max_threads](#max_threads) setting. Higher values will lead to higher memory usage. ## max_compress_block_size {#max-compress-block-size} @@ -1133,7 +1149,7 @@ We are writing a URL column with the String type (average size of 60 bytes per v This is an expert-level setting, and you shouldn't change it if you're just getting started with ClickHouse. ::: -## max_query_size {#settings-max_query_size} +## max_query_size {#max_query_size} The maximum number of bytes of a query string parsed by the SQL parser. Data in the VALUES clause of INSERT queries is processed by a separate stream parser (that consumes O(1) RAM) and not affected by this restriction. @@ -1322,7 +1338,7 @@ Connection pool size for PostgreSQL table engine and database engine. Default value: 16 -## postgresql_connection_pool_size {#postgresql-connection-pool-size} +## postgresql_connection_pool_wait_timeout {#postgresql-connection-pool-wait-timeout} Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool. @@ -1377,7 +1393,7 @@ Default value: 5000. ## stream_flush_interval_ms {#stream-flush-interval-ms} -Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#settings-max_insert_block_size) rows. +Works for tables with streaming in the case of a timeout, or when a thread generates [max_insert_block_size](#max_insert_block_size) rows. The default value is 7500. @@ -1389,7 +1405,7 @@ Timeout for polling data from/to streaming storages. Default value: 500. -## load_balancing {#settings-load_balancing} +## load_balancing {#load_balancing} Specifies the algorithm of replicas selection that is used for distributed query processing. @@ -1403,7 +1419,7 @@ ClickHouse supports the following algorithms of choosing replicas: See also: -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors) ### Random (by Default) {#load_balancing-random} @@ -1457,20 +1473,20 @@ load_balancing = round_robin This algorithm uses a round-robin policy across replicas with the same number of errors (only the queries with `round_robin` policy is accounted). -## prefer_localhost_replica {#settings-prefer-localhost-replica} +## prefer_localhost_replica {#prefer-localhost-replica} Enables/disables preferable using the localhost replica when processing distributed queries. Possible values: - 1 — ClickHouse always sends a query to the localhost replica if it exists. -- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#settings-load_balancing) setting. +- 0 — ClickHouse uses the balancing strategy specified by the [load_balancing](#load_balancing) setting. Default value: 1. :::note -Disable this setting if you use [max_parallel_replicas](#settings-max_parallel_replicas) without [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key). -If [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas. +Disable this setting if you use [max_parallel_replicas](#max_parallel_replicas) without [parallel_replicas_custom_key](#parallel_replicas_custom_key). +If [parallel_replicas_custom_key](#parallel_replicas_custom_key) is set, disable this setting only if it's used on a cluster with multiple shards containing multiple replicas. If it's used on a cluster with a single shard and multiple replicas, disabling this setting will have negative effects. ::: @@ -1484,7 +1500,7 @@ See the section “WITH TOTALS modifier”. The threshold for `totals_mode = 'auto'`. See the section “WITH TOTALS modifier”. -## max_parallel_replicas {#settings-max_parallel_replicas} +## max_parallel_replicas {#max_parallel_replicas} The maximum number of replicas for each shard when executing a query. @@ -1511,23 +1527,23 @@ A query may be processed faster if it is executed on several servers in parallel - The sampling key is an expression that is expensive to calculate. - The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency. -### Parallel processing using [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) +### Parallel processing using [parallel_replicas_custom_key](#parallel_replicas_custom_key) This setting is useful for any replicated table. -## parallel_replicas_custom_key {#settings-parallel_replicas_custom_key} +## parallel_replicas_custom_key {#parallel_replicas_custom_key} An arbitrary integer expression that can be used to split work between replicas for a specific table. The value can be any integer expression. -A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#settings-parallel_replicas_custom_key) -and [parallel_replicas_custom_key_filter_type](#settings-parallel_replicas_custom_key_filter_type). +A query may be processed faster if it is executed on several servers in parallel but it depends on the used [parallel_replicas_custom_key](#parallel_replicas_custom_key) +and [parallel_replicas_custom_key_filter_type](#parallel_replicas_custom_key_filter_type). Simple expressions using primary keys are preferred. If the setting is used on a cluster that consists of a single shard with multiple replicas, those replicas will be converted into virtual shards. Otherwise, it will behave same as for `SAMPLE` key, it will use multiple replicas of each shard. -## parallel_replicas_custom_key_filter_type {#settings-parallel_replicas_custom_key_filter_type} +## parallel_replicas_custom_key_filter_type {#parallel_replicas_custom_key_filter_type} How to use `parallel_replicas_custom_key` expression for splitting work between replicas. @@ -1621,7 +1637,7 @@ Possible values: Default value: `1`. -## query_cache_store_results_of_queries_with_nondeterministic_functions {#query--store-results-of-queries-with-nondeterministic-functions} +## query_cache_store_results_of_queries_with_nondeterministic_functions {#query-cache-store-results-of-queries-with-nondeterministic-functions} If turned on, then results of `SELECT` queries with non-deterministic functions (e.g. `rand()`, `now()`) can be cached in the [query cache](../query-cache.md). @@ -1716,7 +1732,7 @@ Possible values: Default value: 0 (no restriction). -## insert_quorum {#settings-insert_quorum} +## insert_quorum {#insert_quorum} Enables the quorum writes. @@ -1730,7 +1746,7 @@ Quorum writes `INSERT` succeeds only when ClickHouse manages to correctly write data to the `insert_quorum` of replicas during the `insert_quorum_timeout`. If for any reason the number of replicas with successful writes does not reach the `insert_quorum`, the write is considered failed and ClickHouse will delete the inserted block from all the replicas where data has already been written. -When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#settings-select_sequential_consistency). +When `insert_quorum_parallel` is disabled, all replicas in the quorum are consistent, i.e. they contain data from all previous `INSERT` queries (the `INSERT` sequence is linearized). When reading data written using `insert_quorum` and `insert_quorum_parallel` is disabled, you can turn on sequential consistency for `SELECT` queries using [select_sequential_consistency](#select_sequential_consistency). ClickHouse generates an exception: @@ -1739,11 +1755,11 @@ ClickHouse generates an exception: See also: -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum_timeout](#insert_quorum_timeout) +- [insert_quorum_parallel](#insert_quorum_parallel) +- [select_sequential_consistency](#select_sequential_consistency) -## insert_quorum_timeout {#settings-insert_quorum_timeout} +## insert_quorum_timeout {#insert_quorum_timeout} Write to a quorum timeout in milliseconds. If the timeout has passed and no write has taken place yet, ClickHouse will generate an exception and the client must repeat the query to write the same block to the same or any other replica. @@ -1751,11 +1767,11 @@ Default value: 600 000 milliseconds (ten minutes). See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum](#insert_quorum) +- [insert_quorum_parallel](#insert_quorum_parallel) +- [select_sequential_consistency](#select_sequential_consistency) -## insert_quorum_parallel {#settings-insert_quorum_parallel} +## insert_quorum_parallel {#insert_quorum_parallel} Enables or disables parallelism for quorum `INSERT` queries. If enabled, additional `INSERT` queries can be sent while previous queries have not yet finished. If disabled, additional writes to the same table will be rejected. @@ -1768,11 +1784,11 @@ Default value: 1. See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [select_sequential_consistency](#settings-select_sequential_consistency) +- [insert_quorum](#insert_quorum) +- [insert_quorum_timeout](#insert_quorum_timeout) +- [select_sequential_consistency](#select_sequential_consistency) -## select_sequential_consistency {#settings-select_sequential_consistency} +## select_sequential_consistency {#select_sequential_consistency} Enables or disables sequential consistency for `SELECT` queries. Requires `insert_quorum_parallel` to be disabled (enabled by default). @@ -1791,11 +1807,11 @@ When `insert_quorum_parallel` is enabled (the default), then `select_sequential_ See also: -- [insert_quorum](#settings-insert_quorum) -- [insert_quorum_timeout](#settings-insert_quorum_timeout) -- [insert_quorum_parallel](#settings-insert_quorum_parallel) +- [insert_quorum](#insert_quorum) +- [insert_quorum_timeout](#insert_quorum_timeout) +- [insert_quorum_parallel](#insert_quorum_parallel) -## insert_deduplicate {#settings-insert-deduplicate} +## insert_deduplicate {#insert-deduplicate} Enables or disables block deduplication of `INSERT` (for Replicated\* tables). @@ -1922,7 +1938,7 @@ For the replicated tables, by default, only 10000 of the most recent inserts for We recommend enabling the [async_block_ids_cache](merge-tree-settings.md/#use-async-block-ids-cache) to increase the efficiency of deduplication. This function does not work for non-replicated tables. -## deduplicate_blocks_in_dependent_materialized_views {#settings-deduplicate-blocks-in-dependent-materialized-views} +## deduplicate_blocks_in_dependent_materialized_views {#deduplicate-blocks-in-dependent-materialized-views} Enables or disables the deduplication check for materialized views that receive data from Replicated\* tables. @@ -2032,7 +2048,7 @@ Possible values: Default value: 10000 -## max_network_bytes {#settings-max-network-bytes} +## max_network_bytes {#max-network-bytes} Limits the data volume (in bytes) that is received or transmitted over the network when executing a query. This setting applies to every individual query. @@ -2043,7 +2059,7 @@ Possible values: Default value: 0. -## max_network_bandwidth {#settings-max-network-bandwidth} +## max_network_bandwidth {#max-network-bandwidth} Limits the speed of the data exchange over the network in bytes per second. This setting applies to every query. @@ -2054,7 +2070,7 @@ Possible values: Default value: 0. -## max_network_bandwidth_for_user {#settings-max-network-bandwidth-for-user} +## max_network_bandwidth_for_user {#max-network-bandwidth-for-user} Limits the speed of the data exchange over the network in bytes per second. This setting applies to all concurrently running queries performed by a single user. @@ -2065,7 +2081,7 @@ Possible values: Default value: 0. -## max_network_bandwidth_for_all_users {#settings-max-network-bandwidth-for-all-users} +## max_network_bandwidth_for_all_users {#max-network-bandwidth-for-all-users} Limits the speed that data is exchanged at over the network in bytes per second. This setting applies to all concurrently running queries on the server. @@ -2076,7 +2092,7 @@ Possible values: Default value: 0. -## count_distinct_implementation {#settings-count_distinct_implementation} +## count_distinct_implementation {#count_distinct_implementation} Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction. @@ -2090,7 +2106,7 @@ Possible values: Default value: `uniqExact`. -## skip_unavailable_shards {#settings-skip_unavailable_shards} +## skip_unavailable_shards {#skip_unavailable_shards} Enables or disables silently skipping of unavailable shards. @@ -2254,7 +2270,7 @@ Possible values: Default value: 0 -## force_optimize_skip_unused_shards_nesting {#settings-force_optimize_skip_unused_shards_nesting} +## force_optimize_skip_unused_shards_nesting {#force_optimize_skip_unused_shards_nesting} Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table). @@ -2367,7 +2383,24 @@ See also: - [optimize_functions_to_subcolumns](#optimize-functions-to-subcolumns) -## distributed_replica_error_half_life {#settings-distributed_replica_error_half_life} +## optimize_count_from_files {#optimize_count_from_files} + +Enables or disables the optimization of counting number of rows from files in different input formats. It applies to table functions/engines `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`. + +Possible values: + +- 0 — Optimization disabled. +- 1 — Optimization enabled. + +Default value: `1`. + +## use_cache_for_count_from_files {#use_cache_for_count_from_files} + +Enables caching of rows number during count from files in table functions `file`/`s3`/`url`/`hdfs`/`azureBlobStorage`. + +Enabled by default. + +## distributed_replica_error_half_life {#distributed_replica_error_half_life} - Type: seconds - Default value: 60 seconds @@ -2378,10 +2411,10 @@ See also: - [load_balancing](#load_balancing-round_robin) - [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [distributed_replica_error_cap](#distributed_replica_error_cap) +- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors) -## distributed_replica_error_cap {#settings-distributed_replica_error_cap} +## distributed_replica_error_cap {#distributed_replica_error_cap} - Type: unsigned int - Default value: 1000 @@ -2392,10 +2425,10 @@ See also: - [load_balancing](#load_balancing-round_robin) - [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) -- [distributed_replica_max_ignored_errors](#settings-distributed_replica_max_ignored_errors) +- [distributed_replica_error_half_life](#distributed_replica_error_half_life) +- [distributed_replica_max_ignored_errors](#distributed_replica_max_ignored_errors) -## distributed_replica_max_ignored_errors {#settings-distributed_replica_max_ignored_errors} +## distributed_replica_max_ignored_errors {#distributed_replica_max_ignored_errors} - Type: unsigned int - Default value: 0 @@ -2406,7 +2439,7 @@ See also: - [load_balancing](#load_balancing-round_robin) - [Table engine Distributed](../../engines/table-engines/special/distributed.md) -- [distributed_replica_error_cap](#settings-distributed_replica_error_cap) +- [distributed_replica_error_cap](#distributed_replica_error_cap) - [distributed_replica_error_half_life](#settings-distributed_replica_error_half_life) ## distributed_directory_monitor_sleep_time_ms {#distributed_directory_monitor_sleep_time_ms} @@ -2562,7 +2595,7 @@ Possible values: Default value: 0. -## allow_introspection_functions {#settings-allow_introspection_functions} +## allow_introspection_functions {#allow_introspection_functions} Enables or disables [introspection functions](../../sql-reference/functions/introspection.md) for query profiling. @@ -3103,7 +3136,7 @@ Do not enable this feature in version `<= 21.8`. It's not properly implemented a ## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty} Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md/#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility. -It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries. +It is implemented via query rewrite (similar to [count_distinct_implementation](#count_distinct_implementation) setting) to get consistent results for distributed queries. Possible values: @@ -3201,6 +3234,40 @@ ENGINE = Log └──────────────────────────────────────────────────────────────────────────┘ ``` +## default_temporary_table_engine {#default_temporary_table_engine} + +Same as [default_table_engine](#default_table_engine) but for temporary tables. + +Default value: `Memory`. + +In this example, any new temporary table that does not specify an `Engine` will use the `Log` table engine: + +Query: + +```sql +SET default_temporary_table_engine = 'Log'; + +CREATE TEMPORARY TABLE my_table ( + x UInt32, + y UInt32 +); + +SHOW CREATE TEMPORARY TABLE my_table; +``` + +Result: + +```response +┌─statement────────────────────────────────────────────────────────────────┐ +│ CREATE TEMPORARY TABLE default.my_table +( + `x` UInt32, + `y` UInt32 +) +ENGINE = Log +└──────────────────────────────────────────────────────────────────────────┘ +``` + ## data_type_default_nullable {#data_type_default_nullable} Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md/#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable). @@ -3212,6 +3279,17 @@ Possible values: Default value: `0`. +## use_mysql_types_in_show_columns {#use_mysql_types_in_show_columns} + +Show the names of MySQL data types corresponding to ClickHouse data types in [SHOW COLUMNS](../../sql-reference/statements/show.md#show_columns) and SELECTs on [system.columns](../system-tables/columns.md). + +Possible values: + +- 0 - Show names of native ClickHouse data types. +- 1 - Show names of MySQL data types corresponding to ClickHouse data types. + +Default value: `0`. + ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold} Enables special logic to perform merges on replicas. @@ -3430,6 +3508,12 @@ Possible values: Default value: `0`. +## enable_url_encoding {#enable_url_encoding} + +Allows to enable/disable decoding/encoding path in uri in [URL](../../engines/table-engines/special/url.md) engine tables. + +Enabled by default. + ## database_atomic_wait_for_drop_and_detach_synchronously {#database_atomic_wait_for_drop_and_detach_synchronously} Adds a modifier `SYNC` to all `DROP` and `DETACH` queries. @@ -3501,7 +3585,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 180. +Default value: 30. ## http_receive_timeout {#http_receive_timeout} @@ -3512,7 +3596,7 @@ Possible values: - Any positive integer. - 0 - Disabled (infinite timeout). -Default value: 180. +Default value: 30. ## check_query_single_value_result {#check_query_single_value_result} @@ -3994,6 +4078,17 @@ Result: └─────┴─────┴───────┘ ``` +## splitby_max_substrings_includes_remaining_string {#splitby_max_substrings_includes_remaining_string} + +Controls whether function [splitBy*()](../../sql-reference/functions/splitting-merging-functions.md) with argument `max_substrings` > 0 will include the remaining string in the last element of the result array. + +Possible values: + +- `0` - The remaining string will not be included in the last element of the result array. +- `1` - The remaining string will be included in the last element of the result array. This is the behavior of Spark's [`split()`](https://spark.apache.org/docs/3.1.2/api/python/reference/api/pyspark.sql.functions.split.html) function and Python's ['string.split()'](https://docs.python.org/3/library/stdtypes.html#str.split) method. + +Default value: `0` + ## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions} Enables or disables returning results of type: @@ -4254,7 +4349,7 @@ Use this setting only for backward compatibility if your use cases depend on old ## session_timezone {#session_timezone} Sets the implicit time zone of the current session or query. -The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. +The implicit time zone is the time zone applied to values of type DateTime/DateTime64 which have no explicitly specified time zone. The setting takes precedence over the globally configured (server-level) implicit time zone. A value of '' (empty string) means that the implicit time zone of the current session or query is equal to the [server time zone](../server-configuration-parameters/settings.md#server_configuration_parameters-timezone). @@ -4289,7 +4384,7 @@ SELECT toDateTime64(toDateTime64('1999-12-12 23:23:23.123', 3), 3, 'Europe/Zuric ``` :::warning -Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors. +Not all functions that parse DateTime/DateTime64 respect `session_timezone`. This can lead to subtle errors. See the following example and explanation. ::: @@ -4488,6 +4583,7 @@ This setting allows to specify renaming pattern for files processed by `file` ta ### Placeholders +- `%a` — Full original filename (e.g., "sample.csv"). - `%f` — Original filename without extension (e.g., "sample"). - `%e` — Original file extension with dot (e.g., ".csv"). - `%t` — Timestamp (in microseconds). @@ -4533,3 +4629,100 @@ Type: Int64 Default: 0 +## rewrite_count_distinct_if_with_count_distinct_implementation + +Allows you to rewrite `countDistcintIf` with [count_distinct_implementation](#count_distinct_implementation) setting. + +Possible values: + +- true — Allow. +- false — Disallow. + +Default value: `false`. + +## precise_float_parsing {#precise_float_parsing} + +Switches [Float32/Float64](../../sql-reference/data-types/float.md) parsing algorithms: +* If the value is `1`, then precise method is used. It is slower than fast method, but it always returns a number that is the closest machine representable number to the input. +* Otherwise, fast method is used (default). It usually returns the same value as precise, but in rare cases result may differ by one or two least significant digits. + +Possible values: `0`, `1`. + +Default value: `0`. + +Example: + +```sql +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7090999999999998 │ 15008753.000000002 │ +└─────────────────────┴──────────────────────────┘ + +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7091 │ 15008753 │ +└─────────────────────┴──────────────────────────┘ +``` + +## partial_result_update_duration_ms + +Interval (in milliseconds) for sending updates with partial data about the result table to the client (in interactive mode) during query execution. Setting to 0 disables partial results. Only supported for single-threaded GROUP BY without key, ORDER BY, LIMIT and OFFSET. + +:::note +It's an experimental feature. Enable `allow_experimental_partial_result` setting first to use it. +::: + +## max_rows_in_partial_result + +Maximum rows to show in the partial result after every real-time update while the query runs (use partial result limit + OFFSET as a value in case of OFFSET in the query). + +## validate_tcp_client_information {#validate-tcp-client-information} + +Determines whether validation of client information enabled when query packet is received from a client using a TCP connection. + +If `true`, an exception will be thrown on invalid client information from the TCP client. + +If `false`, the data will not be validated. The server will work with clients of all versions. + +The default value is `false`. + +**Example** + +``` xml +true +``` + +## print_pretty_type_names {#print_pretty_type_names} + +Allows to print deep-nested type names in a pretty way with indents in `DESCRIBE` query and in `toTypeName()` function. + +Example: + +```sql +CREATE TABLE test (a Tuple(b String, c Tuple(d Nullable(UInt64), e Array(UInt32), f Array(Tuple(g String, h Map(String, Array(Tuple(i String, j UInt64))))), k Date), l Nullable(String))) ENGINE=Memory; +DESCRIBE TABLE test FORMAT TSVRaw SETTINGS print_pretty_type_names=1; +``` + +``` +a Tuple( + b String, + c Tuple( + d Nullable(UInt64), + e Array(UInt32), + f Array(Tuple( + g String, + h Map( + String, + Array(Tuple( + i String, + j UInt64 + )) + ) + )), + k Date + ), + l Nullable(String) +) +``` diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md index 5804ad8545b9..fc2933aa2cf4 100644 --- a/docs/en/operations/storing-data.md +++ b/docs/en/operations/storing-data.md @@ -114,7 +114,11 @@ Example of disk configuration: ## Using local cache {#using-local-cache} -It is possible to configure local cache over disks in storage configuration starting from version 22.3. For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. Cache uses `LRU` cache policy. +It is possible to configure local cache over disks in storage configuration starting from version 22.3. +For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc. +For versions >= 23.5 cache is supported only for remote disk types: S3, Azure, HDFS. +Cache uses `LRU` cache policy. + Example of configuration for versions later or equal to 22.8: @@ -184,13 +188,15 @@ These settings should be defined in the disk configuration section. - `enable_filesystem_query_cache_limit` - allow to limit the size of cache which is downloaded within each query (depends on user setting `max_query_cache_size`). Default: `false`. -- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default: `0`, e.g. the data is cached at the first attempt to read it. +- `enable_cache_hits_threshold` - number which defines how many times some data needs to be read before it will be cached. Default: `false`. This threshold can be defined by `cache_hits_threshold`. Default: `0`, e.g. the data is cached at the first attempt to read it. + +- `enable_bypass_cache_with_threshold` - allows to skip cache completely in case the requested read range exceeds the threshold. Default: `false`. This threshold can be defined by `bypass_cache_threashold`. Default: `268435456` (`256Mi`). - `do_not_evict_index_and_mark_files` - do not evict small frequently used files according to cache policy. Default: `false`. This setting was added in version 22.8. If you used filesystem cache before this version, then it will not work on versions starting from 22.8 if this setting is set to `true`. If you want to use this setting, clear old cache created before version 22.8 before upgrading. -- `max_file_segment_size` - a maximum size of a single cache file in bytes or in readable format (`ki, Mi, Gi, etc`, example `10Gi`). Default: `104857600` (`100Mi`). +- `max_file_segment_size` - a maximum size of a single cache file in bytes or in readable format (`ki, Mi, Gi, etc`, example `10Gi`). Default: `8388608` (`8Mi`). -- `max_elements` - a limit for a number of cache files. Default: `1048576`. +- `max_elements` - a limit for a number of cache files. Default: `10000000`. File Cache **query/profile settings**: diff --git a/docs/en/operations/system-tables/asynchronous_metric_log.md b/docs/en/operations/system-tables/asynchronous_metric_log.md index 4290799b6bcf..efe57a202d85 100644 --- a/docs/en/operations/system-tables/asynchronous_metric_log.md +++ b/docs/en/operations/system-tables/asynchronous_metric_log.md @@ -9,7 +9,6 @@ Columns: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — Event date. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Event time with microseconds resolution. - `name` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Float64](../../sql-reference/data-types/float.md)) — Metric value. @@ -20,18 +19,18 @@ SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **See Also** diff --git a/docs/en/operations/system-tables/asynchronous_metrics.md b/docs/en/operations/system-tables/asynchronous_metrics.md index f357341da679..e46b495239c4 100644 --- a/docs/en/operations/system-tables/asynchronous_metrics.md +++ b/docs/en/operations/system-tables/asynchronous_metrics.md @@ -32,6 +32,10 @@ SELECT * FROM system.asynchronous_metrics LIMIT 10 └─────────────────────────────────────────┴────────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` + + ## Metric descriptions @@ -483,6 +487,14 @@ The value is similar to `OSUserTime` but divided to the number of CPU cores to b Number of threads in the server of the PostgreSQL compatibility protocol. +### QueryCacheBytes + +Total size of the query cache cache in bytes. + +### QueryCacheEntries + +Total number of entries in the query cache. + ### ReplicasMaxAbsoluteDelay Maximum difference in seconds between the most fresh replicated part and the most fresh data part still to be replicated, across Replicated tables. A very high value indicates a replica with no data. diff --git a/docs/en/operations/system-tables/backup_log.md b/docs/en/operations/system-tables/backup_log.md new file mode 100644 index 000000000000..7e088fcad947 --- /dev/null +++ b/docs/en/operations/system-tables/backup_log.md @@ -0,0 +1,145 @@ +--- +slug: /en/operations/system-tables/backup_log +--- +# backup_log + +Contains logging entries with the information about `BACKUP` and `RESTORE` operations. + +Columns: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Date of the entry. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Time of the entry with microseconds precision. +- `id` ([String](../../sql-reference/data-types/string.md)) — Identifier of the backup or restore operation. +- `name` ([String](../../sql-reference/data-types/string.md)) — Name of the backup storage (the contents of the `FROM` or `TO` clause). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Operation status. Possible values: + - `'CREATING_BACKUP'` + - `'BACKUP_CREATED'` + - `'BACKUP_FAILED'` + - `'RESTORING'` + - `'RESTORED'` + - `'RESTORE_FAILED'` +- `error` ([String](../../sql-reference/data-types/string.md)) — Error message of the failed operation (empty string for successful operations). +- `start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time of the operation. +- `end_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time of the operation. +- `num_files` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of files stored in the backup. +- `total_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total size of files stored in the backup. +- `num_entries` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of entries in the backup, i.e. the number of files inside the folder if the backup is stored as a folder, or the number of files inside the archive if the backup is stored as an archive. It is not the same as `num_files` if it's an incremental backup or if it contains empty files or duplicates. The following is always true: `num_entries <= num_files`. +- `uncompressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Uncompressed size of the backup. +- `compressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Compressed size of the backup. If the backup is not stored as an archive it equals to `uncompressed_size`. +- `files_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of files read during the restore operation. +- `bytes_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total size of files read during the restore operation. + +**Example** + +```sql +BACKUP TABLE test_db.my_table TO Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status─────────┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ BACKUP_CREATED │ +└──────────────────────────────────────┴────────────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:05:21.998566 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: CREATING_BACKUP +error: +start_time: 2023-08-19 11:05:21 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:08:56.916192 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: BACKUP_CREATED +error: +start_time: 2023-08-19 11:05:21 +end_time: 2023-08-19 11:08:56 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 3525068304 +files_read: 0 +bytes_read: 0 +``` +```sql +RESTORE TABLE test_db.my_table FROM Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status───┐ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ RESTORED │ +└──────────────────────────────────────┴──────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:19.718077 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORING +error: +start_time: 2023-08-19 11:09:19 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:29.334234 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORED +error: +start_time: 2023-08-19 11:09:19 +end_time: 2023-08-19 11:09:29 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 4290362365 +files_read: 57 +bytes_read: 4290364870 +``` + +This is essentially the same information that is written in the system table `system.backups`: + +```sql +SELECT * FROM system.backups ORDER BY start_time +``` +```response +┌─id───────────────────────────────────┬─name──────────────────────────┬─status─────────┬─error─┬──────────start_time─┬────────────end_time─┬─num_files─┬─total_size─┬─num_entries─┬─uncompressed_size─┬─compressed_size─┬─files_read─┬─bytes_read─┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ Disk('backups_disk', '1.zip') │ BACKUP_CREATED │ │ 2023-08-19 11:05:21 │ 2023-08-19 11:08:56 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 3525068304 │ 0 │ 0 │ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ Disk('backups_disk', '1.zip') │ RESTORED │ │ 2023-08-19 11:09:19 │ 2023-08-19 11:09:29 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 4290362365 │ 57 │ 4290364870 │ +└──────────────────────────────────────┴───────────────────────────────┴────────────────┴───────┴─────────────────────┴─────────────────────┴───────────┴────────────┴─────────────┴───────────────────┴─────────────────┴────────────┴────────────┘ +``` + +**See Also** + +- [Backup and Restore](../../operations/backup.md) diff --git a/docs/en/operations/system-tables/clusters.md b/docs/en/operations/system-tables/clusters.md index deb9a0aaeb37..2659f80e3381 100644 --- a/docs/en/operations/system-tables/clusters.md +++ b/docs/en/operations/system-tables/clusters.md @@ -23,6 +23,7 @@ Columns: - `database_shard_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database shard (for clusters that belong to a `Replicated` database). - `database_replica_name` ([String](../../sql-reference/data-types/string.md)) — The name of the `Replicated` database replica (for clusters that belong to a `Replicated` database). - `is_active` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) — The status of the `Replicated` database replica (for clusters that belong to a `Replicated` database): 1 means "replica is online", 0 means "replica is offline", `NULL` means "unknown". +- `name` ([String](../../sql-reference/data-types/string.md)) - An alias to cluster. **Example** diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md index 2915b053458f..3a82f56a58d0 100644 --- a/docs/en/operations/system-tables/columns.md +++ b/docs/en/operations/system-tables/columns.md @@ -14,7 +14,7 @@ The `system.columns` table contains the following columns (the column type is sh - `database` ([String](../../sql-reference/data-types/string.md)) — Database name. - `table` ([String](../../sql-reference/data-types/string.md)) — Table name. - `name` ([String](../../sql-reference/data-types/string.md)) — Column name. -- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. +- `type` ([String](../../sql-reference/data-types/string.md)) — Column type. If setting `[use_mysql_types_in_show_columns](../../operations/settings/settings.md#use_mysql_types_in_show_columns) = 1` (default: 0), then the equivalent type name in MySQL is shown. - `position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1. - `default_kind` ([String](../../sql-reference/data-types/string.md)) — Expression type (`DEFAULT`, `MATERIALIZED`, `ALIAS`) for the default value, or an empty string if it is not defined. - `default_expression` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined. diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index 8cccf9466216..a552fd548a83 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -8,17 +8,21 @@ Contains information about [distributed ddl queries (ON CLUSTER clause)](../../s Columns: - `entry` ([String](../../sql-reference/data-types/string.md)) — Query id. -- `host_name` ([String](../../sql-reference/data-types/string.md)) — Hostname. -- `host_address` ([String](../../sql-reference/data-types/string.md)) — IP address that the Hostname resolves to. -- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. -- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. +- `entry_version` ([Nullable(UInt8)](../../sql-reference/data-types/int-uint.md)) - Version of the entry +- `initiator_host` ([Nullable(String)](../../sql-reference/data-types/string.md)) - Host that initiated the DDL operation +- `initiator_port` ([Nullable(UInt16)](../../sql-reference/data-types/int-uint.md)) - Port used by the initiator - `cluster` ([String](../../sql-reference/data-types/string.md)) — Cluster name. - `query` ([String](../../sql-reference/data-types/string.md)) — Query executed. -- `initiator` ([String](../../sql-reference/data-types/string.md)) — Node that executed the query. -- `query_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query start time. +- `settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) - Settings used in the DDL operation +- `query_create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query created time. +- `host` ([String](../../sql-reference/data-types/string.md)) — Hostname +- `port` ([UInt16](../../sql-reference/data-types/int-uint.md)) — Host Port. +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Status of the query. +- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code. +- `exception_text` ([Nullable(String)](../../sql-reference/data-types/string.md)) - Exception message - `query_finish_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Query finish time. - `query_duration_ms` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Duration of query execution (in milliseconds). -- `exception_code` ([Enum8](../../sql-reference/data-types/enum.md)) — Exception code from [ClickHouse Keeper](../../operations/tips.md#zookeeper). + **Example** @@ -34,32 +38,38 @@ Query id: f544e72a-6641-43f1-836b-24baa1c9632a Row 1: ────── entry: query-0000000000 -host_name: clickhouse01 -host_address: 172.23.0.11 -port: 9000 -status: Finished +entry_version: 5 +initiator_host: clickhouse01 +initiator_port: 9000 cluster: test_cluster query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster -initiator: clickhouse01:9000 -query_start_time: 2020-12-30 13:07:51 -query_finish_time: 2020-12-30 13:07:51 -query_duration_ms: 6 -exception_code: ZOK +settings: {'max_threads':'16','use_uncompressed_cache':'0'} +query_create_time: 2023-09-01 16:15:14 +host: clickhouse-01 +port: 9000 +status: Finished +exception_code: 0 +exception_text: +query_finish_time: 2023-09-01 16:15:14 +query_duration_ms: 154 Row 2: ────── -entry: query-0000000000 -host_name: clickhouse02 -host_address: 172.23.0.12 -port: 9000 -status: Finished +entry: query-0000000001 +entry_version: 5 +initiator_host: clickhouse01 +initiator_port: 9000 cluster: test_cluster query: CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster -initiator: clickhouse01:9000 -query_start_time: 2020-12-30 13:07:51 -query_finish_time: 2020-12-30 13:07:51 -query_duration_ms: 6 -exception_code: ZOK +settings: {'max_threads':'16','use_uncompressed_cache':'0'} +query_create_time: 2023-09-01 16:15:14 +host: clickhouse-01 +port: 9000 +status: Finished +exception_code: 630 +exception_text: Code: 630. DB::Exception: Cannot drop or rename test_db, because some tables depend on it: +query_finish_time: 2023-09-01 16:15:14 +query_duration_ms: 154 2 rows in set. Elapsed: 0.025 sec. ``` diff --git a/docs/en/operations/system-tables/events.md b/docs/en/operations/system-tables/events.md index ba5602ee292f..d2b90a49b0d1 100644 --- a/docs/en/operations/system-tables/events.md +++ b/docs/en/operations/system-tables/events.md @@ -10,6 +10,9 @@ Columns: - `event` ([String](../../sql-reference/data-types/string.md)) — Event name. - `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Number of events occurred. - `description` ([String](../../sql-reference/data-types/string.md)) — Event description. +- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `event`. + +You can find all supported events in source file [src/Common/ProfileEvents.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/ProfileEvents.cpp). **Example** diff --git a/docs/en/operations/system-tables/functions.md b/docs/en/operations/system-tables/functions.md index 60bfa08975b7..d52bf24f2891 100644 --- a/docs/en/operations/system-tables/functions.md +++ b/docs/en/operations/system-tables/functions.md @@ -7,28 +7,34 @@ Contains information about normal and aggregate functions. Columns: -- `name`(`String`) – The name of the function. -- `is_aggregate`(`UInt8`) — Whether the function is aggregate. +- `name` ([String](../../sql-reference/data-types/string.md)) – The name of the function. +- `is_aggregate` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Whether the function is an aggregate function. +- `is_deterministic` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt8](../../sql-reference/data-types/int-uint.md))) - Whether the function is deterministic. +- `case_insensitive`, ([UInt8](../../sql-reference/data-types/int-uint.md)) - Whether the function name can be used case-insensitively. +- `alias_to`, ([String](../../sql-reference/data-types/string.md)) - The original function name, if the function name is an alias. +- `create_query`, ([String](../../sql-reference/data-types/enum.md)) - Unused. +- `origin`, ([Enum8](../../sql-reference/data-types/string.md)) - Unused. +- `description`, ([String](../../sql-reference/data-types/string.md)) - A high-level description what the function does. +- `syntax`, ([String](../../sql-reference/data-types/string.md)) - Signature of the function. +- `arguments`, ([String](../../sql-reference/data-types/string.md)) - What arguments does the function take. +- `returned_value`, ([String](../../sql-reference/data-types/string.md)) - What does the function return. +- `examples`, ([String](../../sql-reference/data-types/string.md)) - Example usage of the function. +- `categories`, ([String](../../sql-reference/data-types/string.md)) - The category of the function. **Example** ```sql - SELECT * FROM system.functions LIMIT 10; + SELECT name, is_aggregate, is_deterministic, case_insensitive, alias_to FROM system.functions LIMIT 5; ``` ```text -┌─name──────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┬─create_query─┬─origin─┐ -│ logTrace │ 0 │ 0 │ │ │ System │ -│ aes_decrypt_mysql │ 0 │ 0 │ │ │ System │ -│ aes_encrypt_mysql │ 0 │ 0 │ │ │ System │ -│ decrypt │ 0 │ 0 │ │ │ System │ -│ encrypt │ 0 │ 0 │ │ │ System │ -│ toBool │ 0 │ 0 │ │ │ System │ -│ windowID │ 0 │ 0 │ │ │ System │ -│ hopStart │ 0 │ 0 │ │ │ System │ -│ hop │ 0 │ 0 │ │ │ System │ -│ snowflakeToDateTime64 │ 0 │ 0 │ │ │ System │ -└───────────────────────┴──────────────┴──────────────────┴──────────┴──────────────┴────────┘ +┌─name─────────────────────┬─is_aggregate─┬─is_deterministic─┬─case_insensitive─┬─alias_to─┐ +│ BLAKE3 │ 0 │ 1 │ 0 │ │ +│ sipHash128Reference │ 0 │ 1 │ 0 │ │ +│ mapExtractKeyLike │ 0 │ 1 │ 0 │ │ +│ sipHash128ReferenceKeyed │ 0 │ 1 │ 0 │ │ +│ mapPartialSort │ 0 │ 1 │ 0 │ │ +└──────────────────────────┴──────────────┴──────────────────┴──────────────────┴──────────┘ -10 rows in set. Elapsed: 0.002 sec. +5 rows in set. Elapsed: 0.002 sec. ``` diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md index 508419783ef9..df42f80275e0 100644 --- a/docs/en/operations/system-tables/index.md +++ b/docs/en/operations/system-tables/index.md @@ -13,6 +13,7 @@ System tables provide information about: - Server states, processes, and environment. - Server’s internal processes. +- Options used when the ClickHouse binary was built. System tables: @@ -22,7 +23,7 @@ System tables: Most of system tables store their data in RAM. A ClickHouse server creates such system tables at the start. -Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) and [text_log](../../operations/system-tables/text_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. +Unlike other system tables, the system log tables [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md), [text_log](../../operations/system-tables/text_log.md) and [backup_log](../../operations/system-tables/backup_log.md) are served by [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine and store their data in a filesystem by default. If you remove a table from a filesystem, the ClickHouse server creates the empty one again at the time of the next data writing. If system table schema changed in a new release, then ClickHouse renames the current table and creates a new one. System log tables can be customized by creating a config file with the same name as the table under `/etc/clickhouse-server/config.d/`, or setting corresponding elements in `/etc/clickhouse-server/config.xml`. Elements can be customized are: @@ -31,7 +32,7 @@ System log tables can be customized by creating a config file with the same name - `partition_by`: specify [PARTITION BY](../../engines/table-engines/mergetree-family/custom-partitioning-key.md) expression. - `ttl`: specify table [TTL](../../sql-reference/statements/alter/ttl.md) expression. - `flush_interval_milliseconds`: interval of flushing data to disk. -- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option is contradict with `partition_by` and `ttl`. If set together, the server would raise an exception and exit. +- `engine`: provide full engine expression (starting with `ENGINE =` ) with parameters. This option conflicts with `partition_by` and `ttl`. If set together, the server will raise an exception and exit. An example: @@ -46,6 +47,10 @@ An example: ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md index 35fd3a753b5f..8470ac838a49 100644 --- a/docs/en/operations/system-tables/information_schema.md +++ b/docs/en/operations/system-tables/information_schema.md @@ -3,21 +3,30 @@ slug: /en/operations/system-tables/information_schema --- # INFORMATION_SCHEMA -`INFORMATION_SCHEMA` (`information_schema`) is a system database that contains views. Using these views, you can get information about the metadata of database objects. These views read data from the columns of the [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md) system tables. - -The structure and composition of system tables may change in different versions of the product, but the support of the `information_schema` makes it possible to change the structure of system tables without changing the method of access to metadata. Metadata requests do not depend on the DBMS used. +`INFORMATION_SCHEMA` (or: `information_schema`) is a system database which provides a (somewhat) standardized, [DBMS-agnostic view](https://en.wikipedia.org/wiki/Information_schema) on metadata of database objects. The views in `INFORMATION_SCHEMA` are generally inferior to normal system tables but tools can use them to obtain basic information in a cross-DBMS manner. The structure and content of views in `INFORMATION_SCHEMA` is supposed to evolves in a backwards-compatible way, i.e. only new functionality is added but existing functionality is not changed or removed. In terms of internal implementation, views in `INFORMATION_SCHEMA` usually map to to normal system tables like [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md). ``` sql SHOW TABLES FROM INFORMATION_SCHEMA; + +-- or: +SHOW TABLES FROM information_schema; ``` ``` text -┌─name─────┐ -│ COLUMNS │ -│ SCHEMATA │ -│ TABLES │ -│ VIEWS │ -└──────────┘ +┌─name────────────────────┐ +│ COLUMNS │ +│ KEY_COLUMN_USAGE │ +│ REFERENTIAL_CONSTRAINTS │ +│ SCHEMATA │ +│ TABLES │ +│ VIEWS │ +│ columns │ +│ key_column_usage │ +│ referential_constraints │ +│ schemata │ +│ tables │ +│ views │ +└─────────────────────────┘ ``` `INFORMATION_SCHEMA` contains the following views: @@ -26,6 +35,10 @@ SHOW TABLES FROM INFORMATION_SCHEMA; - [SCHEMATA](#schemata) - [TABLES](#tables) - [VIEWS](#views) +- [KEY_COLUMN_USAGE](#key_column_usage) +- [REFERENTIAL_CONSTRAINTS](#referential_constraints) + +Case-insensitive equivalent views, e.g. `INFORMATION_SCHEMA.columns` are provided for reasons of compatibility with other databases. The same applies to all the columns in these views - both lowercase (for example, `table_name`) and uppercase (`TABLE_NAME`) variants are provided. ## COLUMNS {#columns} @@ -56,13 +69,43 @@ Columns: - `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. - `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. - `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported. +- `extra` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `STORED GENERATED` for `MATERIALIZED`-type columns, `VIRTUAL GENERATED` for `ALIAS`-type columns, `DEFAULT_GENERATED` for `DEFAULT`-type columns, or `NULL`. **Example** Query: ``` sql -SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical; +SELECT table_catalog, + table_schema, + table_name, + column_name, + ordinal_position, + column_default, + is_nullable, + data_type, + character_maximum_length, + character_octet_length, + numeric_precision, + numeric_precision_radix, + numeric_scale, + datetime_precision, + character_set_catalog, + character_set_schema, + character_set_name, + collation_catalog, + collation_schema, + collation_name, + domain_catalog, + domain_schema, + domain_name, + column_comment, + column_type +FROM INFORMATION_SCHEMA.COLUMNS +WHERE (table_schema = currentDatabase() OR table_schema = '') + AND table_name NOT LIKE '%inner%' +LIMIT 1 +FORMAT Vertical; ``` Result: @@ -114,7 +157,17 @@ Columns: Query: ``` sql -SELECT * FROM information_schema.schemata WHERE schema_name ILIKE 'information_schema' LIMIT 1 FORMAT Vertical; +SELECT catalog_name, + schema_name, + schema_owner, + default_character_set_catalog, + default_character_set_schema, + default_character_set_name, + sql_path +FROM information_schema.schemata +WHERE schema_name ilike 'information_schema' +LIMIT 1 +FORMAT Vertical; ``` Result: @@ -140,19 +193,35 @@ Columns: - `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. - `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located. - `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name. -- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Table type. Possible values: +- `table_type` ([String](../../sql-reference/data-types/string.md)) — Table type. Possible values: - `BASE TABLE` - `VIEW` - `FOREIGN TABLE` - `LOCAL TEMPORARY` - `SYSTEM VIEW` +- `table_rows` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The total + number of rows. NULL if it could not be determined. +- `data_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The size of + the data on-disk. NULL if it could not be determined. +- `table_collation` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The table default collation. Always `utf8mb4_0900_ai_ci`. +- `table_comment` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The comment used when creating the table. **Example** Query: ``` sql -SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical; +SELECT table_catalog, + table_schema, + table_name, + table_type, + table_collation, + table_comment +FROM INFORMATION_SCHEMA.TABLES +WHERE (table_schema = currentDatabase() OR table_schema = '') + AND table_name NOT LIKE '%inner%' +LIMIT 1 +FORMAT Vertical; ``` Result: @@ -160,10 +229,12 @@ Result: ``` text Row 1: ────── -table_catalog: default -table_schema: default -table_name: describe_example -table_type: BASE TABLE +table_catalog: default +table_schema: default +table_name: describe_example +table_type: BASE TABLE +table_collation: utf8mb4_0900_ai_ci +table_comment: ``` ## VIEWS {#views} @@ -192,7 +263,20 @@ Query: ``` sql CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t; CREATE MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM system.one; -SELECT * FROM information_schema.views WHERE table_schema = currentDatabase() LIMIT 1 FORMAT Vertical; +SELECT table_catalog, + table_schema, + table_name, + view_definition, + check_option, + is_updatable, + is_insertable_into, + is_trigger_updatable, + is_trigger_deletable, + is_trigger_insertable_into +FROM information_schema.views +WHERE table_schema = currentDatabase() +LIMIT 1 +FORMAT Vertical; ``` Result: @@ -211,3 +295,80 @@ is_trigger_updatable: NO is_trigger_deletable: NO is_trigger_insertable_into: NO ``` + +## KEY_COLUMN_USAGE {#key_column_usage} + +Contains columns from the [system.tables](../../operations/system-tables/tables.md) system table which are restricted by constraints. + +Columns: + +- `constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused. Always `def`. +- `constraint_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the constraint belongs. +- `constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the constraint. +- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused. Always `def`. +- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the schema (database) to which the table belongs. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — The name of the table that has the constraint. +- `column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — The name of the column that has the constraint. +- `ordinal_position` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Currently unused. Always `1`. +- `position_in_unique_constraint` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt32](../../sql-reference/data-types/int-uint.md))) — Currently unused. Always `NULL`. +- `referenced_table_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. Always NULL. +- `referenced_table_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. Always NULL. +- `referenced_column_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. Always NULL. + +**Example** + +```sql +CREATE TABLE test (i UInt32, s String) ENGINE MergeTree ORDER BY i; +SELECT constraint_catalog, + constraint_schema, + constraint_name, + table_catalog, + table_schema, + table_name, + column_name, + ordinal_position, + position_in_unique_constraint, + referenced_table_schema, + referenced_table_name, + referenced_column_name +FROM information_schema.key_column_usage +WHERE table_name = 'test' +FORMAT Vertical; +``` + +Result: + +``` +Row 1: +────── +constraint_catalog: def +constraint_schema: default +constraint_name: PRIMARY +table_catalog: def +table_schema: default +table_name: test +column_name: i +ordinal_position: 1 +position_in_unique_constraint: ᴺᵁᴸᴸ +referenced_table_schema: ᴺᵁᴸᴸ +referenced_table_name: ᴺᵁᴸᴸ +referenced_column_name: ᴺᵁᴸᴸ +``` + +## REFERENTIAL_CONSTRAINTS {#referential_constraints} + +Contains information about foreign keys. Currently returns an empty result (no rows) which is just enough to provide compatibility with 3rd party tools like Tableau Online. + +Columns: + +- `constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused. +- `constraint_schema` ([String](../../sql-reference/data-types/string.md)) — Currently unused. +- `constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. +- `unique_constraint_catalog` ([String](../../sql-reference/data-types/string.md)) — Currently unused. +- `unique_constraint_schema` ([String](../../sql-reference/data-types/string.md)) — Currently unused. +- `unique_constraint_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Currently unused. +- `match_option` ([String](../../sql-reference/data-types/string.md)) — Currently unused. +- `update_rule` ([String](../../sql-reference/data-types/string.md)) — Currently unused. +- `delete_rule` ([String](../../sql-reference/data-types/string.md)) — Currently unused. +- `table_name` ([String](../../sql-reference/data-types/string.md)) — Currently unused. +- `referenced_table_name` ([String](../../sql-reference/data-types/string.md)) — Currently unused. diff --git a/docs/en/operations/system-tables/jemalloc_bins.md b/docs/en/operations/system-tables/jemalloc_bins.md new file mode 100644 index 000000000000..06d9ba57dfc3 --- /dev/null +++ b/docs/en/operations/system-tables/jemalloc_bins.md @@ -0,0 +1,45 @@ +--- +slug: /en/operations/system-tables/jemalloc_bins +--- +# jemalloc_bins + +Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas. +These statistics might not be absolutely accurate because of thread local caching in jemalloc. + +Columns: + +- `index` (UInt64) — Index of the bin ordered by size +- `large` (Bool) — True for large allocations and False for small +- `size` (UInt64) — Size of allocations in this bin +- `allocations` (UInt64) — Number of allocations +- `deallocations` (UInt64) — Number of deallocations + +**Example** + +Find the sizes of allocations that contributed the most to the current overall memory usage. + +``` sql +SELECT + *, + allocations - deallocations AS active_allocations, + size * active_allocations AS allocated_bytes +FROM system.jemalloc_bins +WHERE allocated_bytes > 0 +ORDER BY allocated_bytes DESC +LIMIT 10 +``` + +``` text +┌─index─┬─large─┬─────size─┬─allocactions─┬─deallocations─┬─active_allocations─┬─allocated_bytes─┐ +│ 82 │ 1 │ 50331648 │ 1 │ 0 │ 1 │ 50331648 │ +│ 10 │ 0 │ 192 │ 512336 │ 370710 │ 141626 │ 27192192 │ +│ 69 │ 1 │ 5242880 │ 6 │ 2 │ 4 │ 20971520 │ +│ 3 │ 0 │ 48 │ 16938224 │ 16559484 │ 378740 │ 18179520 │ +│ 28 │ 0 │ 4096 │ 122924 │ 119142 │ 3782 │ 15491072 │ +│ 61 │ 1 │ 1310720 │ 44569 │ 44558 │ 11 │ 14417920 │ +│ 39 │ 1 │ 28672 │ 1285 │ 913 │ 372 │ 10665984 │ +│ 4 │ 0 │ 64 │ 2837225 │ 2680568 │ 156657 │ 10026048 │ +│ 6 │ 0 │ 96 │ 2617803 │ 2531435 │ 86368 │ 8291328 │ +│ 36 │ 1 │ 16384 │ 22431 │ 21970 │ 461 │ 7553024 │ +└───────┴───────┴──────────┴──────────────┴───────────────┴────────────────────┴─────────────────┘ +``` diff --git a/docs/en/operations/system-tables/kafka_consumers.md b/docs/en/operations/system-tables/kafka_consumers.md new file mode 100644 index 000000000000..7e28a251e26c --- /dev/null +++ b/docs/en/operations/system-tables/kafka_consumers.md @@ -0,0 +1,58 @@ +--- +slug: /en/operations/system-tables/kafka_consumers +--- +# kafka_consumers + +Contains information about Kafka consumers. +Applicable for [Kafka table engine](../../engines/table-engines/integrations/kafka) (native ClickHouse integration) + +Columns: + +- `database` (String) - database of the table with Kafka Engine. +- `table` (String) - name of the table with Kafka Engine. +- `consumer_id` (String) - Kafka consumer identifier. Note, that a table can have many consumers. Specified by `kafka_num_consumers` parameter. +- `assignments.topic` (Array(String)) - Kafka topic. +- `assignments.partition_id` (Array(Int32)) - Kafka partition id. Note, that only one consumer can be assigned to a partition. +- `assignments.current_offset` (Array(Int64)) - current offset. +- `exceptions.time`, (Array(DateTime)) - timestamp when the 10 most recent exceptions were generated. +- `exceptions.text`, (Array(String)) - text of 10 most recent exceptions. +- `last_poll_time`, (DateTime) - timestamp of the most recent poll. +- `num_messages_read`, (UInt64) - number of messages read by the consumer. +- `last_commit_time`, (DateTime) - timestamp of the most recent poll. +- `num_commits`, (UInt64) - total number of commits for the consumer. +- `last_rebalance_time`, (DateTime) - timestamp of the most recent Kafka rebalance +- `num_rebalance_revocations`, (UInt64) - number of times the consumer was revoked its partitions +- `num_rebalance_assignments`, (UInt64) - number of times the consumer was assigned to Kafka cluster +- `is_currently_used`, (UInt8) - consumer is in use +- `rdkafka_stat` (String) - library internal statistic. See https://github.com/ClickHouse/librdkafka/blob/master/STATISTICS.md . Set `statistics_interval_ms` to 0 disable, default is 3000 (once in three seconds). + +Example: + +``` sql +SELECT * +FROM system.kafka_consumers +FORMAT Vertical +``` + +``` text +Row 1: +────── +database: test +table: kafka +consumer_id: ClickHouse-instance-test-kafka-1caddc7f-f917-4bb1-ac55-e28bd103a4a0 +assignments.topic: ['system_kafka_cons'] +assignments.partition_id: [0] +assignments.current_offset: [18446744073709550615] +exceptions.time: [] +exceptions.text: [] +last_poll_time: 2006-11-09 18:47:47 +num_messages_read: 4 +last_commit_time: 2006-11-10 04:39:40 +num_commits: 1 +last_rebalance_time: 1970-01-01 00:00:00 +num_rebalance_revocations: 0 +num_rebalance_assignments: 1 +is_currently_used: 1 +rdkafka_stat: {...} + +``` diff --git a/docs/en/operations/system-tables/licenses.md b/docs/en/operations/system-tables/licenses.md index 0f09d559d8be..c436c0ade3b9 100644 --- a/docs/en/operations/system-tables/licenses.md +++ b/docs/en/operations/system-tables/licenses.md @@ -1,7 +1,7 @@ --- slug: /en/operations/system-tables/licenses --- -# licenses +# licenses Contains licenses of third-party libraries that are located in the [contrib](https://github.com/ClickHouse/ClickHouse/tree/master/contrib) directory of ClickHouse sources. @@ -20,21 +20,10 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` text ┌─library_name───────┬─license_type─┬─license_path────────────────────────┐ -│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │ -│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │ -│ avro │ Apache │ /contrib/avro/LICENSE.txt │ │ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │ -│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │ -│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │ -│ aws │ Apache │ /contrib/aws/LICENSE.txt │ -│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │ -│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │ +│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │ │ brotli │ MIT │ /contrib/brotli/LICENSE │ -│ capnproto │ MIT │ /contrib/capnproto/LICENSE │ -│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │ -│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │ -│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │ -│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │ +│ [...] │ [...] │ [...] │ └────────────────────┴──────────────┴─────────────────────────────────────┘ ``` diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index d8539908bf74..48217d63f9db 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -7,11 +7,17 @@ Contains information about settings for `MergeTree` tables. Columns: -- `name` (String) — Setting name. -- `value` (String) — Setting value. -- `description` (String) — Setting description. -- `type` (String) — Setting type (implementation specific string value). -- `changed` (UInt8) — Whether the setting was explicitly defined in the config or explicitly changed. +- `name` ([String](../../sql-reference/data-types/string.md)) — Setting name. +- `value` ([String](../../sql-reference/data-types/string.md)) — Setting value. +- `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Whether the setting was explicitly defined in the config or explicitly changed. +- `description` ([String](../../sql-reference/data-types/string.md)) — Setting description. +- `min` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Minimum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no minimum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `max` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — Maximum value of the setting, if any is set via [constraints](../../operations/settings/constraints-on-settings.md#constraints-on-settings). If the setting has no maximum value, contains [NULL](../../sql-reference/syntax.md#null-literal). +- `readonly` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether the current user can change the setting: + - `0` — Current user can change the setting. + - `1` — Current user can’t change the setting. +- `type` ([String](../../sql-reference/data-types/string.md)) — Setting type (implementation specific string value). +- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) - Shows whether a setting is obsolete. **Example** ```sql @@ -21,35 +27,51 @@ SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; ```response Row 1: ────── -name: index_granularity -value: 8192 +name: min_compress_block_size +value: 0 changed: 0 -description: How many rows correspond to one primary key value. -type: SettingUInt64 +description: When granule is written, compress the data in buffer if the size of pending uncompressed data is larger or equal than the specified threshold. If this setting is not set, the corresponding global setting is used. +min: ____ +max: ____ +readonly: 0 +type: UInt64 +is_obsolete: 0 Row 2: ────── -name: min_bytes_for_wide_part +name: max_compress_block_size value: 0 changed: 0 -description: Minimal uncompressed size in bytes to create part in wide format instead of compact -type: SettingUInt64 +description: Compress the pending uncompressed data in buffer if its size is larger or equal than the specified threshold. Block of data will be compressed even if the current granule is not finished. If this setting is not set, the corresponding global setting is used. +min: ____ +max: ____ +readonly: 0 +type: UInt64 +is_obsolete: 0 Row 3: ────── -name: min_rows_for_wide_part -value: 0 +name: index_granularity +value: 8192 changed: 0 -description: Minimal number of rows to create part in wide format instead of compact -type: SettingUInt64 +description: How many rows correspond to one primary key value. +min: ____ +max: ____ +readonly: 0 +type: UInt64 +is_obsolete: 0 Row 4: ────── -name: merge_max_block_size -value: 8192 +name: max_digestion_size_per_segment +value: 268435456 changed: 0 -description: How many rows in blocks should be formed for merge operations. -type: SettingUInt64 +description: Max number of bytes to digest per segment to build GIN index. +min: ____ +max: ____ +readonly: 0 +type: UInt64 +is_obsolete: 0 -4 rows in set. Elapsed: 0.001 sec. +4 rows in set. Elapsed: 0.009 sec. ``` diff --git a/docs/en/operations/system-tables/metrics.md b/docs/en/operations/system-tables/metrics.md index 5a7dfd03eb49..ae0e7620d350 100644 --- a/docs/en/operations/system-tables/metrics.md +++ b/docs/en/operations/system-tables/metrics.md @@ -10,8 +10,9 @@ Columns: - `metric` ([String](../../sql-reference/data-types/string.md)) — Metric name. - `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — Metric value. - `description` ([String](../../sql-reference/data-types/string.md)) — Metric description. +- `name` ([String](../../sql-reference/data-types/string.md)) — Alias for `metric`. -The list of supported metrics you can find in the [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp) source file of ClickHouse. +You can find all supported metrics in source file [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp). **Example** diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md index 9159d1e92843..8113b850a382 100644 --- a/docs/en/operations/system-tables/parts.md +++ b/docs/en/operations/system-tables/parts.md @@ -27,7 +27,7 @@ Columns: Data storing format is controlled by the `min_bytes_for_wide_part` and `min_rows_for_wide_part` settings of the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table. - - `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. +- `active` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that indicates whether the data part is active. If a data part is active, it’s used in a table. Otherwise, it’s deleted. Inactive data parts remain after merging. - `marks` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The number of marks. To get the approximate number of rows in a data part, multiply `marks` by the index granularity (usually 8192) (this hint does not work for adaptive granularity). @@ -39,6 +39,8 @@ Columns: - `data_uncompressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of uncompressed data in the data part. All the auxiliary files (for example, files with marks) are not included. +- `primary_key_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) used by primary key values in the primary.idx/cidx file on disk. + - `marks_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The size of the file with marks. - `secondary_indices_compressed_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Total size of compressed data for secondary indices in the data part. All the auxiliary files (for example, files with marks) are not included. diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index b9fdd19c643b..ced971667021 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -48,7 +48,7 @@ Columns: - `read_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of rows read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_rows` includes the total number of rows read at all replicas. Each replica sends it’s `read_rows` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. - `read_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Total number of bytes read from all tables and table functions participated in query. It includes usual subqueries, subqueries for `IN` and `JOIN`. For distributed queries `read_bytes` includes the total number of rows read at all replicas. Each replica sends it’s `read_bytes` value, and the server-initiator of the query summarizes all received and local values. The cache volumes do not affect this value. - `written_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written rows. For other queries, the column value is 0. -- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes. For other queries, the column value is 0. +- `written_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — For `INSERT` queries, the number of written bytes (uncompressed). For other queries, the column value is 0. - `result_rows` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Number of rows in a result of the `SELECT` query, or a number of rows in the `INSERT` query. - `result_bytes` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — RAM volume in bytes used to store a query result. - `memory_usage` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Memory consumption by the query. @@ -101,7 +101,8 @@ Columns: - `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/map.md)) — ProfileEvents that measure different metrics. The description of them could be found in the table [system.events](../../operations/system-tables/events.md#system_tables-events) - `Settings` ([Map(String, String)](../../sql-reference/data-types/map.md)) — Settings that were changed when the client ran the query. To enable logging changes to settings, set the `log_query_settings` parameter to 1. - `log_comment` ([String](../../sql-reference/data-types/string.md)) — Log comment. It can be set to arbitrary string no longer than [max_query_size](../../operations/settings/settings.md#settings-max_query_size). An empty string if it is not defined. -- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. +- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — Thread ids that are participating in query execution. These threads may not have run simultaneously. +- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — Maximum count of simultaneous threads executing the query. - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions`, which were used during query execution. - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `aggregate functions combinators`, which were used during query execution. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `database engines`, which were used during query execution. @@ -111,6 +112,11 @@ Columns: - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: + - `'Unknown'` = Status unknown. + - `'None'` = The query result was neither written into nor read from the query cache. + - `'Write'` = The query result was written into the query cache. + - `'Read'` = The query result was read from the query cache. **Example** @@ -186,6 +192,7 @@ used_formats: [] used_functions: [] used_storages: [] used_table_functions: [] +query_cache_usage: None ``` **See Also** diff --git a/docs/en/operations/system-tables/scheduler.md b/docs/en/operations/system-tables/scheduler.md new file mode 100644 index 000000000000..953db4c28f2f --- /dev/null +++ b/docs/en/operations/system-tables/scheduler.md @@ -0,0 +1,72 @@ +--- +slug: /en/operations/system-tables/scheduler +--- +# scheduler + +Contains information and status for [scheduling nodes](/docs/en/operations/workload-scheduling.md/#hierarchy) residing on the local server. +This table can be used for monitoring. The table contains a row for every scheduling node. + +Example: + +``` sql +SELECT * +FROM system.scheduler +WHERE resource = 'network_read' AND path = '/prio/fair/prod' +FORMAT Vertical +``` + +``` text +Row 1: +────── +resource: network_read +path: /prio/fair/prod +type: fifo +weight: 5 +priority: 0 +is_active: 0 +active_children: 0 +dequeued_requests: 67 +dequeued_cost: 4692272 +busy_periods: 63 +vruntime: 938454.1999999989 +system_vruntime: ᴺᵁᴸᴸ +queue_length: 0 +queue_cost: 0 +budget: -60524 +is_satisfied: ᴺᵁᴸᴸ +inflight_requests: ᴺᵁᴸᴸ +inflight_cost: ᴺᵁᴸᴸ +max_requests: ᴺᵁᴸᴸ +max_cost: ᴺᵁᴸᴸ +max_speed: ᴺᵁᴸᴸ +max_burst: ᴺᵁᴸᴸ +throttling_us: ᴺᵁᴸᴸ +tokens: ᴺᵁᴸᴸ +``` + +Columns: + +- `resource` (`String`) - Resource name +- `path` (`String`) - Path to a scheduling node within this resource scheduling hierarchy +- `type` (`String`) - Type of a scheduling node. +- `weight` (`Float64`) - Weight of a node, used by a parent node of `fair`` type. +- `priority` (`Int64`) - Priority of a node, used by a parent node of 'priority' type (Lower value means higher priority). +- `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied. +- `active_children` (`UInt64`) - The number of children in active state. +- `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node. +- `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node. +- `busy_periods` (`UInt64`) - The total number of deactivations of this node. +- `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner. +- `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`. +- `queue_length` (`Nullable(UInt64)`) - For `fifo` nodes only. Current number of resource requests residing in the queue. +- `queue_cost` (`Nullable(UInt64)`) - For `fifo` nodes only. Sum of costs (e.g. size in bytes) of all requests residing in the queue. +- `budget` (`Nullable(Int64)`) - For `fifo` nodes only. The number of available "cost units" for new resource requests. Can appear in case of discrepancy of estimated and real costs of resource requests (e.g. after read/write failure) +- `is_satisfied` (`Nullable(UInt8)`) - For constraint nodes only (e.g. `inflight_limit`). Equals `1` if all the constraint of this node are satisfied. +- `inflight_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The number of resource requests dequeued from this node, that are currently in consumption state. +- `inflight_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. The sum of costs (e.g. bytes) of all resource requests dequeued from this node, that are currently in consumption state. +- `max_requests` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_requests` leading to constraint violation. +- `max_cost` (`Nullable(Int64)`) - For `inflight_limit` nodes only. Upper limit for `inflight_cost` leading to constraint violation. +- `max_speed` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Upper limit for bandwidth in tokens per second. +- `max_burst` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Upper limit for `tokens` available in token-bucket throttler. +- `throttling_us` (`Nullable(Int64)`) - For `bandwidth_limit` nodes only. Total number of microseconds this node was in throttling state. +- `tokens` (`Nullable(Float64)`) - For `bandwidth_limit` nodes only. Number of tokens currently available in token-bucket throttler. diff --git a/docs/en/operations/system-tables/server_settings.md b/docs/en/operations/system-tables/server_settings.md index 3085b1acaf40..7efe605ccef6 100644 --- a/docs/en/operations/system-tables/server_settings.md +++ b/docs/en/operations/system-tables/server_settings.md @@ -14,6 +14,7 @@ Columns: - `changed` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Shows whether a setting was specified in `config.xml` - `description` ([String](../../sql-reference/data-types/string.md)) — Short server setting description. - `type` ([String](../../sql-reference/data-types/string.md)) — Server setting value type. +- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) - Shows whether a setting is obsolete. **Example** @@ -26,14 +27,22 @@ WHERE name LIKE '%thread_pool%' ``` ``` text -┌─name─────────────────────────┬─value─┬─default─┬─changed─┬─description─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─type───┐ -│ max_thread_pool_size │ 5000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ -│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ -│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ -│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ -│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ -│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ -└──────────────────────────────┴───────┴─────────┴─────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴────────┘ +┌─name────────────────────────────────────────_─value─_─default─_─changed─_─description────────────────────────────────────────────────────────────────────────────────────────────────────── +───────────────────────────────────_─type───_─is_obsolete─┐ +│ max_thread_pool_size │ 10000 │ 10000 │ 1 │ The maximum number of threads that could be allocated from the OS and used for query execution and background operations. │ UInt64 │ 0 │ +│ max_thread_pool_free_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that will always stay in a global thread pool once allocated and remain idle in case of insufficient number of tasks. │ UInt64 │ 0 │ +│ thread_pool_queue_size │ 10000 │ 10000 │ 0 │ The maximum number of tasks that will be placed in a queue and wait for execution. │ UInt64 │ 0 │ +│ max_io_thread_pool_size │ 100 │ 100 │ 0 │ The maximum number of threads that would be used for IO operations │ UInt64 │ 0 │ +│ max_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for IO thread pool. │ UInt64 │ 0 │ +│ io_thread_pool_queue_size │ 10000 │ 10000 │ 0 │ Queue size for IO thread pool. │ UInt64 │ 0 │ +│ max_active_parts_loading_thread_pool_size │ 64 │ 64 │ 0 │ The number of threads to load active set of data parts (Active ones) at startup. │ UInt64 │ 0 │ +│ max_outdated_parts_loading_thread_pool_size │ 32 │ 32 │ 0 │ The number of threads to load inactive set of data parts (Outdated ones) at startup. │ UInt64 │ 0 │ +│ max_parts_cleaning_thread_pool_size │ 128 │ 128 │ 0 │ The number of threads for concurrent removal of inactive data parts. │ UInt64 │ 0 │ +│ max_backups_io_thread_pool_size │ 1000 │ 1000 │ 0 │ The maximum number of threads that would be used for IO operations for BACKUP queries │ UInt64 │ 0 │ +│ max_backups_io_thread_pool_free_size │ 0 │ 0 │ 0 │ Max free size for backups IO thread pool. │ UInt64 │ 0 │ +│ backups_io_thread_pool_queue_size │ 0 │ 0 │ 0 │ Queue size for backups IO thread pool. │ UInt64 │ 0 │ +└─────────────────────────────────────────────┴───────┴─────────┴─────────┴────────────────────────────────────────────────────────────────────────────────────────────────────────────────── +───────────────────────────────────┴────────┴─────────────┘ ``` Using of `WHERE changed` can be useful, for example, when you want to check diff --git a/docs/en/operations/system-tables/settings.md b/docs/en/operations/system-tables/settings.md index afae45077cc1..a04e095e990a 100644 --- a/docs/en/operations/system-tables/settings.md +++ b/docs/en/operations/system-tables/settings.md @@ -17,6 +17,7 @@ Columns: - `0` — Current user can change the setting. - `1` — Current user can’t change the setting. - `default` ([String](../../sql-reference/data-types/string.md)) — Setting default value. +- `is_obsolete` ([UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges)) - Shows whether a setting is obsolete. **Example** @@ -29,11 +30,14 @@ WHERE name LIKE '%min_i%' ``` ``` text -┌─name────────────────────────────────────────┬─value─────┬─changed─┬─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬─min──┬─max──┬─readonly─┐ -│ min_insert_block_size_rows │ 1048576 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -│ min_insert_block_size_bytes │ 268435456 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ 0 │ -└─────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┴──────┴──────┴──────────┘ +┌─name───────────────────────────────────────────────_─value─────_─changed─_─description───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────_─min──_─max──_─readonly─_─type─────────_─default───_─alias_for─_─is_obsolete─┐ +│ min_insert_block_size_rows │ 1048449 │ 0 │ Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 1048449 │ │ 0 │ +│ min_insert_block_size_bytes │ 268402944 │ 0 │ Squash blocks passed to INSERT query to specified size in bytes, if blocks are not big enough. │ ____ │ ____ │ 0 │ UInt64 │ 268402944 │ │ 0 │ +│ min_insert_block_size_rows_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_rows, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_rows) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │ +│ min_insert_block_size_bytes_for_materialized_views │ 0 │ 0 │ Like min_insert_block_size_bytes, but applied only during pushing to MATERIALIZED VIEW (default: min_insert_block_size_bytes) │ ____ │ ____ │ 0 │ UInt64 │ 0 │ │ 0 │ +│ read_backoff_min_interval_between_events_ms │ 1000 │ 0 │ Settings to reduce the number of threads in case of slow reads. Do not pay attention to the event, if the previous one has passed less than a certain amount of time. │ ____ │ ____ │ 0 │ Milliseconds │ 1000 │ │ 0 │ +└────────────────────────────────────────────────────┴───────────┴─────────┴───────────────────────────────────────────────────────────────────────────────────────────────────────────────── +──────────────────────────────────────────────────────┴──────┴──────┴──────────┴──────────────┴───────────┴───────────┴─────────────┘ ``` Using of `WHERE changed` can be useful, for example, when you want to check: diff --git a/docs/en/operations/system-tables/trace_log.md b/docs/en/operations/system-tables/trace_log.md index 89d54adc30d9..1396244a12a0 100644 --- a/docs/en/operations/system-tables/trace_log.md +++ b/docs/en/operations/system-tables/trace_log.md @@ -33,7 +33,7 @@ Columns: - `MemoryPeak` represents collecting updates of peak memory usage. - `ProfileEvent` represents collecting of increments of profile events. -- `thread_number` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Thread identifier. +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread identifier. - `query_id` ([String](../../sql-reference/data-types/string.md)) — Query identifier that can be used to get details about a query that was running from the [query_log](#system_tables-query_log) system table. diff --git a/docs/en/operations/utilities/clickhouse-copier.md b/docs/en/operations/utilities/clickhouse-copier.md index a9b82404b90b..473c1c628d1e 100644 --- a/docs/en/operations/utilities/clickhouse-copier.md +++ b/docs/en/operations/utilities/clickhouse-copier.md @@ -43,7 +43,7 @@ Parameters: - `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper. - `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`. - `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper. -- `task-upload-force` — Force upload `task-file` even if node already exists. +- `task-upload-force` — Force upload `task-file` even if node already exists. Default is false. - `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched. ## Format of keeper.xml {#format-of-zookeeper-xml} diff --git a/docs/en/operations/utilities/clickhouse-disks.md b/docs/en/operations/utilities/clickhouse-disks.md new file mode 100644 index 000000000000..76db9e41836c --- /dev/null +++ b/docs/en/operations/utilities/clickhouse-disks.md @@ -0,0 +1,38 @@ +--- +slug: /en/operations/utilities/clickhouse-disks +sidebar_position: 59 +sidebar_label: clickhouse-disks +--- + +# clickhouse-disks + +A utility providing filesystem-like operations for ClickHouse disks. + +Program-wide options: + +* `--config-file, -C` -- path to ClickHouse config, defaults to `/etc/clickhouse-server/config.xml`. +* `--save-logs` -- Log progress of invoked commands to `/var/log/clickhouse-server/clickhouse-disks.log`. +* `--log-level` -- What [type](../server-configuration-parameters/settings#server_configuration_parameters-logger) of events to log, defaults to `none`. +* `--disk` -- what disk to use for `mkdir, move, read, write, remove` commands. Defaults to `default`. + +## Commands + +* `copy [--disk-from d1] [--disk-to d2] `. + Recursively copy data from `FROM_PATH` at disk `d1` (defaults to `disk` value if not provided) + to `TO_PATH` at disk `d2` (defaults to `disk` value if not provided). +* `move `. + Move file or directory from `FROM_PATH` to `TO_PATH`. +* `remove `. + Remove `PATH` recursively. +* `link `. + Create a hardlink from `FROM_PATH` to `TO_PATH`. +* `list [--recursive] ...` + List files at `PATH`s. Non-recursive by default. +* `list-disks`. + List disks names. +* `mkdir [--recursive] `. + Create a directory. Non-recursive by default. +* `read: []` + Read a file from `FROM_PATH` to `TO_PATH` (`stdout` if not supplied). +* `write [FROM_PATH] `. + Write a file from `FROM_PATH` (`stdin` if not supplied) to `TO_PATH`. diff --git a/docs/en/operations/utilities/clickhouse-keeper-client.md b/docs/en/operations/utilities/clickhouse-keeper-client.md index 77f816fe4282..2e84cd260715 100644 --- a/docs/en/operations/utilities/clickhouse-keeper-client.md +++ b/docs/en/operations/utilities/clickhouse-keeper-client.md @@ -11,18 +11,20 @@ A client application to interact with clickhouse-keeper by its native protocol. - `-q QUERY`, `--query=QUERY` — Query to execute. If this parameter is not passed, `clickhouse-keeper-client` will start in interactive mode. - `-h HOST`, `--host=HOST` — Server host. Default value: `localhost`. -- `-p N`, `--port=N` — Server port. Default value: 2181 +- `-p N`, `--port=N` — Server port. Default value: 9181 - `--connection-timeout=TIMEOUT` — Set connection timeout in seconds. Default value: 10s. - `--session-timeout=TIMEOUT` — Set session timeout in seconds. Default value: 10s. - `--operation-timeout=TIMEOUT` — Set operation timeout in seconds. Default value: 10s. - `--history-file=FILE_PATH` — Set path of history file. Default value: `~/.keeper-client-history`. +- `--log-level=LEVEL` — Set log level. Default value: `information`. +- `--no-confirmation` — If set, will not require a confirmation on several commands. Default value `false` for interactive and `true` for query - `--help` — Shows the help message. ## Example {#clickhouse-keeper-client-example} ```bash -./clickhouse-keeper-client -h localhost:2181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 -Connected to ZooKeeper at [::1]:2181 with session_id 137 +./clickhouse-keeper-client -h localhost:9181 --connection-timeout 30 --session-timeout 30 --operation-timeout 30 +Connected to ZooKeeper at [::1]:9181 with session_id 137 / :) ls keeper foo bar / :) cd keeper @@ -43,11 +45,19 @@ keeper foo bar ## Commands {#clickhouse-keeper-client-commands} - `ls [path]` -- Lists the nodes for the given path (default: cwd) -- `cd [path]` -- Change the working path (default `.`) -- `set [version]` -- Updates the node's value. Only update if version matches (default: -1) -- `create ` -- Creates new node +- `cd [path]` -- Changes the working path (default `.`) +- `exists ` -- Returns `1` if node exists, `0` otherwise +- `set [version]` -- Updates the node's value. Only updates if version matches (default: -1) +- `create [mode]` -- Creates new node with the set value +- `touch ` -- Creates new node with an empty string as value. Doesn't throw an exception if the node already exists - `get ` -- Returns the node's value -- `remove ` -- Remove the node +- `rm [version]` -- Removes the node only if version matches (default: -1) - `rmr ` -- Recursively deletes path. Confirmation required - `flwc ` -- Executes four-letter-word command - `help` -- Prints this message +- `get_stat [path]` -- Returns the node's stat (default `.`) +- `find_super_nodes [path]` -- Finds nodes with number of children larger than some threshold for the given path (default `.`) +- `delete_stale_backups` -- Deletes ClickHouse nodes used for backups that are now inactive +- `find_big_family [path] [n]` -- Returns the top n nodes with the biggest family in the subtree (default path = `.` and n = 10) +- `sync ` -- Synchronizes node between processes and leader +- `reconfig "" [version]` -- Reconfigure Keeper cluster. See https://clickhouse.com/docs/en/guides/sre/keeper/clickhouse-keeper#reconfiguration diff --git a/docs/en/operations/utilities/clickhouse-local.md b/docs/en/operations/utilities/clickhouse-local.md index 0443a80cf177..c863282efc1f 100644 --- a/docs/en/operations/utilities/clickhouse-local.md +++ b/docs/en/operations/utilities/clickhouse-local.md @@ -30,11 +30,17 @@ curl https://clickhouse.com/ | sh The binary you just downloaded can run all sorts of ClickHouse tools and utilities. If you want to run ClickHouse as a database server, check out the [Quick Start](../../quick-start.mdx). ::: -## Query data in a CSV file using SQL +## Query data in a file using SQL {#query_data_in_file} A common use of `clickhouse-local` is to run ad-hoc queries on files: where you don't have to insert the data into a table. `clickhouse-local` can stream the data from a file into a temporary table and execute your SQL. -If the file is sitting on the same machine as `clickhouse-local`, use the `file` table engine. The following `reviews.tsv` file contains a sampling of Amazon product reviews: +If the file is sitting on the same machine as `clickhouse-local`, you can simple specify the file to load. The following `reviews.tsv` file contains a sampling of Amazon product reviews: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews.tsv'" +``` + +This command is a shortcut of: ```bash ./clickhouse local -q "SELECT * FROM file('reviews.tsv')" @@ -51,6 +57,19 @@ The `file` table function creates a table, and you can use `DESCRIBE` to see the ./clickhouse local -q "DESCRIBE file('reviews.tsv')" ``` +:::tip +You are allowed to use globs in file name (See [glob substitutions](/docs/en/sql-reference/table-functions/file.md/#globs-in-path)). + +Examples: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews*.jsonl'" +./clickhouse local -q "SELECT * FROM 'review_?.csv'" +./clickhouse local -q "SELECT * FROM 'review_{1..3}.csv'" +``` + +::: + ```response marketplace Nullable(String) customer_id Nullable(Int64) @@ -183,8 +202,8 @@ Arguments: - `-S`, `--structure` — table structure for input data. - `--input-format` — input format, `TSV` by default. - `-f`, `--file` — path to data, `stdin` by default. -- `-q`, `--query` — queries to execute with `;` as delimiter. Cannot be used simultaneously with `--queries-file`. -- `--queries-file` - file path with queries to execute. Cannot be used simultaneously with `--query`. +- `-q`, `--query` — queries to execute with `;` as delimiter. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`. +- `--queries-file` - file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`. - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`. - `-N`, `--table` — table name where to put output data, `table` by default. - `--format`, `--output-format` — output format, `TSV` by default. diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md index 112a51cfa97c..5667f99b6fa6 100644 --- a/docs/en/operations/utilities/index.md +++ b/docs/en/operations/utilities/index.md @@ -13,4 +13,6 @@ pagination_next: 'en/operations/utilities/clickhouse-copier' - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries. - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data. - [ClickHouse compressor](../../operations/utilities/clickhouse-compressor.md) — Compresses and decompresses data. +- [clickhouse-disks](../../operations/utilities/clickhouse-disks.md) -- Provides filesystem-like operations + on files among different ClickHouse disks. - [clickhouse-odbc-bridge](../../operations/utilities/odbc-bridge.md) — A proxy server for ODBC driver. diff --git a/docs/en/operations/workload-scheduling.md b/docs/en/operations/workload-scheduling.md new file mode 100644 index 000000000000..241490998921 --- /dev/null +++ b/docs/en/operations/workload-scheduling.md @@ -0,0 +1,156 @@ +--- +slug: /en/operations/workload-scheduling +sidebar_position: 69 +sidebar_label: "Workload scheduling" +title: "Workload scheduling" +--- + +When ClickHouse execute multiple queries simultaneously, they may be using shared resources (e.g. disks). Scheduling constraints and policies can be applied to regulate how resources are utilized and shared between different workloads. For every resource a scheduling hierarchy can be configured. Hierarchy root represents a resource, while leafs are queues, holding requests that exceed resource capacity. + +:::note +Currently only remote disk IO can be scheduled using described method. For CPU scheduling see settings about thread pools and [`concurrent_threads_soft_limit_num`](server-configuration-parameters/settings.md#concurrent_threads_soft_limit_num). For flexible memory limits see [Memory overcommit](settings/memory-overcommit.md) +::: + +## Disk configuration {#disk-config} + +To enable IO scheduling for a specific disk, you have to specify `read_resource` and/or `write_resource` in storage configuration. It says ClickHouse what resource should be used for every read and write requests with given disk. Read and write resource can refer to the same resource name, which is useful for local SSDs or HDDs. Multiple different disks also can refer to the same resource, which is useful for remote disks: if you want to be able to allow fair division of network bandwidth between e.g. "production" and "development" workloads. + +Example: +```xml + + + ... + + + s3 + https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/ + your_access_key_id + your_secret_access_key + network_read + network_write + + + + + +
+ s3 +
+
+
+
+
+
+``` + +## Workload markup {#workload_markup} + +Queries can be marked with setting `workload` to distinguish different workloads. If `workload` is not set, than value "default" is used. Note that you are able to specify the other value using settings profiles. Setting constraints can be used to make `workload` constant if you want all queries from the user to be marked with fixed value of `workload` setting. + +Let's consider an example of a system with two different workloads: "production" and "development". + +```sql +SELECT count() FROM my_table WHERE value = 42 SETTINGS workload = 'production' +SELECT count() FROM my_table WHERE value = 13 SETTINGS workload = 'development' +``` + +## Resource scheduling hierarchy {#hierarchy} + +From the standpoint of scheduling subsystem a resource represents a hierarchy of scheduling nodes. + +```mermaid +graph TD + subgraph network_read + nr_root(("/")) + -->|100 concurrent requests| nr_fair("fair") + -->|75% bandwidth| nr_prod["prod"] + nr_fair + -->|25% bandwidth| nr_dev["dev"] + end + + subgraph network_write + nw_root(("/")) + -->|100 concurrent requests| nw_fair("fair") + -->|75% bandwidth| nw_prod["prod"] + nw_fair + -->|25% bandwidth| nw_dev["dev"] + end +``` + +**Possible node types:** +* `inflight_limit` (constraint) - blocks if either number of concurrent in-flight requests exceeds `max_requests`, or their total cost exceeds `max_cost`; must have a single child. +* `bandwidth_limit` (constraint) - blocks if current bandwidth exceeds `max_speed` (0 means unlimited) or burst exceeds `max_burst` (by default equals `max_speed`); must have a single child. +* `fair` (policy) - selects the next request to serve from one of its children nodes according to max-min fairness; children nodes can specify `weight` (default is 1). +* `priority` (policy) - selects the next request to serve from one of its children nodes according to static priorities (lower value means higher priority); children nodes can specify `priority` (default is 0). +* `fifo` (queue) - leaf of the hierarchy capable of holding requests that exceed resource capacity. + +To be able to use the full capacity of the underlying resource, you should use `inflight_limit`. Note that a low number of `max_requests` or `max_cost` could lead to not full resource utilization, while too high numbers could lead to empty queues inside the scheduler, which in turn will result in policies being ignored (unfairness or ignoring of priorities) in the subtree. On the other hand, if you want to protect resources from too high utilization, you should use `bandwidth_limit`. It throttles when the amount of resource consumed in `duration` seconds exceeds `max_burst + max_speed * duration` bytes. Two `bandwidth_limit` nodes on the same resource could be used to limit peak bandwidth during short intervals and average bandwidth for longer ones. + +The following example shows how to define IO scheduling hierarchies shown in the picture: + +```xml + + + + + inflight_limit + 100 + + + fair + + + fifo + 3 + + + fifo + + + + + inflight_limit + 100 + + + fair + + + fifo + 3 + + + fifo + + + + +``` + +## Workload classifiers {#workload_classifiers} + +Workload classifiers are used to define mapping from `workload` specified by a query into leaf-queues that should be used for specific resources. At the moment, workload classification is simple: only static mapping is available. + +Example: +```xml + + + + /fair/prod + /fair/prod + + + /fair/dev + /fair/dev + + + /fair/dev + /fair/dev + + + +``` + + +## See also + - [system.scheduler](/docs/en/operations/system-tables/scheduler.md) diff --git a/docs/en/sql-reference/aggregate-functions/combinators.md b/docs/en/sql-reference/aggregate-functions/combinators.md index a395b350a55f..18ff5073e3f9 100644 --- a/docs/en/sql-reference/aggregate-functions/combinators.md +++ b/docs/en/sql-reference/aggregate-functions/combinators.md @@ -300,7 +300,7 @@ SELECT groupArrayResample(30, 75, 30)(name, age) FROM people Consider the results. -`Jonh` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals. +`John` is out of the sample because he’s too young. Other people are distributed according to the specified age intervals. Now let’s count the total number of people and their average wage in the specified age intervals. diff --git a/docs/en/sql-reference/aggregate-functions/reference/any.md b/docs/en/sql-reference/aggregate-functions/reference/any.md index db19f524b312..f79fe66c05d7 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/any.md +++ b/docs/en/sql-reference/aggregate-functions/reference/any.md @@ -12,3 +12,5 @@ To get a determinate result, you can use the ‘min’ or ‘max’ function ins In some cases, you can rely on the order of execution. This applies to cases when SELECT comes from a subquery that uses ORDER BY. When a `SELECT` query has the `GROUP BY` clause or at least one aggregate function, ClickHouse (in contrast to MySQL) requires that all expressions in the `SELECT`, `HAVING`, and `ORDER BY` clauses be calculated from keys or from aggregate functions. In other words, each column selected from the table must be used either in keys or inside aggregate functions. To get behavior like in MySQL, you can put the other columns in the `any` aggregate function. + +- Alias: `any_value` diff --git a/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md new file mode 100644 index 000000000000..3c71129bdb56 --- /dev/null +++ b/docs/en/sql-reference/aggregate-functions/reference/arrayconcatagg.md @@ -0,0 +1,32 @@ +--- +slug: /en/sql-reference/aggregate-functions/reference/array_concat_agg +sidebar_position: 110 +--- + +# array_concat_agg +- Alias of `groupArrayArray`. The function is case insensitive. + +**Example** + +```text +SELECT * +FROM t + +┌─a───────┐ +│ [1,2,3] │ +│ [4,5] │ +│ [6] │ +└─────────┘ + +``` + +Query: + +```sql +SELECT array_concat_agg(a) AS a +FROM t + +┌─a─────────────┐ +│ [1,2,3,4,5,6] │ +└───────────────┘ +``` diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md index 18048fa4f710..a38e35a72ad3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparray.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparray.md @@ -12,7 +12,7 @@ Values can be added to the array in any (indeterminate) order. The second version (with the `max_size` parameter) limits the size of the resulting array to `max_size` elements. For example, `groupArray(1)(x)` is equivalent to `[any (x)]`. -In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`. +In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough. **Example** @@ -44,3 +44,5 @@ Result: ``` The groupArray function will remove ᴺᵁᴸᴸ value based on the above results. + +- Alias: `array_agg`. diff --git a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md index 1b9681dc8524..9b48ee54ecd3 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md +++ b/docs/en/sql-reference/aggregate-functions/reference/grouparraylast.md @@ -10,7 +10,7 @@ Syntax: `groupArrayLast(max_size)(x)` Creates an array of last argument values. For example, `groupArrayLast(1)(x)` is equivalent to `[anyLast (x)]`. -In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY`. +In some cases, you can still rely on the order of execution. This applies to cases when `SELECT` comes from a subquery that uses `ORDER BY` if the subquery result is small enough. **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md index 7ad7e37e5c29..0ddbf700bd31 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevpop.md @@ -7,6 +7,10 @@ sidebar_position: 30 The result is equal to the square root of [varPop](../../../sql-reference/aggregate-functions/reference/varpop.md). -:::note +Alias: +- `STD` +- `STDDEV_POP` + +:::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevPopStable` function. It works slower but provides a lower computational error. ::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md index 068725c4991c..01484c2b02eb 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -7,6 +7,8 @@ sidebar_position: 31 The result is equal to the square root of [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md). -:::note +Alias: `STDDEV_SAMP`. + +:::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `stddevSampStable` function. It works slower but provides a lower computational error. ::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md index 29b43851f446..fa320b4e3367 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/studentttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/studentttest.md @@ -33,8 +33,8 @@ The null hypothesis is that means of populations are equal. Normal distribution - calculated t-statistic. [Float64](../../../sql-reference/data-types/float.md). - calculated p-value. [Float64](../../../sql-reference/data-types/float.md). -- [calculated confidence-interval-low.] [Float64](../../../sql-reference/data-types/float.md). -- [calculated confidence-interval-high.] [Float64](../../../sql-reference/data-types/float.md). +- [calculated confidence-interval-low. [Float64](../../../sql-reference/data-types/float.md).] +- [calculated confidence-interval-high. [Float64](../../../sql-reference/data-types/float.md).] **Example** diff --git a/docs/en/sql-reference/aggregate-functions/reference/varpop.md b/docs/en/sql-reference/aggregate-functions/reference/varpop.md index 0a665c83e742..751688b0830e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md @@ -9,6 +9,8 @@ Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x In other words, dispersion for a set of values. Returns `Float64`. -:::note +Alias: `VAR_POP`. + +:::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error. ::: \ No newline at end of file diff --git a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md index 76639d2d7a01..9b2b94936ec6 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md @@ -11,6 +11,8 @@ It represents an unbiased estimate of the variance of a random variable if passe Returns `Float64`. When `n <= 1`, returns `+∞`. -:::note +Alias: `VAR_SAMP`. + +:::note This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error. ::: diff --git a/docs/en/sql-reference/data-types/array.md b/docs/en/sql-reference/data-types/array.md index 20ce7d2ed520..0ee7c8de93ce 100644 --- a/docs/en/sql-reference/data-types/array.md +++ b/docs/en/sql-reference/data-types/array.md @@ -4,7 +4,7 @@ sidebar_position: 52 sidebar_label: Array(T) --- -# Array(t) +# Array(T) An array of `T`-type items, with the starting array index as 1. `T` can be any data type, including an array. diff --git a/docs/en/sql-reference/data-types/date.md b/docs/en/sql-reference/data-types/date.md index 048466f7ae44..26e4610aec76 100644 --- a/docs/en/sql-reference/data-types/date.md +++ b/docs/en/sql-reference/data-types/date.md @@ -26,7 +26,12 @@ ENGINE = TinyLog; ``` ``` sql -INSERT INTO dt VALUES (1546300800, 1), ('2019-01-01', 2); +-- Parse Date +-- - from string, +-- - from 'small' integer interpreted as number of days since 1970-01-01, and +-- - from 'big' integer interpreted as number of seconds since 1970-01-01. +INSERT INTO dt VALUES ('2019-01-01', 1), (17897, 2), (1546300800, 3); + SELECT * FROM dt; ``` @@ -34,6 +39,7 @@ SELECT * FROM dt; ┌──timestamp─┬─event_id─┐ │ 2019-01-01 │ 1 │ │ 2019-01-01 │ 2 │ +│ 2019-01-01 │ 3 │ └────────────┴──────────┘ ``` diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md index 7cf8b1b95fe5..38a07cd817da 100644 --- a/docs/en/sql-reference/data-types/date32.md +++ b/docs/en/sql-reference/data-types/date32.md @@ -13,7 +13,7 @@ A date. Supports the date range same with [DateTime64](../../sql-reference/data- Creating a table with a `Date32`-type column and inserting data into it: ``` sql -CREATE TABLE new +CREATE TABLE dt32 ( `timestamp` Date32, `event_id` UInt8 @@ -22,8 +22,13 @@ ENGINE = TinyLog; ``` ``` sql -INSERT INTO new VALUES (4102444800, 1), ('2100-01-01', 2); -SELECT * FROM new; +-- Parse Date +-- - from string, +-- - from 'small' integer interpreted as number of days since 1970-01-01, and +-- - from 'big' integer interpreted as number of seconds since 1970-01-01. +INSERT INTO dt32 VALUES ('2100-01-01', 1), (47482, 2), (4102444800, 3); + +SELECT * FROM dt32; ``` ``` text diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index 0da273e01ad1..42a1ca5aaaca 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -48,17 +48,18 @@ ENGINE = TinyLog; ``` ``` sql -INSERT INTO dt Values (1546300800, 1), ('2019-01-01 00:00:00', 2); -``` +-- Parse DateTime +-- - from string, +-- - from integer interpreted as number of seconds since 1970-01-01. +INSERT INTO dt VALUES ('2019-01-01 00:00:00', 1), (1546300800, 3); -``` sql SELECT * FROM dt; ``` ``` text ┌───────────timestamp─┬─event_id─┐ -│ 2019-01-01 03:00:00 │ 1 │ │ 2019-01-01 00:00:00 │ 2 │ +│ 2019-01-01 03:00:00 │ 1 │ └─────────────────────┴──────────┘ ``` @@ -73,7 +74,7 @@ SELECT * FROM dt WHERE timestamp = toDateTime('2019-01-01 00:00:00', 'Asia/Istan ``` text ┌───────────timestamp─┬─event_id─┐ -│ 2019-01-01 00:00:00 │ 2 │ +│ 2019-01-01 00:00:00 │ 1 │ └─────────────────────┴──────────┘ ``` @@ -85,7 +86,7 @@ SELECT * FROM dt WHERE timestamp = '2019-01-01 00:00:00' ``` text ┌───────────timestamp─┬─event_id─┐ -│ 2019-01-01 03:00:00 │ 1 │ +│ 2019-01-01 00:00:00 │ 1 │ └─────────────────────┴──────────┘ ``` @@ -140,8 +141,9 @@ Time shifts for multiple days. Some pacific islands changed their timezone offse - [Type conversion functions](../../sql-reference/functions/type-conversion-functions.md) - [Functions for working with dates and times](../../sql-reference/functions/date-time-functions.md) - [Functions for working with arrays](../../sql-reference/functions/array-functions.md) -- [The `date_time_input_format` setting](../../operations/settings/settings.md#settings-date_time_input_format) -- [The `date_time_output_format` setting](../../operations/settings/settings.md#settings-date_time_output_format) +- [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#settings-date_time_input_format) +- [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#settings-date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-datetime) - [The `Date` data type](../../sql-reference/data-types/date.md) diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md index 793691850b18..8c7fa17ae92b 100644 --- a/docs/en/sql-reference/data-types/datetime64.md +++ b/docs/en/sql-reference/data-types/datetime64.md @@ -28,7 +28,7 @@ Note: The precision of the maximum value is 8. If the maximum precision of 9 dig 1. Creating a table with `DateTime64`-type column and inserting data into it: ``` sql -CREATE TABLE dt +CREATE TABLE dt64 ( `timestamp` DateTime64(3, 'Asia/Istanbul'), `event_id` UInt8 @@ -37,11 +37,12 @@ ENGINE = TinyLog; ``` ``` sql -INSERT INTO dt Values (1546300800123, 1), (1546300800.123, 2), ('2019-01-01 00:00:00', 3); -``` +-- Parse DateTime +-- - from integer interpreted as number of seconds since 1970-01-01. +-- - from string, +INSERT INTO dt64 VALUES (1546300800123, 1), (1546300800.123, 2), ('2019-01-01 00:00:00', 3); -``` sql -SELECT * FROM dt; +SELECT * FROM dt64; ``` ``` text @@ -58,7 +59,7 @@ SELECT * FROM dt; 2. Filtering on `DateTime64` values ``` sql -SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul'); +SELECT * FROM dt64 WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Asia/Istanbul'); ``` ``` text @@ -70,7 +71,7 @@ SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Asia/ Unlike `DateTime`, `DateTime64` values are not converted from `String` automatically. ``` sql -SELECT * FROM dt WHERE timestamp = toDateTime64(1546300800.123, 3); +SELECT * FROM dt64 WHERE timestamp = toDateTime64(1546300800.123, 3); ``` ``` text @@ -101,7 +102,7 @@ SELECT toDateTime64(now(), 3, 'Asia/Istanbul') AS column, toTypeName(column) AS SELECT toDateTime64(timestamp, 3, 'Europe/London') as lon_time, toDateTime64(timestamp, 3, 'Asia/Istanbul') as istanbul_time -FROM dt; +FROM dt64; ``` ``` text @@ -119,6 +120,7 @@ FROM dt; - [The `date_time_input_format` setting](../../operations/settings/settings-formats.md#date_time_input_format) - [The `date_time_output_format` setting](../../operations/settings/settings-formats.md#date_time_output_format) - [The `timezone` server configuration parameter](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [The `session_timezone` setting](../../operations/settings/settings.md#session_timezone) - [Operators for working with dates and times](../../sql-reference/operators/index.md#operators-for-working-with-dates-and-times) - [`Date` data type](../../sql-reference/data-types/date.md) - [`DateTime` data type](../../sql-reference/data-types/datetime.md) diff --git a/docs/en/sql-reference/data-types/decimal.md b/docs/en/sql-reference/data-types/decimal.md index bba5ea74ebe2..e082eb29fbd9 100644 --- a/docs/en/sql-reference/data-types/decimal.md +++ b/docs/en/sql-reference/data-types/decimal.md @@ -4,15 +4,17 @@ sidebar_position: 42 sidebar_label: Decimal --- -# Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) +# Decimal, Decimal(P), Decimal(P, S), Decimal32(S), Decimal64(S), Decimal128(S), Decimal256(S) Signed fixed-point numbers that keep precision during add, subtract and multiply operations. For division least significant digits are discarded (not rounded). ## Parameters -- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). +- P - precision. Valid range: \[ 1 : 76 \]. Determines how many decimal digits number can have (including fraction). By default the precision is 10. - S - scale. Valid range: \[ 0 : P \]. Determines how many decimal digits fraction can have. +Decimal(P) is equivalent to Decimal(P, 0). Similarly, the syntax Decimal is equivalent to Decimal(10, 0). + Depending on P parameter value Decimal(P, S) is a synonym for: - P from \[ 1 : 9 \] - for Decimal32(S) - P from \[ 10 : 18 \] - for Decimal64(S) diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index 3b2787008d2c..1d37b829dd56 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -26,9 +26,9 @@ SELECT p, toTypeName(p) FROM geo_point; Result: ``` text -┌─p─────┬─toTypeName(p)─┐ +┌─p───────┬─toTypeName(p)─┐ │ (10,10) │ Point │ -└───────┴───────────────┘ +└─────────┴───────────────┘ ``` ## Ring diff --git a/docs/en/sql-reference/data-types/tuple.md b/docs/en/sql-reference/data-types/tuple.md index dfe0eda2e216..8f87eeca075f 100644 --- a/docs/en/sql-reference/data-types/tuple.md +++ b/docs/en/sql-reference/data-types/tuple.md @@ -4,7 +4,7 @@ sidebar_position: 54 sidebar_label: Tuple(T1, T2, ...) --- -# Tuple(t1, T2, …) +# Tuple(T1, T2, …) A tuple of elements, each having an individual [type](../../sql-reference/data-types/index.md#data_types). Tuple must contain at least one element. @@ -12,7 +12,7 @@ Tuples are used for temporary column grouping. Columns can be grouped when an IN Tuples can be the result of a query. In this case, for text formats other than JSON, values are comma-separated in brackets. In JSON formats, tuples are output as arrays (in square brackets). -## Creating a Tuple +## Creating Tuples You can use a function to create a tuple: @@ -23,7 +23,7 @@ tuple(T1, T2, ...) Example of creating a tuple: ``` sql -SELECT tuple(1,'a') AS x, toTypeName(x) +SELECT tuple(1, 'a') AS x, toTypeName(x) ``` ``` text @@ -32,7 +32,7 @@ SELECT tuple(1,'a') AS x, toTypeName(x) └─────────┴───────────────────────────┘ ``` -Tuple can contain a single element +A Tuple can contain a single element Example: @@ -46,12 +46,12 @@ SELECT tuple('a') AS x; └───────┘ ``` -There is a syntax sugar using parentheses `( tuple_element1, tuple_element2 )` to create a tuple of several elements without tuple function. +Syntax `(tuple_element1, tuple_element2)` may be used to create a tuple of several elements without calling the `tuple()` function. Example: ``` sql -SELECT (1, 'a') AS x, (today(), rand(), 'someString') y, ('a') not_a_tuple; +SELECT (1, 'a') AS x, (today(), rand(), 'someString') AS y, ('a') AS not_a_tuple; ``` ``` text @@ -60,9 +60,9 @@ SELECT (1, 'a') AS x, (today(), rand(), 'someString') y, ('a') not_a_tuple; └─────────┴────────────────────────────────────────┴─────────────┘ ``` -## Working with Data Types +## Data Type Detection -When creating a tuple on the fly, ClickHouse automatically detects the type of each argument as the minimum of the types which can store the argument value. If the argument is [NULL](../../sql-reference/syntax.md#null-literal), the type of the tuple element is [Nullable](../../sql-reference/data-types/nullable.md). +When creating tuples on the fly, ClickHouse interferes the type of the tuples arguments as the smallest types which can hold the provided argument value. If the value is [NULL](../../sql-reference/syntax.md#null-literal), the interfered type is [Nullable](../../sql-reference/data-types/nullable.md). Example of automatic data type detection: @@ -71,23 +71,21 @@ SELECT tuple(1, NULL) AS x, toTypeName(x) ``` ``` text -┌─x────────┬─toTypeName(tuple(1, NULL))──────┐ -│ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │ -└──────────┴─────────────────────────────────┘ +┌─x─────────┬─toTypeName(tuple(1, NULL))──────┐ +│ (1, NULL) │ Tuple(UInt8, Nullable(Nothing)) │ +└───────────┴─────────────────────────────────┘ ``` -## Addressing Tuple Elements +## Referring to Tuple Elements -It is possible to read elements of named tuples using indexes and names: +Tuple elements can be referred to by name or by index: ``` sql CREATE TABLE named_tuples (`a` Tuple(s String, i Int64)) ENGINE = Memory; - INSERT INTO named_tuples VALUES (('y', 10)), (('x',-10)); -SELECT a.s FROM named_tuples; - -SELECT a.2 FROM named_tuples; +SELECT a.s FROM named_tuples; -- by name +SELECT a.2 FROM named_tuples; -- by index ``` Result: @@ -106,7 +104,7 @@ Result: ## Comparison operations with Tuple -The operation of comparing two tuples is performed sequentially element by element from left to right. If the element of the first tuple is greater than the corresponding element of the second tuple, then the first tuple is greater than the second, if the elements are equal, the next element is compared. +Two tuples are compared by sequentially comparing their elements from the left to the right. If first tuples element is greater (smaller) than the second tuples corresponding element, then the first tuple is greater (smaller) than the second, otherwise (both elements are equal), the next element is compared. Example: diff --git a/docs/en/sql-reference/data-types/uuid.md b/docs/en/sql-reference/data-types/uuid.md index b0f19f0d8bec..40f756b95888 100644 --- a/docs/en/sql-reference/data-types/uuid.md +++ b/docs/en/sql-reference/data-types/uuid.md @@ -6,42 +6,42 @@ sidebar_label: UUID # UUID -A universally unique identifier (UUID) is a 16-byte number used to identify records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). +A Universally Unique Identifier (UUID) is a 16-byte value used to identify records. For detailed information about UUIDs, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). -The example of UUID type value is represented below: +While different UUID variants exist (see [here](https://datatracker.ietf.org/doc/html/draft-ietf-uuidrev-rfc4122bis)), ClickHouse does not validate that inserted UUIDs conform to a particular variant. UUIDs are internally treated as a sequence of 16 random bytes with [8-4-4-4-12 representation](https://en.wikipedia.org/wiki/Universally_unique_identifier#Textual_representation) at SQL level. + +Example UUID value: ``` text 61f0c404-5cb3-11e7-907b-a6006ad3dba0 ``` -If you do not specify the UUID column value when inserting a new record, the UUID value is filled with zero: +The default UUID is all-zero. It is used, for example, when a new record is inserted but no value for a UUID column is specified: ``` text 00000000-0000-0000-0000-000000000000 ``` -## How to Generate +## Generating UUIDs -To generate the UUID value, ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function. +ClickHouse provides the [generateUUIDv4](../../sql-reference/functions/uuid-functions.md) function to generate random UUID version 4 values. ## Usage Example **Example 1** -This example demonstrates creating a table with the UUID type column and inserting a value into the table. +This example demonstrates the creation of a table with a UUID column and the insertion of a value into the table. ``` sql CREATE TABLE t_uuid (x UUID, y String) ENGINE=TinyLog -``` -``` sql INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1' -``` -``` sql SELECT * FROM t_uuid ``` +Result: + ``` text ┌────────────────────────────────────x─┬─y─────────┐ │ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ @@ -50,13 +50,11 @@ SELECT * FROM t_uuid **Example 2** -In this example, the UUID column value is not specified when inserting a new record. +In this example, no UUID column value is specified when the record is inserted, i.e. the default UUID value is inserted: ``` sql INSERT INTO t_uuid (y) VALUES ('Example 2') -``` -``` sql SELECT * FROM t_uuid ``` diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md index 6c3d80683dbf..f55b6114e07c 100644 --- a/docs/en/sql-reference/dictionaries/index.md +++ b/docs/en/sql-reference/dictionaries/index.md @@ -123,7 +123,7 @@ LAYOUT(...) -- Memory layout configuration LIFETIME(...) -- Lifetime of dictionary in memory ``` -## Storing Dictionaries in Memory {#storig-dictionaries-in-memory} +## Storing Dictionaries in Memory {#storing-dictionaries-in-memory} There are a variety of ways to store dictionaries in memory. @@ -1092,7 +1092,7 @@ Types of sources (`source_type`): - [Local file](#local_file) - [Executable File](#executable) - [Executable Pool](#executable_pool) -- [HTTP(s)](#http) +- [HTTP(S)](#http) - DBMS - [ODBC](#odbc) - [MySQL](#mysql) @@ -1102,7 +1102,7 @@ Types of sources (`source_type`): - [Cassandra](#cassandra) - [PostgreSQL](#postgresql) -## Local File {#local_file} +### Local File {#local_file} Example of settings: @@ -1132,7 +1132,7 @@ When a dictionary with source `FILE` is created via DDL command (`CREATE DICTION - [Dictionary function](../../sql-reference/table-functions/dictionary.md#dictionary-function) -## Executable File {#executable} +### Executable File {#executable} Working with executable files depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request to the executable file’s STDIN. Otherwise, ClickHouse starts the executable file and treats its output as dictionary data. @@ -1161,7 +1161,7 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled; otherwise, the DB user would be able to execute arbitrary binaries on the ClickHouse node. -## Executable Pool {#executable_pool} +### Executable Pool {#executable_pool} Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, or `complex_key_direct` layouts. @@ -1196,9 +1196,9 @@ Setting fields: That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node. -## Http(s) {#https} +### HTTP(S) {#https} -Working with an HTTP(s) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. +Working with an HTTP(S) server depends on [how the dictionary is stored in memory](#storig-dictionaries-in-memory). If the dictionary is stored using `cache` and `complex_key_cache`, ClickHouse requests the necessary keys by sending a request via the `POST` method. Example of settings: @@ -1248,7 +1248,55 @@ Setting fields: When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. -### Known Vulnerability of the ODBC Dictionary Functionality +### DBMS + +#### ODBC + +You can use this method to connect any database that has an ODBC driver. + +Example of settings: + +``` xml + + + DatabaseName + ShemaName.TableName
+ DSN=some_parameters + SQL_QUERY + SELECT id, value_1, value_2 FROM ShemaName.TableName +
+ +``` + +or + +``` sql +SOURCE(ODBC( + db 'DatabaseName' + table 'SchemaName.TableName' + connection_string 'DSN=some_parameters' + invalidate_query 'SQL_QUERY' + query 'SELECT id, value_1, value_2 FROM db_name.table_name' +)) +``` + +Setting fields: + +- `db` – Name of the database. Omit it if the database name is set in the `` parameters. +- `table` – Name of the table and schema if exists. +- `connection_string` – Connection string. +- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). +- `query` – The custom query. Optional parameter. + +:::note +The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. +::: + +ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. + +If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. + +##### Known Vulnerability of the ODBC Dictionary Functionality :::note When connecting to the database through the ODBC driver connection parameter `Servername` can be substituted. In this case values of `USERNAME` and `PASSWORD` from `odbc.ini` are sent to the remote server and can be compromised. @@ -1277,7 +1325,7 @@ SELECT * FROM odbc('DSN=gregtest;Servername=some-server.com', 'test_db'); ODBC driver will send values of `USERNAME` and `PASSWORD` from `odbc.ini` to `some-server.com`. -### Example of Connecting Postgresql +##### Example of Connecting Postgresql Ubuntu OS. @@ -1358,7 +1406,7 @@ LIFETIME(MIN 300 MAX 360) You may need to edit `odbc.ini` to specify the full path to the library with the driver `DRIVER=/usr/local/lib/psqlodbcw.so`. -### Example of Connecting MS SQL Server +##### Example of Connecting MS SQL Server Ubuntu OS. @@ -1462,55 +1510,7 @@ LAYOUT(FLAT()) LIFETIME(MIN 300 MAX 360) ``` -## DBMS - -### ODBC - -You can use this method to connect any database that has an ODBC driver. - -Example of settings: - -``` xml - - - DatabaseName - ShemaName.TableName
- DSN=some_parameters - SQL_QUERY - SELECT id, value_1, value_2 FROM ShemaName.TableName -
- -``` - -or - -``` sql -SOURCE(ODBC( - db 'DatabaseName' - table 'SchemaName.TableName' - connection_string 'DSN=some_parameters' - invalidate_query 'SQL_QUERY' - query 'SELECT id, value_1, value_2 FROM db_name.table_name' -)) -``` - -Setting fields: - -- `db` – Name of the database. Omit it if the database name is set in the `` parameters. -- `table` – Name of the table and schema if exists. -- `connection_string` – Connection string. -- `invalidate_query` – Query for checking the dictionary status. Optional parameter. Read more in the section [Updating dictionaries](#dictionary-updates). -- `query` – The custom query. Optional parameter. - -:::note -The `table` and `query` fields cannot be used together. And either one of the `table` or `query` fields must be declared. -::: - -ClickHouse receives quoting symbols from ODBC-driver and quote all settings in queries to driver, so it’s necessary to set table name accordingly to table name case in database. - -If you have a problems with encodings when using Oracle, see the corresponding [FAQ](/knowledgebase/oracle-odbc) item. - -### Mysql +#### Mysql Example of settings: @@ -1627,7 +1627,7 @@ SOURCE(MYSQL( )) ``` -### ClickHouse +#### ClickHouse Example of settings: @@ -1680,7 +1680,7 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: -### Mongodb +#### Mongodb Example of settings: @@ -1723,7 +1723,7 @@ Setting fields: - `options` - MongoDB connection string options (optional parameter). -### Redis +#### Redis Example of settings: @@ -1756,7 +1756,7 @@ Setting fields: - `storage_type` – The structure of internal Redis storage using for work with keys. `simple` is for simple sources and for hashed single key sources, `hash_map` is for hashed sources with two keys. Ranged sources and cache sources with complex key are unsupported. May be omitted, default value is `simple`. - `db_index` – The specific numeric index of Redis logical database. May be omitted, default value is 0. -### Cassandra +#### Cassandra Example of settings: @@ -1798,7 +1798,7 @@ Setting fields: The `column_family` or `where` fields cannot be used together with the `query` field. And either one of the `column_family` or `query` fields must be declared. ::: -### PostgreSQL +#### PostgreSQL Example of settings: @@ -1855,7 +1855,7 @@ Setting fields: The `table` or `where` fields cannot be used together with the `query` field. And either one of the `table` or `query` fields must be declared. ::: -## Null +### Null A special source that can be used to create dummy (empty) dictionaries. Such dictionaries can useful for tests or with setups with separated data and query nodes at nodes with Distributed tables. @@ -2361,6 +2361,12 @@ Result: └────────────────────────────────────────┴───────────────────────────────────────────────────────────────────────────────────────┘ ``` +#### Matching Modes + +Pattern matching behavior can be modified with certain dictionary settings: +- `regexp_dict_flag_case_insensitive`: Use case-insensitive matching (defaults to `false`). Can be overridden in individual expressions with `(?i)` and `(?-i)`. +- `regexp_dict_flag_dotall`: Allow '.' to match newline characters (defaults to `false`). + ### Use Regular Expression Tree Dictionary in ClickHouse Cloud Above used `YAMLRegExpTree` source works in ClickHouse Open Source but not in ClickHouse Cloud. To use regexp tree dictionaries in ClickHouse could, first create a regexp tree dictionary from a YAML file locally in ClickHouse Open Source, then dump this dictionary into a CSV file using the `dictionary` table function and the [INTO OUTFILE](../statements/select/into-outfile.md) clause. @@ -2476,52 +2482,3 @@ Dictionary updates (other than loading at first use) do not block queries. Durin We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. - -## Embedded Dictionaries - - - -ClickHouse contains a built-in feature for working with a geobase. - -This allows you to: - -- Use a region’s ID to get its name in the desired language. -- Use a region’s ID to get the ID of a city, area, federal district, country, or continent. -- Check whether a region is part of another region. -- Get a chain of parent regions. - -All the functions support “translocality,” the ability to simultaneously use different perspectives on region ownership. For more information, see the section “Functions for working with web analytics dictionaries”. - -The internal dictionaries are disabled in the default package. -To enable them, uncomment the parameters `path_to_regions_hierarchy_file` and `path_to_regions_names_files` in the server configuration file. - -The geobase is loaded from text files. - -Place the `regions_hierarchy*.txt` files into the `path_to_regions_hierarchy_file` directory. This configuration parameter must contain the path to the `regions_hierarchy.txt` file (the default regional hierarchy), and the other files (`regions_hierarchy_ua.txt`) must be located in the same directory. - -Put the `regions_names_*.txt` files in the `path_to_regions_names_files` directory. - -You can also create these files yourself. The file format is as follows: - -`regions_hierarchy*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- parent region ID (`UInt32`) -- region type (`UInt8`): 1 - continent, 3 - country, 4 - federal district, 5 - region, 6 - city; other types do not have values -- population (`UInt32`) — optional column - -`regions_names_*.txt`: TabSeparated (no header), columns: - -- region ID (`UInt32`) -- region name (`String`) — Can’t contain tabs or line feeds, even escaped ones. - -A flat array is used for storing in RAM. For this reason, IDs shouldn’t be more than a million. - -Dictionaries can be updated without restarting the server. However, the set of available dictionaries is not updated. -For updates, the file modification times are checked. If a file has changed, the dictionary is updated. -The interval to check for changes is configured in the `builtin_dictionaries_reload_interval` parameter. -Dictionary updates (other than loading at first use) do not block queries. During updates, queries use the old versions of dictionaries. If an error occurs during an update, the error is written to the server log, and queries continue using the old version of dictionaries. - -We recommend periodically updating the dictionaries with the geobase. During an update, generate new files and write them to a separate location. When everything is ready, rename them to the files used by the server. - -There are also functions for working with OS identifiers and search engines, but they shouldn’t be used. diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md index 64fae0e82f0f..69f1816b7dfa 100644 --- a/docs/en/sql-reference/functions/arithmetic-functions.md +++ b/docs/en/sql-reference/functions/arithmetic-functions.md @@ -6,9 +6,20 @@ sidebar_label: Arithmetic # Arithmetic Functions -The result type of all arithmetic functions is the smallest type which can represent all possible results. Size promotion happens for integers up to 32 bit, e.g. `UInt8 + UInt16 = UInt32`. If one of the inters has 64 or more bits, the result is of the same type as the bigger of the input integers, e.g. `UInt16 + UInt128 = UInt128`. While this introduces a risk of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of 64 bit. +Arithmetic functions work for any two operands of type `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64`. -The result of addition or multiplication of two integers is unsigned unless one of the integers is signed. +Before performing the operation, both operands are casted to the result type. The result type is determined as follows (unless specified +differently in the function documentation below): +- If both operands are up to 32 bits wide, the size of the result type will be the size of the next bigger type following the bigger of the + two operands (integer size promotion). For example, `UInt8 + UInt16 = UInt32` or `Float32 * Float32 = Float64`. +- If one of the operands has 64 or more bits, the size of the result type will be the same size as the bigger of the two operands. For + example, `UInt32 + UInt128 = UInt128` or `Float32 * Float64 = Float64`. +- If one of the operands is signed, the result type will also be signed, otherwise it will be signed. For example, `UInt32 * Int32 = Int64`. + +These rules make sure that the result type will be the smallest type which can represent all possible results. While this introduces a risk +of overflows around the value range boundary, it ensures that calculations are performed quickly using the maximum native integer width of +64 bit. This behavior also guarantees compatibility with many other databases which provide 64 bit integers (BIGINT) as the biggest integer +type. Example: @@ -22,8 +33,6 @@ SELECT toTypeName(0), toTypeName(0 + 0), toTypeName(0 + 0 + 0), toTypeName(0 + 0 └───────────────┴────────────────────────┴─────────────────────────────────┴──────────────────────────────────────────┘ ``` -Arithmetic functions work for any pair of `UInt8`, `UInt16`, `UInt32`, `UInt64`, `Int8`, `Int16`, `Int32`, `Int64`, `Float32`, or `Float64` values. - Overflows are produced the same way as in C++. ## plus @@ -68,7 +77,7 @@ Alias: `a \* b` (operator) ## divide -Calculates the quotient of two values `a` and `b`. The result is always a floating-point value. If you need integer division, you can use the `intDiv` function. +Calculates the quotient of two values `a` and `b`. The result type is always [Float64](../../sql-reference/data-types/float.md). Integer division is provided by the `intDiv` function. Division by 0 returns `inf`, `-inf`, or `nan`. @@ -84,7 +93,7 @@ Alias: `a / b` (operator) Performs an integer division of two values `a` by `b`, i.e. computes the quotient rounded down to the next smallest integer. -The result has the same type as the dividend (the first parameter). +The result has the same width as the dividend (the first parameter). An exception is thrown when dividing by zero, when the quotient does not fit in the range of the dividend, or when dividing a minimal negative number by minus one. @@ -135,7 +144,7 @@ intDivOrZero(a, b) Calculates the remainder of the division of two values `a` by `b`. -The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result is a floating-point number. +The result type is an integer if both inputs are integers. If one of the inputs is a floating-point number, the result type is [Float64](../../sql-reference/data-types/float.md). The remainder is computed like in C++. Truncated division is used for negative numbers. diff --git a/docs/en/sql-reference/functions/array-functions.md b/docs/en/sql-reference/functions/array-functions.md index bdd1445c9907..73c51a41dfba 100644 --- a/docs/en/sql-reference/functions/array-functions.md +++ b/docs/en/sql-reference/functions/array-functions.md @@ -102,6 +102,8 @@ The function also works for strings. Can be optimized by enabling the [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns) setting. With `optimize_functions_to_subcolumns = 1` the function reads only [size0](../../sql-reference/data-types/array.md#array-size) subcolumn instead of reading and processing the whole array column. The query `SELECT length(arr) FROM table` transforms to `SELECT arr.size0 FROM TABLE`. +Alias: `OCTET_LENGTH` + ## emptyArrayUInt8, emptyArrayUInt16, emptyArrayUInt32, emptyArrayUInt64 ## emptyArrayInt8, emptyArrayInt16, emptyArrayInt32, emptyArrayInt64 @@ -142,6 +144,7 @@ range([start, ] end [, step]) - All arguments `start`, `end`, `step` must be below data types: `UInt8`, `UInt16`, `UInt32`, `UInt64`,`Int8`, `Int16`, `Int32`, `Int64`, as well as elements of the returned array, which's type is a super type of all arguments. - An exception is thrown if query results in arrays with a total length of more than number of elements specified by the [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block) setting. +- Returns Null if any argument has Nullable(Nothing) type. An exception is thrown if any argument has Null value (Nullable(T) type). **Examples** @@ -180,9 +183,8 @@ arrayConcat(arrays) **Arguments** - `arrays` – Arbitrary number of arguments of [Array](../../sql-reference/data-types/array.md) type. - **Example** - +**Example** ``` sql SELECT arrayConcat([1, 2], [3, 4], [5, 6]) AS res @@ -230,13 +232,15 @@ hasAll(set, subset) **Arguments** - `set` – Array of any type with a set of elements. -- `subset` – Array of any type with elements that should be tested to be a subset of `set`. +- `subset` – Array of any type that shares a common supertype with `set` containing elements that should be tested to be a subset of `set`. **Return values** - `1`, if `set` contains all of the elements from `subset`. - `0`, otherwise. +Raises an exception `NO_COMMON_TYPE` if the set and subset elements do not share a common supertype. + **Peculiar properties** - An empty array is a subset of any array. @@ -253,7 +257,7 @@ hasAll(set, subset) `SELECT hasAll(['a', 'b'], ['a'])` returns 1. -`SELECT hasAll([1], ['a'])` returns 0. +`SELECT hasAll([1], ['a'])` raises a `NO_COMMON_TYPE` exception. `SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [3, 5]])` returns 0. @@ -268,13 +272,15 @@ hasAny(array1, array2) **Arguments** - `array1` – Array of any type with a set of elements. -- `array2` – Array of any type with a set of elements. +- `array2` – Array of any type that shares a common supertype with `array1`. **Return values** - `1`, if `array1` and `array2` have one similar element at least. - `0`, otherwise. +Raises an exception `NO_COMMON_TYPE` if the array1 and array2 elements do not share a common supertype. + **Peculiar properties** - `Null` processed as a value. @@ -288,7 +294,7 @@ hasAny(array1, array2) `SELECT hasAny([-128, 1., 512], [1])` returns `1`. -`SELECT hasAny([[1, 2], [3, 4]], ['a', 'c'])` returns `0`. +`SELECT hasAny([[1, 2], [3, 4]], ['a', 'c'])` raises a `NO_COMMON_TYPE` exception. `SELECT hasAll([[1, 2], [3, 4]], [[1, 2], [1, 2]])` returns `1`. @@ -318,6 +324,8 @@ For Example: - `1`, if `array1` contains `array2`. - `0`, otherwise. +Raises an exception `NO_COMMON_TYPE` if the array1 and array2 elements do not share a common supertype. + **Peculiar properties** - The function will return `1` if `array2` is empty. @@ -339,6 +347,9 @@ For Example: `SELECT hasSubstr(['a', 'b' , 'c'], ['a', 'c'])` returns 0. `SELECT hasSubstr([[1, 2], [3, 4], [5, 6]], [[1, 2], [3, 4]])` returns 1. +i +`SELECT hasSubstr([1, 2, NULL, 3, 4], ['a'])` raises a `NO_COMMON_TYPE` exception. + ## indexOf(arr, x) @@ -646,7 +657,7 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res; Array elements set to `NULL` are handled as normal values. -## arraySort(\[func,\] arr, …) {#array_functions-sort} +## arraySort(\[func,\] arr, …) {#sort} Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description. @@ -705,7 +716,7 @@ SELECT arraySort((x) -> -x, [1, 2, 3]) as res; └─────────┘ ``` -For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#array_functions-reverse-sort) in a sorting. +For each element of the source array, the lambda function returns the sorting key, that is, \[1 –\> -1, 2 –\> -2, 3 –\> -3\]. Since the `arraySort` function sorts the keys in ascending order, the result is \[3, 2, 1\]. Thus, the `(x) –> -x` lambda function sets the [descending order](#reverse-sort) in a sorting. The lambda function can accept multiple arguments. In this case, you need to pass the `arraySort` function several arrays of identical length that the arguments of lambda function will correspond to. The resulting array will consist of elements from the first input array; elements from the next input array(s) specify the sorting keys. For example: @@ -751,7 +762,7 @@ To improve sorting efficiency, the [Schwartzian transform](https://en.wikipedia. Same as `arraySort` with additional `limit` argument allowing partial sorting. Returns an array of the same size as the original array where elements in range `[1..limit]` are sorted in ascending order. Remaining elements `(limit..N]` shall contain elements in unspecified order. -## arrayReverseSort(\[func,\] arr, …) {#array_functions-reverse-sort} +## arrayReverseSort(\[func,\] arr, …) {#reverse-sort} Sorts the elements of the `arr` array in descending order. If the `func` function is specified, `arr` is sorted according to the result of the `func` function applied to the elements of the array, and then the sorted array is reversed. If `func` accepts multiple arguments, the `arrayReverseSort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arrayReverseSort` description. @@ -869,7 +880,7 @@ A special function. See the section [“ArrayJoin function”](../../sql-referen ## arrayDifference -Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). +Calculates an array of differences between adjacent array elements. The first element of the result array will be 0, the second `a[1] - a[0]`, the third `a[2] - a[1]`, etc. The type of elements in the result array is determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`). **Syntax** @@ -987,6 +998,24 @@ SELECT └──────────────┴───────────┘ ``` +## arrayJaccardIndex + +Returns the [Jaccard index](https://en.wikipedia.org/wiki/Jaccard_index) of two arrays. + +**Example** + +Query: +``` sql +SELECT arrayJaccardIndex([1, 2], [2, 3]) AS res +``` + +Result: +``` text +┌─res────────────────┐ +│ 0.3333333333333333 │ +└────────────────────┘ +``` + ## arrayReduce Applies an aggregate function to array elements and returns its result. The name of the aggregation function is passed as a string in single quotes `'max'`, `'sum'`. When using parametric aggregate functions, the parameter is indicated after the function name in parentheses `'uniqUpTo(6)'`. @@ -1094,6 +1123,32 @@ Result: └─────────────────────────────┘ ``` +## arrayFold + +Applies a lambda function to one or more equally-sized arrays and collects the result in an accumulator. + +**Syntax** + +``` sql +arrayFold(lambda_function, arr1, arr2, ..., accumulator) +``` + +**Example** + +Query: + +``` sql +SELECT arrayFold( x,acc -> acc + x*2, [1, 2, 3, 4], toInt64(3)) AS res; +``` + +Result: + +``` text +┌─arrayFold(lambda(tuple(x, acc), plus(acc, multiply(x, 2))), [1, 2, 3, 4], toInt64(3))─┐ +│ 3 │ +└───────────────────────────────────────────────────────────────────────────────────────┘ +``` + ## arrayReverse(arr) Returns an array of the same size as the original array containing the elements in reverse order. @@ -1765,6 +1820,404 @@ Return value type is always [Float64](../../sql-reference/data-types/float.md). └─────┴──────────────────────────────────────────────────────────────────────────────────────────┘ ``` +## arrayRotateLeft + +Rotates an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +If the number of elements is negative, the array is rotated to the right. + +**Syntax** + +``` sql +arrayRotateLeft(arr, n) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md). +- `n` — Number of elements to rotate. + +**Returned value** + +- An array rotated to the left by the specified number of elements. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [3,4,5,6,1,2] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [5,6,1,2,3,4] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res; +``` + +Result: + +``` text +┌─res───────────────────┐ +│ ['d','e','a','b','c'] │ +└───────────────────────┘ +``` + +## arrayRotateRight + +Rotates an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +If the number of elements is negative, the array is rotated to the left. + +**Syntax** + +``` sql +arrayRotateRight(arr, n) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md). +- `n` — Number of elements to rotate. + +**Returned value** + +- An array rotated to the right by the specified number of elements. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [5,6,1,2,3,4] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [3,4,5,6,1,2] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res; +``` + +Result: + +``` text +┌─res───────────────────┐ +│ ['c','d','e','a','b'] │ +└───────────────────────┘ +``` + +## arrayShiftLeft + +Shifts an [array](../../sql-reference/data-types/array.md) to the left by the specified number of elements. +New elements are filled with the provided argument or the default value of the array element type. +If the number of elements is negative, the array is shifted to the right. + +**Syntax** + +``` sql +arrayShiftLeft(arr, n[, default]) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md). +- `n` — Number of elements to shift. +- `default` — Optional. Default value for new elements. + +**Returned value** + +- An array shifted to the left by the specified number of elements. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [3,4,5,6,0,0] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [0,0,1,2,3,4] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res; +``` + +Result: + +``` text +┌─res─────────────┐ +│ [3,4,5,6,42,42] │ +└─────────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res; +``` + +Result: + +``` text +┌─res─────────────────────────────┐ +│ ['d','e','f','foo','foo','foo'] │ +└─────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res; +``` + +Result: + +``` text +┌─res─────────────────┐ +│ [3,4,5,6,4242,4242] │ +└─────────────────────┘ +``` + +## arrayShiftRight + +Shifts an [array](../../sql-reference/data-types/array.md) to the right by the specified number of elements. +New elements are filled with the provided argument or the default value of the array element type. +If the number of elements is negative, the array is shifted to the left. + +**Syntax** + +``` sql +arrayShiftRight(arr, n[, default]) +``` + +**Arguments** + +- `arr` — [Array](../../sql-reference/data-types/array.md). +- `n` — Number of elements to shift. +- `default` — Optional. Default value for new elements. + +**Returned value** + +- An array shifted to the right by the specified number of elements. + +Type: [Array](../../sql-reference/data-types/array.md). + +**Examples** + +Query: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [0,0,1,2,3,4] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res; +``` + +Result: + +``` text +┌─res───────────┐ +│ [3,4,5,6,0,0] │ +└───────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res; +``` + +Result: + +``` text +┌─res─────────────┐ +│ [42,42,1,2,3,4] │ +└─────────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res; +``` + +Result: + +``` text +┌─res─────────────────────────────┐ +│ ['foo','foo','foo','a','b','c'] │ +└─────────────────────────────────┘ +``` + +Query: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res; +``` + +Result: + +``` text +┌─res─────────────────┐ +│ [4242,4242,1,2,3,4] │ +└─────────────────────┘ +``` + + +## arrayRandomSample + +Function `arrayRandomSample` returns a subset with `samples`-many random elements of an input array. If `samples` exceeds the size of the input array, the sample size is limited to the size of the array. In this case, all elements of the input array are returned, but the order is not guaranteed. The function can handle both flat arrays and nested arrays. + +**Syntax** + +```sql +arrayRandomSample(arr, samples) +``` + +**Arguments** + +- `arr` — The input array from which to sample elements. This may be flat or nested arrays. +- `samples` — An unsigned integer specifying the number of elements to include in the random sample. + +**Returned Value** + +- An array containing a random sample of elements from the input array. + +**Examples** + +Query: + +```sql +SELECT arrayRandomSample(['apple', 'banana', 'cherry', 'date'], 2) as res; +``` + +Result: +``` +┌─res────────────────┐ +│ ['banana','apple'] │ +└────────────────────┘ +``` + +Query: + +```sql +SELECT arrayRandomSample([[1, 2], [3, 4], [5, 6]], 2) as res; +``` + +Result: +``` +┌─res───────────┐ +│ [[3,4],[5,6]] │ +└───────────────┘ +``` + +Query: + +```sql +SELECT arrayRandomSample([1, 2, 3, 4, 5], 0) as res; +``` + +Result: +``` +┌─res─┐ +│ [] │ +└─────┘ +``` + +Query: + +```sql +SELECT arrayRandomSample([1, 2, 3], 5) as res; +``` + +Result: +``` +┌─res─────┐ +│ [3,1,2] │ +└─────────┘ +``` + ## Distance functions All supported functions are described in [distance functions documentation](../../sql-reference/functions/distance-functions.md). diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 3f61e7a214da..aecab38f3c8a 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -134,9 +134,57 @@ Like [makeDateTime](#makedatetime) but produces a [DateTime64](../../sql-referen **Syntax** ``` sql -makeDateTime32(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) +makeDateTime64(year, month, day, hour, minute, second[, fraction[, precision[, timezone]]]) ``` +## timestamp + +Converts the first argument 'expr' to type [DateTime64(6)](../../sql-reference/data-types/datetime64.md). +If a second argument 'expr_time' is provided, it adds the specified time to the converted value. + +**Syntax** + +``` sql +timestamp(expr[, expr_time]) +``` + +Alias: `TIMESTAMP` + +**Arguments** + +- `expr` - Date or date with time. Type: [String](../../sql-reference/data-types/string.md). +- `expr_time` - Optional parameter. Time to add. [String](../../sql-reference/data-types/string.md). + +**Examples** + +``` sql +SELECT timestamp('2023-12-31') as ts; +``` + +Result: + +``` text +┌─────────────────────────ts─┐ +│ 2023-12-31 00:00:00.000000 │ +└────────────────────────────┘ +``` + +``` sql +SELECT timestamp('2023-12-31 12:00:00', '12:00:00.11') as ts; +``` + +Result: + +``` text +┌─────────────────────────ts─┐ +│ 2024-01-01 00:00:00.110000 │ +└────────────────────────────┘ +``` + +**Returned value** + +- [DateTime64](../../sql-reference/data-types/datetime64.md)(6) + ## timeZone Returns the timezone of the current session, i.e. the value of setting [session_timezone](../../operations/settings/settings.md#session_timezone). @@ -237,6 +285,11 @@ type_samoa: DateTime('US/Samoa') int32samoa: 1546300800 ``` +**See Also** + +- [formatDateTime](#formatDateTime) - supports non-constant timezone. +- [toString](type-conversion-functions.md#tostring) - supports non-constant timezone. + ## timeZoneOf Returns the timezone name of [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md) data types. @@ -587,11 +640,34 @@ Rounds down the date with time to the start of the fifteen-minute interval. ## toStartOfInterval(time_or_data, INTERVAL x unit \[, time_zone\]) -This is a generalization of other functions named `toStartOf*`. For example, -`toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`, -`toStartOfInterval(t, INTERVAL 1 month)` returns the same as `toStartOfMonth(t)`, -`toStartOfInterval(t, INTERVAL 1 day)` returns the same as `toStartOfDay(t)`, -`toStartOfInterval(t, INTERVAL 15 minute)` returns the same as `toStartOfFifteenMinutes(t)` etc. +This function generalizes other `toStartOf*()` functions. For example, +- `toStartOfInterval(t, INTERVAL 1 year)` returns the same as `toStartOfYear(t)`, +- `toStartOfInterval(t, INTERVAL 1 month)` returns the same as `toStartOfMonth(t)`, +- `toStartOfInterval(t, INTERVAL 1 day)` returns the same as `toStartOfDay(t)`, +- `toStartOfInterval(t, INTERVAL 15 minute)` returns the same as `toStartOfFifteenMinutes(t)`. + +The calculation is performed relative to specific points in time: + +| Interval | Start | +|-------------|------------------------| +| year | year 0 | +| quarter | 1900 Q1 | +| month | 1900 January | +| week | 1970, 1st week (01-05) | +| day | 1970-01-01 | +| hour | (*) | +| minute | 1970-01-01 00:00:00 | +| second | 1970-01-01 00:00:00 | +| millisecond | 1970-01-01 00:00:00 | +| microsecond | 1970-01-01 00:00:00 | +| nanosecond | 1970-01-01 00:00:00 | + +(*) hour intervals are special: the calculation is always performed relative to 00:00:00 (midnight) of the current day. As a result, only + hour values between 1 and 23 are useful. + +**See Also** + +- [date_trunc](#date_trunc) ## toTime @@ -694,10 +770,14 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we Returns year and week for a date. The year in the result may be different from the year in the date argument for the first and the last week of the year. -The mode argument works exactly like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used. +The mode argument works like the mode argument to `toWeek()`. For the single-argument syntax, a mode value of 0 is used. `toISOYear()` is a compatibility function that is equivalent to `intDiv(toYearWeek(date,3),100)`. +:::warning +The week number returned by `toYearWeek()` can be different from what the `toWeek()` returns. `toWeek()` always returns week number in the context of the given year, and in case `toWeek()` returns `0`, `toYearWeek()` returns the value corresponding to the last week of previous year. See `prev_yearWeek` in example below. +::: + **Syntax** ``` sql @@ -707,18 +787,55 @@ toYearWeek(t[, mode[, timezone]]) **Example** ``` sql -SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9; +SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9, toYearWeek(toDate('2022-01-01')) AS prev_yearWeek; ``` ``` text -┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐ -│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ -└────────────┴───────────┴───────────┴───────────┘ +┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┬─prev_yearWeek─┐ +│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ 202152 │ +└────────────┴───────────┴───────────┴───────────┴───────────────┘ +``` + +## toDaysSinceYearZero + +Returns for a given date, the number of days passed since [1 January 0000](https://en.wikipedia.org/wiki/Year_zero) in the [proleptic Gregorian calendar defined by ISO 8601](https://en.wikipedia.org/wiki/Gregorian_calendar#Proleptic_Gregorian_calendar). The calculation is the same as in MySQL's [`TO_DAYS()`](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_to-days) function. + +**Syntax** + +``` sql +toDaysSinceYearZero(date[, time_zone]) +``` + +Aliases: `TO_DAYS` + + +**Arguments** +- `date` — The date to calculate the number of days passed since year zero from. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) + +**Returned value** + +The number of days passed since date 0000-01-01. + +Type: [UInt32](../../sql-reference/data-types/int-uint.md). + +**Example** + +``` sql +SELECT toDaysSinceYearZero(toDate('2023-09-08')); +``` + +Result: + +``` text +┌─toDaysSinceYearZero(toDate('2023-09-08')))─┐ +│ 713569 │ +└────────────────────────────────────────────┘ ``` ## age -Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second. +Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond. E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit. For an alternative to `age`, see function `date\_diff`. @@ -734,14 +851,16 @@ age('unit', startdate, enddate, [timezone]) - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `second` (possible abbreviations: `ss`, `s`) - - `minute` (possible abbreviations: `mi`, `n`) - - `hour` (possible abbreviations: `hh`, `h`) - - `day` (possible abbreviations: `dd`, `d`) - - `week` (possible abbreviations: `wk`, `ww`) - - `month` (possible abbreviations: `mm`, `m`) - - `quarter` (possible abbreviations: `qq`, `q`) - - `year` (possible abbreviations: `yyyy`, `yy`) + - `microsecond` `microseconds` `us` `u` + - `millisecond` `milliseconds` `ms` + - `second` `seconds` `ss` `s` + - `minute` `minutes` `mi` `n` + - `hour` `hours` `hh` `h` + - `day` `days` `dd` `d` + - `week` `weeks` `wk` `ww` + - `month` `months` `mm` `m` + - `quarter` `quarters` `qq` `q` + - `year` `years` `yyyy` `yy` - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -809,14 +928,16 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_ - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md). Possible values: - - `second` (possible abbreviations: `ss`, `s`) - - `minute` (possible abbreviations: `mi`, `n`) - - `hour` (possible abbreviations: `hh`, `h`) - - `day` (possible abbreviations: `dd`, `d`) - - `week` (possible abbreviations: `wk`, `ww`) - - `month` (possible abbreviations: `mm`, `m`) - - `quarter` (possible abbreviations: `qq`, `q`) - - `year` (possible abbreviations: `yyyy`, `yy`) + - `microsecond` `microseconds` `us` `u` + - `millisecond` `milliseconds` `ms` + - `second` `seconds` `ss` `s` + - `minute` `minutes` `mi` `n` + - `hour` `hours` `hh` `h` + - `day` `days` `dd` `d` + - `week` `weeks` `wk` `ww` + - `month` `months` `mm` `m` + - `quarter` `quarters` `qq` `q` + - `year` `years` `yyyy` `yy` - `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). @@ -934,6 +1055,8 @@ Result: Adds the time interval or date interval to the provided date or date with time. +If the addition results in a value outside the bounds of the data type, the result is undefined. + **Syntax** ``` sql @@ -957,13 +1080,13 @@ Aliases: `dateAdd`, `DATE_ADD`. - `year` - `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `date` — The date or date with time to which `value` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned value** Date or date with time obtained by adding `value`, expressed in `unit`, to `date`. -Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -979,10 +1102,16 @@ Result: └───────────────────────────────────────────────┘ ``` +**See Also** + +- [addDate](#addDate) + ## date\_sub Subtracts the time interval or date interval from the provided date or date with time. +If the subtraction results in a value outside the bounds of the data type, the result is undefined. + **Syntax** ``` sql @@ -1007,13 +1136,13 @@ Aliases: `dateSub`, `DATE_SUB`. - `year` - `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `date` — The date or date with time from which `value` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned value** Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. -Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -1029,10 +1158,15 @@ Result: └────────────────────────────────────────────────┘ ``` +**See Also** +- [subDate](#subDate) + ## timestamp\_add Adds the specified time value with the provided date or date time value. +If the addition results in a value outside the bounds of the data type, the result is undefined. + **Syntax** ``` sql @@ -1043,7 +1177,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). - `value` — Value of interval to add. [Int](../../sql-reference/data-types/int-uint.md). - `unit` — The type of interval to add. [String](../../sql-reference/data-types/string.md). Possible values: @@ -1061,7 +1195,7 @@ Aliases: `timeStampAdd`, `TIMESTAMP_ADD`. Date or date with time with the specified `value` expressed in `unit` added to `date`. -Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -1081,6 +1215,8 @@ Result: Subtracts the time interval from the provided date or date with time. +If the subtraction results in a value outside the bounds of the data type, the result is undefined. + **Syntax** ``` sql @@ -1104,13 +1240,13 @@ Aliases: `timeStampSub`, `TIMESTAMP_SUB`. - `year` - `value` — Value of interval to subtract. [Int](../../sql-reference/data-types/int-uint.md). -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned value** Date or date with time obtained by subtracting `value`, expressed in `unit`, from `date`. -Type: [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Example** @@ -1126,10 +1262,96 @@ Result: └──────────────────────────────────────────────────────────────┘ ``` -## now +## addDate + +Adds the time interval or date interval to the provided date or date with time. + +If the addition results in a value outside the bounds of the data type, the result is undefined. + +**Syntax** + +``` sql +addDate(date, interval) +``` + +**Arguments** + +- `date` — The date or date with time to which `interval` is added. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `interval` — Interval to add. [Interval](../../sql-reference/data-types/special-data-types/interval.md). + +**Returned value** + +Date or date with time obtained by adding `interval` to `date`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +```sql +SELECT addDate(toDate('2018-01-01'), INTERVAL 3 YEAR); +``` + +Result: + +```text +┌─addDate(toDate('2018-01-01'), toIntervalYear(3))─┐ +│ 2021-01-01 │ +└──────────────────────────────────────────────────┘ +``` + +Alias: `ADDDATE` + +**See Also** +- [date_add](#date_add) + +## subDate + +Subtracts the time interval or date interval from the provided date or date with time. + +If the subtraction results in a value outside the bounds of the data type, the result is undefined. + +**Syntax** + +``` sql +subDate(date, interval) +``` + +**Arguments** + +- `date` — The date or date with time from which `interval` is subtracted. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). +- `interval` — Interval to subtract. [Interval](../../sql-reference/data-types/special-data-types/interval.md). + +**Returned value** + +Date or date with time obtained by subtracting `interval` from `date`. + +Type: [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). + +**Example** + +```sql +SELECT subDate(toDate('2018-01-01'), INTERVAL 3 YEAR); +``` + +Result: + +```text +┌─subDate(toDate('2018-01-01'), toIntervalYear(3))─┐ +│ 2015-01-01 │ +└──────────────────────────────────────────────────┘ +``` + +Alias: `SUBDATE` + +**See Also** +- [date_sub](#date_sub) + +## now {#now} Returns the current date and time at the moment of query analysis. The function is a constant expression. +Alias: `current_timestamp`. + **Syntax** ``` sql @@ -1211,7 +1433,7 @@ Result: └─────────────────────────┴───────────────────────────────┘ ``` -## nowInBlock +## nowInBlock {#nowInBlock} Returns the current date and time at the moment of processing of each block of data. In contrast to the function [now](#now), it is not a constant expression, and the returned value will be different in different blocks for long-running queries. @@ -1255,12 +1477,14 @@ Result: └─────────────────────┴─────────────────────┴──────────┘ ``` -## today +## today {#today} Accepts zero arguments and returns the current date at one of the moments of query analysis. The same as ‘toDate(now())’. -## yesterday +Aliases: `curdate`, `current_date`. + +## yesterday {#yesterday} Accepts zero arguments and returns yesterday’s date at one of the moments of query analysis. The same as ‘today() - 1’. @@ -1273,6 +1497,8 @@ Rounds the time to the half hour. Converts a date or date with time to a UInt32 number containing the year and month number (YYYY \* 100 + MM). Accepts a second optional timezone argument. If provided, the timezone must be a string constant. +This functions is the opposite of function `YYYYMMDDToDate()`. + **Example** ``` sql @@ -1295,8 +1521,7 @@ Converts a date or date with time to a UInt32 number containing the year and mon **Example** ```sql -SELECT - toYYYYMMDD(now(), 'US/Eastern') +SELECT toYYYYMMDD(now(), 'US/Eastern') ``` Result: @@ -1314,8 +1539,7 @@ Converts a date or date with time to a UInt64 number containing the year and mon **Example** ```sql -SELECT - toYYYYMMDDhhmmss(now(), 'US/Eastern') +SELECT toYYYYMMDDhhmmss(now(), 'US/Eastern') ``` Result: @@ -1326,6 +1550,93 @@ Result: └───────────────────────────────────────┘ ``` +## YYYYMMDDToDate + +Converts a number containing the year, month and day number to a [Date](../../sql-reference/data-types/date.md). + +This functions is the opposite of function `toYYYYMMDD()`. + +The output is undefined if the input does not encode a valid Date value. + +**Syntax** + +```sql +YYYYMMDDToDate(yyyymmdd); +``` + +**Arguments** + +- `yyyymmdd` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). + +**Returned value** + +- a date created from the arguments. + +Type: [Date](../../sql-reference/data-types/date.md). + +**Example** + +```sql +SELECT YYYYMMDDToDate(20230911); +``` + +Result: + +```response +┌─toYYYYMMDD(20230911)─┐ +│ 2023-09-11 │ +└──────────────────────┘ +``` + +## YYYYMMDDToDate32 + +Like function `YYYYMMDDToDate()` but produces a [Date32](../../sql-reference/data-types/date32.md). + +## YYYYMMDDhhmmssToDateTime + +Converts a number containing the year, month, day, hours, minute and second number to a [DateTime](../../sql-reference/data-types/datetime.md). + +The output is undefined if the input does not encode a valid DateTime value. + +This functions is the opposite of function `toYYYYMMDDhhmmss()`. + +**Syntax** + +```sql +YYYYMMDDhhmmssToDateTime(yyyymmddhhmmss[, timezone]); +``` + +**Arguments** + +- `yyyymmddhhmmss` - A number representing the year, month and day. [Integer](../../sql-reference/data-types/int-uint.md), [Float](../../sql-reference/data-types/float.md) or [Decimal](../../sql-reference/data-types/decimal.md). +- `timezone` - [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). + +**Returned value** + +- a date with time created from the arguments. + +Type: [DateTime](../../sql-reference/data-types/datetime.md). + +**Example** + +```sql +SELECT YYYYMMDDToDateTime(20230911131415); +``` + +Result: + +```response +┌──────YYYYMMDDhhmmssToDateTime(20230911131415)─┐ +│ 2023-09-11 13:14:15 │ +└───────────────────────────────────────────────┘ +``` + +## YYYYMMDDhhmmssToDateTime64 + +Like function `YYYYMMDDhhmmssToDate()` but produces a [DateTime64](../../sql-reference/data-types/datetime64.md). + +Accepts an additional, optional `precision` parameter after the `timezone` parameter. + ## addYears, addMonths, addWeeks, addDays, addHours, addMinutes, addSeconds, addQuarters Function adds a Date/DateTime interval to a Date/DateTime and then return the Date/DateTime. For example: @@ -1389,7 +1700,7 @@ SELECT timeSlots(toDateTime64('1980-12-12 21:01:02.1234', 4, 'UTC'), toDecimal64 └───────────────────────────────────────────────────────────────────────────────────────────────────────────┘ ``` -## formatDateTime {#date_time_functions-formatDateTime} +## formatDateTime {#formatDateTime} Formats a Time according to the given Format string. Format is a constant expression, so you cannot have multiple formats for a single result column. @@ -1437,7 +1748,7 @@ Using replacement fields, you can define a pattern for the resulting string. “ | %n | new-line character (‘’) | | | %p | AM or PM designation | PM | | %Q | Quarter (1-4) | 1 | -| %r | 12-hour HH:MM AM/PM time, equivalent to %H:%i %p | 10:30 PM | +| %r | 12-hour HH:MM AM/PM time, equivalent to %h:%i %p | 10:30 PM | | %R | 24-hour HH:MM time, equivalent to %H:%i | 22:33 | | %s | second (00-59) | 44 | | %S | second (00-59) | 44 | @@ -1482,12 +1793,39 @@ Result: └─────────────────────────────────────────────────────────────────────┘ ``` +Additionally, the `formatDateTime` function can take a third String argument containing the name of the time zone. Example: `Asia/Istanbul`. In this case, the time is formatted according to the specified time zone. + +**Example** + +```sql +SELECT + now() AS ts, + time_zone, + formatDateTime(ts, '%T', time_zone) AS str_tz_time +FROM system.time_zones +WHERE time_zone LIKE 'Europe%' +LIMIT 10 + +┌──────────────────ts─┬─time_zone─────────┬─str_tz_time─┐ +│ 2023-09-08 19:13:40 │ Europe/Amsterdam │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Andorra │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Astrakhan │ 23:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Athens │ 22:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Belfast │ 20:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Belgrade │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Berlin │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Bratislava │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Brussels │ 21:13:40 │ +│ 2023-09-08 19:13:40 │ Europe/Bucharest │ 22:13:40 │ +└─────────────────────┴───────────────────┴─────────────┘ +``` + **See Also** - [formatDateTimeInJodaSyntax](##formatDateTimeInJodaSyntax) -## formatDateTimeInJodaSyntax {#date_time_functions-formatDateTimeInJodaSyntax} +## formatDateTimeInJodaSyntax {#formatDateTimeInJodaSyntax} Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html. @@ -1591,7 +1929,7 @@ monthName(date) **Arguments** -- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md). +- `date` — Date or date with time. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md). **Returned value** @@ -1807,6 +2145,72 @@ Result: └────────────────────────────────────┘ ``` +## toUTCTimestamp + +Convert DateTime/DateTime64 type value from other time zone to UTC timezone timestamp + +**Syntax** + +``` sql +toUTCTimestamp(time_val, time_zone) +``` + +**Arguments** + +- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) + +**Returned value** + +- DateTime/DateTime64 in text form + +**Example** + +``` sql +SELECT toUTCTimestamp(toDateTime('2023-03-16'), 'Asia/Shanghai'); +``` + +Result: + +``` text +┌─toUTCTimestamp(toDateTime('2023-03-16'),'Asia/Shanghai')┐ +│ 2023-03-15 16:00:00 │ +└─────────────────────────────────────────────────────────┘ +``` + +## fromUTCTimestamp + +Convert DateTime/DateTime64 type value from UTC timezone to other time zone timestamp + +**Syntax** + +``` sql +fromUTCTimestamp(time_val, time_zone) +``` + +**Arguments** + +- `time_val` — A DateTime/DateTime64 type const value or a expression . [DateTime/DateTime64 types](../../sql-reference/data-types/datetime.md) +- `time_zone` — A String type const value or a expression represent the time zone. [String types](../../sql-reference/data-types/string.md) + +**Returned value** + +- DateTime/DateTime64 in text form + +**Example** + +``` sql +SELECT fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00', 3), 'Asia/Shanghai'); +``` + +Result: + +``` text +┌─fromUTCTimestamp(toDateTime64('2023-03-16 10:00:00',3),'Asia/Shanghai')─┐ +│ 2023-03-16 18:00:00.000 │ +└─────────────────────────────────────────────────────────────────────────┘ +``` + ## Related content - Blog: [Working with time series data in ClickHouse](https://clickhouse.com/blog/working-with-time-series-data-and-functions-ClickHouse) diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md index 284d6d804054..4149afce044b 100644 --- a/docs/en/sql-reference/functions/ext-dict-functions.md +++ b/docs/en/sql-reference/functions/ext-dict-functions.md @@ -12,7 +12,7 @@ For dictionaries created with [DDL queries](../../sql-reference/statements/creat For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/index.md). -## dictGet, dictGetOrDefault, dictGetOrNull +## dictGet, dictGetOrDefault, dictGetOrNull {#dictGet} Retrieves values from a dictionary. diff --git a/docs/en/sql-reference/functions/files.md b/docs/en/sql-reference/functions/files.md index 73d72aa50e53..d62cd1db88d0 100644 --- a/docs/en/sql-reference/functions/files.md +++ b/docs/en/sql-reference/functions/files.md @@ -18,7 +18,7 @@ file(path[, default]) **Arguments** -- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. +- `path` — The path of the file relative to [user_files_path](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Supports wildcards `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` are numbers and `'abc', 'def'` are strings. - `default` — The value returned if the file does not exist or cannot be accessed. Supported data types: [String](../../sql-reference/data-types/string.md) and [NULL](../../sql-reference/syntax.md#null-literal). **Example** diff --git a/docs/en/sql-reference/functions/functions-for-nulls.md b/docs/en/sql-reference/functions/functions-for-nulls.md index d57b799e94cc..bde2a8a9505f 100644 --- a/docs/en/sql-reference/functions/functions-for-nulls.md +++ b/docs/en/sql-reference/functions/functions-for-nulls.md @@ -92,6 +92,50 @@ Result: └───┘ ``` +## isZeroOrNull + +Returns whether the argument is 0 (zero) or [NULL](../../sql-reference/syntax.md#null-literal). + +``` sql +isZeroOrNull(x) +``` + +**Arguments:** + +- `x` — A value of non-compound data type. + +**Returned value** + +- `1` if `x` is 0 (zero) or `NULL`. +- `0` else. + +**Example** + +Table: + +``` text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 0 │ +│ 3 │ 3 │ +└───┴──────┘ +``` + +Query: + +``` sql +SELECT x FROM t_null WHERE isZeroOrNull(y); +``` + +Result: + +``` text +┌─x─┐ +│ 1 │ +│ 2 │ +└───┘ +``` + ## coalesce Returns the leftmost non-`NULL` argument. diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index 06097d924807..7276437ec822 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -19,7 +19,7 @@ halfMD5(par1, ...) ``` The function is relatively slow (5 million short strings per second per processor core). -Consider using the [sipHash64](#hash_functions-siphash64) function instead. +Consider using the [sipHash64](#siphash64) function instead. **Arguments** @@ -45,13 +45,13 @@ SELECT halfMD5(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00') Calculates the MD4 from a string and returns the resulting set of bytes as FixedString(16). -## MD5 {#hash_functions-md5} +## MD5 {#md5} Calculates the MD5 from a string and returns the resulting set of bytes as FixedString(16). If you do not need MD5 in particular, but you need a decent cryptographic 128-bit hash, use the ‘sipHash128’ function instead. If you want to get the same result as output by the md5sum utility, use lower(hex(MD5(s))). -## sipHash64 (#hash_functions-siphash64) +## sipHash64 {#siphash64} Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. @@ -59,13 +59,13 @@ Produces a 64-bit [SipHash](https://en.wikipedia.org/wiki/SipHash) hash value. sipHash64(par1,...) ``` -This is a cryptographic hash function. It works at least three times faster than the [MD5](#hash_functions-md5) hash function. +This is a cryptographic hash function. It works at least three times faster than the [MD5](#md5) hash function. The function [interprets](/docs/en/sql-reference/functions/type-conversion-functions.md/#type_conversion_functions-reinterpretAsString) all the input parameters as strings and calculates the hash value for each of them. It then combines the hashes by the following algorithm: -1. The first and the second hash value are concatenated to an array which is hashed. -2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. -3. This calculation is repeated for all remaining hash values of the original input. +1. The first and the second hash value are concatenated to an array which is hashed. +2. The previously calculated hash value and the hash of the third input parameter are hashed in a similar way. +3. This calculation is repeated for all remaining hash values of the original input. **Arguments** @@ -91,7 +91,7 @@ SELECT sipHash64(array('e','x','a'), 'mple', 10, toDateTime('2019-06-15 23:00:00 ## sipHash64Keyed -Same as [sipHash64](#hash_functions-siphash64) but additionally takes an explicit key argument instead of using a fixed key. +Same as [sipHash64](#siphash64) but additionally takes an explicit key argument instead of using a fixed key. **Syntax** @@ -101,7 +101,7 @@ sipHash64Keyed((k0, k1), par1,...) **Arguments** -Same as [sipHash64](#hash_functions-siphash64), but the first argument is a tuple of two UInt64 values representing the key. +Same as [sipHash64](#siphash64), but the first argument is a tuple of two UInt64 values representing the key. **Returned value** @@ -123,12 +123,12 @@ SELECT sipHash64Keyed((506097522914230528, 1084818905618843912), array('e','x',' ## sipHash128 -Like [sipHash64](#hash_functions-siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits. +Like [sipHash64](#siphash64) but produces a 128-bit hash value, i.e. the final xor-folding state is done up to 128 bits. :::note This 128-bit variant differs from the reference implementation and it's weaker. This version exists because, when it was written, there was no official 128-bit extension for SipHash. -New projects should probably use [sipHash128Reference](#hash_functions-siphash128reference). +New projects should probably use [sipHash128Reference](#siphash128reference). ::: **Syntax** @@ -139,7 +139,7 @@ sipHash128(par1,...) **Arguments** -Same as for [sipHash64](#hash_functions-siphash64). +Same as for [sipHash64](#siphash64). **Returned value** @@ -163,12 +163,12 @@ Result: ## sipHash128Keyed -Same as [sipHash128](#hash_functions-siphash128) but additionally takes an explicit key argument instead of using a fixed key. +Same as [sipHash128](#siphash128) but additionally takes an explicit key argument instead of using a fixed key. :::note This 128-bit variant differs from the reference implementation and it's weaker. This version exists because, when it was written, there was no official 128-bit extension for SipHash. -New projects should probably use [sipHash128ReferenceKeyed](#hash_functions-siphash128referencekeyed). +New projects should probably use [sipHash128ReferenceKeyed](#siphash128referencekeyed). ::: **Syntax** @@ -179,7 +179,7 @@ sipHash128Keyed((k0, k1), par1,...) **Arguments** -Same as [sipHash128](#hash_functions-siphash128), but the first argument is a tuple of two UInt64 values representing the key. +Same as [sipHash128](#siphash128), but the first argument is a tuple of two UInt64 values representing the key. **Returned value** @@ -203,7 +203,7 @@ Result: ## sipHash128Reference -Like [sipHash128](#hash_functions-siphash128) but implements the 128-bit algorithm from the original authors of SipHash. +Like [sipHash128](#siphash128) but implements the 128-bit algorithm from the original authors of SipHash. **Syntax** @@ -213,7 +213,7 @@ sipHash128Reference(par1,...) **Arguments** -Same as for [sipHash128](#hash_functions-siphash128). +Same as for [sipHash128](#siphash128). **Returned value** @@ -237,7 +237,7 @@ Result: ## sipHash128ReferenceKeyed -Same as [sipHash128Reference](#hash_functions-siphash128reference) but additionally takes an explicit key argument instead of using a fixed key. +Same as [sipHash128Reference](#siphash128reference) but additionally takes an explicit key argument instead of using a fixed key. **Syntax** @@ -247,7 +247,7 @@ sipHash128ReferenceKeyed((k0, k1), par1,...) **Arguments** -Same as [sipHash128Reference](#hash_functions-siphash128reference), but the first argument is a tuple of two UInt64 values representing the key. +Same as [sipHash128Reference](#siphash128reference), but the first argument is a tuple of two UInt64 values representing the key. **Returned value** @@ -536,7 +536,7 @@ Calculates `HiveHash` from a string. SELECT hiveHash('') ``` -This is just [JavaHash](#hash_functions-javahash) with zeroed out sign bit. This function is used in [Apache Hive](https://en.wikipedia.org/wiki/Apache_Hive) for versions before 3.0. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. +This is just [JavaHash](#javahash) with zeroed out sign bit. This function is used in [Apache Hive](https://en.wikipedia.org/wiki/Apache_Hive) for versions before 3.0. This hash function is neither fast nor having a good quality. The only reason to use it is when this algorithm is already used in another system and you have to calculate exactly the same result. **Returned value** diff --git a/docs/en/sql-reference/functions/nlp-functions.md b/docs/en/sql-reference/functions/nlp-functions.md index bb127a939f36..3e0458d226d0 100644 --- a/docs/en/sql-reference/functions/nlp-functions.md +++ b/docs/en/sql-reference/functions/nlp-functions.md @@ -204,7 +204,7 @@ Other possible results: Query: ```sql -SELECT detectLanguageMixed('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); +SELECT detectLanguage('Je pense que je ne parviendrai jamais à parler français comme un natif. Where there’s a will, there’s a way.'); ``` Result: diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md index 26dcccfd42e8..378fc821df9a 100644 --- a/docs/en/sql-reference/functions/other-functions.md +++ b/docs/en/sql-reference/functions/other-functions.md @@ -11,7 +11,7 @@ sidebar_label: Other Returns the name of the host on which this function was executed. If the function executes on a remote server (distributed processing), the remote server name is returned. If the function executes in the context of a distributed table, it generates a normal column with values relevant to each shard. Otherwise it produces a constant value. -## getMacro +## getMacro {#getMacro} Returns a named value from the [macros](../../operations/server-configuration-parameters/settings.md#macros) section of the server configuration. @@ -186,7 +186,7 @@ Returns the type name of the passed argument. If `NULL` is passed, then the function returns type `Nullable(Nothing)`, which corresponds to ClickHouse's internal `NULL` representation. -## blockSize() +## blockSize() {#blockSize} In ClickHouse, queries are processed in blocks (chunks). This function returns the size (row count) of the block the function is called on. @@ -311,7 +311,7 @@ Sleeps ‘seconds’ seconds for each row. The sleep time can be specified as in Returns the name of the current database. Useful in table engine parameters of `CREATE TABLE` queries where you need to specify the database. -## currentUser() +## currentUser() {#currentUser} Returns the name of the current user. In case of a distributed query, the name of the user who initiated the query is returned. @@ -660,21 +660,26 @@ SELECT ## formatReadableTimeDelta -Given a time interval (delta) in seconds, this function returns a time delta with year/month/day/hour/minute/second as string. +Given a time interval (delta) in seconds, this function returns a time delta with year/month/day/hour/minute/second/millisecond/microsecond/nanosecond as string. **Syntax** ``` sql -formatReadableTimeDelta(column[, maximum_unit]) +formatReadableTimeDelta(column[, maximum_unit, minimum_unit]) ``` **Arguments** - `column` — A column with a numeric time delta. -- `maximum_unit` — Optional. Maximum unit to show. Acceptable values `seconds`, `minutes`, `hours`, `days`, `months`, `years`. - -Example: +- `maximum_unit` — Optional. Maximum unit to show. + * Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. + * Default value: `years`. +- `minimum_unit` — Optional. Minimum unit to show. All smaller units are truncated. + * Acceptable values: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. + * If explicitly specified value is bigger than `maximum_unit`, an exception will be thrown. + * Default value: `seconds` if `maximum_unit` is `seconds` or bigger, `nanoseconds` otherwise. +**Example** ``` sql SELECT arrayJoin([100, 12345, 432546534]) AS elapsed, @@ -703,6 +708,20 @@ SELECT └────────────┴─────────────────────────────────────────────────────────────────┘ ``` +```sql +SELECT + arrayJoin([100, 12345, 432546534.00000006]) AS elapsed, + formatReadableTimeDelta(elapsed, 'minutes', 'nanoseconds') AS time_delta +``` + +```text +┌────────────elapsed─┬─time_delta─────────────────────────────────────┐ +│ 100 │ 1 minute and 40 seconds │ +│ 12345 │ 205 minutes and 45 seconds │ +│ 432546534.00000006 │ 7209108 minutes, 54 seconds and 60 nanoseconds │ +└────────────────────┴────────────────────────────────────────────────┘ +``` + ## parseTimeDelta Parse a sequence of numbers followed by something resembling a time unit. @@ -771,7 +790,7 @@ If executed in the context of a distributed table, this function generates a nor Returns the sequence number of the data block where the row is located. -## rowNumberInBlock() +## rowNumberInBlock() {#rowNumberInBlock} Returns the ordinal number of the row in the data block. Different data blocks are always recalculated. @@ -896,7 +915,7 @@ Result: └────────────┴───────┴───────────┴────────────────┘ ``` -## runningDifference(x) +## runningDifference(x) {#runningDifference} Calculates the difference between two consecutive row values in the data block. Returns 0 for the first row, and for subsequent rows the difference to the previous row. @@ -2274,7 +2293,7 @@ Result: └───────────────────────────┘ ``` -## queryID +## queryID {#queryID} Returns the ID of the current query. Other parameters of a query can be extracted from the [system.query_log](../../operations/system-tables/query_log.md) table via `query_id`. @@ -2552,3 +2571,187 @@ Result: This function can be used together with [generateRandom](../../sql-reference/table-functions/generate.md) to generate completely random tables. +## structureToCapnProtoSchema {#structure_to_capn_proto_schema} + +Converts ClickHouse table structure to CapnProto schema. + +**Syntax** + +``` sql +structureToCapnProtoSchema(structure) +``` + +**Arguments** + +- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`. +- `root_struct_name` — Name for root struct in CapnProto schema. Default value - `Message`; + +**Returned value** + +- CapnProto schema + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB +``` + +Result: + +``` text +@0xf96402dd754d0eb7; + +struct Message +{ + column1 @0 : Data; + column2 @1 : UInt32; + column3 @2 : List(Data); +} +``` + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB +``` + +Result: + +``` text +@0xd1c8320fecad2b7f; + +struct Message +{ + struct Column1 + { + union + { + value @0 : Data; + null @1 : Void; + } + } + column1 @0 : Column1; + struct Column2 + { + element1 @0 : UInt32; + element2 @1 : List(Data); + } + column2 @1 : Column2; + struct Column3 + { + struct Entry + { + key @0 : Data; + value @1 : Data; + } + entries @0 : List(Entry); + } + column3 @2 : Column3; +} +``` + +Query: + +``` sql +SELECT structureToCapnProtoSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB +``` + +Result: + +``` text +@0x96ab2d4ab133c6e1; + +struct Root +{ + column1 @0 : Data; + column2 @1 : UInt32; +} +``` + +## structureToProtobufSchema {#structure_to_protobuf_schema} + +Converts ClickHouse table structure to Protobuf schema. + +**Syntax** + +``` sql +structureToProtobufSchema(structure) +``` + +**Arguments** + +- `structure` — Table structure in a format `column1_name column1_type, column2_name column2_type, ...`. +- `root_message_name` — Name for root message in Protobuf schema. Default value - `Message`; + +**Returned value** + +- Protobuf schema + +Type: [String](../../sql-reference/data-types/string.md). + +**Examples** + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 String, column2 UInt32, column3 Array(String)') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Message +{ + bytes column1 = 1; + uint32 column2 = 2; + repeated bytes column3 = 3; +} +``` + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 Nullable(String), column2 Tuple(element1 UInt32, element2 Array(String)), column3 Map(String, String)') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Message +{ + bytes column1 = 1; + message Column2 + { + uint32 element1 = 1; + repeated bytes element2 = 2; + } + Column2 column2 = 2; + map column3 = 3; +} +``` + +Query: + +``` sql +SELECT structureToProtobufSchema('column1 String, column2 UInt32', 'Root') FORMAT RawBLOB +``` + +Result: + +``` text +syntax = "proto3"; + +message Root +{ + bytes column1 = 1; + uint32 column2 = 2; +} +``` diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md index e593d9458f09..13c29329f41d 100644 --- a/docs/en/sql-reference/functions/random-functions.md +++ b/docs/en/sql-reference/functions/random-functions.md @@ -478,7 +478,7 @@ Result: └─────────────────────┘ ``` -## randomString +## randomString {#randomString} Generates a string of the specified length filled with random bytes (including zero bytes). Not all characters may be printable. @@ -627,7 +627,7 @@ Result: └──────────────────────┘ ``` -## fuzzBits +## fuzzBits {#fuzzBits} **Syntax** diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md index 7336e53fc24a..8e50637cf300 100644 --- a/docs/en/sql-reference/functions/splitting-merging-functions.md +++ b/docs/en/sql-reference/functions/splitting-merging-functions.md @@ -21,7 +21,7 @@ splitByChar(separator, s[, max_substrings])) - `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md). - `s` — The string to split. [String](../../sql-reference/data-types/string.md). -- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. +- `max_substrings` — An optional `Int64` defaulting to 0. If `max_substrings` > 0, the returned array will contain at most `max_substrings` substrings, otherwise the function will return as many substrings as possible. **Returned value(s)** @@ -33,6 +33,17 @@ Returns an array of selected substrings. Empty substrings may be selected when: Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +:::note +The behavior of parameter `max_substrings` changed starting with ClickHouse v22.11. In versions older than that, `max_substrings > 0` meant that `max_substring`-many splits were performed and that the remainder of the string was returned as the final element of the list. +For example, +- in v22.10: `SELECT splitByChar('=', 'a=b=c=d', 2);` returned `['a','b','c=d']` +- in v22.11: `SELECT splitByChar('=', 'a=b=c=d', 2);` returned `['a','b']` + +A behavior similar to ClickHouse pre-v22.11 can be achieved by setting +[splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) +`SELECT splitByChar('=', 'a=b=c=d', 2) SETTINGS splitby_max_substrings_includes_remaining_string = 1 -- ['a', 'b=c=d']` +::: + **Example** ``` sql @@ -63,7 +74,6 @@ splitByString(separator, s[, max_substrings])) - `s` — The string to split. [String](../../sql-reference/data-types/string.md). - `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible. - **Returned value(s)** Returns an array of selected substrings. Empty substrings may be selected when: @@ -74,6 +84,8 @@ Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-refere - There are multiple consecutive non-empty separators; - The original string `s` is empty while the separator is not empty. +Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. + **Example** ``` sql @@ -127,6 +139,8 @@ Returns an array of selected substrings. Empty substrings may be selected when: Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. + **Example** ``` sql @@ -176,6 +190,8 @@ Returns an array of selected substrings. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. + **Example** ``` sql @@ -213,6 +229,8 @@ Returns an array of selected substrings. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. + **Example** ``` sql @@ -273,6 +291,8 @@ Returns an array of selected substrings. Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)). +Setting [splitby_max_substrings_includes_remaining_string](../../operations/settings/settings.md#splitby_max_substrings_includes_remaining_string) (default: 0) controls if the remaining string is included in the last element of the result array when argument `max_substrings` > 0. + **Example** ``` sql diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index 5175bbf0615f..47e16b67643c 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -90,6 +90,8 @@ Returns the length of a string in bytes (not: in characters or Unicode code poin The function also works for arrays. +Alias: `OCTET_LENGTH` + ## lengthUTF8 Returns the length of a string in Unicode code points (not: in bytes or characters). It assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. @@ -573,6 +575,42 @@ Alias: Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +## substringIndex(s, delim, count) + +Returns the substring of `s` before `count` occurrences of the delimiter `delim`, as in Spark or MySQL. + +**Syntax** + +```sql +substringIndex(s, delim, count) +``` +Alias: `SUBSTRING_INDEX` + + +**Arguments** + +- s: The string to extract substring from. [String](../../sql-reference/data-types/string.md). +- delim: The character to split. [String](../../sql-reference/data-types/string.md). +- count: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md) + +**Example** + +``` sql +SELECT substringIndex('www.clickhouse.com', '.', 2) +``` + +Result: +``` +┌─substringIndex('www.clickhouse.com', '.', 2)─┐ +│ www.clickhouse │ +└──────────────────────────────────────────────┘ +``` + +## substringIndexUTF8(s, delim, count) + +Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + ## appendTrailingCharIfAbsent Appends character `c` to string `s` if `s` is non-empty and does not end with character `c`. @@ -691,6 +729,30 @@ Returns whether string `str` ends with `suffix`. endsWith(str, suffix) ``` +## endsWithUTF8 + +Returns whether string `str` ends with `suffix`, the difference between `endsWithUTF8` and `endsWith` is that `endsWithUTF8` match `str` and `suffix` by UTF-8 characters. + +**Syntax** + +```sql +endsWithUTF8(str, suffix) +``` + +**Example** + +``` sql +SELECT endsWithUTF8('中国', '\xbd'), endsWith('中国', '\xbd') +``` + +Result: + +```result +┌─endsWithUTF8('中国', '½')─┬─endsWith('中国', '½')─┐ +│ 0 │ 1 │ +└──────────────────────────┴──────────────────────┘ +``` + ## startsWith Returns whether string `str` starts with `prefix`. @@ -707,6 +769,25 @@ startsWith(str, prefix) SELECT startsWith('Spider-Man', 'Spi'); ``` +## startsWithUTF8 + +Returns whether string `str` starts with `prefix`, the difference between `startsWithUTF8` and `startsWith` is that `startsWithUTF8` match `str` and `suffix` by UTF-8 characters. + + +**Example** + +``` sql +SELECT startsWithUTF8('中国', '\xe4'), startsWith('中国', '\xe4') +``` + +Result: + +```result +┌─startsWithUTF8('中国', '⥩─┬─startsWith('中国', '⥩─┐ +│ 0 │ 1 │ +└────────────────────────────┴────────────────────────┘ +``` + ## trim Removes the specified characters from the start or end of a string. If not specified otherwise, the function removes whitespace (ASCII-character 32). @@ -1149,6 +1230,42 @@ Result: < Σ > ``` +## decodeHTMLComponent + +Un-escapes substrings with special meaning in HTML. For example: `ℏ` `>` `♦` `♥` `<` etc. + +This function also replaces numeric character references with Unicode characters. Both decimal (like `✓`) and hexadecimal (`✓`) forms are supported. + +**Syntax** + +``` sql +decodeHTMComponent(x) +``` + +**Arguments** + +- `x` — An input string. [String](../../sql-reference/data-types/string.md). + +**Returned value** + +- The un-escaped string. + +Type: [String](../../sql-reference/data-types/string.md). + +**Example** + +``` sql +SELECT decodeHTMLComponent(''CH'); +SELECT decodeHMLComponent('I♥ClickHouse'); +``` + +Result: + +```result +'CH' +I♥ClickHouse' +``` + ## extractTextFromHTML This function extracts plain text from HTML or XHTML. @@ -1253,3 +1370,48 @@ Result: │ A240 │ └──────────────────┘ ``` + +## initcap + +Convert the first letter of each word to upper case and the rest to lower case. Words are sequences of alphanumeric characters separated by non-alphanumeric characters. + +## initcapUTF8 + +Like [initcap](#initcap), assuming that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined. + +Does not detect the language, e.g. for Turkish the result might not be exactly correct (i/İ vs. i/I). + +If the length of the UTF-8 byte sequence is different for upper and lower case of a code point, the result may be incorrect for this code point. + +## firstLine + +Returns the first line from a multi-line string. + +**Syntax** + +```sql +firstLine(val) +``` + +**Arguments** + +- `val` - Input value. [String](../data-types/string.md) + +**Returned value** + +- The first line of the input value or the whole value if there is no line + separators. [String](../data-types/string.md) + +**Example** + +```sql +select firstLine('foo\nbar\nbaz'); +``` + +Result: + +```result +┌─firstLine('foo\nbar\nbaz')─┐ +│ foo │ +└────────────────────────────┘ +``` diff --git a/docs/en/sql-reference/functions/string-search-functions.md b/docs/en/sql-reference/functions/string-search-functions.md index 3d8f89f72954..264708513fae 100644 --- a/docs/en/sql-reference/functions/string-search-functions.md +++ b/docs/en/sql-reference/functions/string-search-functions.md @@ -631,3 +631,129 @@ Result: │ 100 │ 200 │ 100-200 │ 100 │ └──────────────────────────────────────────────┴──────────────────────────────────────────────┴──────────────────────────────────────────────┴───────────────────────────────────────────┘ ``` + +## hasSubsequence + +Returns 1 if needle is a subsequence of haystack, or 0 otherwise. +A subsequence of a string is a sequence that can be derived from the given string by deleting zero or more elements without changing the order of the remaining elements. + + +**Syntax** + +``` sql +hasSubsequence(haystack, needle) +``` + +**Arguments** + +- `haystack` — String in which the search is performed. [String](../../sql-reference/syntax.md#syntax-string-literal). +- `needle` — Subsequence to be searched. [String](../../sql-reference/syntax.md#syntax-string-literal). + +**Returned values** + +- 1, if needle is a subsequence of haystack. +- 0, otherwise. + +Type: `UInt8`. + +**Examples** + +``` sql +SELECT hasSubsequence('garbage', 'arg') ; +``` + +Result: + +``` text +┌─hasSubsequence('garbage', 'arg')─┐ +│ 1 │ +└──────────────────────────────────┘ +``` + +## hasSubsequenceCaseInsensitive + +Like [hasSubsequence](#hasSubsequence) but searches case-insensitively. + +## hasSubsequenceUTF8 + +Like [hasSubsequence](#hasSubsequence) but assumes `haystack` and `needle` are UTF-8 encoded strings. + +## hasSubsequenceCaseInsensitiveUTF8 + +Like [hasSubsequenceUTF8](#hasSubsequenceUTF8) but searches case-insensitively. + +## byteHammingDistance + +Calculates the [hamming distance](https://en.wikipedia.org/wiki/Hamming_distance) between two byte strings. + +**Syntax** + +```sql +byteHammingDistance(string2, string2) +``` + +**Examples** + +``` sql +SELECT byteHammingDistance('abc', 'ab') ; +``` + +Result: + +``` text +┌─byteHammingDistance('abc', 'ab')─┐ +│ 1 │ +└──────────────────────────────────┘ +``` + +- Alias: mismatches + +## jaccardIndex + +Calculates the [Jaccard similarity index](https://en.wikipedia.org/wiki/Jaccard_index) between two byte strings. + +**Syntax** + +```sql +byteJaccardIndex(string1, string2) +``` + +**Examples** + +``` sql +SELECT jaccardIndex('clickhouse', 'mouse'); +``` + +Result: + +``` text +┌─jaccardIndex('clickhouse', 'mouse')─┐ +│ 0.4 │ +└─────────────────────────────────────────┘ +``` + +## editDistance + +Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two byte strings. + +**Syntax** + +```sql +editDistance(string1, string2) +``` + +**Examples** + +``` sql +SELECT editDistance('clickhouse', 'mouse'); +``` + +Result: + +``` text +┌─editDistance('clickhouse', 'mouse')─┐ +│ 6 │ +└─────────────────────────────────────────┘ +``` + +- Alias: levenshteinDistance diff --git a/docs/en/sql-reference/functions/tuple-functions.md b/docs/en/sql-reference/functions/tuple-functions.md index 1739920c9f05..2f7c6377ee14 100644 --- a/docs/en/sql-reference/functions/tuple-functions.md +++ b/docs/en/sql-reference/functions/tuple-functions.md @@ -22,20 +22,23 @@ tuple(x, y, …) A function that allows getting a column from a tuple. -If the second argument is a number `n`, it is the column index, starting from 1. If the second argument is a string `s`, it represents the name of the element. Besides, we can provide the third optional argument, such that when index out of bounds or element for such name does not exist, the default value returned instead of throw exception. The second and third arguments if provided are always must be constant. There is no cost to execute the function. +If the second argument is a number `index`, it is the column index, starting from 1. If the second argument is a string `name`, it represents the name of the element. Besides, we can provide the third optional argument, such that when index out of bounds or no element exist for the name, the default value returned instead of throwing an exception. The second and third arguments, if provided, must be constants. There is no cost to execute the function. -The function implements the operator `x.n` and `x.s`. +The function implements operators `x.index` and `x.name`. **Syntax** ``` sql -tupleElement(tuple, n/s [, default_value]) +tupleElement(tuple, index, [, default_value]) +tupleElement(tuple, name, [, default_value]) ``` ## untuple Performs syntactic substitution of [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) elements in the call location. +The names of the result columns are implementation-specific and subject to change. Do not assume specific column names after `untuple`. + **Syntax** ``` sql @@ -86,8 +89,6 @@ Result: └───────┴───────┘ ``` -Note: the names are implementation specific and are subject to change. You should not assume specific names of the columns after application of the `untuple`. - Example of using an `EXCEPT` expression: Query: @@ -558,6 +559,29 @@ Result: └────────────────────────────┘ ``` +## tupleConcat + +Combines tuples passed as arguments. + +``` sql +tupleConcat(tuples) +``` + +**Arguments** + +- `tuples` – Arbitrary number of arguments of [Tuple](../../sql-reference/data-types/tuple.md) type. + +**Example** + +``` sql +SELECT tupleConcat((1, 2), (3, 4), (true, false)) AS res +``` + +``` text +┌─res──────────────────┐ +│ (1,2,3,4,true,false) │ +└──────────────────────┘ +``` ## Distance functions diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md index 214c885bc0e3..0df72b5818c8 100644 --- a/docs/en/sql-reference/functions/type-conversion-functions.md +++ b/docs/en/sql-reference/functions/type-conversion-functions.md @@ -399,7 +399,11 @@ toDateTime(expr[, time_zone ]) - `expr` — The value. [String](/docs/en/sql-reference/data-types/string.md), [Int](/docs/en/sql-reference/data-types/int-uint.md), [Date](/docs/en/sql-reference/data-types/date.md) or [DateTime](/docs/en/sql-reference/data-types/datetime.md). - `time_zone` — Time zone. [String](/docs/en/sql-reference/data-types/string.md). -If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp). +:::note +If `expr` is a number, it is interpreted as the number of seconds since the beginning of the Unix Epoch (as Unix timestamp). +If `expr` is a [String](/docs/en/sql-reference/data-types/string.md), it may be interpreted as a Unix timestamp or as a string representation of date / date with time. +Thus, parsing of short numbers' string representations (up to 4 digits) is explicitly disabled due to ambiguity, e.g. a string `'1999'` may be both a year (an incomplete string representation of Date / DateTime) or a unix timestamp. Longer numeric strings are allowed. +::: **Returned value** @@ -888,16 +892,29 @@ Query: ``` sql SELECT - now() AS now_local, - toString(now(), 'Asia/Yekaterinburg') AS now_yekat; + now() AS ts, + time_zone, + toString(ts, time_zone) AS str_tz_datetime +FROM system.time_zones +WHERE time_zone LIKE 'Europe%' +LIMIT 10 ``` Result: ```response -┌───────────now_local─┬─now_yekat───────────┐ -│ 2016-06-15 00:11:21 │ 2016-06-15 02:11:21 │ -└─────────────────────┴─────────────────────┘ +┌──────────────────ts─┬─time_zone─────────┬─str_tz_datetime─────┐ +│ 2023-09-08 19:14:59 │ Europe/Amsterdam │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Andorra │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Astrakhan │ 2023-09-08 23:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Athens │ 2023-09-08 22:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Belfast │ 2023-09-08 20:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Belgrade │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Berlin │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Bratislava │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Brussels │ 2023-09-08 21:14:59 │ +│ 2023-09-08 19:14:59 │ Europe/Bucharest │ 2023-09-08 22:14:59 │ +└─────────────────────┴───────────────────┴─────────────────────┘ ``` Also see the `toUnixTimestamp` function. diff --git a/docs/en/sql-reference/functions/udf.md b/docs/en/sql-reference/functions/udf.md index 9c6b1b0c66b8..51734beed038 100644 --- a/docs/en/sql-reference/functions/udf.md +++ b/docs/en/sql-reference/functions/udf.md @@ -171,12 +171,13 @@ Result: └──────────────────────────────┘ ``` -Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). +Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). It also requires the `execute_direct` option (to ensure no shell argument expansion vulnerability). File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings). ```xml executable + true test_function_parameter_python String diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md index dae2c7dd1d3b..6ceb9b5849e7 100644 --- a/docs/en/sql-reference/statements/alter/column.md +++ b/docs/en/sql-reference/statements/alter/column.md @@ -213,7 +213,7 @@ Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC Syntax: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Example** diff --git a/docs/en/sql-reference/statements/alter/comment.md b/docs/en/sql-reference/statements/alter/comment.md index cc49c6abf80b..f6fb179d969e 100644 --- a/docs/en/sql-reference/statements/alter/comment.md +++ b/docs/en/sql-reference/statements/alter/comment.md @@ -57,3 +57,9 @@ Output of a removed comment: │ │ └─────────┘ ``` + +**Caveats** + +For Replicated tables, the comment can be different on different replicas. Modifying the comment applies to a single replica. + +The feature is available since version 23.9. It does not work in previous ClickHouse versions. diff --git a/docs/en/sql-reference/statements/alter/index.md b/docs/en/sql-reference/statements/alter/index.md index 7dadc2be5b2c..dca34d16f25f 100644 --- a/docs/en/sql-reference/statements/alter/index.md +++ b/docs/en/sql-reference/statements/alter/index.md @@ -36,6 +36,8 @@ These `ALTER` statements modify entities related to role-based access control: [ALTER TABLE ... MODIFY COMMENT](/docs/en/sql-reference/statements/alter/comment.md) statement adds, modifies, or removes comments to the table, regardless if it was set before or not. +[ALTER NAMED COLLECTION](/docs/en/sql-reference/statements/alter/named-collection.md) statement modifies [Named Collections](/docs/en/operations/named-collections.md). + ## Mutations `ALTER` queries that are intended to manipulate table data are implemented with a mechanism called “mutations”, most notably [ALTER TABLE … DELETE](/docs/en/sql-reference/statements/alter/delete.md) and [ALTER TABLE … UPDATE](/docs/en/sql-reference/statements/alter/update.md). They are asynchronous background processes similar to merges in [MergeTree](/docs/en/engines/table-engines/mergetree-family/index.md) tables that to produce new “mutated” versions of parts. diff --git a/docs/en/sql-reference/statements/alter/named-collection.md b/docs/en/sql-reference/statements/alter/named-collection.md new file mode 100644 index 000000000000..ac6752127c19 --- /dev/null +++ b/docs/en/sql-reference/statements/alter/named-collection.md @@ -0,0 +1,30 @@ +--- +slug: /en/sql-reference/statements/alter/named-collection +sidebar_label: NAMED COLLECTION +--- + +# ALTER NAMED COLLECTION + +This query intends to modify already existing named collections. + +**Syntax** + +```sql +ALTER NAMED COLLECTION [IF EXISTS] name [ON CLUSTER cluster] +[ SET +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... ] | +[ DELETE key_name4, key_name5, ... ] +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; + +ALTER NAMED COLLECTION foobar SET a = '2', c = '3'; + +ALTER NAMED COLLECTION foobar DELETE b; +``` diff --git a/docs/en/sql-reference/statements/alter/partition.md b/docs/en/sql-reference/statements/alter/partition.md index a0aa74e6d252..114b8d5ffe32 100644 --- a/docs/en/sql-reference/statements/alter/partition.md +++ b/docs/en/sql-reference/statements/alter/partition.md @@ -113,6 +113,7 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same indices and projections. - Both tables must have the same storage policy. ## REPLACE PARTITION @@ -132,6 +133,7 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same indices and projections. - Both tables must have the same storage policy. ## MOVE PARTITION TO TABLE @@ -146,6 +148,7 @@ For the query to run successfully, the following conditions must be met: - Both tables must have the same structure. - Both tables must have the same partition key, the same order by key and the same primary key. +- Both tables must have the same indices and projections. - Both tables must have the same storage policy. - Both tables must be the same engine family (replicated or non-replicated). diff --git a/docs/en/sql-reference/statements/alter/projection.md b/docs/en/sql-reference/statements/alter/projection.md index fb4389270897..428f4ea9aeca 100644 --- a/docs/en/sql-reference/statements/alter/projection.md +++ b/docs/en/sql-reference/statements/alter/projection.md @@ -22,7 +22,7 @@ You can see more technical details about how projections work internally on this ## Example filtering without using primary keys Creating the table: -``` +```sql CREATE TABLE visits_order ( `user_id` UInt64, @@ -34,7 +34,7 @@ ENGINE = MergeTree() PRIMARY KEY user_agent ``` Using `ALTER TABLE`, we could add the Projection to an existing table: -``` +```sql ALTER TABLE visits_order ADD PROJECTION user_name_projection ( SELECT * @@ -44,7 +44,7 @@ ORDER BY user_name ALTER TABLE visits_order MATERIALIZE PROJECTION user_name_projection ``` Inserting the data: -``` +```sql INSERT INTO visits_order SELECT number, 'test', @@ -55,7 +55,7 @@ FROM numbers(1, 100); The Projection will allow us to filter by `user_name` fast even if in the original Table `user_name` was not defined as a `PRIMARY_KEY`. At query time ClickHouse determined that less data will be processed if the projection is used, as the data is ordered by `user_name`. -``` +```sql SELECT * FROM visits_order @@ -64,14 +64,14 @@ LIMIT 2 ``` To verify that a query is using the projection, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used: -``` +```sql SELECT query, projections FROM system.query_log WHERE query_id='' ``` ## Example pre-aggregation query Creating the table with the Projection: -``` +```sql CREATE TABLE visits ( `user_id` UInt64, @@ -90,7 +90,7 @@ ENGINE = MergeTree() ORDER BY user_agent ``` Inserting the data: -``` +```sql INSERT INTO visits SELECT number, 'test', @@ -98,7 +98,7 @@ INSERT INTO visits SELECT 'Android' FROM numbers(1, 100); ``` -``` +```sql INSERT INTO visits SELECT number, 'test', @@ -107,7 +107,7 @@ INSERT INTO visits SELECT FROM numbers(100, 500); ``` We will execute a first query using `GROUP BY` using the field `user_agent`, this query will not use the projection defined as the pre-aggregation does not match. -``` +```sql SELECT user_agent, count(DISTINCT user_id) @@ -116,7 +116,7 @@ GROUP BY user_agent ``` To use the projection we could execute queries that select part of, or all of the pre-aggregation and `GROUP BY` fields. -``` +```sql SELECT user_agent FROM visits @@ -132,7 +132,7 @@ GROUP BY user_agent ``` As mentioned before, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used: -``` +```sql SELECT query, projections FROM system.query_log WHERE query_id='' ``` diff --git a/docs/en/sql-reference/statements/alter/sample-by.md b/docs/en/sql-reference/statements/alter/sample-by.md index b20f3c7b5d33..ccad792f8535 100644 --- a/docs/en/sql-reference/statements/alter/sample-by.md +++ b/docs/en/sql-reference/statements/alter/sample-by.md @@ -5,15 +5,28 @@ sidebar_label: SAMPLE BY title: "Manipulating Sampling-Key Expressions" --- -Syntax: +# Manipulating SAMPLE BY expression + +The following operations are available: + +## MODIFY ``` sql ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY SAMPLE BY new_expression ``` -The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions). +The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions). The primary key must contain the new sample key. + +## REMOVE + +``` sql +ALTER TABLE [db].name [ON CLUSTER cluster] REMOVE SAMPLE BY +``` + +The command removes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table. + -The command is lightweight in the sense that it only changes metadata. The primary key must contain the new sample key. +The commands `MODIFY` and `REMOVE` are lightweight in the sense that they only change metadata or remove files. :::note It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables). diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md index 8785610f58a3..fd7da05167c4 100644 --- a/docs/en/sql-reference/statements/alter/user.md +++ b/docs/en/sql-reference/statements/alter/user.md @@ -14,6 +14,7 @@ ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1] [, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...] [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] [[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + [VALID UNTIL datetime] [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [READONLY | WRITABLE] | PROFILE 'profile_name'] [,...] diff --git a/docs/en/sql-reference/statements/create/index.md b/docs/en/sql-reference/statements/create/index.md index 14e29d051d77..fa39526a53e7 100644 --- a/docs/en/sql-reference/statements/create/index.md +++ b/docs/en/sql-reference/statements/create/index.md @@ -8,13 +8,14 @@ sidebar_label: CREATE Create queries make a new entity of one of the following kinds: -- [DATABASE](../../../sql-reference/statements/create/database.md) -- [TABLE](../../../sql-reference/statements/create/table.md) -- [VIEW](../../../sql-reference/statements/create/view.md) -- [DICTIONARY](../../../sql-reference/statements/create/dictionary.md) -- [FUNCTION](../../../sql-reference/statements/create/function.md) -- [USER](../../../sql-reference/statements/create/user.md) -- [ROLE](../../../sql-reference/statements/create/role.md) -- [ROW POLICY](../../../sql-reference/statements/create/row-policy.md) -- [QUOTA](../../../sql-reference/statements/create/quota.md) -- [SETTINGS PROFILE](../../../sql-reference/statements/create/settings-profile.md) +- [DATABASE](/docs/en/sql-reference/statements/create/database.md) +- [TABLE](/docs/en/sql-reference/statements/create/table.md) +- [VIEW](/docs/en/sql-reference/statements/create/view.md) +- [DICTIONARY](/docs/en/sql-reference/statements/create/dictionary.md) +- [FUNCTION](/docs/en/sql-reference/statements/create/function.md) +- [USER](/docs/en/sql-reference/statements/create/user.md) +- [ROLE](/docs/en/sql-reference/statements/create/role.md) +- [ROW POLICY](/docs/en/sql-reference/statements/create/row-policy.md) +- [QUOTA](/docs/en/sql-reference/statements/create/quota.md) +- [SETTINGS PROFILE](/docs/en/sql-reference/statements/create/settings-profile.md) +- [NAMED COLLECTION](/docs/en/sql-reference/statements/create/named-collection.md) diff --git a/docs/en/sql-reference/statements/create/named-collection.md b/docs/en/sql-reference/statements/create/named-collection.md new file mode 100644 index 000000000000..1fc7b11c5543 --- /dev/null +++ b/docs/en/sql-reference/statements/create/named-collection.md @@ -0,0 +1,34 @@ +--- +slug: /en/sql-reference/statements/create/named-collection +sidebar_label: NAMED COLLECTION +--- + +# CREATE NAMED COLLECTION + +Creates a new named collection. + +**Syntax** + +```sql +CREATE NAMED COLLECTION [IF NOT EXISTS] name [ON CLUSTER cluster] AS +key_name1 = 'some value', +key_name2 = 'some value', +key_name3 = 'some value', +... +``` + +**Example** + +```sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +``` + +**Related statements** + +- [CREATE NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/alter/named-collection) +- [DROP NAMED COLLECTION](https://clickhouse.com/docs/en/sql-reference/statements/drop#drop-function) + + +**See Also** + +- [Named collections guide](/docs/en/operations/named-collections.md) diff --git a/docs/en/sql-reference/statements/create/quota.md b/docs/en/sql-reference/statements/create/quota.md index c69285171ab5..a6ced870c18f 100644 --- a/docs/en/sql-reference/statements/create/quota.md +++ b/docs/en/sql-reference/statements/create/quota.md @@ -11,6 +11,7 @@ Syntax: ``` sql CREATE QUOTA [IF NOT EXISTS | OR REPLACE] name [ON CLUSTER cluster_name] + [IN access_storage_type] [KEYED BY {user_name | ip_address | client_key | client_key,user_name | client_key,ip_address} | NOT KEYED] [FOR [RANDOMIZED] INTERVAL number {second | minute | hour | day | week | month | quarter | year} {MAX { {queries | query_selects | query_inserts | errors | result_rows | result_bytes | read_rows | read_bytes | execution_time} = number } [,...] | diff --git a/docs/en/sql-reference/statements/create/role.md b/docs/en/sql-reference/statements/create/role.md index 9b14e220e1ff..4b6fffe4f60a 100644 --- a/docs/en/sql-reference/statements/create/role.md +++ b/docs/en/sql-reference/statements/create/role.md @@ -11,6 +11,7 @@ Syntax: ``` sql CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] + [IN access_storage_type] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/row-policy.md b/docs/en/sql-reference/statements/create/row-policy.md index 83bb2e6bb9a8..cd7718793bd5 100644 --- a/docs/en/sql-reference/statements/create/row-policy.md +++ b/docs/en/sql-reference/statements/create/row-policy.md @@ -16,6 +16,7 @@ Syntax: ``` sql CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluster_name1] ON [db1.]table1|db1.* [, policy_name2 [ON CLUSTER cluster_name2] ON [db2.]table2|db2.* ...] + [IN access_storage_type] [FOR SELECT] USING condition [AS {PERMISSIVE | RESTRICTIVE}] [TO {role1 [, role2 ...] | ALL | ALL EXCEPT role1 [, role2 ...]}] diff --git a/docs/en/sql-reference/statements/create/settings-profile.md b/docs/en/sql-reference/statements/create/settings-profile.md index 8e221a4d82fb..d8afce9d6de2 100644 --- a/docs/en/sql-reference/statements/create/settings-profile.md +++ b/docs/en/sql-reference/statements/create/settings-profile.md @@ -12,6 +12,7 @@ Syntax: ``` sql CREATE SETTINGS PROFILE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] + [IN access_storage_type] [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | INHERIT 'profile_name'] [,...] ``` diff --git a/docs/en/sql-reference/statements/create/table.md b/docs/en/sql-reference/statements/create/table.md index 1a72f89fb1f3..5e94a5fdc6f0 100644 --- a/docs/en/sql-reference/statements/create/table.md +++ b/docs/en/sql-reference/statements/create/table.md @@ -391,15 +391,19 @@ DEFLATE_QPL is not available in ClickHouse Cloud. ### Specialized Codecs -These codecs are designed to make compression more effective by using specific features of data. Some of these codecs do not compress data themself. Instead, they prepare the data for a common purpose codec, which compresses it better than without this preparation. +These codecs are designed to make compression more effective by exploiting specific features of the data. Some of these codecs do not compress data themself, they instead preprocess the data such that a second compression stage using a general-purpose codec can achieve a higher data compression rate. #### Delta -`Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. +`Delta(delta_bytes)` — Compression approach in which raw values are replaced by the difference of two neighboring values, except for the first value that stays unchanged. Up to `delta_bytes` are used for storing delta values, so `delta_bytes` is the maximum size of raw values. Possible `delta_bytes` values: 1, 2, 4, 8. The default value for `delta_bytes` is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Delta is a data preparation codec, i.e. it cannot be used stand-alone. #### DoubleDelta -`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +`DoubleDelta(bytes_size)` — Calculates delta of deltas and writes it in compact binary form. Possible `bytes_size` values: 1, 2, 4, 8, the default value is `sizeof(type)` if equal to 1, 2, 4, or 8. In all other cases, it’s 1. Optimal compression rates are achieved for monotonic sequences with a constant stride, such as time series data. Can be used with any fixed-width type. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. Uses 1 extra bit for 32-bit deltas: 5-bit prefixes instead of 4-bit prefixes. For additional information, see Compressing Time Stamps in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). DoubleDelta is a data preparation codec, i.e. it cannot be used stand-alone. + +#### GCD + +`GCD()` - - Calculates the greatest common denominator (GCD) of the values in the column, then divides each value by the GCD. Can be used with integer, decimal and date/time columns. The codec is well suited for columns with values that change (increase or decrease) in multiples of the GCD, e.g. 24, 28, 16, 24, 8, 24 (GCD = 4). GCD is a data preparation codec, i.e. it cannot be used stand-alone. #### Gorilla diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md index d168be63c36c..20cd9a47e982 100644 --- a/docs/en/sql-reference/statements/create/user.md +++ b/docs/en/sql-reference/statements/create/user.md @@ -12,8 +12,10 @@ Syntax: ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] + [VALID UNTIL datetime] + [IN access_storage_type] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] [GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]] @@ -37,8 +39,35 @@ There are multiple ways of user identification: - `IDENTIFIED WITH ldap SERVER 'server_name'` - `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` - `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` +- `IDENTIFIED WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa', KEY 'another_public_key' TYPE 'ssh-ed25519'` - `IDENTIFIED BY 'qwerty'` +Password complexity requirements can be edited in [config.xml](/docs/en/operations/configuration-files). Below is an example configuration that requires passwords to be at least 12 characters long and contain 1 number. Each password complexity rule requires a regex to match against passwords and a description of the rule. + +```xml + + + + .{12} + be at least 12 characters long + + + \p{N} + contain at least 1 numeric character + + + +``` + +:::note +In ClickHouse Cloud, by default, passwords must meet the following complexity requirements: +- Be at least 12 characters long +- Contain at least 1 numeric character +- Contain at least 1 uppercase character +- Contain at least 1 lowercase character +- Contain at least 1 special character +::: + ## Examples 1. The following username is `name1` and does not require a password - which obviously doesn't provide much security: @@ -63,7 +92,7 @@ There are multiple ways of user identification: CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password' ``` - The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup: + The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. The following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup: ```bash /var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql @@ -135,6 +164,16 @@ Another way of specifying host is to use `@` syntax following the username. Exam ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so. ::: +## VALID UNTIL Clause + +Allows you to specify the expiration date and, optionally, the time for user credentials. It accepts a string as a parameter. It is recommended to use the `YYYY-MM-DD [hh:mm:ss] [timezone]` format for datetime. By default, this parameter equals `'infinity'`. + +Examples: + +- `CREATE USER name1 VALID UNTIL '2025-01-01'` +- `CREATE USER name1 VALID UNTIL '2025-01-01 12:00:00 UTC'` +- `CREATE USER name1 VALID UNTIL 'infinity'` + ## GRANTEES Clause Specifies users or roles which are allowed to receive [privileges](../../../sql-reference/statements/grant.md#grant-privileges) from this user on the condition this user has also all required access granted with [GRANT OPTION](../../../sql-reference/statements/grant.md#grant-privigele-syntax). Options of the `GRANTEES` clause: diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 10b156381525..11026340a0fc 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -97,7 +97,7 @@ This is an experimental feature that may change in backwards-incompatible ways i ::: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. diff --git a/docs/en/sql-reference/statements/drop.md b/docs/en/sql-reference/statements/drop.md index b6208c2fd52a..8ed00f625d61 100644 --- a/docs/en/sql-reference/statements/drop.md +++ b/docs/en/sql-reference/statements/drop.md @@ -21,6 +21,7 @@ DROP DATABASE [IF EXISTS] db [ON CLUSTER cluster] [SYNC] ## DROP TABLE Deletes the table. +In case when `IF EMPTY` clause is specified server will check if table is empty only on replica that received initial query. :::tip Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) @@ -29,7 +30,7 @@ Also see [UNDROP TABLE](/docs/en/sql-reference/statements/undrop.md) Syntax: ``` sql -DROP [TEMPORARY] TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] [SYNC] +DROP [TEMPORARY] TABLE [IF EXISTS] [IF EMPTY] [db.]name [ON CLUSTER cluster] [SYNC] ``` ## DROP DICTIONARY @@ -49,7 +50,7 @@ Deletes a user. Syntax: ``` sql -DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP USER [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP ROLE @@ -59,7 +60,7 @@ Deletes a role. The deleted role is revoked from all the entities where it was a Syntax: ``` sql -DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP ROLE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP ROW POLICY @@ -69,7 +70,7 @@ Deletes a row policy. Deleted row policy is revoked from all the entities where Syntax: ``` sql -DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] +DROP [ROW] POLICY [IF EXISTS] name [,...] ON [database.]table [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP QUOTA @@ -79,7 +80,7 @@ Deletes a quota. The deleted quota is revoked from all the entities where it was Syntax: ``` sql -DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP QUOTA [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP SETTINGS PROFILE @@ -89,7 +90,7 @@ Deletes a settings profile. The deleted settings profile is revoked from all the Syntax: ``` sql -DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] +DROP [SETTINGS] PROFILE [IF EXISTS] name [,...] [ON CLUSTER cluster_name] [FROM access_storage_type] ``` ## DROP VIEW @@ -119,3 +120,20 @@ DROP FUNCTION [IF EXISTS] function_name [on CLUSTER cluster] CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b; DROP FUNCTION linear_equation; ``` + +## DROP NAMED COLLECTION + +Deletes a named collection. + +**Syntax** + +``` sql +DROP NAMED COLLECTION [IF EXISTS] name [on CLUSTER cluster] +``` + +**Example** + +``` sql +CREATE NAMED COLLECTION foobar AS a = '1', b = '2'; +DROP NAMED COLLECTION foobar; +``` diff --git a/docs/en/sql-reference/statements/insert-into.md b/docs/en/sql-reference/statements/insert-into.md index d6e30827f9bb..e0cc98c2351f 100644 --- a/docs/en/sql-reference/statements/insert-into.md +++ b/docs/en/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ Inserts data into a table. **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` You can specify a list of columns to insert using the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -107,7 +107,7 @@ If table has [constraints](../../sql-reference/statements/create/table.md#constr **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` Columns are mapped according to their position in the SELECT clause. However, their names in the SELECT expression and the table for INSERT may differ. If necessary, type casting is performed. @@ -126,7 +126,7 @@ To insert a default value instead of `NULL` into a column with not nullable data **Syntax** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` Use the syntax above to insert data from a file, or files, stored on the **client** side. `file_name` and `type` are string literals. Input file [format](../../interfaces/formats.md) must be set in the `FORMAT` clause. diff --git a/docs/en/sql-reference/statements/move.md b/docs/en/sql-reference/statements/move.md new file mode 100644 index 000000000000..fac738ff7117 --- /dev/null +++ b/docs/en/sql-reference/statements/move.md @@ -0,0 +1,32 @@ +--- +slug: /en/sql-reference/statements/move +sidebar_position: 54 +sidebar_label: MOVE +--- + +# MOVE access entity statement + +This statement allows to move an access entity from one access storage to another. + +Syntax: + +```sql +MOVE {USER, ROLE, QUOTA, SETTINGS PROFILE, ROW POLICY} name1 [, name2, ...] TO access_storage_type +``` + +Currently, there are five access storages in ClickHouse: + - `local_directory` + - `memory` + - `replicated` + - `users_xml` (ro) + - `ldap` (ro) + +Examples: + +```sql +MOVE USER test TO local_directory +``` + +```sql +MOVE ROLE test TO memory +``` diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md index 45d336c42f2c..49843eaff9a5 100644 --- a/docs/en/sql-reference/statements/optimize.md +++ b/docs/en/sql-reference/statements/optimize.md @@ -94,8 +94,10 @@ Result: │ 1 │ 1 │ 3 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` +All following examples are executed against this state with 5 rows. -When columns for deduplication are not specified, all of them are taken into account. Row is removed only if all values in all columns are equal to corresponding values in previous row: +#### `DEDUPLICATE` +When columns for deduplication are not specified, all of them are taken into account. The row is removed only if all values in all columns are equal to corresponding values in the previous row: ``` sql OPTIMIZE TABLE example FINAL DEDUPLICATE; @@ -116,7 +118,7 @@ Result: │ 1 │ 1 │ 3 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` - +#### `DEDUPLICATE BY *` When columns are specified implicitly, the table is deduplicated by all columns that are not `ALIAS` or `MATERIALIZED`. Considering the table above, these are `primary_key`, `secondary_key`, `value`, and `partition_key` columns: ```sql OPTIMIZE TABLE example FINAL DEDUPLICATE BY *; @@ -137,7 +139,7 @@ Result: │ 1 │ 1 │ 3 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` - +#### `DEDUPLICATE BY * EXCEPT` Deduplicate by all columns that are not `ALIAS` or `MATERIALIZED` and explicitly not `value`: `primary_key`, `secondary_key`, and `partition_key` columns. ``` sql @@ -158,7 +160,7 @@ Result: │ 1 │ 1 │ 2 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` - +#### `DEDUPLICATE BY ` Deduplicate explicitly by `primary_key`, `secondary_key`, and `partition_key` columns: ```sql OPTIMIZE TABLE example FINAL DEDUPLICATE BY primary_key, secondary_key, partition_key; @@ -178,8 +180,8 @@ Result: │ 1 │ 1 │ 2 │ 3 │ └─────────────┴───────────────┴───────┴───────────────┘ ``` - -Deduplicate by any column matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns: +#### `DEDUPLICATE BY COLUMNS()` +Deduplicate by all columns matching a regex: `primary_key`, `secondary_key`, and `partition_key` columns: ```sql OPTIMIZE TABLE example FINAL DEDUPLICATE BY COLUMNS('.*_key'); ``` diff --git a/docs/en/sql-reference/statements/select/index.md b/docs/en/sql-reference/statements/select/index.md index 2863c5c0116d..86a4e9639f5d 100644 --- a/docs/en/sql-reference/statements/select/index.md +++ b/docs/en/sql-reference/statements/select/index.md @@ -60,9 +60,9 @@ Specifics of each optional clause are covered in separate sections, which are li If you want to include all columns in the result, use the asterisk (`*`) symbol. For example, `SELECT * FROM ...`. -### COLUMNS expression +### Dynamic column selection -To match some columns in the result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression, you can use the `COLUMNS` expression. +Dynamic column selection (also known as a COLUMNS expression) allows you to match some columns in a result with a [re2](https://en.wikipedia.org/wiki/RE2_(software)) regular expression. ``` sql COLUMNS('regexp') diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md index 7971b3ba2754..38922e964b8d 100644 --- a/docs/en/sql-reference/statements/select/join.md +++ b/docs/en/sql-reference/statements/select/join.md @@ -163,6 +163,61 @@ Result: │ 4 │ -4 │ 4 │ └───┴────┴─────┘ ``` + +## NULL values in JOIN keys + +The NULL is not equal to any value, including itself. It means that if a JOIN key has a NULL value in one table, it won't match a NULL value in the other table. + +**Example** + +Table `A`: + +``` +┌───id─┬─name────┐ +│ 1 │ Alice │ +│ 2 │ Bob │ +│ ᴺᵁᴸᴸ │ Charlie │ +└──────┴─────────┘ +``` + +Table `B`: + +``` +┌───id─┬─score─┐ +│ 1 │ 90 │ +│ 3 │ 85 │ +│ ᴺᵁᴸᴸ │ 88 │ +└──────┴───────┘ +``` + +```sql +SELECT A.name, B.score FROM A LEFT JOIN B ON A.id = B.id +``` + +``` +┌─name────┬─score─┐ +│ Alice │ 90 │ +│ Bob │ 0 │ +│ Charlie │ 0 │ +└─────────┴───────┘ +``` + +Notice that the row with `Charlie` from table `A` and the row with score 88 from table `B` are not in the result because of the NULL value in the JOIN key. + +In case you want to match NULL values, use the `isNotDistinctFrom` function to compare the JOIN keys. + +```sql +SELECT A.name, B.score FROM A LEFT JOIN B ON isNotDistinctFrom(A.id, B.id) +``` + +``` +┌─name────┬─score─┐ +│ Alice │ 90 │ +│ Bob │ 0 │ +│ Charlie │ 88 │ +└─────────┴───────┘ +``` + ## ASOF JOIN Usage `ASOF JOIN` is useful when you need to join records that have no exact match. diff --git a/docs/en/sql-reference/statements/select/with.md b/docs/en/sql-reference/statements/select/with.md index 4654f2495482..a59ef463419b 100644 --- a/docs/en/sql-reference/statements/select/with.md +++ b/docs/en/sql-reference/statements/select/with.md @@ -5,7 +5,27 @@ sidebar_label: WITH # WITH Clause -ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)), that is provides to use results of `WITH` clause in the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. +ClickHouse supports Common Table Expressions ([CTE](https://en.wikipedia.org/wiki/Hierarchical_and_recursive_queries_in_SQL)) and substitutes the code defined in the `WITH` clause in all places of use for the rest of `SELECT` query. Named subqueries can be included to the current and child query context in places where table objects are allowed. Recursion is prevented by hiding the current level CTEs from the WITH expression. + +Please note that CTEs do not guarantee the same results in all places they are called because the query will be re-executed for each use case. + +An example of such behavior is below +``` sql +with cte_numbers as +( + select + num + from generateRandom('num UInt64', NULL) + limit 1000000 +) +select + count() +from cte_numbers +where num in (select num from cte_numbers) +``` +If CTEs were to pass exactly the results and not just a piece of code, you would always see `1000000` + +However, due to the fact that we are referring `cte_numbers` twice, random numbers are generated each time and, accordingly, we see different random results, `280501, 392454, 261636, 196227` and so on... ## Syntax diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md index f96eb55aa453..21bd674bd248 100644 --- a/docs/en/sql-reference/statements/show.md +++ b/docs/en/sql-reference/statements/show.md @@ -189,7 +189,7 @@ Result: - [Create Tables](https://clickhouse.com/docs/en/getting-started/tutorial/#create-tables) - [SHOW CREATE TABLE](https://clickhouse.com/docs/en/sql-reference/statements/show/#show-create-table) -## SHOW COLUMNS +## SHOW COLUMNS {#show_columns} Displays a list of columns @@ -205,16 +205,16 @@ The optional keyword `EXTENDED` currently has no effect, it only exists for MySQ The optional keyword `FULL` causes the output to include the collation, comment and privilege columns. -`SHOW COLUMNS` produces a result table with the following structure: -- field - The name of the column (String) -- type - The column data type (String) -- null - If the column data type is Nullable (UInt8) -- key - `PRI` if the column is part of the primary key, `SOR` if the column is part of the sorting key, empty otherwise (String) -- default - Default expression of the column if it is of type `ALIAS`, `DEFAULT`, or `MATERIALIZED`, otherwise `NULL`. (Nullable(String)) -- extra - Additional information, currently unused (String) -- collation - (only if `FULL` keyword was specified) Collation of the column, always `NULL` because ClickHouse has no per-column collations (Nullable(String)) -- comment - (only if `FULL` keyword was specified) Comment on the column (String) -- privilege - (only if `FULL` keyword was specified) The privilege you have on this column, currently not available (String) +The statement produces a result table with the following structure: +- `field` - The name of the column (String) +- `type` - The column data type. If setting `[use_mysql_types_in_show_columns](../../operations/settings/settings.md#use_mysql_types_in_show_columns) = 1` (default: 0), then the equivalent type name in MySQL is shown. (String) +- `null` - `YES` if the column data type is Nullable, `NO` otherwise (String) +- `key` - `PRI` if the column is part of the primary key, `SOR` if the column is part of the sorting key, empty otherwise (String) +- `default` - Default expression of the column if it is of type `ALIAS`, `DEFAULT`, or `MATERIALIZED`, otherwise `NULL`. (Nullable(String)) +- `extra` - Additional information, currently unused (String) +- `collation` - (only if `FULL` keyword was specified) Collation of the column, always `NULL` because ClickHouse has no per-column collations (Nullable(String)) +- `comment` - (only if `FULL` keyword was specified) Comment on the column (String) +- `privilege` - (only if `FULL` keyword was specified) The privilege you have on this column, currently not available (String) **Examples** @@ -272,6 +272,10 @@ SHOW DICTIONARIES FROM db LIKE '%reg%' LIMIT 2 Displays a list of primary and data skipping indexes of a table. +This statement mostly exists for compatibility with MySQL. System tables [system.tables](../../operations/system-tables/tables.md) (for +primary keys) and [system.data_skipping_indices](../../operations/system-tables/data_skipping_indices.md) (for data skipping indices) +provide equivalent information but in a fashion more native to ClickHouse. + ```sql SHOW [EXTENDED] {INDEX | INDEXES | INDICES | KEYS } {FROM | IN} [{FROM | IN} ] [WHERE ] [INTO OUTFILE ] [FORMAT ] ``` @@ -281,22 +285,22 @@ equivalent. If no database is specified, the query assumes the current database The optional keyword `EXTENDED` currently has no effect, it only exists for MySQL compatibility. -`SHOW INDEX` produces a result table with the following structure: -- table - The name of the table (String) -- non_unique - 0 if the index can contain duplicates, 1 otherwise (UInt8) -- key_name - The name of the index, `PRIMARY` if the index is a primary key index (String) -- seq_in_index - Currently unused -- column_name - Currently unused -- collation - The sorting of the column in the index, `A` if ascending, `D` if descending, `NULL` if unsorted (Nullable(String)) -- cardinality - Currently unused -- sub_part - Currently unused -- packed - Currently unused -- null - Currently unused -- index_type - The index type, e.g. `primary`, `minmax`, `bloom_filter` etc. (String) -- comment - Currently unused -- index_comment - Currently unused -- visible - If the index is visible to the optimizer, always `YES` (String) -- expression - The index expression (String) +The statement produces a result table with the following structure: +- `table` - The name of the table. (String) +- `non_unique` - Always `1` as ClickHouse does not support uniqueness constraints. (UInt8) +- `key_name` - The name of the index, `PRIMARY` if the index is a primary key index. (String) +- `seq_in_index` - For a primary key index, the position of the column starting from `1`. For a data skipping index: always `1`. (UInt8) +- `column_name` - For a primary key index, the name of the column. For a data skipping index: `''` (empty string), see field "expression". (String) +- `collation` - The sorting of the column in the index: `A` if ascending, `D` if descending, `NULL` if unsorted. (Nullable(String)) +- `cardinality` - An estimation of the index cardinality (number of unique values in the index). Currently always 0. (UInt64) +- `sub_part` - Always `NULL` because ClickHouse does not support index prefixes like MySQL. (Nullable(String)) +- `packed` - Always `NULL` because ClickHouse does not support packed indexes (like MySQL). (Nullable(String)) +- `null` - Currently unused +- `index_type` - The index type, e.g. `PRIMARY`, `MINMAX`, `BLOOM_FILTER` etc. (String) +- `comment` - Additional information about the index, currently always `''` (empty string). (String) +- `index_comment` - `''` (empty string) because indexes in ClickHouse cannot have a `COMMENT` field (like in MySQL). (String) +- `visible` - If the index is visible to the optimizer, always `YES`. (String) +- `expression` - For a data skipping index, the index expression. For a primary key index: `''` (empty string). (String) **Examples** @@ -310,11 +314,12 @@ Result: ``` text ┌─table─┬─non_unique─┬─key_name─┬─seq_in_index─┬─column_name─┬─collation─┬─cardinality─┬─sub_part─┬─packed─┬─null─┬─index_type───┬─comment─┬─index_comment─┬─visible─┬─expression─┐ -│ tbl │ 0 │ blf_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ bloom_filter │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ d, b │ -│ tbl │ 0 │ mm1_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ a, c, d │ -│ tbl │ 0 │ mm2_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ minmax │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, d, e │ -│ tbl │ 0 │ PRIMARY │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ A │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ primary │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ c, a │ -│ tbl │ 0 │ set_idx │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ set │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ YES │ e │ +│ tbl │ 1 │ blf_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ BLOOM_FILTER │ │ │ YES │ d, b │ +│ tbl │ 1 │ mm1_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ a, c, d │ +│ tbl │ 1 │ mm2_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ MINMAX │ │ │ YES │ c, d, e │ +│ tbl │ 1 │ PRIMARY │ 1 │ c │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ PRIMARY │ 2 │ a │ A │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ PRIMARY │ │ │ YES │ │ +│ tbl │ 1 │ set_idx │ 1 │ 1 │ ᴺᵁᴸᴸ │ 0 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ SET │ │ │ YES │ e │ └───────┴────────────┴──────────┴──────────────┴─────────────┴───────────┴─────────────┴──────────┴────────┴──────┴──────────────┴─────────┴───────────────┴─────────┴────────────┘ ``` @@ -633,3 +638,16 @@ Outputs the content of the [system.table_engines](../../operations/system-tables **See Also** - [system.table_engines](../../operations/system-tables/table_engines.md) table + +## SHOW FUNCTIONS + +``` sql +SHOW FUNCTIONS [LIKE | ILIKE ''] +``` + +Outputs the content of the [system.functions](../../operations/system-tables/functions.md) table. + +If either `LIKE` or `ILIKE` clause is specified, the query returns a list of system functions whose names match the provided ``. + +**See Also** +- [system.functions](../../operations/system-tables/functions.md) table diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index 65a35f03fbe4..4e1e7aa6a491 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -66,13 +66,13 @@ RELOAD FUNCTION [ON CLUSTER cluster_name] function_name ## DROP DNS CACHE -Resets ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). +Clears ClickHouse’s internal DNS cache. Sometimes (for old ClickHouse versions) it is necessary to use this command when changing the infrastructure (changing the IP address of another ClickHouse server or the server used by dictionaries). For more convenient (automatic) cache management, see disable_internal_dns_cache, dns_cache_update_period parameters. ## DROP MARK CACHE -Resets the mark cache. +Clears the mark cache. ## DROP REPLICA @@ -106,22 +106,18 @@ Similar to `SYSTEM DROP REPLICA`, but removes the `Replicated` database replica ## DROP UNCOMPRESSED CACHE -Reset the uncompressed data cache. +Clears the uncompressed data cache. The uncompressed data cache is enabled/disabled with the query/user/profile-level setting [use_uncompressed_cache](../../operations/settings/settings.md#setting-use_uncompressed_cache). Its size can be configured using the server-level setting [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size). ## DROP COMPILED EXPRESSION CACHE -Reset the compiled expression cache. +Clears the compiled expression cache. The compiled expression cache is enabled/disabled with the query/user/profile-level setting [compile_expressions](../../operations/settings/settings.md#compile-expressions). ## DROP QUERY CACHE -Resets the [query cache](../../operations/query-cache.md). - -```sql -SYSTEM DROP QUERY CACHE [ON CLUSTER cluster_name] -``` +Clears the [query cache](../../operations/query-cache.md). ## FLUSH LOGS @@ -314,6 +310,22 @@ Provides possibility to start background fetch tasks from replication queues whi SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] ``` +### STOP PULLING REPLICATION LOG + +Stops loading new entries from replication log to replication queue in a `ReplicatedMergeTree` table. + +``` sql +SYSTEM STOP PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + +### START PULLING REPLICATION LOG + +Cancels `SYSTEM STOP PULLING REPLICATION LOG`. + +``` sql +SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_merge_tree_family_table_name] +``` + ### SYNC REPLICA Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds. @@ -328,6 +340,15 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name` - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed. - If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed. +### SYNC DATABASE REPLICA + +Waits until the specified [replicated database](https://clickhouse.com/docs/en/engines/database-engines/replicated) applies all schema changes from the DDL queue of that database. + +**Syntax** +```sql +SYSTEM SYNC DATABASE REPLICA replicated_database_name; +``` + ### RESTART REPLICA Provides possibility to reinitialize Zookeeper session's state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of truth and add tasks to Zookeeper queue if needed. @@ -414,3 +435,29 @@ Will do sync syscall. ```sql SYSTEM SYNC FILE CACHE [ON CLUSTER cluster_name] ``` + + +### SYSTEM STOP LISTEN + +Closes the socket and gracefully terminates the existing connections to the server on the specified port with the specified protocol. + +However, if the corresponding protocol settings were not specified in the clickhouse-server configuration, this command will have no effect. + +```sql +SYSTEM STOP LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` + +- If `CUSTOM 'protocol'` modifier is specified, the custom protocol with the specified name defined in the protocols section of the server configuration will be stopped. +- If `QUERIES ALL [EXCEPT .. [,..]]` modifier is specified, all protocols are stopped, unless specified with `EXCEPT` clause. +- If `QUERIES DEFAULT [EXCEPT .. [,..]]` modifier is specified, all default protocols are stopped, unless specified with `EXCEPT` clause. +- If `QUERIES CUSTOM [EXCEPT .. [,..]]` modifier is specified, all custom protocols are stopped, unless specified with `EXCEPT` clause. + +### SYSTEM START LISTEN + +Allows new connections to be established on the specified protocols. + +However, if the server on the specified port and protocol was not stopped using the SYSTEM STOP LISTEN command, this command will have no effect. + +```sql +SYSTEM START LISTEN [ON CLUSTER cluster_name] [QUERIES ALL | QUERIES DEFAULT | QUERIES CUSTOM | TCP | TCP_WITH_PROXY | TCP_SECURE | HTTP | HTTPS | MYSQL | GRPC | POSTGRESQL | PROMETHEUS | CUSTOM 'protocol'] +``` diff --git a/docs/en/sql-reference/statements/truncate.md b/docs/en/sql-reference/statements/truncate.md index 457031a2157b..029815a43922 100644 --- a/docs/en/sql-reference/statements/truncate.md +++ b/docs/en/sql-reference/statements/truncate.md @@ -4,8 +4,9 @@ sidebar_position: 52 sidebar_label: TRUNCATE --- -# TRUNCATE Statement +# TRUNCATE Statements +## TRUNCATE TABLE ``` sql TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] ``` @@ -21,3 +22,10 @@ You can specify how long (in seconds) to wait for inactive replicas to execute ` :::note If the `alter_sync` is set to `2` and some replicas are not active for more than the time, specified by the `replication_wait_for_inactive_replica_timeout` setting, then an exception `UNFINISHED` is thrown. ::: + +## TRUNCATE DATABASE +``` sql +TRUNCATE DATABASE [IF EXISTS] [db.]name [ON CLUSTER cluster] +``` + +Removes all tables from a database but keeps the database itself. When the clause `IF EXISTS` is omitted, the query returns an error if the database does not exist. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorage.md b/docs/en/sql-reference/table-functions/azureBlobStorage.md index 7bb5d892c47d..59c92e1327e5 100644 --- a/docs/en/sql-reference/table-functions/azureBlobStorage.md +++ b/docs/en/sql-reference/table-functions/azureBlobStorage.md @@ -19,7 +19,7 @@ azureBlobStorage(- connection_string|storage_account_url, container_name, blobpa - `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) - `container_name` - Container name -- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. - `account_name` - if storage_account_url is used, then account name can be specified here - `account_key` - if storage_account_url is used, then account key can be specified here - `format` — The [format](../../interfaces/formats.md#formats) of the file. diff --git a/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md new file mode 100644 index 000000000000..20dfd35d5db9 --- /dev/null +++ b/docs/en/sql-reference/table-functions/azureBlobStorageCluster.md @@ -0,0 +1,47 @@ +--- +slug: /en/sql-reference/table-functions/azureBlobStorageCluster +sidebar_position: 55 +sidebar_label: azureBlobStorageCluster +title: "azureBlobStorageCluster Table Function" +--- + +Allows processing files from [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs) in parallel from many nodes in a specified cluster. On initiator it creates a connection to all nodes in the cluster, discloses asterisks in S3 file path, and dispatches each file dynamically. On the worker node it asks the initiator about the next task to process and processes it. This is repeated until all tasks are finished. +This table function is similar to the [s3Cluster function](../../sql-reference/table-functions/s3Cluster.md). + +**Syntax** + +``` sql +azureBlobStorageCluster(cluster_name, connection_string|storage_account_url, container_name, blobpath, [account_name, account_key, format, compression, structure]) +``` + +**Arguments** + +- `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `connection_string|storage_account_url` — connection_string includes account name & key ([Create connection string](https://learn.microsoft.com/en-us/azure/storage/common/storage-configure-connection-string?toc=%2Fazure%2Fstorage%2Fblobs%2Ftoc.json&bc=%2Fazure%2Fstorage%2Fblobs%2Fbreadcrumb%2Ftoc.json#configure-a-connection-string-for-an-azure-storage-account)) or you could also provide the storage account url here and account name & account key as separate parameters (see parameters account_name & account_key) +- `container_name` - Container name +- `blobpath` - file path. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `account_name` - if storage_account_url is used, then account name can be specified here +- `account_key` - if storage_account_url is used, then account key can be specified here +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `compression` — Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. (same as setting to `auto`). +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Select the count for the file `test_cluster_*.csv`, using all the nodes in the `cluster_simple` cluster: + +``` sql +SELECT count(*) from azureBlobStorageCluster( + 'cluster_simple', 'http://azurite1:10000/devstoreaccount1', 'test_container', 'test_cluster_count.csv', 'devstoreaccount1', + 'Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==', 'CSV', + 'auto', 'key UInt64') +``` + +**See Also** + +- [AzureBlobStorage engine](../../engines/table-engines/integrations/azureBlobStorage.md) +- [azureBlobStorage table function](../../sql-reference/table-functions/azureBlobStorage.md) diff --git a/docs/en/sql-reference/table-functions/cluster.md b/docs/en/sql-reference/table-functions/cluster.md index 7362c433e0e9..a083c6b89a6b 100644 --- a/docs/en/sql-reference/table-functions/cluster.md +++ b/docs/en/sql-reference/table-functions/cluster.md @@ -16,14 +16,14 @@ All available clusters are listed in the [system.clusters](../../operations/syst **Syntax** ``` sql -cluster('cluster_name', db.table[, sharding_key]) -cluster('cluster_name', db, table[, sharding_key]) -clusterAllReplicas('cluster_name', db.table[, sharding_key]) -clusterAllReplicas('cluster_name', db, table[, sharding_key]) +cluster(['cluster_name', db.table, sharding_key]) +cluster(['cluster_name', db, table, sharding_key]) +clusterAllReplicas(['cluster_name', db.table, sharding_key]) +clusterAllReplicas(['cluster_name', db, table, sharding_key]) ``` **Arguments** -- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. +- `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers, set `default` if not specified. - `db.table` or `db`, `table` - Name of a database and a table. - `sharding_key` - A sharding key. Optional. Needs to be specified if the cluster has more than one shard. diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md index c78ffc1d61c1..7e869af82efe 100644 --- a/docs/en/sql-reference/table-functions/file.md +++ b/docs/en/sql-reference/table-functions/file.md @@ -13,16 +13,18 @@ The `file` function can be used in `SELECT` and `INSERT` queries to read from or **Syntax** ``` sql -file(path [,format] [,structure] [,compression]) +file([path_to_archive ::] path [,format] [,structure] [,compression]) ``` **Parameters** - `path` — The relative path to the file from [user_files_path](/docs/en/operations/server-configuration-parameters/settings.md#server_configuration_parameters-user_files_path). Path to file support following globs in read-only mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc', 'def'` — strings. +- `path_to_archive` - The relative path to zip/tar/7z archive. Path to archive support the same globs as `path`. - `format` — The [format](/docs/en/interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format: `'column1_name column1_type, column2_name column2_type, ...'`. - `compression` — The existing compression type when used in a `SELECT` query, or the desired compression type when used in an `INSERT` query. The supported compression types are `gz`, `br`, `xz`, `zst`, `lz4`, and `bz2`. + **Returned value** A table with the specified structure for reading or writing data in the specified file. @@ -128,13 +130,18 @@ file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32'); └─────────┴─────────┴─────────┘ ``` -## Globs in Path +Getting data from table in table.csv, located in archive1.zip or/and archive2.zip +``` sql +SELECT * FROM file('user_files/archives/archive{1..2}.zip :: table.csv'); +``` + +## Globs in Path {#globs_in_path} Multiple path components can have globs. For being processed file must exist and match to the whole path pattern (not only suffix or prefix). - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol. - `{N..M}` — Substitutes any number in range from N to M including both borders. - `**` - Fetches all files inside the folder recursively. @@ -205,7 +212,6 @@ SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt3 - [storage_file_read_method](/docs/en/operations/settings/settings.md#engine-file-emptyif-not-exists) - method of reading data from storage file, one of: read, pread, mmap (only for clickhouse-local). Default value: `pread` for clickhouse-server, `mmap` for clickhouse-local. - **See Also** - [Virtual columns](/docs/en/engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/en/sql-reference/table-functions/gcs.md b/docs/en/sql-reference/table-functions/gcs.md index 01b4e4f6a69a..48c2381696eb 100644 --- a/docs/en/sql-reference/table-functions/gcs.md +++ b/docs/en/sql-reference/table-functions/gcs.md @@ -22,7 +22,7 @@ The GCS Table Function integrates with Google Cloud Storage by using the GCS XML **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: diff --git a/docs/en/sql-reference/table-functions/hdfs.md b/docs/en/sql-reference/table-functions/hdfs.md index 680ac54ee78c..678470e9150c 100644 --- a/docs/en/sql-reference/table-functions/hdfs.md +++ b/docs/en/sql-reference/table-functions/hdfs.md @@ -39,16 +39,16 @@ LIMIT 2 └─────────┴─────────┴─────────┘ ``` -**Globs in path** +## Globs in path {#globs_in_path} Multiple path components can have globs. For being processed file should exists and matches to the whole path pattern (not only suffix or prefix). - `*` — Substitutes any number of any characters except `/` including empty string. - `?` — Substitutes any single character. -- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`. The strings can contain the `/` symbol. - `{N..M}` — Substitutes any number in range from N to M including both borders. -Constructions with `{}` are similar to the [remote table function](../../sql-reference/table-functions/remote.md)). +Constructions with `{}` are similar to the [remote](../../sql-reference/table-functions/remote.md)) table function. **Example** @@ -102,6 +102,7 @@ FROM hdfs('hdfs://hdfs1:9000/big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name Strin - [hdfs_truncate_on_insert](/docs/en/operations/settings/settings.md#hdfs-truncate-on-insert) - allows to truncate file before insert into it. Disabled by default. - [hdfs_create_multiple_files](/docs/en/operations/settings/settings.md#hdfs_allow_create_multiple_files) - allows to create a new file on each insert if format has suffix. Disabled by default. - [hdfs_skip_empty_files](/docs/en/operations/settings/settings.md#hdfs_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [ignore_access_denied_multidirectory_globs](/docs/en/operations/settings/settings.md#ignore_access_denied_multidirectory_globs) - allows to ignore permission denied errors for multi-directory globs. **See Also** diff --git a/docs/en/sql-reference/table-functions/hdfsCluster.md b/docs/en/sql-reference/table-functions/hdfsCluster.md index 832be46d05f7..75100eeb4f30 100644 --- a/docs/en/sql-reference/table-functions/hdfsCluster.md +++ b/docs/en/sql-reference/table-functions/hdfsCluster.md @@ -17,7 +17,7 @@ hdfsCluster(cluster_name, URI, format, structure) **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `URI` — URI to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/en/sql-reference/table-functions/iceberg.md b/docs/en/sql-reference/table-functions/iceberg.md index 30db0ef00aa7..fa86b436a5e2 100644 --- a/docs/en/sql-reference/table-functions/iceberg.md +++ b/docs/en/sql-reference/table-functions/iceberg.md @@ -21,7 +21,7 @@ iceberg(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure]) - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. By default `Parquet` is used. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -Engine parameters can be specified using [Named Collections](../../operations/named-collections.md) +Engine parameters can be specified using [Named Collections](/docs/en/operations/named-collections.md). **Returned value** diff --git a/docs/en/sql-reference/table-functions/mongodb.md b/docs/en/sql-reference/table-functions/mongodb.md index aad60a7003c6..a483414c0d4b 100644 --- a/docs/en/sql-reference/table-functions/mongodb.md +++ b/docs/en/sql-reference/table-functions/mongodb.md @@ -30,6 +30,14 @@ mongodb(host:port, database, collection, user, password, structure [, options]) - `options` - MongoDB connection string options (optional parameter). +:::tip +If you are using the MongoDB Atlas cloud offering please add these options: + +``` +'connectTimeoutMS=10000&ssl=true&authSource=admin' +``` + +::: **Returned Value** diff --git a/docs/en/sql-reference/table-functions/remote.md b/docs/en/sql-reference/table-functions/remote.md index fba3ea55653b..59ed4bf19859 100644 --- a/docs/en/sql-reference/table-functions/remote.md +++ b/docs/en/sql-reference/table-functions/remote.md @@ -13,10 +13,10 @@ Both functions can be used in `SELECT` and `INSERT` queries. ## Syntax ``` sql -remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) -remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key]) -remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) +remote('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) +remote('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) +remoteSecure('addresses_expr', [db, table, 'user'[, 'password'], sharding_key]) +remoteSecure('addresses_expr', [db.table, 'user'[, 'password'], sharding_key]) ``` ## Parameters @@ -29,6 +29,8 @@ remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key]) The port is required for an IPv6 address. + If only specify this parameter, `db` and `table` will use `system.one` by default. + Type: [String](../../sql-reference/data-types/string.md). - `db` — Database name. Type: [String](../../sql-reference/data-types/string.md). diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md index 55c825b8b9b0..8649295e8150 100644 --- a/docs/en/sql-reference/table-functions/s3.md +++ b/docs/en/sql-reference/table-functions/s3.md @@ -23,7 +23,7 @@ For GCS, substitute your HMAC key and HMAC secret where you see `aws_access_key_ **Arguments** -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). :::note GCS The GCS path is in this format as the endpoint for the Google XML API is different than the JSON API: @@ -162,6 +162,28 @@ The below get data from all `test-data.csv.gz` files from any folder inside `my- SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); ``` +Note. It is possible to specify custom URL mappers in the server configuration file. Example: +``` sql +SELECT * FROM s3('s3://clickhouse-public-datasets/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +``` +The URL `'s3://clickhouse-public-datasets/my-test-bucket-768/**/test-data.csv.gz'` would be replaced to `'http://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz'` + + +Custom mapper can be added into `config.xml`: +``` xml + + + https://{bucket}.s3.amazonaws.com + + + https://{bucket}.storage.googleapis.com + + + https://{bucket}.oss.aliyuncs.com + + +``` + ## Partitioned Write If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. diff --git a/docs/en/sql-reference/table-functions/s3Cluster.md b/docs/en/sql-reference/table-functions/s3Cluster.md index d5bdc85f9f89..675aef54d346 100644 --- a/docs/en/sql-reference/table-functions/s3Cluster.md +++ b/docs/en/sql-reference/table-functions/s3Cluster.md @@ -16,7 +16,7 @@ s3Cluster(cluster_name, source, [,access_key_id, secret_access_key] [,format] [, **Arguments** - `cluster_name` — Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers. -- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `source` — URL to a file or a bunch of files. Supports following wildcards in readonly mode: `*`, `**`, `?`, `{'abc','def'}` and `{N..M}` where `N`, `M` — numbers, `abc`, `def` — strings. For more information see [Wildcards In Path](../../engines/table-engines/integrations/s3.md#wildcards-in-path). - `access_key_id` and `secret_access_key` — Keys that specify credentials to use with given endpoint. Optional. - `format` — The [format](../../interfaces/formats.md#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. diff --git a/docs/en/sql-reference/table-functions/url.md b/docs/en/sql-reference/table-functions/url.md index 2ab43f1b895e..859de86f0190 100644 --- a/docs/en/sql-reference/table-functions/url.md +++ b/docs/en/sql-reference/table-functions/url.md @@ -56,6 +56,7 @@ Character `|` inside patterns is used to specify failover addresses. They are it ## Storage Settings {#storage-settings} - [engine_url_skip_empty_files](/docs/en/operations/settings/settings.md#engine_url_skip_empty_files) - allows to skip empty files while reading. Disabled by default. +- [enable_url_encoding](/docs/en/operations/settings/settings.md#enable_url_encoding) - allows to enable/disable decoding/encoding path in uri. Enabled by default. **See Also** diff --git a/docs/en/sql-reference/transactions.md b/docs/en/sql-reference/transactions.md index 68fbfe0b22a8..cb89a091d684 100644 --- a/docs/en/sql-reference/transactions.md +++ b/docs/en/sql-reference/transactions.md @@ -3,23 +3,46 @@ slug: /en/guides/developer/transactional --- # Transactional (ACID) support -INSERT into one partition* in one table* of MergeTree* family up to max_insert_block_size rows* is transactional (ACID): -- Atomic: INSERT is succeeded or rejected as a whole: if confirmation is sent to the client, all rows INSERTed; if error is sent to the client, no rows INSERTed. +## Case 1: INSERT into one partition, of one table, of the MergeTree* family + +This is transactional (ACID) if the inserted rows are packed and inserted as a single block (see Notes): +- Atomic: an INSERT succeeds or is rejected as a whole: if a confirmation is sent to the client, then all rows were inserted; if an error is sent to the client, then no rows were inserted. - Consistent: if there are no table constraints violated, then all rows in an INSERT are inserted and the INSERT succeeds; if constraints are violated, then no rows are inserted. -- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as if before INSERT or after successful INSERT; no partial state is seen; -- Durable: successful INSERT is written to the filesystem before answering to the client, on single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). -* If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own; -* INSERT into multiple tables with one statement is possible if materialized views are involved; -* INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional; -* another example: insert into Buffer tables is neither atomic nor isolated or consistent or durable; -* atomicity is ensured even if `async_insert` is enabled, but it can be turned off by the wait_for_async_insert setting; -* max_insert_block_size is 1 000 000 by default and can be adjusted as needed; -* if client did not receive the answer from the server, the client does not know if transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties; -* ClickHouse is using MVCC with snapshot isolation internally; -* all ACID properties are valid even in case of server kill / crash; -* either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in typical setup; -* "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) -* this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. +- Isolated: concurrent clients observe a consistent snapshot of the table–the state of the table either as it was before the INSERT attempt, or after the successful INSERT; no partial state is seen +- Durable: a successful INSERT is written to the filesystem before answering to the client, on a single replica or multiple replicas (controlled by the `insert_quorum` setting), and ClickHouse can ask the OS to sync the filesystem data on the storage media (controlled by the `fsync_after_insert` setting). +- INSERT into multiple tables with one statement is possible if materialized views are involved (the INSERT from the client is to a table which has associate materialized views). + +## Case 2: INSERT into multiple partitions, of one table, of the MergeTree* family + +Same as Case 1 above, with this detail: +- If table has many partitions and INSERT covers many partitions–then insertion into every partition is transactional on its own + + +## Case 3: INSERT into one distributed table of the MergeTree* family + +Same as Case 1 above, with this detail: +- INSERT into Distributed table is not transactional as a whole, while insertion into every shard is transactional + +## Case 4: Using a Buffer table + +- insert into Buffer tables is neither atomic nor isolated nor consistent nor durable + +## Case 5: Using async_insert + +Same as Case 1 above, with this detail: +- atomicity is ensured even if `async_insert` is enabled and `wait_for_async_insert` is set to 1 (the default), but if `wait_for_async_insert` is set to 0, then atomicity is not ensured. + +## Notes +- rows inserted from the client in some data format are packed into a single block when: + - the insert format is row-based (like CSV, TSV, Values, JSONEachRow, etc) and the data contains less then `max_insert_block_size` rows (~1 000 000 by default) or less then `min_chunk_bytes_for_parallel_parsing` bytes (10 MB by default) in case of parallel parsing is used (enabled by default) + - the insert format is column-based (like Native, Parquet, ORC, etc) and the data contains only one block of data +- the size of the inserted block in general may depend on many settings (for example: `max_block_size`, `max_insert_block_size`, `min_insert_block_size_rows`, `min_insert_block_size_bytes`, `preferred_block_size_bytes`, etc) +- if the client did not receive an answer from the server, the client does not know if the transaction succeeded, and it can repeat the transaction, using exactly-once insertion properties +- ClickHouse is using MVCC with snapshot isolation internally +- all ACID properties are valid even in the case of server kill/crash +- either insert_quorum into different AZ or fsync should be enabled to ensure durable inserts in the typical setup +- "consistency" in ACID terms does not cover the semantics of distributed systems, see https://jepsen.io/consistency which is controlled by different settings (select_sequential_consistency) +- this explanation does not cover a new transactions feature that allow to have full-featured transactions over multiple tables, materialized views, for multiple SELECTs, etc. (see the next section on Transactions, Commit, and Rollback) ## Transactions, Commit, and Rollback diff --git a/docs/ru/development/architecture.md b/docs/ru/development/architecture.md index a4b99987d030..1a6fbade06f3 100644 --- a/docs/ru/development/architecture.md +++ b/docs/ru/development/architecture.md @@ -156,9 +156,9 @@ ClickHouse имеет сильную типизацию, поэтому нет Мы поддерживаем полную обратную и прямую совместимость для TCP интерфейса: старые клиенты могут общаться с новыми серверами, а новые клиенты могут общаться со старыми серверами. Но мы не хотим поддерживать его вечно и прекращаем поддержку старых версий примерно через год. - :::note "Note" - Для всех сторонних приложений мы рекомендуем использовать HTTP интерфейс, потому что он прост и удобен в использовании. TCP интерфейс тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных и использует специальное кадрирование для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что потребовала бы линковки большей части кодовой базы ClickHouse, что непрактично. - ::: +:::note Примечание +Для всех сторонних приложений мы рекомендуем использовать HTTP интерфейс, потому что он прост и удобен в использовании. TCP интерфейс тесно связан с внутренними структурами данных: он использует внутренний формат для передачи блоков данных и использует специальное кадрирование для сжатых данных. Мы не выпустили библиотеку C для этого протокола, потому что потребовала бы линковки большей части кодовой базы ClickHouse, что непрактично. +::: ## Выполнение распределенных запросов (Distributed Query Execution) {#distributed-query-execution} Сервера в кластере в основном независимы. Вы можете создать `Распределенную` (`Distributed`) таблицу на одном или всех серверах в кластере. Такая таблица сама по себе не хранит данные - она только предоставляет возможность "просмотра" всех локальных таблиц на нескольких узлах кластера. При выполнении `SELECT` распределенная таблица переписывает запрос, выбирает удаленные узлы в соответствии с настройками балансировки нагрузки и отправляет им запрос. Распределенная таблица просит удаленные сервера обработать запрос до той стадии, когда промежуточные результаты с разных серверов могут быть объединены. Затем он получает промежуточные результаты и объединяет их. Распределенная таблица пытается возложить как можно больше работы на удаленные серверы и сократить объем промежуточных данных, передаваемых по сети. @@ -197,7 +197,8 @@ ClickHouse имеет сильную типизацию, поэтому нет Кроме того, каждая реплика сохраняет свое состояние в `ZooKeeper` в виде набора частей и его контрольных сумм. Когда состояние в локальной файловой системе расходится с эталонным состоянием в `ZooKeeper`, реплика восстанавливает свою согласованность путем загрузки отсутствующих и поврежденных частей из других реплик. Когда в локальной файловой системе есть неожиданные или испорченные данные, ClickHouse не удаляет их, а перемещает в отдельный каталог и забывает об этом. - :::note "Note" - Кластер ClickHouse состоит из независимых шардов, а каждый шард состоит из реплик. Кластер **не является эластичным** (not elastic), поэтому после добавления нового шарда данные не будут автоматически распределены между ними. Вместо этого нужно изменить настройки, чтобы выровнять нагрузку на кластер. Эта реализация дает вам больший контроль, и вполне приемлема для относительно небольших кластеров, таких как десятки узлов. Но для кластеров с сотнями узлов, которые мы используем в эксплуатации, такой подход становится существенным недостатком. Движки таблиц, которые охватывают весь кластер с динамически реплицируемыми областями, которые могут быть автоматически разделены и сбалансированы между кластерами, еще предстоит реализовать. - ::: +:::note Примечание +Кластер ClickHouse состоит из независимых шардов, а каждый шард состоит из реплик. Кластер **не является эластичным** (not elastic), поэтому после добавления нового шарда данные не будут автоматически распределены между ними. Вместо этого нужно изменить настройки, чтобы выровнять нагрузку на кластер. Эта реализация дает вам больший контроль, и вполне приемлема для относительно небольших кластеров, таких как десятки узлов. Но для кластеров с сотнями узлов, которые мы используем в эксплуатации, такой подход становится существенным недостатком. Движки таблиц, которые охватывают весь кластер с динамически реплицируемыми областями, которые могут быть автоматически разделены и сбалансированы между кластерами, еще предстоит реализовать. +::: + {## [Original article](https://clickhouse.com/docs/ru/development/architecture/) ##} diff --git a/docs/ru/development/build-osx.md b/docs/ru/development/build-osx.md index 9a1f9c9347d7..033f42c1fd04 100644 --- a/docs/ru/development/build-osx.md +++ b/docs/ru/development/build-osx.md @@ -6,9 +6,9 @@ sidebar_label: Сборка на Mac OS X # Как собрать ClickHouse на Mac OS X {#how-to-build-clickhouse-on-mac-os-x} -:::info "Вам не нужно собирать ClickHouse самостоятельно" - Вы можете установить предварительно собранный ClickHouse, как описано в [Быстром старте](https://clickhouse.com/#quick-start). - Следуйте инструкциям по установке для `macOS (Intel)` или `macOS (Apple Silicon)`. +:::info Вам не нужно собирать ClickHouse самостоятельно +Вы можете установить предварительно собранный ClickHouse, как описано в [Быстром старте](https://clickhouse.com/#quick-start). +Следуйте инструкциям по установке для `macOS (Intel)` или `macOS (Apple Silicon)`. ::: Сборка должна запускаться с x86_64 (Intel) на macOS версии 10.15 (Catalina) и выше в последней версии компилятора Xcode's native AppleClang, Homebrew's vanilla Clang или в GCC-компиляторах. @@ -68,7 +68,7 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/ $ rm -rf build $ mkdir build $ cd build - $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER==$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. + $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. $ cmake -DCMAKE_C_COMPILER=$(brew --prefix llvm)/bin/clang -DCMAKE_CXX_COMPILER=$(brew --prefix llvm)/bin/clang++ -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_JEMALLOC=OFF .. $ cmake --build . --config RelWithDebInfo $ cd .. @@ -90,8 +90,8 @@ $ /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/ Если будете запускать `clickhouse-server`, убедитесь, что увеличили системную переменную `maxfiles`. -:::info "Note" - Вам понадобится команда `sudo`. +:::info Примечание +Вам понадобится команда `sudo`. ::: 1. Создайте файл `/Library/LaunchDaemons/limit.maxfiles.plist` и поместите в него следующее: diff --git a/docs/ru/development/developer-instruction.md b/docs/ru/development/developer-instruction.md index 7294bc2ae875..c63622594e4b 100644 --- a/docs/ru/development/developer-instruction.md +++ b/docs/ru/development/developer-instruction.md @@ -289,6 +289,4 @@ Pull request можно создать, даже если работа над з ## Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse} -Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно. - Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse). diff --git a/docs/ru/engines/database-engines/materialized-mysql.md b/docs/ru/engines/database-engines/materialized-mysql.md index df56b7a0bd60..f019bc7e3fca 100644 --- a/docs/ru/engines/database-engines/materialized-mysql.md +++ b/docs/ru/engines/database-engines/materialized-mysql.md @@ -49,9 +49,9 @@ CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', - `default_authentication_plugin = mysql_native_password` — `MaterializedMySQL` может авторизоваться только с помощью этого метода. - `gtid_mode = on` — ведение журнала на основе GTID является обязательным для обеспечения правильной репликации. - :::note "Внимание" - При включении `gtid_mode` вы также должны указать `enforce_gtid_consistency = on`. - ::: +:::note Внимание +При включении `gtid_mode` вы также должны указать `enforce_gtid_consistency = on`. +::: ## Виртуальные столбцы {#virtual-columns} При работе с движком баз данных `MaterializedMySQL` используются таблицы семейства [ReplacingMergeTree](../../engines/table-engines/mergetree-family/replacingmergetree.md) с виртуальными столбцами `_sign` и `_version`. diff --git a/docs/ru/engines/database-engines/materialized-postgresql.md b/docs/ru/engines/database-engines/materialized-postgresql.md index f2268e9ba0f4..6dcb5a3475fa 100644 --- a/docs/ru/engines/database-engines/materialized-postgresql.md +++ b/docs/ru/engines/database-engines/materialized-postgresql.md @@ -96,9 +96,10 @@ FROM pg_class WHERE oid = 'postgres_table'::regclass; ``` -:::danger "Предупреждение" - Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию. - +:::danger Предупреждение +Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию. +::: + ## Пример использования {#example-of-use} ``` sql diff --git a/docs/ru/engines/database-engines/mysql.md b/docs/ru/engines/database-engines/mysql.md index fb5c9d16ee4e..3e3355d83480 100644 --- a/docs/ru/engines/database-engines/mysql.md +++ b/docs/ru/engines/database-engines/mysql.md @@ -60,8 +60,9 @@ ENGINE = MySQL('host:port', ['database' | database], 'user', 'password') - `version` - `max_allowed_packet` -:::danger "Предупреждение" - В настоящее время эти переменные реализованы только как "заглушки" и не содержат актуальных данных. +:::danger Предупреждение +В настоящее время эти переменные реализованы только как "заглушки" и не содержат актуальных данных. +::: Пример: diff --git a/docs/ru/engines/database-engines/replicated.md b/docs/ru/engines/database-engines/replicated.md index 05e38e774fdf..fcf2b9f40196 100644 --- a/docs/ru/engines/database-engines/replicated.md +++ b/docs/ru/engines/database-engines/replicated.md @@ -12,7 +12,7 @@ sidebar_label: Replicated ## Создание базы данных {#creating-a-database} ``` sql - CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...] +CREATE DATABASE testdb ENGINE = Replicated('zoo_path', 'shard_name', 'replica_name') [SETTINGS ...] ``` **Параметры движка** @@ -21,9 +21,9 @@ sidebar_label: Replicated - `shard_name` — Имя шарда. Реплики базы данных группируются в шарды по имени. - `replica_name` — Имя реплики. Имена реплик должны быть разными для всех реплик одного и того же шарда. - :::note "Предупреждение" - Для таблиц [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) если аргументы не заданы, то используются аргументы по умолчанию: `/clickhouse/tables/{uuid}/{shard}` и `{replica}`. Они могут быть изменены в серверных настройках: [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) и [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Макрос `{uuid}` раскрывается в `UUID` таблицы, `{shard}` и `{replica}` — в значения из конфига сервера. В будущем появится возможность использовать значения `shard_name` и `replica_name` аргументов движка базы данных `Replicated`. - ::: +:::note Предупреждение +Для таблиц [ReplicatedMergeTree](../table-engines/mergetree-family/replication.md#table_engines-replication) если аргументы не заданы, то используются аргументы по умолчанию: `/clickhouse/tables/{uuid}/{shard}` и `{replica}`. Они могут быть изменены в серверных настройках: [default_replica_path](../../operations/server-configuration-parameters/settings.md#default_replica_path) и [default_replica_name](../../operations/server-configuration-parameters/settings.md#default_replica_name). Макрос `{uuid}` раскрывается в `UUID` таблицы, `{shard}` и `{replica}` — в значения из конфига сервера. В будущем появится возможность использовать значения `shard_name` и `replica_name` аргументов движка базы данных `Replicated`. +::: ## Особенности и рекомендации {#specifics-and-recommendations} DDL-запросы с базой данных `Replicated` работают похожим образом на [ON CLUSTER](../../sql-reference/distributed-ddl.md) запросы, но с небольшими отличиями. diff --git a/docs/ru/engines/table-engines/integrations/hdfs.md b/docs/ru/engines/table-engines/integrations/hdfs.md index aed90d53f2a0..72087b56652a 100644 --- a/docs/ru/engines/table-engines/integrations/hdfs.md +++ b/docs/ru/engines/table-engines/integrations/hdfs.md @@ -97,8 +97,9 @@ CREATE TABLE table_with_question_mark (name String, value UInt32) ENGINE = HDFS( CREATE TABLE table_with_asterisk (name String, value UInt32) ENGINE = HDFS('hdfs://hdfs1:9000/{some,another}_dir/*', 'TSV') ``` -:::danger "Warning" - Если список файлов содержит числовые интервалы с ведущими нулями, используйте конструкцию с фигурными скобочками для каждой цифры или используйте `?`. +:::danger Предупреждение +Если список файлов содержит числовые интервалы с ведущими нулями, используйте конструкцию с фигурными скобочками для каждой цифры или используйте `?`. +::: **Example** diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md index 832486c038a0..18f6c7cd1f9f 100644 --- a/docs/ru/engines/table-engines/integrations/kafka.md +++ b/docs/ru/engines/table-engines/integrations/kafka.md @@ -102,7 +102,7 @@ SETTINGS Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше. ::: diff --git a/docs/ru/engines/table-engines/integrations/materialized-postgresql.md b/docs/ru/engines/table-engines/integrations/materialized-postgresql.md index bc10066f6a67..c1f7fde1ab4e 100644 --- a/docs/ru/engines/table-engines/integrations/materialized-postgresql.md +++ b/docs/ru/engines/table-engines/integrations/materialized-postgresql.md @@ -52,5 +52,6 @@ PRIMARY KEY key; SELECT key, value, _version FROM postgresql_db.postgresql_replica; ``` -:::danger "Предупреждение" - Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию. +:::danger Предупреждение +Репликация **TOAST**-значений не поддерживается. Для типа данных будет использоваться значение по умолчанию. +::: diff --git a/docs/ru/engines/table-engines/integrations/postgresql.md b/docs/ru/engines/table-engines/integrations/postgresql.md index d4fc9470a0e8..4f3654baaf28 100644 --- a/docs/ru/engines/table-engines/integrations/postgresql.md +++ b/docs/ru/engines/table-engines/integrations/postgresql.md @@ -48,8 +48,8 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] PostgreSQL массивы конвертируются в массивы ClickHouse. -:::info "Внимание" - Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустимы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. +:::info Внимание +Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустимы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. ::: Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например: diff --git a/docs/ru/engines/table-engines/integrations/s3.md b/docs/ru/engines/table-engines/integrations/s3.md index 24735a353824..720aa589122e 100644 --- a/docs/ru/engines/table-engines/integrations/s3.md +++ b/docs/ru/engines/table-engines/integrations/s3.md @@ -68,8 +68,8 @@ SELECT * FROM s3_engine_table LIMIT 2; Конструкции с `{}` аналогичны функции [remote](../../../sql-reference/table-functions/remote.md). -:::danger "Примечание" - Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +:::danger Примечание +Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. **Пример подстановки 1** diff --git a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md index 596926ea47df..f60735eac9fd 100644 --- a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -39,7 +39,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md index e3b4238a200f..cfafddf0bc2a 100644 --- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -43,7 +43,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md index 7c3603338373..46597c943704 100644 --- a/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md +++ b/docs/ru/engines/table-engines/mergetree-family/custom-partitioning-key.md @@ -39,8 +39,8 @@ ORDER BY (CounterID, StartDate, intHash32(UserID)); Каждая партиция состоит из отдельных фрагментов или так называемых *кусков данных*. Каждый кусок отсортирован по первичному ключу. При вставке данных в таблицу каждая отдельная запись сохраняется в виде отдельного куска. Через некоторое время после вставки (обычно до 10 минут), ClickHouse выполняет в фоновом режиме слияние данных — в результате куски для одной и той же партиции будут объединены в более крупный кусок. -:::info "Info" - Не рекомендуется делать слишком гранулированное партиционирование – то есть задавать партиции по столбцу, в котором будет слишком большой разброс значений (речь идет о порядке более тысячи партиций). Это приведет к скоплению большого числа файлов и файловых дескрипторов в системе, что может значительно снизить производительность запросов `SELECT`. +:::info Примечание +Не рекомендуется делать слишком гранулированное партиционирование – то есть задавать партиции по столбцу, в котором будет слишком большой разброс значений (речь идет о порядке более тысячи партиций). Это приведет к скоплению большого числа файлов и файловых дескрипторов в системе, что может значительно снизить производительность запросов `SELECT`. ::: Чтобы получить набор кусков и партиций таблицы, можно воспользоваться системной таблицей [system.parts](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md#system_tables-parts). В качестве примера рассмотрим таблицу `visits`, в которой задано партиционирование по месяцам. Выполним `SELECT` для таблицы `system.parts`: @@ -80,8 +80,8 @@ WHERE table = 'visits' - `2` – уровень куска (глубина дерева слияний, которыми этот кусок образован). - `11` - версия мутации (если парт мутировал) -:::info "Info" - Названия кусков для таблиц старого типа образуются следующим образом: `20190117_20190123_2_2_0` (минимальная дата _ максимальная дата _ номер минимального блока _ номер максимального блока _ уровень). +:::info Примечание +Названия кусков для таблиц старого типа образуются следующим образом: `20190117_20190123_2_2_0` (минимальная дата _ максимальная дата _ номер минимального блока _ номер максимального блока _ уровень). ::: Как видно из примера выше, таблица содержит несколько отдельных кусков для одной и той же партиции (например, куски `201901_1_3_1` и `201901_1_9_2` принадлежат партиции `201901`). Это означает, что эти куски еще не были объединены – в файловой системе они хранятся отдельно. После того как будет выполнено автоматическое слияние данных (выполняется примерно спустя 10 минут после вставки данных), исходные куски будут объединены в один более крупный кусок и помечены как неактивные. diff --git a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md index 324a3fd16339..ff77a29767d4 100644 --- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md @@ -55,7 +55,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. ::: @@ -123,12 +123,13 @@ default ... ``` -:::danger "Внимание" - Правила должны быть строго упорядочены: +:::danger Внимание +Правила должны быть строго упорядочены: - 1. Правила без `function` или `retention`. - 1. Правила одновремено содержащие `function` и `retention`. - 1. Правило `default`. +1. Правила без `function` или `retention`. +1. Правила одновремено содержащие `function` и `retention`. +1. Правило `default`. +::: При обработке строки ClickHouse проверяет правила в разделе `pattern`. Каждый `pattern` (включая `default`) может содержать параметр агрегации `function`, параметр `retention`, или оба параметра одновременно. Если имя метрики соответствует шаблону `regexp`, то применяются правила `pattern`, в противном случае правило `default`. @@ -257,5 +258,6 @@ default ``` -:::danger "Внимание" - Прореживание данных производится во время слияний. Обычно для старых партиций слияния не запускаются, поэтому для прореживания надо инициировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). +:::danger Внимание +Прореживание данных производится во время слияний. Обычно для старых партиций слияния не запускаются, поэтому для прореживания надо инициировать незапланированное слияние используя [optimize](../../../sql-reference/statements/optimize.md). Или использовать дополнительные инструменты, например [graphite-ch-optimizer](https://github.com/innogames/graphite-ch-optimizer). +::: diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md index 812b0c0a2d4f..00eb830c9efa 100644 --- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md @@ -20,9 +20,9 @@ sidebar_label: MergeTree - **Поддерживает сэмплирование данных.** При необходимости можно задать способ сэмплирования данных в таблице. - :::info - Движок [Merge](../special/merge.md#merge) не относится к семейству `*MergeTree`. - ::: +:::info Примечание +Движок [Merge](../special/merge.md#merge) не относится к семейству `*MergeTree`. +::: ## Создание таблицы {#table_engine-mergetree-creating-a-table} ``` sql @@ -115,7 +115,7 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше. ::: @@ -805,8 +805,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' 4 1000 /var/lib/clickhouse/disks/s3/ - true - /var/lib/clickhouse/disks/s3/cache/ false @@ -832,8 +830,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' - `single_read_retries` — число попыток выполнения запроса в случае возникновения ошибки в процессе чтения. Значение по умолчанию: `4`. - `min_bytes_for_seek` — минимальное количество байтов, которые используются для операций поиска вместо последовательного чтения. Значение по умолчанию: 1 МБайт. - `metadata_path` — путь к локальному файловому хранилищу для хранения файлов с метаданными для S3. Значение по умолчанию: `/var/lib/clickhouse/disks//`. -- `cache_enabled` — признак, разрешено ли хранение кэша засечек и файлов индекса в локальной файловой системе. Значение по умолчанию: `true`. -- `cache_path` — путь в локальной файловой системе, где будут храниться кэш засечек и файлы индекса. Значение по умолчанию: `/var/lib/clickhouse/disks//cache/`. - `skip_access_check` — признак, выполнять ли проверку доступов при запуске диска. Если установлено значение `true`, то проверка не выполняется. Значение по умолчанию: `false`. Диск S3 может быть сконфигурирован как `main` или `cold`: diff --git a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md index 86626d92863d..cd8b55b0259e 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/replacingmergetree.md @@ -29,7 +29,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Описание параметров запроса смотрите в [описании запроса](../../../engines/table-engines/mergetree-family/replacingmergetree.md). -:::warning "Внимание" +:::warning Внимание Уникальность строк определяется `ORDER BY` секцией таблицы, а не `PRIMARY KEY`. ::: @@ -95,7 +95,7 @@ SELECT * FROM mySecondReplacingMT FINAL; Устаревший способ создания таблицы -:::warning "Внимание" +:::warning Внимание Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/replication.md b/docs/ru/engines/table-engines/mergetree-family/replication.md index 2b4d89dbe0ae..dd94288882ca 100644 --- a/docs/ru/engines/table-engines/mergetree-family/replication.md +++ b/docs/ru/engines/table-engines/mergetree-family/replication.md @@ -32,9 +32,9 @@ ClickHouse хранит метаинформацию о репликах в [Apa Для использовании репликации, установите параметры в секции [zookeeper](../../../operations/server-configuration-parameters/settings.md#server-settings_zookeeper) конфигурации сервера. - :::note "Внимание" - Не пренебрегайте настройками безопасности. ClickHouse поддерживает [ACL схему](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) `digest` подсистемы безопасности ZooKeeper. - ::: +:::note Внимание +Не пренебрегайте настройками безопасности. ClickHouse поддерживает [ACL схему](https://zookeeper.apache.org/doc/current/zookeeperProgrammers.html#sc_ZooKeeperAccessControl) `digest` подсистемы безопасности ZooKeeper. +::: Пример указания адресов кластера ZooKeeper: ``` xml diff --git a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md index abbcfcc5be27..a88de9d1e262 100644 --- a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md @@ -42,7 +42,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] Устаревший способ создания таблицы -:::note "Attention" +:::note Важно Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше. ::: diff --git a/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index 1cc5752f04b5..cc1bf09e5643 100644 --- a/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/ru/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -54,10 +54,10 @@ VersionedCollapsingMergeTree(sign, version) Устаревший способ создания таблицы - :::danger "Внимание" - Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше. - ::: - +:::danger Внимание +Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше. +::: + ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] ( diff --git a/docs/ru/engines/table-engines/special/buffer.md b/docs/ru/engines/table-engines/special/buffer.md index 574d9273088d..1fd8483e54dc 100644 --- a/docs/ru/engines/table-engines/special/buffer.md +++ b/docs/ru/engines/table-engines/special/buffer.md @@ -9,7 +9,7 @@ sidebar_label: Buffer Буферизует записываемые данные в оперативке, периодически сбрасывая их в другую таблицу. При чтении, производится чтение данных одновременно из буфера и из другой таблицы. ``` sql -Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) +Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) ``` Параметры движка: @@ -49,9 +49,9 @@ CREATE TABLE merge.hits_buffer AS merge.hits ENGINE = Buffer(merge, hits, 16, 10 Если у одного из столбцов таблицы Buffer и подчинённой таблицы не совпадает тип, то в лог сервера будет записано сообщение об ошибке и буфер будет очищен. То же самое происходит, если подчинённая таблица не существует в момент сброса буфера. - :::note "Внимание" - В релизах до 26 октября 2021 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) и [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена. - ::: +:::note Внимание +В релизах до 26 октября 2021 года выполнение ALTER на таблице Buffer ломает структуру блоков и вызывает ошибку (см. [#15117](https://github.com/ClickHouse/ClickHouse/issues/15117) и [#30565](https://github.com/ClickHouse/ClickHouse/pull/30565)), поэтому удаление буфера и его пересоздание — единственный вариант миграции для данного движка. Перед выполнением ALTER на таблице Buffer убедитесь, что в вашей версии эта ошибка устранена. +::: При нештатном перезапуске сервера, данные, находящиеся в буфере, будут потеряны. Для таблиц типа Buffer неправильно работают FINAL и SAMPLE. Эти условия пробрасываются в таблицу назначения, но не используются для обработки данных в буфере. В связи с этим, рекомендуется использовать таблицу типа Buffer только для записи, а читать из таблицы назначения. diff --git a/docs/ru/engines/table-engines/special/distributed.md b/docs/ru/engines/table-engines/special/distributed.md index 341acf92803a..0c4457917041 100644 --- a/docs/ru/engines/table-engines/special/distributed.md +++ b/docs/ru/engines/table-engines/special/distributed.md @@ -141,9 +141,10 @@ logs - имя кластера в конфигурационном файле с - `_shard_num` — содержит значение `shard_num` из таблицы `system.clusters`. Тип: [UInt32](../../../sql-reference/data-types/int-uint.md). - :::note "Примечание" - Так как табличные функции [remote](../../../sql-reference/table-functions/remote.md) и [cluster](../../../sql-reference/table-functions/cluster.md) создают временную таблицу на движке `Distributed`, то в ней также доступен столбец `_shard_num`. - ::: +:::note Примечание +Так как табличные функции [remote](../../../sql-reference/table-functions/remote.md) и [cluster](../../../sql-reference/table-functions/cluster.md) создают временную таблицу на движке `Distributed`, то в ней также доступен столбец `_shard_num`. +::: + **См. также** - общее описание [виртуальных столбцов](../../../engines/table-engines/index.md#table_engines-virtual_columns) diff --git a/docs/ru/engines/table-engines/special/file.md b/docs/ru/engines/table-engines/special/file.md index fb65c93c48c3..2c5d998fd0b1 100644 --- a/docs/ru/engines/table-engines/special/file.md +++ b/docs/ru/engines/table-engines/special/file.md @@ -28,8 +28,9 @@ File(Format) Можно вручную создать в хранилище каталог таблицы, поместить туда файл, затем на сервере ClickHouse добавить ([ATTACH](../../../sql-reference/statements/attach.md#attach)) информацию о таблице, соответствующей имени каталога и прочитать из файла данные. -:::danger "Warning" - Будьте аккуратны с этой функциональностью, поскольку сервер ClickHouse не отслеживает внешние изменения данных. Если в файл будет производиться запись одновременно со стороны сервера ClickHouse и с внешней стороны, то результат непредсказуем. +:::danger Предупреждение +Будьте аккуратны с этой функциональностью, поскольку сервер ClickHouse не отслеживает внешние изменения данных. Если в файл будет производиться запись одновременно со стороны сервера ClickHouse и с внешней стороны, то результат непредсказуем. +::: **Пример:** diff --git a/docs/ru/faq/general/dbms-naming.md b/docs/ru/faq/general/dbms-naming.md index 875d24dea2ae..32f681aaef93 100644 --- a/docs/ru/faq/general/dbms-naming.md +++ b/docs/ru/faq/general/dbms-naming.md @@ -13,6 +13,6 @@ sidebar_position: 10 - единственно правильный способ написания — Click**H**ouse — с заглавной буквой H; - если нужно сокращеннное название, используйте **CH**. Исторически сложилось, что в Китае также популярно сокращение CK — в основном, из-за того, что это название использовалось в одном из первых обсуждений ClickHouse на китайском языке. -:::info "Забавный факт" - Спустя годы после того, как ClickHouse получил свое название, принцип комбинирования двух слов, каждое из которых имеет подходящий смысл, был признан лучшим способом назвать базу данных в [исследовании Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), Associate Professor of Databases в Carnegie Mellon University. ClickHouse разделил награду "за лучшее название СУБД" с Postgres. +:::info Забавный факт +Спустя годы после того, как ClickHouse получил свое название, принцип комбинирования двух слов, каждое из которых имеет подходящий смысл, был признан лучшим способом назвать базу данных в [исследовании Andy Pavlo](https://www.cs.cmu.edu/~pavlo/blog/2020/03/on-naming-a-database-management-system.html), Associate Professor of Databases в Carnegie Mellon University. ClickHouse разделил награду "за лучшее название СУБД" с Postgres. ::: \ No newline at end of file diff --git a/docs/ru/faq/general/index.md b/docs/ru/faq/general/index.md index 6aaccbe5826a..b4a2f18cb024 100644 --- a/docs/ru/faq/general/index.md +++ b/docs/ru/faq/general/index.md @@ -19,6 +19,6 @@ sidebar_label: Общие вопросы - [Что такое столбцовая база данных?](columnar-database.md) - [Почему бы не использовать системы типа MapReduce?](mapreduce.md) -:::info "Если вы не нашли то, что искали:" - Загляните в другие категории F.A.Q. или поищите в остальных разделах документации, ориентируясь по оглавлению слева. +:::info Если вы не нашли то, что искали: +Загляните в другие категории F.A.Q. или поищите в остальных разделах документации, ориентируясь по оглавлению слева. ::: diff --git a/docs/ru/faq/general/why-clickhouse-is-so-fast.md b/docs/ru/faq/general/why-clickhouse-is-so-fast.md index 2652e93c35e5..8bb8fb78e051 100644 --- a/docs/ru/faq/general/why-clickhouse-is-so-fast.md +++ b/docs/ru/faq/general/why-clickhouse-is-so-fast.md @@ -49,16 +49,14 @@ sidebar_position: 8 Ну и последнее, но тем не менее важное условие: команда ClickHouse постоянно отслеживает в интернете сообщения пользователей о найденных ими удачных реализациях, алгоритмах или структурах данных, анализирует и пробует новые идеи. Иногда в этом потоке сообщений попадаются действительно ценные предложения. -:::info "Советы о том, как создать собственную высокопроизводительную систему" - - - - При проектировании системы обращайте внимание на мельчайшие детали реализации. - - Учитывайте возможности аппаратного обеспечения. - - Выбирайте структуры и представления данных исходя из требований конкретной задачи. - - Для особых случаев разрабатывайте специализированные решения. - - Пробуйте новые алгоритмы, о которых вы вчера прочитали в интернете. Ищите возможности для совершенствования. - - Выбирайте алгоритмы динамически, в процессе выполнения, на основе статистики. - - Ориентируйтесь на показатели, собранные при работе с реальными данными. - - Проверяйте производительность в процессе CI. - - Измеряйте и анализируйте всё, что только возможно. +:::info Советы о том, как создать собственную высокопроизводительную систему +- При проектировании системы обращайте внимание на мельчайшие детали реализации. +- Учитывайте возможности аппаратного обеспечения. +- Выбирайте структуры и представления данных исходя из требований конкретной задачи. +- Для особых случаев разрабатывайте специализированные решения. +- Пробуйте новые алгоритмы, о которых вы вчера прочитали в интернете. Ищите возможности для совершенствования. +- Выбирайте алгоритмы динамически, в процессе выполнения, на основе статистики. +- Ориентируйтесь на показатели, собранные при работе с реальными данными. +- Проверяйте производительность в процессе CI. +- Измеряйте и анализируйте всё, что только возможно. ::: diff --git a/docs/ru/faq/integration/index.md b/docs/ru/faq/integration/index.md index aa0579b9375d..168da82de535 100644 --- a/docs/ru/faq/integration/index.md +++ b/docs/ru/faq/integration/index.md @@ -14,8 +14,8 @@ sidebar_label: Интеграция - [Как импортировать JSON в ClickHouse?](json-import.md) - [Что делать, если у меня проблема с кодировками при использовании Oracle через ODBC?](oracle-odbc.md) -:::info "Если вы не нашли то, что искали" - Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева. +:::info Если вы не нашли то, что искали +Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева. ::: [Original article](https://clickhouse.com/docs/ru/faq/integration/) diff --git a/docs/ru/faq/integration/json-import.md b/docs/ru/faq/integration/json-import.md index a3c89aed429d..230d6ab74c07 100644 --- a/docs/ru/faq/integration/json-import.md +++ b/docs/ru/faq/integration/json-import.md @@ -29,6 +29,6 @@ $ echo '{"foo":"bar"}' | clickhouse-client --query="INSERT INTO test FORMAT JSO - `input_format_skip_unknown_fields` позволяет импортировать JSON, даже если он содержит дополнительные поля, которых нет в таблице (отбрасывая лишние поля). - `input_format_import_nested_json` позволяет импортировать вложенные JSON-объекты в столбцы типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md). - :::note "Примечание" - В HTTP-интерфейсе настройки передаются через параметры `GET` запроса, в `CLI` interface — как дополнительные аргументы командной строки, начинающиеся с `--`. - ::: +:::note Примечание +В HTTP-интерфейсе настройки передаются через параметры `GET` запроса, в `CLI` interface — как дополнительные аргументы командной строки, начинающиеся с `--`. +::: diff --git a/docs/ru/faq/operations/delete-old-data.md b/docs/ru/faq/operations/delete-old-data.md index aad33ce0333f..031d2881ae28 100644 --- a/docs/ru/faq/operations/delete-old-data.md +++ b/docs/ru/faq/operations/delete-old-data.md @@ -14,9 +14,10 @@ ClickHouse позволяет автоматически удалять данн Ключевое преимущество такого подхода в том, что не нужно использовать внешнюю систему, чтобы запустить процесс — когда заданы условия TTL, удаление данных выполняется автоматически в фоновом режиме. - :::note - TTL можно использовать не только для перемещения в [/dev/null](https://en.wikipedia.org/wiki/Null_device), но еще и между дисками, например, с SSD на HDD. - ::: +:::note Примечание +TTL можно использовать не только для перемещения в [/dev/null](https://en.wikipedia.org/wiki/Null_device), но еще и между дисками, например, с SSD на HDD. +::: + [Подробнее о конфигурировании TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). ## ALTER DELETE {#alter-delete} diff --git a/docs/ru/faq/operations/index.md b/docs/ru/faq/operations/index.md index c1135716f394..2dc788908fb0 100644 --- a/docs/ru/faq/operations/index.md +++ b/docs/ru/faq/operations/index.md @@ -13,8 +13,8 @@ sidebar_label: Операции - [Какую версию ClickHouse использовать?](production.md) - [Возможно ли удалить старые записи из таблицы ClickHouse?](delete-old-data.md) -:::info "Если вы не нашли то, что искали" - Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева. +:::info Если вы не нашли то, что искали +Загляните в другие подразделы F.A.Q. или поищите в остальных разделах документации, ориентируйтесь по оглавлению слева. ::: [Original article](https://clickhouse.com/docs/en/faq/operations/) diff --git a/docs/ru/faq/operations/production.md b/docs/ru/faq/operations/production.md index 5bbc0741d8c8..70186a476b43 100644 --- a/docs/ru/faq/operations/production.md +++ b/docs/ru/faq/operations/production.md @@ -60,5 +60,6 @@ sidebar_position: 10 Часто компании, которые изначально ориентировались на релизы `lts`, позднее переходят на `stable`, поскольку хотят быстрее получать доступ к новым возможностям. -:::danger "Важно" - Мы всегда стремимся поддерживать совместимость релизов, но иногда это правило нарушается, и какие-то отдельные возможности в новых релизах становятся недоступны. Перед обновлением ClickHouse обязательно изучите [журнал изменений](../../whats-new/changelog/index.mdx), чтобы убедиться, что в нем нет объявлений о нарушении обратной совместимости. +:::danger Важно +Мы всегда стремимся поддерживать совместимость релизов, но иногда это правило нарушается, и какие-то отдельные возможности в новых релизах становятся недоступны. Перед обновлением ClickHouse обязательно изучите [журнал изменений](../../whats-new/changelog/index.mdx), чтобы убедиться, что в нем нет объявлений о нарушении обратной совместимости. +::: diff --git a/docs/ru/getting-started/example-datasets/nyc-taxi.md b/docs/ru/getting-started/example-datasets/nyc-taxi.md index 7fe6a998a63c..12d0c18c3a1d 100644 --- a/docs/ru/getting-started/example-datasets/nyc-taxi.md +++ b/docs/ru/getting-started/example-datasets/nyc-taxi.md @@ -291,9 +291,8 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "SELECT COUNT(*) FROM datasets.trips_mergetree" ``` -:::info "Info" - Если вы собираетесь выполнять запросы, приведенные ниже, то к имени таблицы -нужно добавить имя базы, `datasets.trips_mergetree`. +:::info Примечание +Если вы собираетесь выполнять запросы, приведенные ниже, то к имени таблицы нужно добавить имя базы, `datasets.trips_mergetree`. ::: ## Результаты на одном сервере {#rezultaty-na-odnom-servere} diff --git a/docs/ru/getting-started/example-datasets/ontime.md b/docs/ru/getting-started/example-datasets/ontime.md index a3cd116217ea..4e3c5a1e7e58 100644 --- a/docs/ru/getting-started/example-datasets/ontime.md +++ b/docs/ru/getting-started/example-datasets/ontime.md @@ -155,9 +155,8 @@ $ sudo service clickhouse-server restart $ clickhouse-client --query "SELECT COUNT(*) FROM datasets.ontime" ``` -:::info "Info" - Если вы собираетесь выполнять запросы, приведенные ниже, то к имени таблицы -нужно добавить имя базы, `datasets.ontime`. +:::info Примечание +Если вы собираетесь выполнять запросы, приведенные ниже, то к имени таблицы нужно добавить имя базы, `datasets.ontime`. ::: ## Запросы: {#zaprosy} diff --git a/docs/ru/getting-started/example-datasets/star-schema.md b/docs/ru/getting-started/example-datasets/star-schema.md index a37545317e34..a444a0d9ec29 100644 --- a/docs/ru/getting-started/example-datasets/star-schema.md +++ b/docs/ru/getting-started/example-datasets/star-schema.md @@ -16,9 +16,9 @@ $ make Генерация данных: -:::warning "Внимание" - -s 100 – dbgen генерирует 600 миллионов строк (67 ГБ) --s 1000 – dbgen генерирует 6 миллиардов строк (занимает много времени) +:::warning Внимание +`-s 100` – dbgen генерирует 600 миллионов строк (67 ГБ) +`-s 1000` – dbgen генерирует 6 миллиардов строк (занимает много времени) ::: ``` bash diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md deleted file mode 100644 index 479616d667b9..000000000000 --- a/docs/ru/getting-started/example-datasets/wikistat.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: /ru/getting-started/example-datasets/wikistat -sidebar_position: 17 -sidebar_label: WikiStat ---- - -# WikiStat {#wikistat} - -См: http://dumps.wikimedia.org/other/pagecounts-raw/ - -Создание таблицы: - -``` sql -CREATE TABLE wikistat -( - date Date, - time DateTime, - project String, - subproject String, - path String, - hits UInt64, - size UInt64 -) ENGINE = MergeTree(date, (path, time), 8192); -``` - -Загрузка данных: - -``` bash -$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done -``` diff --git a/docs/ru/getting-started/example-datasets/wikistat.md b/docs/ru/getting-started/example-datasets/wikistat.md new file mode 120000 index 000000000000..2d429d00984e --- /dev/null +++ b/docs/ru/getting-started/example-datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting-started/example-datasets/wikistat.md \ No newline at end of file diff --git a/docs/ru/getting-started/install.md b/docs/ru/getting-started/install.md index f041db43d0f8..63bd7ee8eb61 100644 --- a/docs/ru/getting-started/install.md +++ b/docs/ru/getting-started/install.md @@ -69,10 +69,10 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password. - `clickhouse-client` — Создает символические ссылки для `clickhouse-client` и других клиентских инструментов и устанавливает конфигурационные файлы `clickhouse-client`. - `clickhouse-common-static-dbg` — Устанавливает исполняемые файлы ClickHouse собранные с отладочной информацией. - :::note "Внимание" - Если вам нужно установить ClickHouse определенной версии, вы должны установить все пакеты одной версии: - `sudo apt-get install clickhouse-server=21.8.5.7 clickhouse-client=21.8.5.7 clickhouse-common-static=21.8.5.7` - ::: +:::note Внимание +Если вам нужно установить ClickHouse определенной версии, вы должны установить все пакеты одной версии: +`sudo apt-get install clickhouse-server=21.8.5.7 clickhouse-client=21.8.5.7 clickhouse-common-static=21.8.5.7` +::: ### Из RPM пакетов {#from-rpm-packages} Команда ClickHouse в Яндексе рекомендует использовать официальные предкомпилированные `rpm` пакеты для CentOS, RedHat и всех остальных дистрибутивов Linux, основанных на rpm. diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md index 60a7463f70f0..6ebdcda8befd 100644 --- a/docs/ru/getting-started/tutorial.md +++ b/docs/ru/getting-started/tutorial.md @@ -91,7 +91,7 @@ clickhouse-client --query='INSERT INTO table FORMAT TabSeparated' < data.tsv ## Import Sample Dataset {#import-sample-dataset} -Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use the anonymized data of Yandex.Metrica, the first service that runs ClickHouse in production way before it became open-source (more on that in [history section](../introduction/history.md)). There are [multiple ways to import Yandex.Metrica dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. +Now it’s time to fill our ClickHouse server with some sample data. In this tutorial, we’ll use some anonymized metric data. There are [multiple ways to import the dataset](../getting-started/example-datasets/metrica.md), and for the sake of the tutorial, we’ll go with the most realistic one. ### Download and Extract Table Data {#download-and-extract-table-data} @@ -116,7 +116,7 @@ Syntax for creating tables is way more complicated compared to databases (see [r 2. Table schema, i.e. list of columns and their [data types](../sql-reference/data-types/index.md). 3. [Table engine](../engines/table-engines/index.md) and its settings, which determines all the details on how queries to this table will be physically executed. -Yandex.Metrica is a web analytics service, and sample dataset doesn’t cover its full functionality, so there are only two tables to create: +There are only two tables to create: - `hits` is a table with each action done by all users on all websites covered by the service. - `visits` is a table that contains pre-built sessions instead of individual actions. @@ -523,7 +523,7 @@ SELECT sumIf(Sign, has(Goals.ID, 1105530)) AS goal_visits, (100. * goal_visits) / visits AS goal_percent FROM tutorial.visits_v1 -WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) AND (domain(StartURL) = 'yandex.ru') +WHERE (CounterID = 912887) AND (toYYYYMM(StartDate) = 201403) ``` ## Cluster Deployment {#cluster-deployment} @@ -544,19 +544,19 @@ Example config for a cluster with three shards, one replica each: - example-perftest01j.yandex.ru + example-perftest01j.clickhouse.com 9000 - example-perftest02j.yandex.ru + example-perftest02j.clickhouse.com 9000 - example-perftest03j.yandex.ru + example-perftest03j.clickhouse.com 9000 @@ -585,8 +585,9 @@ Let’s run [INSERT SELECT](../sql-reference/statements/insert-into.md) into the INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1; ``` -:::danger "Notice" - This approach is not suitable for the sharding of large tables. There’s a separate tool [clickhouse-copier](../operations/utilities/clickhouse-copier.md) that can re-shard arbitrary large tables. +:::danger Notice +This approach is not suitable for the sharding of large tables. There’s a separate tool [clickhouse-copier](../operations/utilities/clickhouse-copier.md) that can re-shard arbitrary large tables. +::: As you could expect, computationally heavy queries run N times faster if they utilize 3 servers instead of one. @@ -602,15 +603,15 @@ Example config for a cluster of one shard containing three replicas: - example-perftest01j.yandex.ru + example-perftest01j.clickhouse.com 9000 - example-perftest02j.yandex.ru + example-perftest02j.clickhouse.com 9000 - example-perftest03j.yandex.ru + example-perftest03j.clickhouse.com 9000 @@ -620,23 +621,24 @@ Example config for a cluster of one shard containing three replicas: To enable native replication [ZooKeeper](http://zookeeper.apache.org/) is required. ClickHouse takes care of data consistency on all replicas and runs restore procedure after failure automatically. It’s recommended to deploy the ZooKeeper cluster on separate servers (where no other processes including ClickHouse are running). - :::note "Note" - ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won’t be able to guarantee data consistency on all replicas. Thus it becomes the responsibility of your application. - ::: +:::note Note +ZooKeeper is not a strict requirement: in some simple cases, you can duplicate the data by writing it into all the replicas from your application code. This approach is **not** recommended, in this case, ClickHouse won’t be able to guarantee data consistency on all replicas. Thus it becomes the responsibility of your application. +::: + ZooKeeper locations are specified in the configuration file: ``` xml - zoo01.yandex.ru + zoo01.clickhouse.com 2181 - zoo02.yandex.ru + zoo02.clickhouse.com 2181 - zoo03.yandex.ru + zoo03.clickhouse.com 2181 diff --git a/docs/ru/interfaces/cli.md b/docs/ru/interfaces/cli.md index aa6ae3629e8a..47ab6474fc02 100644 --- a/docs/ru/interfaces/cli.md +++ b/docs/ru/interfaces/cli.md @@ -128,7 +128,7 @@ $ clickhouse-client --param_tbl="numbers" --param_db="system" --param_col="numbe - `--port` — порт для подключения, по умолчанию — 9000. Обратите внимание: для HTTP-интерфейса и нативного интерфейса используются разные порты. - `--user, -u` — имя пользователя, по умолчанию — ‘default’. - `--password` — пароль, по умолчанию — пустая строка. -- `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме. +- `--query, -q` — запрос для выполнения, при использовании в неинтерактивном режиме. Допускается указание `--query` несколько раз (`--query "SELECT 1;" --query "SELECT 2;"...`). - `--queries-file` - путь к файлу с запросами для выполнения. Необходимо указать только одну из опций: `query` или `queries-file`. - `--database, -d` — выбрать текущую БД. Без указания значение берется из настроек сервера (по умолчанию — БД ‘default’). - `--multiline, -m` — если указано — разрешить многострочные запросы, не отправлять запрос по нажатию Enter. diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 48a6132170a3..b4794b027431 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -401,8 +401,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR - [output_format_csv_crlf_end_of_line](../operations/settings/settings.md#output_format_csv_crlf_end_of_line) - если установлено значение true, конец строки в формате вывода CSV будет `\r\n` вместо `\n`. Значение по умолчанию - `false`. - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`. - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`. -- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. -Значение по умолчанию - `true`. +- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`. +- [input_format_csv_allow_variable_number_of_columns](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`. ## CSVWithNames {#csvwithnames} @@ -772,9 +772,9 @@ CREATE TABLE IF NOT EXISTS example_table - Если `input_format_defaults_for_omitted_fields = 0`, то значение по умолчанию для `x` и `a` равняется `0` (поскольку это значение по умолчанию для типа данных `UInt32`.) - Если `input_format_defaults_for_omitted_fields = 1`, то значение по умолчанию для `x` равно `0`, а значение по умолчанию `a` равно `x * 2`. - :::note "Предупреждение" - При добавлении данных с помощью `input_format_defaults_for_omitted_fields = 1`, ClickHouse потребляет больше вычислительных ресурсов по сравнению с `input_format_defaults_for_omitted_fields = 0`. - ::: +:::note Предупреждение +При добавлении данных с помощью `input_format_defaults_for_omitted_fields = 1`, ClickHouse потребляет больше вычислительных ресурсов по сравнению с `input_format_defaults_for_omitted_fields = 0`. +::: ### Выборка данных {#vyborka-dannykh} Рассмотрим в качестве примера таблицу `UserActivity`: @@ -795,9 +795,10 @@ CREATE TABLE IF NOT EXISTS example_table В отличие от формата [JSON](#json), для `JSONEachRow` ClickHouse не заменяет невалидные UTF-8 последовательности. Значения экранируются так же, как и для формата `JSON`. - :::note "Примечание" - В строках может выводиться произвольный набор байт. Используйте формат `JSONEachRow`, если вы уверены, что данные в таблице могут быть представлены в формате JSON без потери информации. - ::: +:::note Примечание +В строках может выводиться произвольный набор байт. Используйте формат `JSONEachRow`, если вы уверены, что данные в таблице могут быть представлены в формате JSON без потери информации. +::: + ### Использование вложенных структур {#jsoneachrow-nested} Если у вас есть таблица со столбцами типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md), то в неё можно вставить данные из JSON-документа с такой же структурой. Функциональность включается настройкой [input_format_import_nested_json](../operations/settings/settings.md#settings-input_format_import_nested_json). @@ -1305,9 +1306,9 @@ SET format_avro_schema_registry_url = 'http://schema-registry'; SELECT * FROM topic1_stream; ``` - :::note "Внимание" - `format_avro_schema_registry_url` необходимо настроить в `users.xml`, чтобы сохранить значение после перезапуска. Также можно использовать настройку `format_avro_schema_registry_url` табличного движка `Kafka`. - ::: +:::note Внимание +`format_avro_schema_registry_url` необходимо настроить в `users.xml`, чтобы сохранить значение после перезапуска. Также можно использовать настройку `format_avro_schema_registry_url` табличного движка `Kafka`. +::: ## Parquet {#data-format-parquet} [Apache Parquet](https://parquet.apache.org/) — формат поколоночного хранения данных, который распространён в экосистеме Hadoop. Для формата `Parquet` ClickHouse поддерживает операции чтения и записи. @@ -1353,8 +1354,6 @@ ClickHouse поддерживает настраиваемую точность $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested). - Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида: ``` bash @@ -1413,8 +1412,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested). - ### Вывод данных {#selecting-data-arrow} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида: @@ -1471,8 +1468,6 @@ ClickHouse поддерживает настраиваемую точность $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC" ``` -Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested). - ### Вывод данных {#selecting-data-2} Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида: diff --git a/docs/ru/interfaces/http.md b/docs/ru/interfaces/http.md index b8c5ee77f0c4..16927408bc43 100644 --- a/docs/ru/interfaces/http.md +++ b/docs/ru/interfaces/http.md @@ -50,7 +50,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} 1 ``` @@ -173,9 +173,10 @@ $ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @- Для отправки сжатого запроса `POST` добавьте заголовок `Content-Encoding: compression_method`. Чтобы ClickHouse сжимал ответ, разрешите сжатие настройкой [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) и добавьте заголовок `Accept-Encoding: compression_method`. Уровень сжатия данных для всех методов сжатия можно задать с помощью настройки [http_zlib_compression_level](../operations/settings/settings.md#settings-http_zlib_compression_level). - :::note "Примечание" - Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия. - ::: +:::note Примечание +Некоторые HTTP-клиенты могут по умолчанию распаковывать данные (`gzip` и `deflate`) с сервера в фоновом режиме и вы можете получить распакованные данные, даже если правильно используете настройки сжатия. +::: + **Примеры** ``` bash @@ -266,9 +267,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 Прогресс выполнения запроса можно отслеживать с помощью заголовков ответа `X-ClickHouse-Progress`. Для этого включите [send_progress_in_http_headers](../operations/settings/settings.md#settings-send_progress_in_http_headers). Пример последовательности заголовков: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334"} ``` Возможные поля заголовка: @@ -425,9 +426,9 @@ $ curl -v 'http://localhost:8123/predefined_query' В следующем примере определяются настройки [max_threads](../operations/settings/settings.md#settings-max_threads) и `max_final_threads`, а затем запрашивается системная таблица, чтобы проверить, были ли эти параметры успешно установлены. - :::note "Предупреждение" - Чтобы сохранить стандартные `handlers` такие как `query`, `play`, `ping`, используйте правило ``. - ::: +:::note Предупреждение +Чтобы сохранить стандартные `handlers` такие как `query`, `play`, `ping`, используйте правило ``. +::: Пример: ``` xml @@ -455,9 +456,9 @@ $ curl -H 'XXX:TEST_HEADER_VALUE' -H 'PARAMS_XXX:max_threads' 'http://localhost: max_final_threads 2 ``` - :::note "Предупреждение" - В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`. - ::: +:::note Предупреждение +В одном `predefined_query_handler` поддерживается только один запрос типа `INSERT`. +::: ### dynamic_query_handler {#dynamic_query_handler} В `dynamic_query_handler`, запрос пишется в виде параметров HTTP-запроса. Разница в том, что в `predefined_query_handler`, запрос записывается в конфигурационный файл. Вы можете настроить `query_param_name` в `dynamic_query_handler`. @@ -529,7 +530,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < * Connection #0 to host localhost left intact Say Hi!% @@ -569,7 +570,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < * Connection #0 to host localhost left intact
% @@ -621,7 +622,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < Absolute Path File * Connection #0 to host localhost left intact @@ -640,7 +641,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index a4659e9ac4ec..a9210964f3ef 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -6,8 +6,8 @@ sidebar_label: "Клиентские библиотеки от сторонни # Клиентские библиотеки от сторонних разработчиков {#klientskie-biblioteki-ot-storonnikh-razrabotchikov} -:::danger "Disclaimer" - Яндекс не поддерживает перечисленные ниже библиотеки и не проводит тщательного тестирования для проверки их качества. +:::danger Предупреждение +ClickHouse Inc. не поддерживает перечисленные ниже библиотеки и не проводит тщательного тестирования для проверки их качества. ::: - Python: @@ -37,6 +37,8 @@ sidebar_label: "Клиентские библиотеки от сторонни - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse) - [clickhouse-client](https://github.com/depyronick/clickhouse-client) - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm) + - [clickhouse-ts](https://github.com/bytadaniel/clickhouse-ts) + - [clickcache](https://github.com/bytadaniel/clickcache) - Perl - [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse) - [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse) diff --git a/docs/ru/interfaces/third-party/index.md b/docs/ru/interfaces/third-party/index.md index 45f29d781b24..c0beaf143401 100644 --- a/docs/ru/interfaces/third-party/index.md +++ b/docs/ru/interfaces/third-party/index.md @@ -13,6 +13,6 @@ sidebar_position: 24 - [GUI](../../interfaces/third-party/gui.md) - [Proxies](../../interfaces/third-party/proxy.md) - :::note "Примечание" - С ClickHouse работают также универсальные инструменты, поддерживающие общий API, такие как [ODBC](../../interfaces/odbc.md) или [JDBC](../../interfaces/jdbc.md). +:::note Примечание +С ClickHouse работают также универсальные инструменты, поддерживающие общий API, такие как [ODBC](../../interfaces/odbc.md) или [JDBC](../../interfaces/jdbc.md). ::: \ No newline at end of file diff --git a/docs/ru/interfaces/third-party/integrations.md b/docs/ru/interfaces/third-party/integrations.md index 86e0dc15aacc..3f7ded897eac 100644 --- a/docs/ru/interfaces/third-party/integrations.md +++ b/docs/ru/interfaces/third-party/integrations.md @@ -6,8 +6,9 @@ sidebar_label: "Библиотеки для интеграции от сторо # Библиотеки для интеграции от сторонних разработчиков {#biblioteki-dlia-integratsii-ot-storonnikh-razrabotchikov} -:::danger "Disclaimer" - ClickHouse, Inc. не занимается поддержкой перечисленных ниже инструментов и библиотек и не проводит тщательного тестирования для проверки их качества. +:::danger Предупреждение +ClickHouse Inc. не занимается поддержкой перечисленных ниже инструментов и библиотек и не проводит тщательного тестирования для проверки их качества. +::: ## Инфраструктурные продукты {#infrastrukturnye-produkty} diff --git a/docs/ru/operations/access-rights.md b/docs/ru/operations/access-rights.md index f268165f10e3..0793e49edac9 100644 --- a/docs/ru/operations/access-rights.md +++ b/docs/ru/operations/access-rights.md @@ -26,9 +26,9 @@ ClickHouse поддерживает управление доступом на Рекомендуется использовать SQL-воркфлоу. Оба метода конфигурации работают одновременно, поэтому, если для управления доступом вы используете конфигурационные файлы, вы можете плавно перейти на SQL-воркфлоу. - :::note "Внимание" - Нельзя одновременно использовать оба метода для управления одним и тем же объектом системы доступа. - ::: +:::note Внимание +Нельзя одновременно использовать оба метода для управления одним и тем же объектом системы доступа. +::: Чтобы посмотреть список всех пользователей, ролей, профилей и пр., а также все привилегии, используйте запрос [SHOW ACCESS](../sql-reference/statements/show.md#show-access-statement). ## Использование {#access-control-usage} diff --git a/docs/ru/operations/backup.md b/docs/ru/operations/backup.md index 7eaca62fd699..9ff13bbc8a65 100644 --- a/docs/ru/operations/backup.md +++ b/docs/ru/operations/backup.md @@ -12,9 +12,10 @@ sidebar_label: "Резервное копирование данных" Каждая компания имеет различные доступные ресурсы и бизнес-требования, поэтому нет универсального решения для резервного копирования и восстановления ClickHouse, которое будет подходить в каждой ситуации. То, что работает для одного гигабайта данных, скорее всего, не будет работать для десятков петабайт. Существует множество возможных подходов со своими плюсами и минусами, которые будут рассмотрены ниже. Рекомендуется использовать несколько подходов вместо одного, чтобы компенсировать их различные недостатки. - :::note "Примечание" - Имейте в виду, что если вы создали резервную копию чего-то и никогда не пытались восстановить её, скорее всего, восстановление не будет работать должным образом, когда вам это действительно понадобится (или, по крайней мере, это займет больше времени, чем будет приемлемо для бизнеса). Поэтому, какой бы подход к резервному копированию вы ни выбрали, обязательно автоматизируйте процесс восстановления и регулярно запускайте его на резервном кластере ClickHouse. - ::: +:::note Примечание +Имейте в виду, что если вы создали резервную копию чего-то и никогда не пытались восстановить её, скорее всего, восстановление не будет работать должным образом, когда вам это действительно понадобится (или, по крайней мере, это займет больше времени, чем будет приемлемо для бизнеса). Поэтому, какой бы подход к резервному копированию вы ни выбрали, обязательно автоматизируйте процесс восстановления и регулярно запускайте его на резервном кластере ClickHouse. +::: + ## Дублирование данных {#dublirovanie-dannykh} Часто данные, которые поступают в ClickHouse, доставляются через некоторую отказоустойчивую очередь, например [Apache Kafka](https://kafka.apache.org). В этом случае можно настроить дополнительный набор подписчиков, которые будут считывать один и тот же поток данных во время записи в ClickHouse и хранить его в холодном хранилище. Большинство компаний уже имеют некоторые рекомендуемые по умолчанию холодные хранилища, которые могут быть хранилищем объектов или распределенной файловой системой, например [HDFS](https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html). diff --git a/docs/ru/operations/configuration-files.md b/docs/ru/operations/configuration-files.md index 2b824ce91bd9..3b037521692a 100644 --- a/docs/ru/operations/configuration-files.md +++ b/docs/ru/operations/configuration-files.md @@ -85,6 +85,58 @@ $ cat /etc/clickhouse-server/users.d/alice.xml Сервер следит за изменениями конфигурационных файлов, а также файлов и ZooKeeper-узлов, которые были использованы при выполнении подстановок и переопределений, и перезагружает настройки пользователей и кластеров на лету. То есть, можно изменять кластера, пользователей и их настройки без перезапуска сервера. +## Шифрование и Скрытие {#encryption} + +Вы можете использовать симметричное шифрование для зашифровки элемента конфигурации, например, поля password. Чтобы это сделать, сначала настройте [кодек шифрования](../sql-reference/statements/create/table.md#encryption-codecs), затем добавьте аттибут`encrypted_by` с именем кодека шифрования как значение к элементу, который надо зашифровать. + +В отличии от аттрибутов `from_zk`, `from_env` и `incl` (или элемента `include`), подстановка, т.е. расшифровка зашифрованного значения, не выподняется в файле предобработки. Расшифровка происходит только во время исполнения в серверном процессе. + +Пример: + +```xml + + + + + 00112233445566778899aabbccddeeff + + + + + admin + 961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 + + + +``` + +Чтобы получить зашифрованное значение может быть использовано приложение-пример `encrypt_decrypt` . + +Пример: + +``` bash +./encrypt_decrypt /etc/clickhouse-server/config.xml -e AES_128_GCM_SIV abcd +``` + +``` text +961F000000040000000000EEDDEF4F453CFE6457C4234BD7C09258BD651D85 +``` + +Даже с применённым шифрованием в файле предобработки элементы все равно сохраняются в незашифрованном виде. В случае если это является проблемой, мы предлагаем две альтернативы: или установить разрешения на файл предобработки 600 или использовать аттрибут `hide_in_preprocessed`. + +Пример: + +```xml + + + + admin + secret + + + +``` + ## Примеры записи конфигурации на YAML {#example} Здесь можно рассмотреть пример реальной конфигурации записанной на YAML: [config.yaml.example](https://github.com/ClickHouse/ClickHouse/blob/master/programs/server/config.yaml.example). diff --git a/docs/ru/operations/external-authenticators/kerberos.md b/docs/ru/operations/external-authenticators/kerberos.md index 4641f15cb560..4c8a7cae60cc 100644 --- a/docs/ru/operations/external-authenticators/kerberos.md +++ b/docs/ru/operations/external-authenticators/kerberos.md @@ -8,8 +8,9 @@ ClickHouse предоставляет возможность аутентифи В настоящее время возможно использование Kerberos только как внешнего аутентификатора, то есть для аутентификации уже существующих пользователей с помощью Kerberos. Пользователи, настроенные для Kerberos-аутентификации, могут работать с ClickHouse только через HTTP-интерфейс, причём сами клиенты должны иметь возможность аутентификации с использованием механизма GSS-SPNEGO. - ::: Для Kerberos-аутентификации необходимо предварительно корректно настроить Kerberos на стороне клиента, на сервере и в конфигурационных файлах самого ClickHouse. Ниже описана лишь конфигурация ClickHouse. - ::: +:::note Примечание +Для Kerberos-аутентификации необходимо предварительно корректно настроить Kerberos на стороне клиента, на сервере и в конфигурационных файлах самого ClickHouse. Ниже описана лишь конфигурация ClickHouse. +::: ## Настройка Kerberos в ClickHouse {#enabling-kerberos-in-clickhouse} Для того, чтобы задействовать Kerberos-аутентификацию в ClickHouse, в первую очередь необходимо добавить одну-единственную секцию `kerberos` в `config.xml`. @@ -56,12 +57,13 @@ ClickHouse предоставляет возможность аутентифи ``` -:::danger "Важно" - В конфигурационном файле не могут быть указаны одновременно оба параметра. В противном случае, аутентификация с помощью Kerberos будет недоступна для всех пользователей. - -:::danger "Важно" - В конфигурационном файле может быть не более одной секции `kerberos`. В противном случае, аутентификация с помощью Kerberos будет отключена для всех пользователей. +:::danger Важно +В конфигурационном файле не могут быть указаны одновременно оба параметра. В противном случае, аутентификация с помощью Kerberos будет недоступна для всех пользователей. +::: +:::danger Важно +В конфигурационном файле может быть не более одной секции `kerberos`. В противном случае, аутентификация с помощью Kerberos будет отключена для всех пользователей. +::: ## Аутентификация пользователей с помощью Kerberos {#kerberos-as-an-external-authenticator-for-existing-users} @@ -100,11 +102,12 @@ ClickHouse предоставляет возможность аутентифи ``` -:::danger "Важно" - Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу. +:::danger Важно +Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу. +::: -:::info "" - Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`. +:::info Примечаниие +Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`. ::: ### Настройка Kerberos через SQL {#enabling-kerberos-using-sql} diff --git a/docs/ru/operations/named-collections.md b/docs/ru/operations/named-collections.md index ba6b47116adf..48ee7c9f15d3 100644 --- a/docs/ru/operations/named-collections.md +++ b/docs/ru/operations/named-collections.md @@ -88,7 +88,6 @@ SELECT * FROM s3_engine_table LIMIT 3; 3306 test 8 - 1 1 diff --git a/docs/ru/operations/opentelemetry.md b/docs/ru/operations/opentelemetry.md index 4e127e9e0f00..af6d3eef2055 100644 --- a/docs/ru/operations/opentelemetry.md +++ b/docs/ru/operations/opentelemetry.md @@ -8,7 +8,7 @@ sidebar_label: Поддержка OpenTelemetry ClickHouse поддерживает [OpenTelemetry](https://opentelemetry.io/) — открытый стандарт для сбора трассировок и метрик из распределенного приложения. -:::danger "Предупреждение" +:::danger Предупреждение Поддержка стандарта экспериментальная и будет со временем меняться. ::: diff --git a/docs/ru/operations/optimizing-performance/profile-guided-optimization.md b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md new file mode 120000 index 000000000000..31cb656bd991 --- /dev/null +++ b/docs/ru/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1 @@ +../../../en/operations/optimizing-performance/profile-guided-optimization.md \ No newline at end of file diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md index 5430469ea189..2c7f0b773e83 100644 --- a/docs/ru/operations/server-configuration-parameters/settings.md +++ b/docs/ru/operations/server-configuration-parameters/settings.md @@ -24,8 +24,8 @@ ClickHouse перезагружает встроенные словари с з Настройки компрессии данных. -:::danger "Внимание" - Лучше не использовать, если вы только начали работать с ClickHouse. +:::danger Внимание +Лучше не использовать, если вы только начали работать с ClickHouse. ::: Общий вид конфигурации: @@ -89,9 +89,10 @@ ClickHouse проверяет условия для `min_part_size` и `min_part ``` - :::note "Примечание" - Хранение ключей в конфигурационном файле не рекомендовано. Это не безопасно. Вы можете переместить ключи в отдельный файл на секретном диске и сделать symlink к этому конфигурационному файлу в папке `config.d/`. - ::: +:::note Примечание +Хранение ключей в конфигурационном файле не рекомендовано. Это не безопасно. Вы можете переместить ключи в отдельный файл на секретном диске и сделать symlink к этому конфигурационному файлу в папке `config.d/`. +::: + Загрузка из файла конфигурации, когда ключ представлен в шестнадцатеричной форме: ```xml @@ -174,8 +175,8 @@ ClickHouse проверяет условия для `min_part_size` и `min_part Значение по умолчанию: `1073741824` (1 ГБ). -:::info "Примечание" - Жесткое ограничение настраивается с помощью системных инструментов. +:::info Примечание +Жесткое ограничение настраивается с помощью системных инструментов. ::: **Пример** @@ -475,12 +476,13 @@ ClickHouse проверяет условия для `min_part_size` и `min_part По умолчанию, если секция `interserver_http_credentials` не задана в конфигурации, аутентификация при репликации не используется. - :::note "Примечание" - Настройки `interserver_http_credentials` не относятся к [конфигурации](../../interfaces/cli.md#configuration_files) учетных данных клиента ClickHouse. - ::: - :::note "Примечание" - Учетные данные в `interserver_http_credentials` являются общими для репликации по `HTTP` и `HTTPS`. - ::: +:::note Примечание +Настройки `interserver_http_credentials` не относятся к [конфигурации](../../interfaces/cli.md#configuration_files) учетных данных клиента ClickHouse. +::: +:::note Примечание +Учетные данные в `interserver_http_credentials` являются общими для репликации по `HTTP` и `HTTPS`. +::: + Раздел содержит следующие параметры: - `user` — имя пользователя. @@ -575,14 +577,60 @@ ClickHouse поддерживает динамическое изменение - `errorlog` - Файл лога ошибок. - `size` - Размер файла. Действует для `log` и `errorlog`. Как только файл достиг размера `size`, ClickHouse архивирует и переименовывает его, а на его месте создает новый файл лога. - `count` - Количество заархивированных файлов логов, которые сохраняет ClickHouse. +- `stream_compress` – Сжимать `log` и `errorlog` с помощью алгоритма `lz4`. Чтобы активировать, узтановите значение `1` или `true`. + +Имена файлов `log` и `errorlog` (только имя файла, а не директорий) поддерживают спецификаторы шаблонов даты и времени. + +**Спецификаторы форматирования** +С помощью следующих спецификаторов, можно определить шаблон для формирования имени файла. Столбец “Пример” показывает возможные значения на момент времени `2023-07-06 18:32:07`. + +| Спецификатор | Описание | Пример | +|--------------|---------------------------------------------------------------------------------------------------------------------|--------------------------| +| %% | Литерал % | % | +| %n | Символ новой строки | | +| %t | Символ горизонтальной табуляции | | +| %Y | Год как десятичное число, например, 2017 | 2023 | +| %y | Последние 2 цифры года в виде десятичного числа (диапазон [00,99]) | 23 | +| %C | Первые 2 цифры года в виде десятичного числа (диапазон [00,99]) | 20 | +| %G | Год по неделям согласно [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), то есть год, который содержит указанную неделю. Обычно используется вместе с %V. | 2023 | +| %g | Последние 2 цифры [года по неделям ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates), т.е. года, содержащего указанную неделю (диапазон [00,99]). | 23 | +| %b | Сокращённое название месяца, например Oct (зависит от локали) | Jul | +| %h | Синоним %b | Jul | +| %B | Полное название месяца, например, October (зависит от локали) | July | +| %m | Месяц в виде десятичного числа (диапазон [01,12]) | 07 | +| %U | Неделя года в виде десятичного числа (воскресенье - первый день недели) (диапазон [00,53]) | 27 | +| %W | Неделя года в виде десятичного числа (понедельник - первый день недели) (диапазон [00,53]) | 27 | +| %V | Неделя года ISO 8601 (диапазон [01,53]) | 27 | +| %j | День года в виде десятичного числа (диапазон [001,366]) | 187 | +| %d | День месяца в виде десятичного числа (диапазон [01,31]) Перед одиночной цифрой ставится ноль. | 06 | +| %e | День месяца в виде десятичного числа (диапазон [1,31]). Перед одиночной цифрой ставится пробел. |   6 | +| %a | Сокращённое название дня недели, например, Fri (зависит от локали) | Thu | +| %A | Полный день недели, например, Friday (зависит от локали) | Thursday | +| %w | День недели в виде десятичного числа, где воскресенье равно 0 (диапазон [0-6]) | 4 | +| %u | День недели в виде десятичного числа, где понедельник равен 1 (формат ISO 8601) (диапазон [1-7]) | 4 | +| %H | Час в виде десятичного числа, 24-часовой формат (диапазон [00-23]) | 18 | +| %I | Час в виде десятичного числа, 12-часовой формат (диапазон [01,12]) | 06 | +| %M | Минуты в виде десятичного числа (диапазон [00,59]) | 32 | +| %S | Секунды как десятичное число (диапазон [00,60]) | 07 | +| %c | Стандартная строка даты и времени, например, Sun Oct 17 04:41:13 2010 (зависит от локали) | Thu Jul 6 18:32:07 2023 | +| %x | Локализованное представление даты (зависит от локали) | 07/06/23 | +| %X | Локализованное представление времени, например, 18:40:20 или 6:40:20 PM (зависит от локали) | 18:32:07 | +| %D | Эквивалентно "%m/%d/%y" | 07/06/23 | +| %F | Эквивалентно "%Y-%m-%d" (формат даты ISO 8601) | 2023-07-06 | +| %r | Локализованное 12-часовое время (зависит от локали) | 06:32:07 PM | +| %R | Эквивалентно "%H:%M" | 18:32 | +| %T | Эквивалентно "%H:%M:%S" (формат времени ISO 8601) | 18:32:07 | +| %p | Локализованное обозначение a.m. или p.m. (зависит от локали) | PM | +| %z | Смещение от UTC в формате ISO 8601 (например, -0430), или без символов, если информация о часовом поясе недоступна | +0800 | +| %Z | Зависящее от локали название или аббревиатура часового пояса, если информация о часовом поясе доступна | Z AWST | **Пример** ``` xml trace - /var/log/clickhouse-server/clickhouse-server.log - /var/log/clickhouse-server/clickhouse-server.err.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.log + /var/log/clickhouse-server/clickhouse-server-%F-%T.err.log 1000M 10 @@ -711,8 +759,8 @@ ClickHouse поддерживает динамическое изменение Определяет максимальное количество одновременно обрабатываемых запросов, связанных с таблицей семейства `MergeTree`. Запросы также могут быть ограничены настройками: [max_concurrent_insert_queries](#max-concurrent-insert-queries), [max_concurrent_select_queries](#max-concurrent-select-queries), [max_concurrent_queries_for_user](#max-concurrent-queries-for-user), [max_concurrent_queries_for_all_users](#max-concurrent-queries-for-all-users), [min_marks_to_honor_max_concurrent_queries](#min-marks-to-honor-max-concurrent-queries). -:::info "Примечание" - Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. +:::info Примечание +Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. ::: Возможные значения: @@ -732,8 +780,8 @@ ClickHouse поддерживает динамическое изменение Определяет максимальное количество одновременных `INSERT` запросов. -:::info "Примечание" - Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. +:::info Примечание +Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. ::: Возможные значения: @@ -753,8 +801,8 @@ ClickHouse поддерживает динамическое изменение Определяет максимальное количество одновременных `SELECT` запросов. -:::info "Примечание" - Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. +:::info Примечание +Параметры этих настроек могут быть изменены во время выполнения запросов и вступят в силу немедленно. Запросы, которые уже запущены, выполнятся без изменений. ::: Возможные значения: @@ -1012,6 +1060,10 @@ ClickHouse использует потоки из глобального пул
metric_log
7500 1000 + 1048576 + 8192 + 524288 + false ``` @@ -1056,7 +1108,7 @@ ClickHouse использует потоки из глобального пул - verificationMode - Способ проверки сертификатов узла. Подробности находятся в описании класса [Context](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h). Допустимые значения: `none`, `relaxed`, `strict`, `once`. - verificationDepth - Максимальная длина верификационной цепи. Верификация завершится ошибкой, если длина цепи сертификатов превысит установленное значение. - loadDefaultCAFile - Признак того, что будут использоваться встроенные CA-сертификаты для OpenSSL. Допустимые значения: `true`, `false`. \| -- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cipherList - Поддерживаемые OpenSSL-шифры. Например, `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`. - cacheSessions - Включение/выключение кеширования сессии. Использовать обязательно вместе с `sessionIdContext`. Допустимые значения: `true`, `false`. - sessionIdContext - Уникальный набор произвольных символов, которые сервер добавляет к каждому сгенерированному идентификатору. Длина строки не должна превышать `SSL_MAX_SSL_SESSION_ID_LENGTH`. Рекомендуется к использованию всегда, поскольку позволяет избежать проблем как в случае, если сервер кеширует сессию, так и если клиент затребовал кеширование. По умолчанию `${application.name}`. - sessionCacheSize - Максимальное количество сессий, которые кэширует сервер. По умолчанию - 1024\*20. 0 - неограниченное количество сессий. @@ -1109,12 +1161,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. - +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ``` xml @@ -1123,6 +1182,10 @@ ClickHouse использует потоки из глобального пул part_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1130,8 +1193,8 @@ ClickHouse использует потоки из глобального пул Путь к каталогу с данными. -:::danger "Обратите внимание" - Завершающий слеш обязателен. +:::danger Обратите внимание +Завершающий слеш обязателен. ::: **Пример** @@ -1172,11 +1235,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1188,6 +1259,10 @@ ClickHouse использует потоки из глобального пул query_log
Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1199,11 +1274,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` — имя базы данных; -- `table` — имя таблицы, куда будет записываться лог; -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1215,6 +1298,10 @@ ClickHouse использует потоки из глобального пул query_thread_log
toMonday(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1226,11 +1313,19 @@ ClickHouse использует потоки из глобального пул При настройке логирования используются следующие параметры: -- `database` – имя базы данных. -- `table` – имя системной таблицы, где будут логироваться запросы. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать, если задан параметр `engine`. -- `engine` — устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать, если задан параметр `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. Если таблица не существует, то ClickHouse создаст её. Если структура журнала запросов изменилась при обновлении сервера ClickHouse, то таблица со старой структурой переименовывается, а новая таблица создается автоматически. @@ -1242,6 +1337,10 @@ ClickHouse использует потоки из глобального пул query_views_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false ``` @@ -1251,12 +1350,20 @@ ClickHouse использует потоки из глобального пул Параметры: -- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. -- `database` — имя базы данных для хранения таблицы. -- `table` — имя таблицы, куда будут записываться текстовые сообщения. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `level` — Максимальный уровень сообщения (по умолчанию `Trace`) которое будет сохранено в таблице. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. **Пример** ```xml @@ -1266,6 +1373,10 @@ ClickHouse использует потоки из глобального пул system text_log
7500 + 1048576 + 8192 + 524288 + false Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day @@ -1277,13 +1388,21 @@ ClickHouse использует потоки из глобального пул Настройки для [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log) system table operation. -Parameters: +Параметры: -- `database` — Database for storing a table. -- `table` — Table name. -- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` -- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. -- `flush_interval_milliseconds` — Interval for flushing data from the buffer in memory to the table. +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. По умолчанию файл настроек сервера `config.xml` содержит следующие настройки: @@ -1293,9 +1412,125 @@ Parameters: trace_log
toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 ``` +## asynchronous_insert_log {#server_configuration_parameters-asynchronous_insert_log} + +Настройки для asynchronous_insert_log Система для логирования ассинхронных вставок. + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1048576. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 8192. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. + +**Пример** + +```xml + + + system + asynchronous_insert_log
+ 7500 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + +
+
+``` + +## crash_log {#server_configuration_parameters-crash_log} + +Настройки для таблицы [crash_log](../../operations/system-tables/crash-log.md). + +Параметры: + +- `database` — имя базы данных; +- `table` — имя таблицы; +- `partition_by` — устанавливает [произвольный ключ партиционирования](../../operations/server-configuration-parameters/settings.md). Нельзя использовать если используется `engine` +- `engine` - устанавливает [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table) для системной таблицы. Нельзя использовать если используется `partition_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1024. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 1024. +- `buffer_size_bytes_flush_threshold` – количество линий в логе при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: true. + +**Пример** + +``` xml + + system + crash_log
+ toYYYYMM(event_date) + 7500 + 1024 + 1024 + 512 + true +
+``` + +## backup_log {#server_configuration_parameters-backup_log} + +Настройки для системной таблицы [backup_log](../../operations/system-tables/backup_log.md), предназначенной для логирования операций `BACKUP` и `RESTORE`. + +Параметры: + +- `database` — имя базы данных. +- `table` — имя таблицы. +- `partition_by` — [произвольный ключ партиционирования](../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Нельзя использовать одновременно с `engine`. +- `order_by` - [произвольный ключ сортировки](../../engines/table-engines/mergetree-family/mergetree.md#order_by). Нельзя использовать одновременно с `engine`. +- `engine` - [настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table). Нельзя использовать с `partition_by` или `order_by`. +- `flush_interval_milliseconds` — период сброса данных из буфера в памяти в таблицу. +- `max_size_rows` – максимальный размер в строках для буфера с логами. Когда буфер будет заполнен полностью, сбрасывает логи на диск. +Значение по умолчанию: 1024. +- `reserved_size_rows` – преаллоцированный размер в строках для буфера с логами. +Значение по умолчанию: 1024. +- `buffer_size_rows_flush_threshold` – количество строк в логе, при достижении которого логи начнут скидываться на диск в неблокирующем режиме. +Значение по умолчанию: `max_size_rows / 2`. +- `flush_on_crash` - должны ли логи быть сброшены на диск в случае неожиданной остановки программы. +Значение по умолчанию: false. +- `storage_policy` – название политики хранения (необязательный параметр). +- `settings` - [дополнительные настройки MergeTree Engine](../../engines/table-engines/mergetree-family/mergetree.md#settings) (необязательный параметр). + +**Пример** + +```xml + + + system + backup_log
+ 1000 + toYYYYMM(event_date) + 1048576 + 8192 + 524288 + false + +
+
+``` + ## query_masking_rules {#query-masking-rules} Правила, основанные на регулярных выражениях, которые будут применены для всех запросов, а также для всех сообщений перед сохранением их в лог на сервере, @@ -1401,8 +1636,8 @@ TCP порт для защищённого обмена данными с кли Путь ко временным данным для обработки больших запросов. -:::danger "Обратите внимание" - Завершающий слеш обязателен. +:::danger Обратите внимание +Завершающий слеш обязателен. ::: **Пример** @@ -1417,7 +1652,7 @@ TCP порт для защищённого обмена данными с кли Если политика не задана, используется [tmp_path](#tmp-path). В противном случае `tmp_path` игнорируется. -:::note "Примечание" +:::note Примечание - `move_factor` игнорируется. - `keep_free_space_bytes` игнорируется. - `max_data_part_size_bytes` игнорируется. @@ -1558,11 +1793,11 @@ ClickHouse использует ZooKeeper для хранения метадан Если `use_minimalistic_part_header_in_zookeeper = 1`, то [реплицированные](../../operations/server-configuration-parameters/settings.md) таблицы хранят заголовки кусков данных в компактном виде, используя только одну `znode`. Если таблица содержит много столбцов, этот метод хранения значительно уменьшает объём данных, хранящихся в Zookeeper. - :::note "Внимание" - После того как вы установили `use_minimalistic_part_header_in_zookeeper = 1`, невозможно откатить ClickHouse до версии, которая не поддерживает этот параметр. Будьте осторожны при обновлении ClickHouse на серверах в кластере. Не обновляйте все серверы сразу. Безопаснее проверять новые версии ClickHouse в тестовой среде или только на некоторых серверах кластера. +:::note Внимание +После того как вы установили `use_minimalistic_part_header_in_zookeeper = 1`, невозможно откатить ClickHouse до версии, которая не поддерживает этот параметр. Будьте осторожны при обновлении ClickHouse на серверах в кластере. Не обновляйте все серверы сразу. Безопаснее проверять новые версии ClickHouse в тестовой среде или только на некоторых серверах кластера. - Заголовки частей данных, ранее сохранённые с этим параметром, не могут быть восстановлены в их предыдущем (некомпактном) представлении. - ::: +Заголовки частей данных, ранее сохранённые с этим параметром, не могут быть восстановлены в их предыдущем (некомпактном) представлении. +::: **Значение по умолчанию**: 0. ## disable_internal_dns_cache {#server-settings-disable-internal-dns-cache} @@ -1731,3 +1966,19 @@ ClickHouse использует ZooKeeper для хранения метадан - Положительное целое число. Значение по умолчанию: `10000`. + +## validate_tcp_client_information {#validate-tcp-client-information} + +Включена ли валидация данных о клиенте при запросе от клиента, использующего TCP соединение. + +Если `true`, то на неверные данные от клиента будет выброшено исключение. + +Если `false`, то данные не будут валидироваться. Сервер будет работать с клиентами всех версий. + +Значение по умолчанию: `false`. + +**Пример** + +``` xml +true +``` diff --git a/docs/ru/operations/settings/query-complexity.md b/docs/ru/operations/settings/query-complexity.md index de9bb9690858..fb3b18bd46aa 100644 --- a/docs/ru/operations/settings/query-complexity.md +++ b/docs/ru/operations/settings/query-complexity.md @@ -311,6 +311,52 @@ FORMAT Null; **Подробности** -При вставке данных, ClickHouse вычисляет количество партиций во вставленном блоке. Если число партиций больше, чем `max_partitions_per_insert_block`, ClickHouse генерирует исключение со следующим текстом: +При вставке данных ClickHouse проверяет количество партиций во вставляемом блоке. Если количество разделов превышает число `max_partitions_per_insert_block`, ClickHouse либо логирует предупреждение, либо выбрасывает исключение в зависимости от значения `throw_on_max_partitions_per_insert_block`. Исключения имеют следующий текст: -> «Too many partitions for single INSERT block (more than» + toString(max_parts) + «). The limit is controlled by ‘max_partitions_per_insert_block’ setting. Large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).» +> “Too many partitions for a single INSERT block (`partitions_count` partitions, limit is ” + toString(max_partitions) + “). The limit is controlled by the ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).” + +## throw_on_max_partitions_per_insert_block {#settings-throw_on_max_partition_per_insert_block} + +Позволяет контролировать поведение при достижении `max_partitions_per_insert_block` + +- `true` - Когда вставляемый блок достигает `max_partitions_per_insert_block`, возникает исключение. +- `false` - Записывает предупреждение при достижении `max_partitions_per_insert_block`. + +Значение по умолчанию: `true` + +## max_sessions_for_user {#max-sessions-per-user} + +Максимальное количество одновременных сессий на одного аутентифицированного пользователя. + +Пример: + +``` xml + + + 1 + + + 2 + + + 0 + + + + + + single_session_profile + + + + two_sessions_profile + + + + unlimited_sessions_profile + + +``` + +Значение по умолчанию: 0 (неограниченное количество сессий). diff --git a/docs/ru/operations/settings/settings-profiles.md b/docs/ru/operations/settings/settings-profiles.md index ba2cb9a601ff..a8374c345c00 100644 --- a/docs/ru/operations/settings/settings-profiles.md +++ b/docs/ru/operations/settings/settings-profiles.md @@ -8,9 +8,9 @@ sidebar_label: "Профили настроек" Профиль настроек — это набор настроек, сгруппированных под одним именем. - :::note "Информация" - Для управления профилями настроек рекомендуется использовать [SQL-ориентированный воркфлоу](../access-rights.md#access-control), который также поддерживается в ClickHouse. - ::: +:::note Информация +Для управления профилями настроек рекомендуется использовать [SQL-ориентированный воркфлоу](../access-rights.md#access-control), который также поддерживается в ClickHouse. +::: Название профиля может быть любым. Вы можете указать один и тот же профиль для разных пользователей. Самое важное, что можно прописать в профиле — `readonly=1`, это обеспечит доступ только на чтение. @@ -39,7 +39,7 @@ SET profile = 'web' 8 - + 1000000000 100000000000 @@ -67,6 +67,7 @@ SET profile = 'web' 50 100 + 4 1 diff --git a/docs/ru/operations/settings/settings-users.md b/docs/ru/operations/settings/settings-users.md index 50f4eb5ae6bc..ecdd4f85974b 100644 --- a/docs/ru/operations/settings/settings-users.md +++ b/docs/ru/operations/settings/settings-users.md @@ -8,9 +8,10 @@ sidebar_label: "Настройки пользователей" Раздел `users` конфигурационного файла `user.xml` содержит настройки для пользователей. - :::note "Информация" - Для управления пользователями рекомендуется использовать [SQL-ориентированный воркфлоу](../access-rights.md#access-control), который также поддерживается в ClickHouse. - ::: +:::note Информация +Для управления пользователями рекомендуется использовать [SQL-ориентированный воркфлоу](../access-rights.md#access-control), который также поддерживается в ClickHouse. +::: + Структура раздела `users`: ``` xml @@ -146,8 +147,9 @@ sidebar_label: "Настройки пользователей" ::/0 ``` -:::danger "Внимание" - Открывать доступ из любой сети небезопасно, если у вас нет правильно настроенного брандмауэра или сервер не отключен от интернета. +:::danger Внимание +Открывать доступ из любой сети небезопасно, если у вас нет правильно настроенного брандмауэра или сервер не отключен от интернета. +::: Чтобы открыть только локальный доступ, укажите: diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index f83d05ff7103..c58b3ae8107a 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -238,39 +238,6 @@ ClickHouse применяет настройку в тех случаях, ко В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение. -## input_format_parquet_import_nested {#input_format_parquet_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_arrow_import_nested {#input_format_arrow_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - -## input_format_orc_import_nested {#input_format_orc_import_nested} - -Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc). - -Возможные значения: - -- 0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур. -- 0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур. - -Значение по умолчанию: `0`. - ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions} Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md). @@ -374,9 +341,10 @@ INSERT INTO test VALUES (lower('Hello')), (lower('world')), (lower('INSERT')), ( - [CSV](../../interfaces/formats.md#csv) - [TabSeparated](../../interfaces/formats.md#tabseparated) - :::note "Примечание" - Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность. - ::: +:::note Примечание +Когда опция включена, сервер отправляет клиенту расширенные метаданные. Это требует дополнительных вычислительных ресурсов на сервере и может снизить производительность. +::: + Возможные значения: - 0 — выключена. @@ -656,8 +624,8 @@ ClickHouse может парсить только базовый формат `Y Изменяет поведение операций, выполняемых со строгостью `ANY`. -:::warning "Внимание" - Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../../engines/table-engines/special/join.md). +:::warning Внимание +Настройка применяется только для операций `JOIN`, выполняемых над таблицами с движком [Join](../../engines/table-engines/special/join.md). ::: Возможные значения: @@ -738,9 +706,9 @@ ClickHouse может парсить только базовый формат `Y Включает устаревшее поведение сервера ClickHouse при выполнении операций `ANY INNER|LEFT JOIN`. - :::note "Внимание" - Используйте этот параметр только в целях обратной совместимости, если ваши варианты использования требуют устаревшего поведения `JOIN`. - ::: +:::note Внимание +Используйте этот параметр только в целях обратной совместимости, если ваши варианты использования требуют устаревшего поведения `JOIN`. +::: Когда включено устаревшее поведение: - Результаты операций "t1 ANY LEFT JOIN t2" и "t2 ANY RIGHT JOIN t1" не равны, поскольку ClickHouse использует логику с сопоставлением ключей таблицы "многие к одному слева направо". @@ -1102,9 +1070,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' Максимальный размер блоков несжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативной памяти. - :::note "Предупреждение" - Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. - ::: +:::note Предупреждение +Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. +::: Не путайте блоки для сжатия (кусок памяти, состоящий из байт) и блоки для обработки запроса (пачка строк из таблицы). ## min_compress_block_size {#min-compress-block-size} @@ -1119,9 +1087,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' Пусть мы записываем столбец URL типа String (средний размер - 60 байт на значение). При записи 8192 строк, будет, в среднем, чуть меньше 500 КБ данных. Так как это больше 65 536 строк, то сжатый блок будет сформирован на каждую засечку. В этом случае, при чтении с диска данных из диапазона в одну засечку, не будет разжато лишних данных. - :::note "Предупреждение" - Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. - ::: +:::note Предупреждение +Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse. +::: ## max_query_size {#settings-max_query_size} @@ -1209,9 +1177,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов. - :::note - 60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8). - ::: +:::note Примечание +60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8). +::: ## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server} @@ -1230,9 +1198,9 @@ SELECT type, query FROM system.query_log WHERE log_comment = 'log_comment test' Может быть использована для ограничения скорости сети при репликации данных для добавления или замены новых узлов. - :::note - 60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8). - ::: +:::note Примечание +60000000 байт/с примерно соответствует 457 Мбит/с (60000000 / 1024 / 1024 * 8). +::: ## connect_timeout_with_failover_ms {#connect-timeout-with-failover-ms} @@ -1399,8 +1367,9 @@ load_balancing = round_robin Значение по умолчанию: 1. -:::danger "Warning" - Отключайте эту настройку при использовании [max_parallel_replicas](#settings-max_parallel_replicas). +:::danger Предупреждение +Отключайте эту настройку при использовании [max_parallel_replicas](#settings-max_parallel_replicas). +::: ## totals_mode {#totals-mode} @@ -1429,8 +1398,9 @@ load_balancing = round_robin - Ключ сэмплирования является выражением, которое сложно вычисляется. - У распределения сетевых задержек в кластере длинный «хвост», из-за чего при параллельных запросах к нескольким серверам увеличивается среднее время задержки. -:::danger "Предупреждение" - Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries). +:::danger Предупреждение +Параллельное выполнение запроса может привести к неверному результату, если в запросе есть объединение или подзапросы и при этом таблицы не удовлетворяют определенным требованиям. Подробности смотрите в разделе [Распределенные подзапросы и max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries). +::: ## compile_expressions {#compile-expressions} @@ -1686,7 +1656,7 @@ SELECT * FROM table_with_enum_column_for_csv_insert; ## input_format_csv_detect_header {#input_format_csv_detect_header} Обнаружить заголовок с именами и типами в формате CSV. - + Значение по умолчанию - `true`. ## input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines} @@ -1727,6 +1697,12 @@ echo ' string ' | ./clickhouse local -q "select * from table FORMAT CSV" --in " string " ``` +## input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns} + +Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. + +Выключено по умолчанию. + ## output_format_tsv_crlf_end_of_line {#settings-output-format-tsv-crlf-end-of-line} Использовать в качестве разделителя строк для TSV формата CRLF (DOC/Windows стиль) вместо LF (Unix стиль). @@ -2197,8 +2173,8 @@ SELECT * FROM test_table Устанавливает приоритет ([nice](https://en.wikipedia.org/wiki/Nice_(Unix))) для потоков, исполняющих запросы. Планировщик ОС учитывает эти приоритеты при выборе следующего потока для исполнения на доступном ядре CPU. -:::warning "Предупреждение" - Для использования этой настройки необходимо установить свойство `CAP_SYS_NICE`. Пакет `clickhouse-server` устанавливает его во время инсталляции. Некоторые виртуальные окружения не позволяют установить `CAP_SYS_NICE`. В этом случае, `clickhouse-server` выводит сообщение при запуске. +:::warning Предупреждение +Для использования этой настройки необходимо установить свойство `CAP_SYS_NICE`. Пакет `clickhouse-server` устанавливает его во время инсталляции. Некоторые виртуальные окружения не позволяют установить `CAP_SYS_NICE`. В этом случае, `clickhouse-server` выводит сообщение при запуске. ::: Допустимые значения: @@ -4195,6 +4171,7 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi ### Шаблон Шаблон поддерживает следующие виды плейсхолдеров: +- `%a` — Полное исходное имя файла (например "sample.csv"). - `%f` — Исходное имя файла без расширения (например "sample"). - `%e` — Оригинальное расширение файла с точкой (например ".csv"). - `%t` — Текущее время (в микросекундах). @@ -4206,3 +4183,29 @@ SELECT *, timezone() FROM test_tz WHERE d = '2000-01-01 00:00:00' SETTINGS sessi - Запрос: `SELECT * FROM file('sample.csv')` Если чтение и обработка `sample.csv` прошли успешно, файл будет переименован в `processed_sample_1683473210851438.csv`. + +## precise_float_parsing {#precise_float_parsing} + +Позволяет выбрать алгоритм, используемый при парсинге [Float32/Float64](../../sql-reference/data-types/float.md): +* Если установлено значение `1`, то используется точный метод. Он более медленный, но всегда возвращает число, наиболее близкое к входному значению. +* В противном случае используется быстрый метод (поведение по умолчанию). Обычно результат его работы совпадает с результатом, полученным точным методом, однако в редких случаях он может отличаться на 1 или 2 наименее значимых цифры. + +Возможные значения: `0`, `1`. + +Значение по умолчанию: `0`. + +Пример: + +```sql +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 0; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7090999999999998 │ 15008753.000000002 │ +└─────────────────────┴──────────────────────────┘ + +SELECT toFloat64('1.7091'), toFloat64('1.5008753E7') SETTINGS precise_float_parsing = 1; + +┌─toFloat64('1.7091')─┬─toFloat64('1.5008753E7')─┐ +│ 1.7091 │ 15008753 │ +└─────────────────────┴──────────────────────────┘ +``` diff --git a/docs/ru/operations/system-tables/asynchronous_metric_log.md b/docs/ru/operations/system-tables/asynchronous_metric_log.md index 886fbb6cab0a..5145889c95f6 100644 --- a/docs/ru/operations/system-tables/asynchronous_metric_log.md +++ b/docs/ru/operations/system-tables/asynchronous_metric_log.md @@ -8,7 +8,6 @@ slug: /ru/operations/system-tables/asynchronous_metric_log Столбцы: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — дата события. - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время события. -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — время события в микросекундах. - `name` ([String](../../sql-reference/data-types/string.md)) — название метрики. - `value` ([Float64](../../sql-reference/data-types/float.md)) — значение метрики. diff --git a/docs/ru/operations/system-tables/backup_log.md b/docs/ru/operations/system-tables/backup_log.md new file mode 100644 index 000000000000..15c1bfb20a40 --- /dev/null +++ b/docs/ru/operations/system-tables/backup_log.md @@ -0,0 +1,145 @@ +--- +slug: /ru/operations/system-tables/backup_log +--- +# system.backup_log {#system_tables-backup-log} + +Содержит информацию о всех операциях `BACKUP` and `RESTORE`. + +Колонки: + +- `event_date` ([Date](../../sql-reference/data-types/date.md)) — Дата события. +- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — Время события с точностью до микросекунд. +- `id` ([String](../../sql-reference/data-types/string.md)) — Идентификатор операции. +- `name` ([String](../../sql-reference/data-types/string.md)) — Название хранилища (содержимое секции `FROM` или `TO` в SQL запросе). +- `status` ([Enum8](../../sql-reference/data-types/enum.md)) — Статус операции. Возможные значения: + - `'CREATING_BACKUP'` + - `'BACKUP_CREATED'` + - `'BACKUP_FAILED'` + - `'RESTORING'` + - `'RESTORED'` + - `'RESTORE_FAILED'` +- `error` ([String](../../sql-reference/data-types/string.md)) — Сообщение об ошибке, при наличии (записи для успешных операций содержат пустую строку). +- `start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Время начала операции. +- `end_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Время завершения операции. +- `num_files` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество файлов, хранимых в бэкапе. +- `total_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общий размер файлов, хранимых в бэкапе. +- `num_entries` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество позиций в бэкапе, т.е. либо количество файлов в папке (если бэкап хранится в папке), либо количество файлов в архиве (если бэкап хранится в архиве). Это значение не равно `num_files` в случае если это инкрементальный бэкап либо он содержит пустые файлы или дубликаты. Следующее утверждение верно всегда: `num_entries <= num_files`. +- `uncompressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Размер бэкапа до сжатия. +- `compressed_size` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Размер бэкапа после сжатия. Если бэкап не хранится в виде архива, это значение равно `uncompressed_size`. +- `files_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Количество файлов, прочитанных во время операции восстановления. +- `bytes_read` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Общий размер файлов, прочитанных во время операции восстановления. + +**Пример** + +```sql +BACKUP TABLE test_db.my_table TO Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status─────────┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ BACKUP_CREATED │ +└──────────────────────────────────────┴────────────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'e5b74ecb-f6f1-426a-80be-872f90043885' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:05:21.998566 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: CREATING_BACKUP +error: +start_time: 2023-08-19 11:05:21 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:08:56.916192 +id: e5b74ecb-f6f1-426a-80be-872f90043885 +name: Disk('backups_disk', '1.zip') +status: BACKUP_CREATED +error: +start_time: 2023-08-19 11:05:21 +end_time: 2023-08-19 11:08:56 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 3525068304 +files_read: 0 +bytes_read: 0 +``` +```sql +RESTORE TABLE test_db.my_table FROM Disk('backups_disk', '1.zip') +``` +```response +┌─id───────────────────────────────────┬─status───┐ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ RESTORED │ +└──────────────────────────────────────┴──────────┘ +``` +```sql +SELECT * FROM system.backup_log WHERE id = 'cdf1f731-52ef-42da-bc65-2e1bfcd4ce90' ORDER BY event_date, event_time_microseconds \G +``` +```response +Row 1: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:19.718077 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORING +error: +start_time: 2023-08-19 11:09:19 +end_time: 1970-01-01 03:00:00 +num_files: 0 +total_size: 0 +num_entries: 0 +uncompressed_size: 0 +compressed_size: 0 +files_read: 0 +bytes_read: 0 + +Row 2: +────── +event_date: 2023-08-19 +event_time_microseconds: 2023-08-19 11:09:29.334234 +id: cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 +name: Disk('backups_disk', '1.zip') +status: RESTORED +error: +start_time: 2023-08-19 11:09:19 +end_time: 2023-08-19 11:09:29 +num_files: 57 +total_size: 4290364870 +num_entries: 46 +uncompressed_size: 4290362365 +compressed_size: 4290362365 +files_read: 57 +bytes_read: 4290364870 +``` + +Это по сути та же информация, что заносится и в системную таблицу `system.backups`: + +```sql +SELECT * FROM system.backups ORDER BY start_time +``` +```response +┌─id───────────────────────────────────┬─name──────────────────────────┬─status─────────┬─error─┬──────────start_time─┬────────────end_time─┬─num_files─┬─total_size─┬─num_entries─┬─uncompressed_size─┬─compressed_size─┬─files_read─┬─bytes_read─┐ +│ e5b74ecb-f6f1-426a-80be-872f90043885 │ Disk('backups_disk', '1.zip') │ BACKUP_CREATED │ │ 2023-08-19 11:05:21 │ 2023-08-19 11:08:56 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 3525068304 │ 0 │ 0 │ +│ cdf1f731-52ef-42da-bc65-2e1bfcd4ce90 │ Disk('backups_disk', '1.zip') │ RESTORED │ │ 2023-08-19 11:09:19 │ 2023-08-19 11:09:29 │ 57 │ 4290364870 │ 46 │ 4290362365 │ 4290362365 │ 57 │ 4290364870 │ +└──────────────────────────────────────┴───────────────────────────────┴────────────────┴───────┴─────────────────────┴─────────────────────┴───────────┴────────────┴─────────────┴───────────────────┴─────────────────┴────────────┴────────────┘ +``` + +**См. также** + +- [Backup and Restore](../../operations/backup.md) diff --git a/docs/ru/operations/system-tables/index.md b/docs/ru/operations/system-tables/index.md index 7ff368b19101..aedefb241931 100644 --- a/docs/ru/operations/system-tables/index.md +++ b/docs/ru/operations/system-tables/index.md @@ -21,7 +21,7 @@ sidebar_label: "Системные таблицы" Большинство системных таблиц хранят свои данные в оперативной памяти. Сервер ClickHouse создает эти системные таблицы при старте. -В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md) и [text_log](../../operations/system-tables/text_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую. +В отличие от других системных таблиц, таблицы с системными логами [metric_log](../../operations/system-tables/metric_log.md), [query_log](../../operations/system-tables/query_log.md), [query_thread_log](../../operations/system-tables/query_thread_log.md), [trace_log](../../operations/system-tables/trace_log.md), [part_log](../../operations/system-tables/part_log.md), [crash_log](../../operations/system-tables/crash-log.md), [text_log](../../operations/system-tables/text_log.md) и [backup_log](../../operations/system-tables/backup_log.md) используют движок таблиц [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) и по умолчанию хранят свои данные в файловой системе. Если удалить таблицу из файловой системы, сервер ClickHouse снова создаст пустую таблицу во время следующей записи данных. Если схема системной таблицы изменилась в новом релизе, то ClickHouse переименует текущую таблицу и создаст новую. Таблицы с системными логами `log` можно настроить, создав конфигурационный файл с тем же именем, что и таблица в разделе `/etc/clickhouse-server/config.d/`, или указав соответствующие элементы в `/etc/clickhouse-server/config.xml`. Настраиваться могут следующие элементы: @@ -45,6 +45,10 @@ sidebar_label: "Системные таблицы" ENGINE = MergeTree PARTITION BY toYYYYMM(event_date) ORDER BY (event_date, event_time) SETTINGS index_granularity = 1024 --> 7500 + 1048576 + 8192 + 524288 + false ``` diff --git a/docs/ru/operations/system-tables/licenses.md b/docs/ru/operations/system-tables/licenses.md index 13407a27c94c..e35b5d5e3be2 100644 --- a/docs/ru/operations/system-tables/licenses.md +++ b/docs/ru/operations/system-tables/licenses.md @@ -20,21 +20,10 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` text ┌─library_name───────┬─license_type─┬─license_path────────────────────────┐ -│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │ -│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │ -│ avro │ Apache │ /contrib/avro/LICENSE.txt │ │ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │ -│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │ -│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │ -│ aws │ Apache │ /contrib/aws/LICENSE.txt │ -│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │ -│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │ +│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │ │ brotli │ MIT │ /contrib/brotli/LICENSE │ -│ capnproto │ MIT │ /contrib/capnproto/LICENSE │ -│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │ -│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │ -│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │ -│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │ +│ [...] │ [...] │ [...] │ └────────────────────┴──────────────┴─────────────────────────────────────┘ ``` diff --git a/docs/ru/operations/system-tables/mutations.md b/docs/ru/operations/system-tables/mutations.md index 7a9f4ecc0da4..bbf22952c2f5 100644 --- a/docs/ru/operations/system-tables/mutations.md +++ b/docs/ru/operations/system-tables/mutations.md @@ -31,8 +31,8 @@ slug: /ru/operations/system-tables/mutations - `1` — мутация завершена, - `0` — мутация еще продолжается. -:::info "Замечание" - Даже если `parts_to_do = 0`, для реплицированной таблицы возможна ситуация, когда мутация ещё не завершена из-за долго выполняющейся операции `INSERT`, которая добавляет данные, которые нужно будет мутировать. +:::info Замечание +Даже если `parts_to_do = 0`, для реплицированной таблицы возможна ситуация, когда мутация ещё не завершена из-за долго выполняющейся операции `INSERT`, которая добавляет данные, которые нужно будет мутировать. ::: Если во время мутации какого-либо куска возникли проблемы, заполняются следующие столбцы: diff --git a/docs/ru/operations/system-tables/parts.md b/docs/ru/operations/system-tables/parts.md index 66c3fe3095e1..74984591dd2f 100644 --- a/docs/ru/operations/system-tables/parts.md +++ b/docs/ru/operations/system-tables/parts.md @@ -99,9 +99,10 @@ slug: /ru/operations/system-tables/parts - `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Массив выражений. Каждое выражение задаёт правило [TTL MOVE](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). - :::note "Предупреждение" - Массив выражений `move_ttl_info.expression` используется, в основном, для обратной совместимости. Для работы с правилами `TTL MOVE` лучше использовать поля `move_ttl_info.min` и `move_ttl_info.max`. - ::: +:::note Предупреждение +Массив выражений `move_ttl_info.expression` используется, в основном, для обратной совместимости. Для работы с правилами `TTL MOVE` лучше использовать поля `move_ttl_info.min` и `move_ttl_info.max`. +::: + - `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Массив значений. Каждый элемент массива задаёт минимальное значение ключа даты и времени для правила [TTL MOVE](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). - `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Массив значений. Каждый элемент массива задаёт максимальное значение ключа даты и времени для правила [TTL MOVE](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl). diff --git a/docs/ru/operations/system-tables/query_log.md b/docs/ru/operations/system-tables/query_log.md index 8f858c14fb1d..8d79aa0eef70 100644 --- a/docs/ru/operations/system-tables/query_log.md +++ b/docs/ru/operations/system-tables/query_log.md @@ -5,9 +5,9 @@ slug: /ru/operations/system-tables/query_log Содержит информацию о выполняемых запросах, например, время начала обработки, продолжительность обработки, сообщения об ошибках. - :::note "Внимание" - Таблица не содержит входных данных для запросов `INSERT`. - ::: +:::note Внимание +Таблица не содержит входных данных для запросов `INSERT`. +::: Настойки логгирования можно изменить в секции серверной конфигурации [query_log](../server-configuration-parameters/settings.md#server_configuration_parameters-query-log). Можно отключить логгирование настройкой [log_queries = 0](../settings/settings.md#settings-log-queries). По-возможности, не отключайте логгирование, поскольку информация из таблицы важна при решении проблем. @@ -99,7 +99,8 @@ ClickHouse не удаляет данные из таблица автомати - `ProfileEvents` ([Map(String, UInt64)](../../sql-reference/data-types/array.md)) — счетчики для изменения различных метрик. Описание метрик можно получить из таблицы [system.events](#system_tables-events)(#system_tables-events - `Settings` ([Map(String, String)](../../sql-reference/data-types/array.md)) — имена настроек, которые меняются, когда клиент выполняет запрос. Чтобы разрешить логирование изменений настроек, установите параметр `log_query_settings` равным 1. - `log_comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к записи в логе. Представляет собой произвольную строку, длина которой должна быть не больше, чем [max_query_size](../../operations/settings/settings.md#settings-max_query_size). Если нет комментария, то пустая строка. -- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов. +- `thread_ids` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — идентификаторы потоков, участвующих в обработке запросов, эти потоки не обязательно выполняются одновременно. +- `peak_threads_usage` ([UInt64)](../../sql-reference/data-types/int-uint.md)) — максимальное количество одновременно работавших потоков, участвоваших в обработке запроса. - `used_aggregate_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `агрегатных функций`, использованных при выполнении запроса. - `used_aggregate_function_combinators` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `комбинаторов агрегатных функций`, использованных при выполнении запроса. - `used_database_engines` ([Array(String)](../../sql-reference/data-types/array.md)) — канонические имена `движков баз данных`, использованных при выполнении запроса. diff --git a/docs/ru/operations/update.md b/docs/ru/operations/update.md index 4ad4d8d942b9..9a58a7ea5ddb 100644 --- a/docs/ru/operations/update.md +++ b/docs/ru/operations/update.md @@ -16,9 +16,10 @@ $ sudo service clickhouse-server restart Если ClickHouse установлен не из рекомендуемых deb-пакетов, используйте соответствующий метод обновления. - :::note "Примечание" - Вы можете обновить сразу несколько серверов, кроме случая, когда все реплики одного шарда отключены. - ::: +:::note Примечание +Вы можете обновить сразу несколько серверов, кроме случая, когда все реплики одного шарда отключены. +::: + Обновление ClickHouse до определенной версии: **Пример** diff --git a/docs/ru/operations/utilities/clickhouse-local.md b/docs/ru/operations/utilities/clickhouse-local.md index 61fba2dd7cc8..6f0394a183d5 100644 --- a/docs/ru/operations/utilities/clickhouse-local.md +++ b/docs/ru/operations/utilities/clickhouse-local.md @@ -12,8 +12,9 @@ sidebar_label: clickhouse-local `clickhouse-local` при настройке по умолчанию не имеет доступа к данным, которыми управляет сервер ClickHouse, установленный на этом же хосте, однако можно подключить конфигурацию сервера с помощью ключа `--config-file`. -:::danger "Warning" - Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями. +:::danger Предупреждение +Мы не рекомендуем подключать серверную конфигурацию к `clickhouse-local`, поскольку данные можно легко повредить неосторожными действиями. +::: Для временных данных по умолчанию создается специальный каталог. @@ -110,3 +111,42 @@ Read 186 rows, 4.15 KiB in 0.035 sec., 5302 rows/sec., 118.34 KiB/sec. ├──────────┼──────────┤ ... ``` + +## Запрос данных в файле с помощью SQL {#query_data_in_file} + +Часто `clickhouse-local` используется для выполнения специальных запросов к файлам, когда не нужно вставлять данные в таблицу. `clickhouse-local` может транслировать данные из файла во временную таблицу и выполнить ваш SQL. + +Если файл находится на той же машине, что и `clickhouse-local`, то можно просто указать файл для загрузки. Следующий файл `reviews.tsv` содержит выборку отзывов о товарах Amazon: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews.tsv'" +``` + +Эта команда является сокращением команды: + +```bash +./clickhouse local -q "SELECT * FROM file('reviews.tsv')" +``` + +ClickHouse знает, что файл использует формат, разделенный табуляцией, из расширения имени файла. Если необходимо явно указать формат, просто добавьте один из [множества входных форматов ClickHouse](../../interfaces/formats.md): + +```bash +./clickhouse local -q "SELECT * FROM file('reviews.tsv', 'TabSeparated')" +``` + +Функция таблицы `file` создает таблицу, и вы можете использовать `DESCRIBE` для просмотра предполагаемой схемы: + +```bash +./clickhouse local -q "DESCRIBE file('reviews.tsv')" +``` + +:::tip +В имени файла разрешается использовать [Шаблоны поиска](/docs/ru/sql-reference/table-functions/file.md/#globs-in-path). + +Примеры: + +```bash +./clickhouse local -q "SELECT * FROM 'reviews*.jsonl'" +./clickhouse local -q "SELECT * FROM 'review_?.csv'" +./clickhouse local -q "SELECT * FROM 'review_{1..3}.csv'" +``` diff --git a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md index f7c7f98a8dd2..59a9c7f8cf19 100644 --- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md +++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md @@ -90,8 +90,8 @@ FROM sequenceMatch(pattern)(timestamp, cond1, cond2, ...) ``` -:::danger "Предупреждение" - События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. +:::danger Предупреждение +События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. ::: **Аргументы** @@ -176,8 +176,8 @@ SELECT sequenceMatch('(?1)(?2)')(time, number = 1, number = 2, number = 4) FROM Вычисляет количество цепочек событий, соответствующих шаблону. Функция обнаруживает только непересекающиеся цепочки событий. Она начинает искать следующую цепочку только после того, как полностью совпала текущая цепочка событий. -:::danger "Предупреждение" - События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. +:::danger Предупреждение +События, произошедшие в одну и ту же секунду, располагаются в последовательности в неопределенном порядке, что может повлиять на результат работы функции. ::: ``` sql diff --git a/docs/ru/sql-reference/aggregate-functions/reference/corr.md b/docs/ru/sql-reference/aggregate-functions/reference/corr.md index 7871a04a4ffb..faa411db632b 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/corr.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/corr.md @@ -9,6 +9,6 @@ sidebar_position: 107 Вычисляет коэффициент корреляции Пирсона: `Σ((x - x̅)(y - y̅)) / sqrt(Σ((x - x̅)^2) * Σ((y - y̅)^2))`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `corrStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `corrStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/covarpop.md b/docs/ru/sql-reference/aggregate-functions/reference/covarpop.md index d1c96a9a8e31..22bf21d5d9a4 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/covarpop.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/covarpop.md @@ -9,6 +9,6 @@ sidebar_position: 36 Вычисляет величину `Σ((x - x̅)(y - y̅)) / n`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/covarsamp.md b/docs/ru/sql-reference/aggregate-functions/reference/covarsamp.md index b04b2c1c89da..ed7b6eda12c7 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/covarsamp.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/covarsamp.md @@ -11,6 +11,6 @@ sidebar_position: 37 Возвращает Float64. В случае, когда `n <= 1`, возвращается +∞. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `covarSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/deltasum.md b/docs/ru/sql-reference/aggregate-functions/reference/deltasum.md index 3816fec9dce3..64f78e85c6de 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/deltasum.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/deltasum.md @@ -7,8 +7,8 @@ sidebar_position: 141 Суммирует арифметическую разницу между последовательными строками. Если разница отрицательна — она будет проигнорирована. -:::info "Примечание" - Чтобы эта функция работала должным образом, исходные данные должны быть отсортированы. В [материализованном представлении](../../../sql-reference/statements/create/view.md#materialized) вместо нее рекомендуется использовать [deltaSumTimestamp](../../../sql-reference/aggregate-functions/reference/deltasumtimestamp.md#agg_functions-deltasumtimestamp). +:::info Примечание +Чтобы эта функция работала должным образом, исходные данные должны быть отсортированы. В [материализованном представлении](../../../sql-reference/statements/create/view.md#materialized) вместо нее рекомендуется использовать [deltaSumTimestamp](../../../sql-reference/aggregate-functions/reference/deltasumtimestamp.md#agg_functions-deltasumtimestamp). ::: **Синтаксис** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/intervalLengthSum.md b/docs/ru/sql-reference/aggregate-functions/reference/intervalLengthSum.md index dfe0ea7a4f32..f4c0b2cdd73e 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/intervalLengthSum.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/intervalLengthSum.md @@ -19,8 +19,8 @@ intervalLengthSum(start, end) - `start` — начальное значение интервала. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) или [Date](../../../sql-reference/data-types/date.md#data_type-date). - `end` — конечное значение интервала. [Int32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Int64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt32](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [UInt64](../../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-int8-int16-int32-int64), [Float32](../../../sql-reference/data-types/float.md#float32-float64), [Float64](../../../sql-reference/data-types/float.md#float32-float64), [DateTime](../../../sql-reference/data-types/datetime.md#data_type-datetime) или [Date](../../../sql-reference/data-types/date.md#data_type-date). -:::info "Примечание" - Аргументы должны быть одного типа. В противном случае ClickHouse сгенерирует исключение. +:::info Примечание +Аргументы должны быть одного типа. В противном случае ClickHouse сгенерирует исключение. ::: **Возвращаемое значение** diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md index 21f7c8330906..e538a78fc3bc 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md @@ -13,9 +13,9 @@ sidebar_position: 208 Внутренние состояния функций `quantile*` не объединяются, если они используются в одном запросе. Если вам необходимо вычислить квантили нескольких уровней, используйте функцию [quantiles](#quantiles), это повысит эффективность запроса. - :::note "Примечание" - Использование `quantileTDigestWeighted` [не рекомендуется для небольших наборов данных](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) и может привести к значительной ошибке. Рассмотрите возможность использования [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) в таких случаях. - ::: +:::note Примечание +Использование `quantileTDigestWeighted` [не рекомендуется для небольших наборов данных](https://github.com/tdunning/t-digest/issues/167#issuecomment-828650275) и может привести к значительной ошибке. Рассмотрите возможность использования [`quantileTDigest`](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md) в таких случаях. +::: **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiletiming.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiletiming.md index 59c2ced3439e..6e75dc094efd 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletiming.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletiming.md @@ -37,18 +37,20 @@ quantileTiming(level)(expr) В противном случае, результат вычисления округляется до ближайшего множителя числа 16. - :::note "Примечание" - Для указанного типа последовательностей функция производительнее и точнее, чем [quantile](#quantile). - ::: +:::note Примечание +Для указанного типа последовательностей функция производительнее и точнее, чем [quantile](#quantile). +::: + **Возвращаемое значение** - Квантиль заданного уровня. Тип: `Float32`. - :::note "Примечания" - Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../../sql-reference/statements/select/order-by.md#select-order-by). - ::: +:::note Примечание +Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../../sql-reference/statements/select/order-by.md#select-order-by). +::: + **Пример** Входная таблица: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/quantiletimingweighted.md b/docs/ru/sql-reference/aggregate-functions/reference/quantiletimingweighted.md index 4cce04e6d4e4..839afec5a12d 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletimingweighted.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletimingweighted.md @@ -39,18 +39,19 @@ quantileTimingWeighted(level)(expr, weight) В противном случае, результат вычисления округляется до ближайшего множителя числа 16. - :::note "Примечание" - Для указанного типа последовательностей функция производительнее и точнее, чем [quantile](#quantile). - ::: +:::note Примечание +Для указанного типа последовательностей функция производительнее и точнее, чем [quantile](#quantile). +::: **Возвращаемое значение** - Квантиль заданного уровня. Тип: `Float32`. - :::note "Примечания" - Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../../sql-reference/statements/select/order-by.md#select-order-by). - ::: +:::note Примечание +Если в функцию `quantileTimingIf` не передать значений, то вернётся [NaN](../../../sql-reference/data-types/float.md#data_type-float-nan-inf). Это необходимо для отделения подобных случаев от случаев, когда результат 0. Подробности про сортировку `NaN` cмотрите в разделе [Секция ORDER BY](../../../sql-reference/statements/select/order-by.md#select-order-by). +::: + **Пример** Входная таблица: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/stddevpop.md b/docs/ru/sql-reference/aggregate-functions/reference/stddevpop.md index bb6b43e716c1..ddb424aa8a15 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/stddevpop.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/stddevpop.md @@ -7,6 +7,6 @@ sidebar_position: 30 Результат равен квадратному корню от `varPop(x)`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/ru/sql-reference/aggregate-functions/reference/stddevsamp.md index c8048f2d3dca..eddd3b4b32ee 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -7,6 +7,6 @@ sidebar_position: 31 Результат равен квадратному корню от `varSamp(x)`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `stddevSampStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/uniqcombined.md b/docs/ru/sql-reference/aggregate-functions/reference/uniqcombined.md index c0c1ca2c5123..a7dd308791c8 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/uniqcombined.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/uniqcombined.md @@ -35,9 +35,10 @@ uniqCombined(HLL_precision)(x[, ...]) - Результат детерминирован (не зависит от порядка выполнения запроса). - :::note "Note" - Так как используется 32-битный хэш для не-`String` типов, результат будет иметь очень очень большую ошибку для количества разичных элементов существенно больше `UINT_MAX` (ошибка быстро растёт начиная с нескольких десятков миллиардов различных значений), таким образом в этом случае нужно использовать [uniqCombined64](#agg_function-uniqcombined64) - ::: +:::note Примечание +Так как используется 32-битный хэш для не-`String` типов, результат будет иметь очень очень большую ошибку для количества разичных элементов существенно больше `UINT_MAX` (ошибка быстро растёт начиная с нескольких десятков миллиардов различных значений), таким образом в этом случае нужно использовать [uniqCombined64](#agg_function-uniqcombined64) +::: + По сравнению с функцией [uniq](#agg_function-uniq), `uniqCombined`: - Потребляет в несколько раз меньше памяти. diff --git a/docs/ru/sql-reference/aggregate-functions/reference/varpop.md b/docs/ru/sql-reference/aggregate-functions/reference/varpop.md index ba1719151f26..64bf703a1d80 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/varpop.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/varpop.md @@ -9,6 +9,6 @@ sidebar_position: 32 То есть, дисперсию для множества значений. Возвращает `Float64`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varPopStable`. Она работает медленнее, но обеспечивает меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/aggregate-functions/reference/varsamp.md b/docs/ru/sql-reference/aggregate-functions/reference/varsamp.md index d81b94d1b130..e23f95a15f6d 100644 --- a/docs/ru/sql-reference/aggregate-functions/reference/varsamp.md +++ b/docs/ru/sql-reference/aggregate-functions/reference/varsamp.md @@ -11,6 +11,6 @@ sidebar_position: 33 Возвращает `Float64`. В случае, когда `n <= 1`, возвращается `+∞`. - :::note "Примечание" - Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varSampStable`. Она работает медленнее, но обеспечиват меньшую вычислительную ошибку. - ::: +:::note Примечание +Функция использует вычислительно неустойчивый алгоритм. Если для ваших расчётов необходима [вычислительная устойчивость](https://ru.wikipedia.org/wiki/Вычислительная_устойчивость), используйте функцию `varSampStable`. Она работает медленнее, но обеспечиват меньшую вычислительную ошибку. +::: diff --git a/docs/ru/sql-reference/data-types/datetime.md b/docs/ru/sql-reference/data-types/datetime.md index e8d4a3ee9fd7..80d844a17134 100644 --- a/docs/ru/sql-reference/data-types/datetime.md +++ b/docs/ru/sql-reference/data-types/datetime.md @@ -122,6 +122,7 @@ FROM dt - [Настройка `date_time_input_format`](../../operations/settings/index.md#settings-date_time_input_format) - [Настройка `date_time_output_format`](../../operations/settings/index.md) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) - [Тип данных `Date`](date.md) - [Тип данных `DateTime64`](datetime64.md) diff --git a/docs/ru/sql-reference/data-types/datetime64.md b/docs/ru/sql-reference/data-types/datetime64.md index da2f81f4828c..78ad43e47648 100644 --- a/docs/ru/sql-reference/data-types/datetime64.md +++ b/docs/ru/sql-reference/data-types/datetime64.md @@ -102,6 +102,7 @@ FROM dt; - [Настройка `date_time_input_format`](../../operations/settings/settings.md#settings-date_time_input_format) - [Настройка `date_time_output_format`](../../operations/settings/settings.md) - [Конфигурационный параметр сервера `timezone`](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) +- [Параметр `session_timezone`](../../operations/settings/settings.md#session_timezone) - [Операторы для работы с датой и временем](../../sql-reference/operators/index.md#operators-datetime) - [Тип данных `Date`](date.md) - [Тип данных `DateTime`](datetime.md) diff --git a/docs/ru/sql-reference/data-types/nullable.md b/docs/ru/sql-reference/data-types/nullable.md index f6d6bb1f9c6b..331e9cc49989 100644 --- a/docs/ru/sql-reference/data-types/nullable.md +++ b/docs/ru/sql-reference/data-types/nullable.md @@ -25,8 +25,8 @@ sidebar_label: Nullable В случае, когда маска указывает, что в ячейке хранится `NULL`, в файле значений хранится значение по умолчанию для типа данных. Т.е. если, например, поле имеет тип `Nullable(Int8)`, то ячейка будет хранить значение по умолчанию для `Int8`. Эта особенность увеличивает размер хранилища. -:::info "Info" - Почти всегда использование `Nullable` снижает производительность, учитывайте это при проектировании своих баз. +:::info Примечание +Почти всегда использование `Nullable` снижает производительность, учитывайте это при проектировании своих баз. ::: ## Поиск NULL {#finding-null} diff --git a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md index 6e3660844f64..380b075d38d6 100644 --- a/docs/ru/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/ru/sql-reference/data-types/simpleaggregatefunction.md @@ -25,11 +25,11 @@ slug: /ru/sql-reference/data-types/simpleaggregatefunction - [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap) - [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap) - :::note "Примечание" - Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. +:::note Примечание +Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются. - `SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией. - ::: +`SimpleAggregateFunction` имеет лучшую производительность, чем `AggregateFunction` с той же агрегатной функцией. +::: **Параметры** - `func` — имя агрегатной функции. diff --git a/docs/ru/sql-reference/data-types/special-data-types/interval.md b/docs/ru/sql-reference/data-types/special-data-types/interval.md index 109ceee78526..867a6665f4be 100644 --- a/docs/ru/sql-reference/data-types/special-data-types/interval.md +++ b/docs/ru/sql-reference/data-types/special-data-types/interval.md @@ -8,8 +8,8 @@ sidebar_label: Interval Семейство типов данных, представляющих интервалы дат и времени. Оператор [INTERVAL](../../../sql-reference/data-types/special-data-types/interval.md#operator-interval) возвращает значения этих типов. -:::danger "Внимание" - Нельзя использовать типы данных `Interval` для хранения данных в таблице. +:::danger Внимание +Нельзя использовать типы данных `Interval` для хранения данных в таблице. ::: Структура: diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md index c8c55ed5c962..16b5f026f67e 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md @@ -434,8 +434,8 @@ LAYOUT(CACHE(SIZE_IN_CELLS 1000000000)) 3. Оценить потребление оперативной памяти с помощью таблицы `system.dictionaries`. 4. Увеличивать/уменьшать количество ячеек до получения требуемого расхода оперативной памяти. -:::danger "Warning" - Не используйте в качестве источника ClickHouse, поскольку он медленно обрабатывает запросы со случайным чтением. +:::danger Предупреждение +Не используйте в качестве источника ClickHouse, поскольку он медленно обрабатывает запросы со случайным чтением. ::: ### complex_key_cache {#complex-key-cache} diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 2aaae6b7075b..5bd0d9ed2068 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -212,9 +212,9 @@ SOURCE(HTTP( ### Выявленная уязвимость в функционировании ODBC словарей {#vyiavlennaia-uiazvimost-v-funktsionirovanii-odbc-slovarei} - :::note "Внимание" - При соединении с базой данных через ODBC можно заменить параметр соединения `Servername`. В этом случае, значения `USERNAME` и `PASSWORD` из `odbc.ini` отправляются на удаленный сервер и могут быть скомпрометированы. - ::: +:::note Внимание +При соединении с базой данных через ODBC можно заменить параметр соединения `Servername`. В этом случае, значения `USERNAME` и `PASSWORD` из `odbc.ini` отправляются на удаленный сервер и могут быть скомпрометированы. +::: **Пример небезопасного использования** Сконфигурируем unixODBC для работы с PostgreSQL. Содержимое `/etc/odbc.ini`: @@ -463,8 +463,8 @@ SOURCE(ODBC( - `invalidate_query` — запрос для проверки статуса словаря. Необязательный параметр. Читайте подробнее в разделе [Обновление словарей](external-dicts-dict-lifetime.md). - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `table` и `query` не могут быть использованы вместе. Также обязательно должен быть один из источников данных: `table` или `query`. +:::info Примечание +Поля `table` и `query` не могут быть использованы вместе. Также обязательно должен быть один из источников данных: `table` или `query`. ::: ClickHouse получает от ODBC-драйвера информацию о квотировании и квотирует настройки в запросах к драйверу, поэтому имя таблицы нужно указывать в соответствии с регистром имени таблицы в базе данных. @@ -542,9 +542,9 @@ SOURCE(MYSQL( - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. - Явный параметр `secure` отсутствует. Автоматически поддержана работа в обоих случаях: когда установка SSL-соединения необходима и когда нет. +:::info Примечание +Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. +Явный параметр `secure` отсутствует. Автоматически поддержана работа в обоих случаях: когда установка SSL-соединения необходима и когда нет. ::: MySQL можно подключить на локальном хосте через сокеты, для этого необходимо задать `host` и `socket`. @@ -634,8 +634,8 @@ SOURCE(CLICKHOUSE( - `secure` - флаг, разрешающий или не разрешающий защищённое SSL-соединение. - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. +:::info Примечание +Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. ::: ### MongoDB {#dicts-external_dicts_dict_sources-mongodb} @@ -750,8 +750,8 @@ SOURCE(REDIS( - `max_threads` – максимальное количество тредов для загрузки данных из нескольких партиций в словарь. - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `column_family` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `column_family` или `query`. +:::info Примечание +Поля `column_family` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `column_family` или `query`. ::: ### PostgreSQL {#dicts-external_dicts_dict_sources-postgresql} @@ -807,6 +807,6 @@ SOURCE(POSTGRESQL( - `invalidate_query` – запрос для проверки условия загрузки словаря. Необязательный параметр. Более подробную информацию смотрите в разделе [обновление словарей](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md). - `query` – пользовательский запрос. Необязательный параметр. -:::info "Примечание" - Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. +:::info Примечание +Поля `table` или `where` не могут быть использованы вместе с полем `query`. Также обязательно должен быть один из источников данных: `table` или `query`. ::: diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md index a711287ae8ee..c2f943d82763 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md @@ -57,8 +57,8 @@ ClickHouse поддерживает следующие виды ключей: Структура может содержать либо `` либо ``. DDL-запрос может содержать только `PRIMARY KEY`. -:::danger "Обратите внимание" - Ключ не надо дополнительно описывать в атрибутах. +:::danger Обратите внимание +Ключ не надо дополнительно описывать в атрибутах. ::: ### Числовой ключ {#ext_dict-numeric-key} @@ -94,9 +94,10 @@ PRIMARY KEY Id Ключом может быть кортеж (`tuple`) из полей произвольных типов. В этом случае [layout](external-dicts-dict-layout.md) должен быть `complex_key_hashed` или `complex_key_cache`. -:::tip "Совет" - Составной ключ может состоять из одного элемента. Это даёт возможность использовать в качестве ключа, например, строку. - ::: +:::tip Совет +Составной ключ может состоять из одного элемента. Это даёт возможность использовать в качестве ключа, например, строку. +::: + Структура ключа задаётся в элементе ``. Поля ключа задаются в том же формате, что и [атрибуты](external-dicts-dict-structure.md) словаря. Пример: ``` xml diff --git a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md index a262a3548893..af002557f6be 100644 --- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -50,9 +50,9 @@ ClickHouse: Если вы создаёте внешние словари [DDL-запросами](../../statements/create/index.md#create-dictionary-query), то не задавайте конфигурацию словаря в конфигурации сервера. - :::note "Внимание" - Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../../../sql-reference/functions/other-functions.md)). Эта функциональность не связана с внешними словарями. - ::: +:::note Внимание +Можно преобразовывать значения по небольшому словарю, описав его в запросе `SELECT` (см. функцию [transform](../../../sql-reference/functions/other-functions.md)). Эта функциональность не связана с внешними словарями. +::: ## Смотрите также {#ext-dicts-see-also} - [Настройка внешнего словаря](external-dicts-dict.md) diff --git a/docs/ru/sql-reference/functions/array-functions.md b/docs/ru/sql-reference/functions/array-functions.md index c43323d68fd2..659e2d3f75eb 100644 --- a/docs/ru/sql-reference/functions/array-functions.md +++ b/docs/ru/sql-reference/functions/array-functions.md @@ -18,8 +18,8 @@ empty([x]) Массив считается пустым, если он не содержит ни одного элемента. -:::note "Примечание" - Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT empty(arr) FROM TABLE` преобразуется к запросу `SELECT arr.size0 = 0 FROM TABLE`. +:::note Примечание +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT empty(arr) FROM TABLE` преобразуется к запросу `SELECT arr.size0 = 0 FROM TABLE`. ::: Функция также поддерживает работу с типами [String](string-functions.md#empty) и [UUID](uuid-functions.md#empty). @@ -62,8 +62,8 @@ notEmpty([x]) Массив считается непустым, если он содержит хотя бы один элемент. -:::note "Примечание" - Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT notEmpty(arr) FROM table` преобразуется к запросу `SELECT arr.size0 != 0 FROM TABLE`. +:::note Примечание +Функцию можно оптимизировать, если включить настройку [optimize_functions_to_subcolumns](../../operations/settings/settings.md#optimize-functions-to-subcolumns). При `optimize_functions_to_subcolumns = 1` функция читает только подстолбец [size0](../../sql-reference/data-types/array.md#array-size) вместо чтения и обработки всего столбца массива. Запрос `SELECT notEmpty(arr) FROM table` преобразуется к запросу `SELECT arr.size0 != 0 FROM TABLE`. ::: Функция также поддерживает работу с типами [String](string-functions.md#notempty) и [UUID](uuid-functions.md#notempty). @@ -145,6 +145,8 @@ range([start, ] end [, step]) - Если в результате запроса создаются массивы суммарной длиной больше, чем количество элементов, указанное настройкой [function_range_max_elements_in_block](../../operations/settings/settings.md#settings-function_range_max_elements_in_block), то генерируется исключение. +- Возвращает Null если любой аргумент Nullable(Nothing) типа. Генерируется исключение если любой аргумент Null (Nullable(T) тип). + **Примеры** Запрос: @@ -692,8 +694,8 @@ SELECT arraySort((x, y) -> -y, [0, 1, 2], [1, 2, 3]) as res; └─────────┘ ``` -:::note "Примечание" - Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). +:::note Примечание +Для улучшения эффективности сортировки применяется [преобразование Шварца](https://ru.wikipedia.org/wiki/%D0%9F%D1%80%D0%B5%D0%BE%D0%B1%D1%80%D0%B0%D0%B7%D0%BE%D0%B2%D0%B0%D0%BD%D0%B8%D0%B5_%D0%A8%D0%B2%D0%B0%D1%80%D1%86%D0%B0). ::: ## arrayPartialSort(\[func,\] limit, arr, …) {#array_functions-sort} @@ -1701,3 +1703,327 @@ SELECT arrayProduct([toDecimal64(1,8), toDecimal64(2,8), toDecimal64(3,8)]) as r │ 6 │ Float64 │ └─────┴──────────────────────────────────────────────────────────────────────────────────────────┘ ``` + +## arrayRotateLeft + +Поворачивает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов. +Если количество элементов отрицательно, то массив поворачивается вправо. + +**Синтаксис** + +``` sql +arrayRotateLeft(arr, n) +``` + +**Аргументы** + +- `arr` — [Массив](../../sql-reference/data-types/array.md). +- `n` — Число элементов, на которое нужно повернуть массив. + +**Возвращаемое значение** + +- Массив, повернутый на заданное число элементов влево. + +Тип: [Массив](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +``` sql +SELECT arrayRotateLeft([1,2,3,4,5,6], 2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [3,4,5,6,1,2] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayRotateLeft([1,2,3,4,5,6], -2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [5,6,1,2,3,4] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayRotateLeft(['a','b','c','d','e'], 3) as res; +``` + +Результат: + +``` text +┌─res───────────────────┐ +│ ['d','e','a','b','c'] │ +└───────────────────────┘ +``` + +## arrayRotateRight + +Поворачивает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов. +Если количество элементов отрицательно, то массив поворачивается влево. + +**Синтаксис** + +``` sql +arrayRotateRight(arr, n) +``` + +**Аргументы** + +- `arr` — [Массив](../../sql-reference/data-types/array.md). +- `n` — Число элементов, на которое нужно повернуть массив. + +**Возвращаемое значение** + +- Массив, повернутый на заданное число элементов вправо. + +Тип: [Массив](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +``` sql +SELECT arrayRotateRight([1,2,3,4,5,6], 2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [5,6,1,2,3,4] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayRotateRight([1,2,3,4,5,6], -2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [3,4,5,6,1,2] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayRotateRight(['a','b','c','d','e'], 3) as res; +``` + +Результат: + +``` text +┌─res───────────────────┐ +│ ['c','d','e','a','b'] │ +└───────────────────────┘ +``` + +## arrayShiftLeft + +Сдвигает [массив](../../sql-reference/data-types/array.md) влево на заданное число элементов. +Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива. +Если количество элементов отрицательно, то массив сдвигается вправо. + +**Синтаксис** + +``` sql +arrayShiftLeft(arr, n[, default]) +``` + +**Аргументы** + +- `arr` — [Массив](../../sql-reference/data-types/array.md). +- `n` — Число элементов, на которое нужно сдвинуть массив. +- `default` — Опциональный. Значение по умолчанию для новых элементов. + +**Возвращаемое значение** + +- Массив, сдвинутый на заданное число элементов влево. + +Тип: [Массив](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], 2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [3,4,5,6,0,0] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], -2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [0,0,1,2,3,4] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6], 2, 42) as res; +``` + +Результат: + +``` text +┌─res─────────────┐ +│ [3,4,5,6,42,42] │ +└─────────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftLeft(['a','b','c','d','e','f'], 3, 'foo') as res; +``` + +Результат: + +``` text +┌─res─────────────────────────────┐ +│ ['d','e','f','foo','foo','foo'] │ +└─────────────────────────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftLeft([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res; +``` + +Результат: + +``` text +┌─res─────────────────┐ +│ [3,4,5,6,4242,4242] │ +└─────────────────────┘ +``` + +## arrayShiftRight + +Сдвигает [массив](../../sql-reference/data-types/array.md) вправо на заданное число элементов. +Новые элементы заполняются переданным аргументом или значением по умолчанию для типа элементов массива. +Если количество элементов отрицательно, то массив сдвигается влево. + +**Синтаксис** + +``` sql +arrayShiftRight(arr, n[, default]) +``` + +**Аргументы** + +- `arr` — [Массив](../../sql-reference/data-types/array.md). +- `n` — Число элементов, на которое нужно сдвинуть массив. +- `default` — Опциональный. Значение по умолчанию для новых элементов. + +**Возвращаемое значение** + +- Массив, сдвинутый на заданное число элементов вправо. + +Тип: [Массив](../../sql-reference/data-types/array.md). + +**Примеры** + +Запрос: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], 2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [0,0,1,2,3,4] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], -2) as res; +``` + +Результат: + +``` text +┌─res───────────┐ +│ [3,4,5,6,0,0] │ +└───────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6], 2, 42) as res; +``` + +Результат: + +``` text +┌─res─────────────┐ +│ [42,42,1,2,3,4] │ +└─────────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftRight(['a','b','c','d','e','f'], 3, 'foo') as res; +``` + +Результат: + +``` text +┌─res─────────────────────────────┐ +│ ['foo','foo','foo','a','b','c'] │ +└─────────────────────────────────┘ +``` + +Запрос: + +``` sql +SELECT arrayShiftRight([1,2,3,4,5,6] :: Array(UInt16), 2, 4242) as res; +``` + +Результат: + +``` text +┌─res─────────────────┐ +│ [4242,4242,1,2,3,4] │ +└─────────────────────┘ +``` diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md index 17ab04b7799e..fa5728a097d9 100644 --- a/docs/ru/sql-reference/functions/date-time-functions.md +++ b/docs/ru/sql-reference/functions/date-time-functions.md @@ -359,7 +359,7 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101; Округляет дату или дату-с-временем до последнего числа месяца. Возвращается дата. -:::note "Attention" +:::note Важно Возвращаемое значение для некорректных дат зависит от реализации. ClickHouse может вернуть нулевую дату, выбросить исключение, или выполнить «естественное» перетекание дат между месяцами. ::: @@ -599,29 +599,33 @@ SELECT toDate('2016-12-27') AS date, toWeek(date) AS week0, toWeek(date,1) AS we ## toYearWeek(date[,mode]) {#toyearweek} Возвращает год и неделю для даты. Год в результате может отличаться от года в аргументе даты для первой и последней недели года. -Аргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). Если mode не задан, используется режим 0. +Аргумент mode работает так же, как аргумент mode [toWeek()](#toweek), значение mode по умолчанию -- `0`. -`toISOYear() ` эквивалентно `intDiv(toYearWeek(date,3),100)`. +`toISOYear() ` эквивалентно `intDiv(toYearWeek(date,3),100)` + +:::warning +Однако, есть отличие в работе функций `toWeek()` и `toYearWeek()`. `toWeek()` возвращает номер недели в контексте заданного года, и в случае, когда `toWeek()` вернёт `0`, `toYearWeek()` вернёт значение, соответствующее последней неделе предыдущего года (см. `prev_yearWeek` в примере). +::: **Пример** Запрос: ```sql -SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9; +SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(date,1) AS yearWeek1, toYearWeek(date,9) AS yearWeek9, toYearWeek(toDate('2022-01-01')) AS prev_yearWeek; ``` Результат: ```text -┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┐ -│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ -└────────────┴───────────┴───────────┴───────────┘ +┌───────date─┬─yearWeek0─┬─yearWeek1─┬─yearWeek9─┬─prev_yearWeek─┐ +│ 2016-12-27 │ 201652 │ 201652 │ 201701 │ 202152 │ +└────────────┴───────────┴───────────┴───────────┴───────────────┘ ``` ## age -Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду. +Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду. Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`. **Синтаксис** @@ -635,6 +639,8 @@ age('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `microsecond` (возможные сокращения: `us`, `u`) + - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) - `minute` (возможные сокращения: `mi`, `n`) - `hour` (возможные сокращения: `hh`, `h`) @@ -708,6 +714,8 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md). Возможные значения: + - `microsecond` (возможные сокращения: `us`, `u`) + - `millisecond` (возможные сокращения: `ms`) - `second` (возможные сокращения: `ss`, `s`) - `minute` (возможные сокращения: `mi`, `n`) - `hour` (возможные сокращения: `hh`, `h`) diff --git a/docs/ru/sql-reference/functions/encoding-functions.md b/docs/ru/sql-reference/functions/encoding-functions.md index 675bf1cbf9fb..3e962ac23c22 100644 --- a/docs/ru/sql-reference/functions/encoding-functions.md +++ b/docs/ru/sql-reference/functions/encoding-functions.md @@ -154,7 +154,7 @@ SELECT hex(toFloat64(number)) AS hex_presentation FROM numbers(15, 2); Если вы хотите преобразовать результат в число, вы можете использовать функции [reverse](../../sql-reference/functions/string-functions.md#reverse) и [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#type-conversion-functions). -:::note "Примечание" +:::note Примечание Если `unhex` вызывается из `clickhouse-client`, двоичные строки отображаются с использованием UTF-8. ::: @@ -296,10 +296,10 @@ unbin(arg) Для числового аргумента `unbin()` не возвращает значение, обратное результату `bin()`. Чтобы преобразовать результат в число, используйте функции [reverse](../../sql-reference/functions/string-functions.md#reverse) и [reinterpretAs<Type>](../../sql-reference/functions/type-conversion-functions.md#reinterpretasuint8163264). -:::note "Примечание" +:::note Примечание Если `unbin` вызывается из клиента `clickhouse-client`, бинарная строка возвращается в кодировке UTF-8. ::: - + Поддерживает двоичные цифры `0` и `1`. Количество двоичных цифр не обязательно должно быть кратно восьми. Если строка аргумента содержит что-либо, кроме двоичных цифр, возвращается некоторый результат, определенный реализацией (ошибки не возникает). **Аргументы** diff --git a/docs/ru/sql-reference/functions/ext-dict-functions.md b/docs/ru/sql-reference/functions/ext-dict-functions.md index d14f0ddf027c..047862f06176 100644 --- a/docs/ru/sql-reference/functions/ext-dict-functions.md +++ b/docs/ru/sql-reference/functions/ext-dict-functions.md @@ -4,7 +4,7 @@ sidebar_position: 58 sidebar_label: "Функции для работы с внешними словарями" --- -:::note "Внимание" +:::note Внимание Для словарей, созданных с помощью [DDL-запросов](../../sql-reference/statements/create/dictionary.md), в параметре `dict_name` указывается полное имя словаря вместе с базой данных, например: `.`. Если база данных не указана, используется текущая. ::: diff --git a/docs/ru/sql-reference/functions/geo/geohash.md b/docs/ru/sql-reference/functions/geo/geohash.md index 9e56d977bc73..3b39b1318ea8 100644 --- a/docs/ru/sql-reference/functions/geo/geohash.md +++ b/docs/ru/sql-reference/functions/geo/geohash.md @@ -85,8 +85,8 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi - `latitude_max` — максимальная широта. Диапазон возможных значений: `[-90°, 90°]`. Тип данных: [Float](../../../sql-reference/data-types/float.md). - `precision` — точность geohash. Диапазон возможных значений: `[1, 12]`. Тип данных: [UInt8](../../../sql-reference/data-types/int-uint.md). -:::info "Замечание" - Все передаваемые координаты должны быть одного и того же типа: либо `Float32`, либо `Float64`. +:::info Замечание +Все передаваемые координаты должны быть одного и того же типа: либо `Float32`, либо `Float64`. ::: **Возвращаемые значения** @@ -96,8 +96,8 @@ geohashesInBox(longitude_min, latitude_min, longitude_max, latitude_max, precisi Тип данных: [Array](../../../sql-reference/data-types/array.md)([String](../../../sql-reference/data-types/string.md)). -:::info "Замечание" - Если возвращаемый массив содержит свыше 10 000 000 элементов, функция сгенерирует исключение. +:::info Замечание +Если возвращаемый массив содержит свыше 10 000 000 элементов, функция сгенерирует исключение. ::: **Пример** diff --git a/docs/ru/sql-reference/functions/introspection.md b/docs/ru/sql-reference/functions/introspection.md index 26497ef21d36..2b74e9d776ce 100644 --- a/docs/ru/sql-reference/functions/introspection.md +++ b/docs/ru/sql-reference/functions/introspection.md @@ -8,8 +8,8 @@ sidebar_label: "Функции интроспекции" Функции из этого раздела могут использоваться для интроспекции [ELF](https://en.wikipedia.org/wiki/Executable_and_Linkable_Format) и [DWARF](https://en.wikipedia.org/wiki/DWARF) в целях профилирования запросов. -:::danger "Предупреждение" - Эти функции выполняются медленно и могут приводить к нежелательным последствиям в плане безопасности. +:::danger Предупреждение +Эти функции выполняются медленно и могут приводить к нежелательным последствиям в плане безопасности. ::: Для правильной работы функций интроспекции: diff --git a/docs/ru/sql-reference/functions/json-functions.md b/docs/ru/sql-reference/functions/json-functions.md index 53ab638165af..123f40ce05da 100644 --- a/docs/ru/sql-reference/functions/json-functions.md +++ b/docs/ru/sql-reference/functions/json-functions.md @@ -361,8 +361,8 @@ SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[*]'); SELECT JSON_EXISTS('{"hello":["world"]}', '$.hello[0]'); ``` -:::note "Примечание" - до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_EXISTS(path, json) +:::note Примечание +До версии 21.11 порядок аргументов функции был обратный, т.е. JSON_EXISTS(path, json) ::: ## JSON_QUERY(json, path) {#json-query} @@ -388,8 +388,8 @@ SELECT toTypeName(JSON_QUERY('{"hello":2}', '$.hello')); [2] String ``` -:::note "Примечание" - до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_QUERY(path, json) +:::note Примечание +До версии 21.11 порядок аргументов функции был обратный, т.е. JSON_QUERY(path, json) ::: ## JSON_VALUE(json, path) {#json-value} @@ -416,8 +416,8 @@ world String ``` -:::note "Примечание" - до версии 21.11 порядок аргументов функции был обратный, т.е. JSON_VALUE(path, json) +:::note Примечание +До версии 21.11 порядок аргументов функции был обратный, т.е. JSON_VALUE(path, json) ::: ## toJSONString {#tojsonstring} diff --git a/docs/ru/sql-reference/functions/nlp-functions.md b/docs/ru/sql-reference/functions/nlp-functions.md index 5d6540871d52..ed5fbffbb9cf 100644 --- a/docs/ru/sql-reference/functions/nlp-functions.md +++ b/docs/ru/sql-reference/functions/nlp-functions.md @@ -6,8 +6,8 @@ sidebar_label: NLP # [экспериментально] Функции для работы с естественным языком {#nlp-functions} -:::danger "Предупреждение" - Сейчас использование функций для работы с естественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`. +:::danger Предупреждение +Сейчас использование функций для работы с естественным языком является экспериментальной возможностью. Чтобы использовать данные функции, включите настройку `allow_experimental_nlp_functions = 1`. ::: ## stem {#stem} diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md index de54f1b36072..835aed934d5f 100644 --- a/docs/ru/sql-reference/functions/other-functions.md +++ b/docs/ru/sql-reference/functions/other-functions.md @@ -659,9 +659,11 @@ SELECT └────────────────┴───────────────────┘ ``` + ## formatReadableTimeDelta {#formatreadabletimedelta} -Принимает дельту времени в секундах. Возвращает дельту времени с (год, месяц, день, час, минута, секунда) в виде строки. +Принимает дельту времени в секундах. Возвращает дельту времени в виде строки. +Есть возможность указать наиболее крупную единицу измерения для вывода (год, месяц, день, час, минута, секунда, милли/микро/наносекунда). **Синтаксис** @@ -671,10 +673,16 @@ formatReadableTimeDelta(column[, maximum_unit]) **Аргументы** -- `column` — Столбец с числовой дельтой времени. -- `maximum_unit` — Опциональный параметр. Максимальная единица измерения для отображения. Допустимые значения: секунды, минуты, часы, дни, месяцы, годы. +- `column` — Столбец с числовой дельтой времени. +- `maximum_unit` — Опциональный параметр. Максимальная единица измерения для отображения. + * Допустимые значения: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. + * Значение по умолчанию: `years`. +- `minimum_unit` — Опциональный параметр. Минимальная единица измерения для отображения. Более мелкие единицы будут отброшены. + * Допустимые значения: `nanoseconds`, `microseconds`, `milliseconds`, `seconds`, `minutes`, `hours`, `days`, `months`, `years`. + * Если минимальная единица задана явно и превышает максимальную единицу, будет выкинуто исключение. + * Значение по умолчанию: `seconds` если максимальная единица -- секунда или более крупный интервал, в противном случае -- `nanoseconds`. -Пример: +**Пример** ``` sql SELECT @@ -792,8 +800,8 @@ neighbor(column, offset[, default_value]) Результат функции зависит от затронутых блоков данных и порядка данных в блоке. -:::danger "Предупреждение" - Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных. +:::danger Предупреждение +Функция может получить доступ к значению в столбце соседней строки только внутри обрабатываемого в данный момент блока данных. ::: Порядок строк, используемый при вычислении функции `neighbor`, может отличаться от порядка строк, возвращаемых пользователю. @@ -902,8 +910,8 @@ FROM numbers(16) Считает разницу между последовательными значениями строк в блоке данных. Возвращает 0 для первой строки и разницу с предыдущей строкой для каждой последующей строки. -:::danger "Предупреждение" - Функция может взять значение предыдущей строки только внутри текущего обработанного блока данных. +:::danger Предупреждение +Функция может взять значение предыдущей строки только внутри текущего обработанного блока данных. ::: Результат функции зависит от затронутых блоков данных и порядка данных в блоке. @@ -983,9 +991,9 @@ WHERE diff != 1 У каждого события есть время начала и время окончания. Считается, что время начала включено в событие, а время окончания исключено из него. Столбцы со временем начала и окончания событий должны иметь одинаковый тип данных. Функция подсчитывает количество событий, происходящих одновременно на момент начала каждого из событий в выборке. -:::danger "Предупреждение" - События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение. - Каждый блок данных обрабатывается независимо. Если события из разных блоков данных накладываются по времени, они не могут быть корректно обработаны. +:::danger Предупреждение +События должны быть отсортированы по возрастанию времени начала. Если это требование нарушено, то функция вызывает исключение. +Каждый блок данных обрабатывается независимо. Если события из разных блоков данных накладываются по времени, они не могут быть корректно обработаны. ::: **Синтаксис** @@ -1675,8 +1683,8 @@ FROM numbers(10); Накапливает состояния агрегатной функции для каждой строки блока данных. -:::danger "Warning" - Функция обнуляет состояние для каждого нового блока. +:::danger Предупреждение +Функция обнуляет состояние для каждого нового блока. ::: **Синтаксис** @@ -2133,8 +2141,8 @@ countDigits(x) Тип: [UInt8](../../sql-reference/data-types/int-uint.md#uint-ranges). -:::note "Примечание" - Для `Decimal` значений учитывается их масштаб: вычисляется результат по базовому целочисленному типу, полученному как `(value * scale)`. Например: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. То есть вы можете проверить десятичное переполнение для `Decimal64` с помощью `countDecimal(x) > 18`. Это медленный вариант [isDecimalOverflow](#is-decimal-overflow). +:::note Примечание +Для `Decimal` значений учитывается их масштаб: вычисляется результат по базовому целочисленному типу, полученному как `(value * scale)`. Например: `countDigits(42) = 2`, `countDigits(42.000) = 5`, `countDigits(0.04200) = 4`. То есть вы можете проверить десятичное переполнение для `Decimal64` с помощью `countDecimal(x) > 18`. Это медленный вариант [isDecimalOverflow](#is-decimal-overflow). ::: **Пример** diff --git a/docs/ru/sql-reference/functions/string-functions.md b/docs/ru/sql-reference/functions/string-functions.md index 9638e25d4885..276dfc2ef203 100644 --- a/docs/ru/sql-reference/functions/string-functions.md +++ b/docs/ru/sql-reference/functions/string-functions.md @@ -1113,3 +1113,50 @@ A text with tags . The content within CDATA Do Nothing for 2 Minutes 2:00   ``` + +## initcap {#initcap} + +Переводит первую букву каждого слова в строке в верхний регистр, а остальные — в нижний. Словами считаются последовательности алфавитно-цифровых символов, разделённые любыми другими символами. + +## initcapUTF8 {#initcapUTF8} + +Как [initcap](#initcap), предполагая, что строка содержит набор байтов, представляющий текст в кодировке UTF-8. +Не учитывает язык. То есть, для турецкого языка, результат может быть не совсем верным. +Если длина UTF-8 последовательности байтов различна для верхнего и нижнего регистра кодовой точки, то для этой кодовой точки результат работы может быть некорректным. +Если строка содержит набор байтов, не являющийся UTF-8, то поведение не определено. + +## firstLine + +Возвращает первую строку в многострочном тексте. + +**Синтаксис** + +```sql +firstLine(val) +``` + +**Аргументы** + +- `val` - текст для обработки. [String](../data-types/string.md) + +**Returned value** + +- Первая строка текста или весь текст, если переносы строк отсутствуют. + +Тип: [String](../data-types/string.md) + +**Пример** + +Запрос: + +```sql +select firstLine('foo\nbar\nbaz'); +``` + +Результат: + +```result +┌─firstLine('foo\nbar\nbaz')─┐ +│ foo │ +└────────────────────────────┘ +``` diff --git a/docs/ru/sql-reference/functions/string-search-functions.md b/docs/ru/sql-reference/functions/string-search-functions.md index ea4f90d4f66e..4f9ae4428a47 100644 --- a/docs/ru/sql-reference/functions/string-search-functions.md +++ b/docs/ru/sql-reference/functions/string-search-functions.md @@ -28,8 +28,8 @@ position(needle IN haystack) Алиас: `locate(haystack, needle[, start_pos])`. -:::note "Примечание" - Синтаксис `position(needle IN haystack)` обеспечивает совместимость с SQL, функция работает так же, как `position(haystack, needle)`. +:::note Примечание +Синтаксис `position(needle IN haystack)` обеспечивает совместимость с SQL, функция работает так же, как `position(haystack, needle)`. ::: **Аргументы** @@ -329,8 +329,8 @@ Result: Для поиска без учета регистра и/или в кодировке UTF-8 используйте функции `multiSearchAnyCaseInsensitive, multiSearchAnyUTF8, multiSearchAnyCaseInsensitiveUTF8`. -:::note "Примечание" - Во всех функциях `multiSearch*` количество needles должно быть меньше 28 из-за особенностей реализации. +:::note Примечание +Во всех функциях `multiSearch*` количество needles должно быть меньше 28 из-за особенностей реализации. ::: ## match(haystack, pattern) {#matchhaystack-pattern} @@ -347,8 +347,8 @@ Result: То же, что и `match`, но возвращает ноль, если ни одно регулярное выражение не подошло и один, если хотя бы одно. Используется библиотека [hyperscan](https://github.com/intel/hyperscan) для соответствия регулярных выражений. Для шаблонов на поиск многих подстрок в строке, лучше используйте `multiSearchAny`, так как она работает существенно быстрее. -:::note "Примечание" - Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. +:::note Примечание +Длина любой строки из `haystack` должна быть меньше 232 байт, иначе бросается исключение. Это ограничение связано с ограничением hyperscan API. ::: ## multiMatchAnyIndex(haystack, \[pattern1, pattern2, …, patternn\]) {#multimatchanyindexhaystack-pattern1-pattern2-patternn} @@ -370,12 +370,12 @@ Result: То же, что и `multiFuzzyMatchAny`, только возвращает массив всех индексов всех подходящих регулярных выражений в любом порядке в пределах константного редакционного расстояния. -:::note "Примечание" - `multiFuzzyMatch*` функции не поддерживают UTF-8 закодированные регулярные выражения, и такие выражения рассматриваются как байтовые из-за ограничения hyperscan. +:::note Примечание +`multiFuzzyMatch*` функции не поддерживают UTF-8 закодированные регулярные выражения, и такие выражения рассматриваются как байтовые из-за ограничения hyperscan. ::: - -:::note "Примечание" - Чтобы выключить все функции, использующие hyperscan, используйте настройку `SET allow_hyperscan = 0;`. + +:::note Примечание +Чтобы выключить все функции, использующие hyperscan, используйте настройку `SET allow_hyperscan = 0;`. ::: ## extract(haystack, pattern) {#extracthaystack-pattern} @@ -389,8 +389,8 @@ Result: Разбирает строку `haystack` на фрагменты, соответствующие группам регулярного выражения `pattern`. Возвращает массив массивов, где первый массив содержит все фрагменты, соответствующие первой группе регулярного выражения, второй массив - соответствующие второй группе, и т.д. -:::note "Замечание" - Функция `extractAllGroupsHorizontal` работает медленнее, чем функция [extractAllGroupsVertical](#extractallgroups-vertical). +:::note Замечание +Функция `extractAllGroupsHorizontal` работает медленнее, чем функция [extractAllGroupsVertical](#extractallgroups-vertical). ::: **Синтаксис** @@ -561,8 +561,8 @@ SELECT * FROM Months WHERE ilike(name, '%j%'); Для поиска без учета регистра и/или в формате UTF-8 используйте функции `ngramSearchCaseInsensitive, ngramSearchUTF8, ngramSearchCaseInsensitiveUTF8`. -:::note "Примечание" - Для случая UTF-8 мы используем триграммное расстояние. Вычисление n-граммного расстояния не совсем честное. Мы используем 2-х байтные хэши для хэширования n-грамм, а затем вычисляем (не)симметрическую разность между хэш таблицами – могут возникнуть коллизии. В формате UTF-8 без учета регистра мы не используем честную функцию `tolower` – мы обнуляем 5-й бит (нумерация с нуля) каждого байта кодовой точки, а также первый бит нулевого байта, если байтов больше 1 – это работает для латиницы и почти для всех кириллических букв. +:::note Примечание +Для случая UTF-8 мы используем триграммное расстояние. Вычисление n-граммного расстояния не совсем честное. Мы используем 2-х байтные хэши для хэширования n-грамм, а затем вычисляем (не)симметрическую разность между хэш таблицами – могут возникнуть коллизии. В формате UTF-8 без учета регистра мы не используем честную функцию `tolower` – мы обнуляем 5-й бит (нумерация с нуля) каждого байта кодовой точки, а также первый бит нулевого байта, если байтов больше 1 – это работает для латиницы и почти для всех кириллических букв. ::: ## countMatches(haystack, pattern) {#countmatcheshaystack-pattern} @@ -801,3 +801,55 @@ SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв'); │ 3 │ └────────────────────────────────────────────────────────────┘ ``` + +## hasSubsequence(haystack, needle) {#hasSubsequence} + +Возвращает 1 если needle является подпоследовательностью haystack, иначе 0. + + +**Синтаксис** + +``` sql +hasSubsequence(haystack, needle) +``` + +**Аргументы** + +- `haystack` — строка, по которой выполняется поиск. [Строка](../syntax.md#syntax-string-literal). +- `needle` — подпоследовательность, которую необходимо найти. [Строка](../syntax.md#syntax-string-literal). + +**Возвращаемые значения** + +- 1, если +- 0, если подстрока не найдена. + +Тип: `UInt8`. + +**Примеры** + +Запрос: + +``` sql +SELECT hasSubsequence('garbage', 'arg') ; +``` + +Результат: + +``` text +┌─hasSubsequence('garbage', 'arg')─┐ +│ 1 │ +└──────────────────────────────────┘ +``` + + +## hasSubsequenceCaseInsensitive + +Такая же, как и [hasSubsequence](#hasSubsequence), но работает без учета регистра. + +## hasSubsequenceUTF8 + +Такая же, как и [hasSubsequence](#hasSubsequence) при допущении что `haystack` и `needle` содержат набор кодовых точек, представляющий текст в кодировке UTF-8. + +## hasSubsequenceCaseInsensitiveUTF8 + +Такая же, как и [hasSubsequenceUTF8](#hasSubsequenceUTF8), но работает без учета регистра. diff --git a/docs/ru/sql-reference/functions/type-conversion-functions.md b/docs/ru/sql-reference/functions/type-conversion-functions.md index d5e6246fe9e9..5a023d94d0f1 100644 --- a/docs/ru/sql-reference/functions/type-conversion-functions.md +++ b/docs/ru/sql-reference/functions/type-conversion-functions.md @@ -284,7 +284,13 @@ toDateTime(expr[, time_zone ]) - `expr` — Значение для преобразования. [String](/docs/ru/sql-reference/data-types/string.md), [Int](/docs/ru/sql-reference/data-types/int-uint.md), [Date](/docs/ru/sql-reference/data-types/date.md) или [DateTime](/docs/ru/sql-reference/data-types/datetime.md). - `time_zone` — Часовой пояс. [String](/docs/ru/sql-reference/data-types/string.md). -Если `expr` является числом, оно интерпретируется как количество секунд от начала unix эпохи. +:::note +Если `expr` является числом, то оно интерпретируется как число секунд с начала Unix-эпохи (Unix Timestamp). + +Если же `expr` -- [строка (String)](/docs/ru/sql-reference/data-types/string.md), то оно может быть интерпретировано и как Unix Timestamp, и как строковое представление даты / даты со временем. +Ввиду неоднозначности запрещён парсинг строк длиной 4 и меньше. Так, строка `'1999'` могла бы представлять собой как год (неполное строковое представление даты или даты со временем), так и Unix Timestamp. +Строки длиной 5 символов и более не несут неоднозначности, а следовательно, их парсинг разрешён. +::: **Возвращаемое значение** @@ -926,7 +932,7 @@ x::t - Преобразованное значение. -:::note "Примечание" +:::note Примечание Если входное значение выходит за границы нового типа, то результат переполняется. Например, `CAST(-1, 'UInt8')` возвращает `255`. ::: @@ -1475,8 +1481,8 @@ SELECT toLowCardinality('1'); Преобразует значение `DateTime64` в значение `Int64` с фиксированной точностью менее одной секунды. Входное значение округляется соответствующим образом вверх или вниз в зависимости от его точности. -:::info "Примечание" - Возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. +:::info Примечание +Возвращаемое значение — это временная метка в UTC, а не в часовом поясе `DateTime64`. ::: **Синтаксис** diff --git a/docs/ru/sql-reference/operators/exists.md b/docs/ru/sql-reference/operators/exists.md index 38855abbcf3b..0e7d0ac75745 100644 --- a/docs/ru/sql-reference/operators/exists.md +++ b/docs/ru/sql-reference/operators/exists.md @@ -7,8 +7,8 @@ slug: /ru/sql-reference/operators/exists `EXISTS` может быть использован в секции [WHERE](../../sql-reference/statements/select/where.md). -:::danger "Предупреждение" - Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе. +:::danger Предупреждение +Ссылки на таблицы или столбцы основного запроса не поддерживаются в подзапросе. ::: **Синтаксис** diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md index 60400fb2b31d..6b44446b3f78 100644 --- a/docs/ru/sql-reference/operators/in.md +++ b/docs/ru/sql-reference/operators/in.md @@ -122,7 +122,7 @@ FROM t_null Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса. -:::note "Внимание" +:::note Внимание Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`. ::: При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`. diff --git a/docs/ru/sql-reference/operators/index.md b/docs/ru/sql-reference/operators/index.md index b5fec3cb38c8..3238c3de4aa6 100644 --- a/docs/ru/sql-reference/operators/index.md +++ b/docs/ru/sql-reference/operators/index.md @@ -212,8 +212,9 @@ FROM test.Orders; В качестве значения оператора `INTERVAL` вы можете также использовать строковый литерал. Например, выражение `INTERVAL 1 HOUR` идентично выражению `INTERVAL '1 hour'` или `INTERVAL '1' hour`. -:::danger "Внимание" - Интервалы различных типов нельзя объединять. Нельзя использовать выражения вида `INTERVAL 4 DAY 1 HOUR`. Вместо этого интервалы можно выразить в единицах меньших или равных наименьшей единице интервала, Например, `INTERVAL 25 HOUR`. Также можно выполнять последовательные операции как показано в примере ниже. +:::danger Внимание +Интервалы различных типов нельзя объединять. Нельзя использовать выражения вида `INTERVAL 4 DAY 1 HOUR`. Вместо этого интервалы можно выразить в единицах меньших или равных наименьшей единице интервала, Например, `INTERVAL 25 HOUR`. Также можно выполнять последовательные операции как показано в примере ниже. +::: Примеры: @@ -249,9 +250,10 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV Вы можете изменить дату, не используя синтаксис `INTERVAL`, а просто добавив или отняв секунды, минуты и часы. Например, чтобы передвинуть дату на один день вперед, можно прибавить к ней значение `60*60*24`. - :::note "Примечание" - Синтаксис `INTERVAL` или функция `addDays` предпочтительнее для работы с датами. Сложение с числом (например, синтаксис `now() + ...`) не учитывает региональные настройки времени, например, переход на летнее время. - ::: +:::note Примечание +Синтаксис `INTERVAL` или функция `addDays` предпочтительнее для работы с датами. Сложение с числом (например, синтаксис `now() + ...`) не учитывает региональные настройки времени, например, переход на летнее время. +::: + Пример: ``` sql @@ -263,7 +265,6 @@ SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 6 │ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │ └─────────────────────┴─────────────────────┴─────────────────────┘ ``` -::: **Смотрите также** @@ -303,9 +304,9 @@ END В случае указания `x` - функция `transform(x, [a, ...], [b, ...], c)`. Иначе — `multiIf(a, b, ..., c)`. При отсутствии секции `ELSE c`, значением по умолчанию будет `NULL`. - :::note "Примечание" - Функция `transform` не умеет работать с `NULL`. - ::: +:::note Примечание +Функция `transform` не умеет работать с `NULL`. +::: ## Оператор склеивания строк {#operator-skleivaniia-strok} `s1 || s2` - функция `concat(s1, s2)` diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md index a8ace2130750..1a45f0f1f7f9 100644 --- a/docs/ru/sql-reference/statements/alter/column.md +++ b/docs/ru/sql-reference/statements/alter/column.md @@ -76,8 +76,8 @@ DROP COLUMN [IF EXISTS] name Запрос удаляет данные из файловой системы. Так как это представляет собой удаление целых файлов, запрос выполняется почти мгновенно. -:::warning "Предупреждение" - Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка. +:::warning Предупреждение +Вы не можете удалить столбец, используемый в [материализованном представлениии](../../../sql-reference/statements/create/view.md#materialized). В противном случае будет ошибка. ::: Пример: @@ -182,7 +182,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String) Синтаксис: ```sql -ALTER TABLE table_name MODIFY column_name REMOVE property; +ALTER TABLE table_name MODIFY COLUMN column_name REMOVE property; ``` **Пример** diff --git a/docs/ru/sql-reference/statements/alter/delete.md b/docs/ru/sql-reference/statements/alter/delete.md index 1d16bbdc7688..dc968a173493 100644 --- a/docs/ru/sql-reference/statements/alter/delete.md +++ b/docs/ru/sql-reference/statements/alter/delete.md @@ -12,9 +12,10 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] DELETE WHERE filter_expr Удаляет данные, соответствующие указанному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations). - :::note - Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования. - ::: +:::note Примечание +Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования. +::: + Выражение `filter_expr` должно иметь тип `UInt8`. Запрос удаляет строки в таблице, для которых это выражение принимает ненулевое значение. Один запрос может содержать несколько команд, разделенных запятыми. diff --git a/docs/ru/sql-reference/statements/alter/index.md b/docs/ru/sql-reference/statements/alter/index.md index 33c2c1de417c..07f5ff0a2984 100644 --- a/docs/ru/sql-reference/statements/alter/index.md +++ b/docs/ru/sql-reference/statements/alter/index.md @@ -26,9 +26,9 @@ ALTER TABLE [db].name [ON CLUSTER cluster] ADD|DROP|CLEAR|COMMENT|MODIFY COLUMN - [CONSTRAINT](../../../sql-reference/statements/alter/constraint.md) - [TTL](../../../sql-reference/statements/alter/ttl.md) - :::note - Запрос `ALTER TABLE` поддерживается только для таблиц типа `*MergeTree`, а также `Merge` и `Distributed`. Запрос имеет несколько вариантов. - ::: +:::note Примечание +Запрос `ALTER TABLE` поддерживается только для таблиц типа `*MergeTree`, а также `Merge` и `Distributed`. Запрос имеет несколько вариантов. +::: Следующие запросы `ALTER` управляют представлениями: - [ALTER TABLE ... MODIFY QUERY](../../../sql-reference/statements/alter/view.md) — изменяет структуру [Materialized view](../create/view.md#materialized). @@ -76,8 +76,8 @@ ALTER TABLE [db.]table MATERIALIZE INDEX name IN PARTITION partition_name Вы можете указать время ожидания (в секундах) выполнения всех запросов `ALTER` для неактивных реплик с помощью настройки [replication_wait_for_inactive_replica_timeout](../../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout). -:::info "Примечание" - Для всех запросов `ALTER` при `alter_sync = 2` и неактивности некоторых реплик больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, генерируется исключение `UNFINISHED`. +:::info Примечание +Для всех запросов `ALTER` при `alter_sync = 2` и неактивности некоторых реплик больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, генерируется исключение `UNFINISHED`. ::: Для запросов `ALTER TABLE ... UPDATE|DELETE` синхронность выполнения определяется настройкой [mutations_sync](../../../operations/settings/settings.md#mutations_sync). diff --git a/docs/ru/sql-reference/statements/alter/partition.md b/docs/ru/sql-reference/statements/alter/partition.md index 90688c9ece27..4d37dc82fced 100644 --- a/docs/ru/sql-reference/statements/alter/partition.md +++ b/docs/ru/sql-reference/statements/alter/partition.md @@ -182,9 +182,10 @@ ALTER TABLE table_name [ON CLUSTER cluster] FREEZE [PARTITION partition_expr] [W Создаёт резервную копию для заданной партиции. Если выражение `PARTITION` опущено, резервные копии будут созданы для всех партиций. - :::note "Примечание" - Создание резервной копии не требует остановки сервера. - ::: +:::note Примечание +Создание резервной копии не требует остановки сервера. +::: + Для таблиц старого стиля имя партиций можно задавать в виде префикса (например, `2019`). В этом случае, резервные копии будут созданы для всех соответствующих партиций. Подробнее о том, как корректно задать имя партиции, см. в разделе [Как задавать имя партиции в запросах ALTER](#alter-how-to-specify-part-expr). Запрос формирует для текущего состояния таблицы жесткие ссылки на данные в этой таблице. Ссылки размещаются в директории `/var/lib/clickhouse/shadow/N/...`, где: @@ -193,9 +194,9 @@ ALTER TABLE table_name [ON CLUSTER cluster] FREEZE [PARTITION partition_expr] [W - `N` — инкрементальный номер резервной копии. - если задан параметр `WITH NAME`, то вместо инкрементального номера используется значение параметра `'backup_name'`. - :::note "Примечание" - При использовании [нескольких дисков для хранения данных таблицы](../../statements/alter/index.md#table_engine-mergetree-multiple-volumes) директория `shadow/N` появляется на каждом из дисков, на которых были куски, попавшие под выражение `PARTITION`. - ::: +:::note Примечание +При использовании [нескольких дисков для хранения данных таблицы](../../statements/alter/index.md#table_engine-mergetree-multiple-volumes) директория `shadow/N` появляется на каждом из дисков, на которых были куски, попавшие под выражение `PARTITION`. +::: Структура директорий внутри резервной копии такая же, как внутри `/var/lib/clickhouse/`. Запрос выполнит `chmod` для всех файлов, запрещая запись в них. Обратите внимание, запрос `ALTER TABLE t FREEZE PARTITION` не реплицируется. Он создает резервную копию только на локальном сервере. После создания резервной копии данные из `/var/lib/clickhouse/shadow/` можно скопировать на удалённый сервер, а локальную копию удалить. diff --git a/docs/ru/sql-reference/statements/alter/projection.md b/docs/ru/sql-reference/statements/alter/projection.md index 33e52b93add8..523bdf371b01 100644 --- a/docs/ru/sql-reference/statements/alter/projection.md +++ b/docs/ru/sql-reference/statements/alter/projection.md @@ -20,6 +20,6 @@ sidebar_label: PROJECTION Также команды реплицируются, синхронизируя описания проекций в метаданных с помощью ZooKeeper. - :::note - Манипуляции с проекциями поддерживаются только для таблиц с движком [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) (включая [replicated](../../../engines/table-engines/mergetree-family/replication.md) варианты). - ::: +:::note Примечание +Манипуляции с проекциями поддерживаются только для таблиц с движком [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) (включая [replicated](../../../engines/table-engines/mergetree-family/replication.md) варианты). +::: diff --git a/docs/ru/sql-reference/statements/alter/sample-by.md b/docs/ru/sql-reference/statements/alter/sample-by.md index ca3cb93d12b6..7b5705898e30 100644 --- a/docs/ru/sql-reference/statements/alter/sample-by.md +++ b/docs/ru/sql-reference/statements/alter/sample-by.md @@ -16,7 +16,7 @@ ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY SAMPLE BY new_expression Эта команда является упрощенной в том смысле, что она изменяет только метаданные. Первичный ключ должен содержать новый ключ сэмплирования. - :::note "Note" - Это работает только для таблиц в семействе [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (включая +:::note Примечание +Это работает только для таблиц в семействе [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) (включая [реплицируемые](../../../engines/table-engines/mergetree-family/replication.md) таблицы). - ::: \ No newline at end of file +::: \ No newline at end of file diff --git a/docs/ru/sql-reference/statements/alter/setting.md b/docs/ru/sql-reference/statements/alter/setting.md index 5eba971fae18..078e73c48e3b 100644 --- a/docs/ru/sql-reference/statements/alter/setting.md +++ b/docs/ru/sql-reference/statements/alter/setting.md @@ -15,9 +15,9 @@ sidebar_label: SETTING ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY|RESET SETTING ... ``` - :::note "Примечание" - Эти запросы могут применяться только к таблицам на движке [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md). - ::: +:::note Примечание +Эти запросы могут применяться только к таблицам на движке [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md). +::: ## MODIFY SETTING {#alter_modify_setting} diff --git a/docs/ru/sql-reference/statements/alter/update.md b/docs/ru/sql-reference/statements/alter/update.md index c9ca9651d623..b2032ac77d1c 100644 --- a/docs/ru/sql-reference/statements/alter/update.md +++ b/docs/ru/sql-reference/statements/alter/update.md @@ -12,9 +12,10 @@ ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE Манипулирует данными, соответствующими заданному выражению фильтрации. Реализовано как [мутация](../../../sql-reference/statements/alter/index.md#mutations). - :::note - Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования. - ::: +:::note Примечание +Префикс `ALTER TABLE` делает этот синтаксис отличным от большинства других систем, поддерживающих SQL. Он предназначен для обозначения того, что в отличие от аналогичных запросов в базах данных OLTP это тяжелая операция, не предназначенная для частого использования. +::: + Выражение `filter_expr` должно иметь тип `UInt8`. Запрос изменяет значение указанных столбцов на вычисленное значение соответствующих выражений в каждой строке, для которой `filter_expr` принимает ненулевое значение. Вычисленные значения преобразуются к типу столбца с помощью оператора `CAST`. Изменение столбцов, которые используются при вычислении первичного ключа или ключа партиционирования, не поддерживается. Один запрос может содержать несколько команд, разделенных запятыми. diff --git a/docs/ru/sql-reference/statements/create/row-policy.md b/docs/ru/sql-reference/statements/create/row-policy.md index c19e8a8fc9bd..ae5bdc6783ac 100644 --- a/docs/ru/sql-reference/statements/create/row-policy.md +++ b/docs/ru/sql-reference/statements/create/row-policy.md @@ -28,17 +28,17 @@ CREATE [ROW] POLICY [IF NOT EXISTS | OR REPLACE] policy_name1 [ON CLUSTER cluste Ключевым словом `ALL` обозначаются все пользователи, включая текущего. Ключевые слова `ALL EXCEPT` позволяют исключить пользователей из списка всех пользователей. Например, `CREATE ROW POLICY ... TO ALL EXCEPT accountant, john@localhost` - :::note - Если для таблицы не задано ни одной политики доступа к строкам, то любой пользователь может выполнить команду SELECT и получить все строки таблицы. Если определить хотя бы одну политику для таблицы, до доступ к строкам будет управляться этими политиками, причем для всех пользователей (даже для тех, для кого политики не определялись). Например, следующая политика +:::note Примечание +Если для таблицы не задано ни одной политики доступа к строкам, то любой пользователь может выполнить команду SELECT и получить все строки таблицы. Если определить хотя бы одну политику для таблицы, до доступ к строкам будет управляться этими политиками, причем для всех пользователей (даже для тех, для кого политики не определялись). Например, следующая политика - `CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` +`CREATE ROW POLICY pol1 ON mydb.table1 USING b=1 TO mira, peter` - запретит пользователям `mira` и `peter` видеть строки с `b != 1`, и еще запретит всем остальным пользователям (например, пользователю `paul`) видеть какие-либо строки вообще из таблицы `mydb.table1`. +запретит пользователям `mira` и `peter` видеть строки с `b != 1`, и еще запретит всем остальным пользователям (например, пользователю `paul`) видеть какие-либо строки вообще из таблицы `mydb.table1`. - Если это нежелательно, такое поведение можно исправить, определив дополнительную политику: +Если это нежелательно, такое поведение можно исправить, определив дополнительную политику: - `CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter` - ::: +`CREATE ROW POLICY pol2 ON mydb.table1 USING 1 TO ALL EXCEPT mira, peter` +::: ## Секция AS {#create-row-policy-as} diff --git a/docs/ru/sql-reference/statements/create/table.md b/docs/ru/sql-reference/statements/create/table.md index 64eae49be6c3..dbd6a325c40d 100644 --- a/docs/ru/sql-reference/statements/create/table.md +++ b/docs/ru/sql-reference/statements/create/table.md @@ -156,8 +156,9 @@ ENGINE = engine PRIMARY KEY(expr1[, expr2,...]); ``` -:::danger "Предупреждение" - Вы не можете сочетать оба способа в одном запросе. +:::danger Предупреждение +Вы не можете сочетать оба способа в одном запросе. +::: ## Ограничения {#constraints} @@ -209,8 +210,9 @@ ALTER TABLE codec_example MODIFY COLUMN float_value CODEC(Default); Кодеки можно последовательно комбинировать, например, `CODEC(Delta, Default)`. -:::danger "Предупреждение" - Нельзя распаковать базу данных ClickHouse с помощью сторонних утилит наподобие `lz4`. Необходимо использовать специальную утилиту [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor). +:::danger Предупреждение +Нельзя распаковать базу данных ClickHouse с помощью сторонних утилит наподобие `lz4`. Необходимо использовать специальную утилиту [clickhouse-compressor](https://github.com/ClickHouse/ClickHouse/tree/master/programs/compressor). +::: Сжатие поддерживается для следующих движков таблиц: @@ -240,6 +242,7 @@ ClickHouse поддерживает кодеки общего назначени - `Delta(delta_bytes)` — Метод, в котором исходные значения заменяются разностью двух соседних значений, за исключением первого значения, которое остаётся неизменным. Для хранения разниц используется до `delta_bytes`, т.е. `delta_bytes` — это максимальный размер исходных данных. Возможные значения `delta_bytes`: 1, 2, 4, 8. Значение по умолчанию для `delta_bytes` равно `sizeof(type)`, если результат 1, 2, 4, or 8. Во всех других случаях — 1. - `DoubleDelta` — Вычисляется разницу от разниц и сохраняет её в компакном бинарном виде. Оптимальная степень сжатия достигается для монотонных последовательностей с постоянным шагом, наподобие временных рядов. Можно использовать с любым типом данных фиксированного размера. Реализует алгоритм, используемый в TSDB Gorilla, поддерживает 64-битные типы данных. Использует 1 дополнительный бит для 32-байтовых значений: 5-битные префиксы вместо 4-битных префиксов. Подробнее читайте в разделе «Compressing Time Stamps» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). +- `GCD` - Вычисляет НОД всех чисел, а затем делит их на него. Этот кодек предназначен для подготовки данных и не подходит для использования без дополнительного кодека. GCD-кодек может использоваться с Integer, Decimal и DateTime. Хорошим вариантом использования было бы хранение временных меток или денежных значений с высокой точностью. - `Gorilla` — Вычисляет XOR между текущим и предыдущим значением и записывает результат в компактной бинарной форме. Еффективно сохраняет ряды медленно изменяющихся чисел с плавающей запятой, поскольку наилучший коэффициент сжатия достигается, если соседние значения одинаковые. Реализует алгоритм, используемый в TSDB Gorilla, адаптируя его для работы с 64-битными значениями. Подробнее читайте в разделе «Compressing Values» документа [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf). - `T64` — Метод сжатия который обрезает неиспользуемые старшие биты целочисленных значений (включая `Enum`, `Date` и `DateTime`). На каждом шаге алгоритма, кодек помещает блок из 64 значений в матрицу 64✕64, транспонирует её, обрезает неиспользуемые биты, а то, что осталось возвращает в виде последовательности. Неиспользуемые биты, это биты, которые не изменяются от минимального к максимальному на всём диапазоне значений куска данных. @@ -265,12 +268,12 @@ ENGINE = MergeTree() Эти кодеки используют фиксированный одноразовый ключ шифрования. Таким образом, это детерминированное шифрование. Оно совместимо с поддерживающими дедупликацию движками, в частности, [ReplicatedMergeTree](../../../engines/table-engines/mergetree-family/replication.md). Однако у шифрования имеется недостаток: если дважды зашифровать один и тот же блок данных, текст на выходе получится одинаковым, и злоумышленник, у которого есть доступ к диску, заметит эту эквивалентность (при этом доступа к содержимому он не получит). - :::note "Внимание" - Большинство движков, включая семейство `MergeTree`, создают на диске индексные файлы, не применяя кодеки. А значит, в том случае, если зашифрованный столбец индексирован, на диске отобразится незашифрованный текст. - ::: - :::note "Внимание" - Если вы выполняете запрос SELECT с упоминанием конкретного значения в зашифрованном столбце (например, при использовании секции WHERE), это значение может появиться в [system.query_log](../../../operations/system-tables/query_log.md). Рекомендуем отключить логирование. - ::: +:::note Внимание +Большинство движков, включая семейство `MergeTree`, создают на диске индексные файлы, не применяя кодеки. А значит, в том случае, если зашифрованный столбец индексирован, на диске отобразится незашифрованный текст. +::: +:::note Внимание +Если вы выполняете запрос SELECT с упоминанием конкретного значения в зашифрованном столбце (например, при использовании секции WHERE), это значение может появиться в [system.query_log](../../../operations/system-tables/query_log.md). Рекомендуем отключить логирование. +::: **Пример** ```sql @@ -281,9 +284,10 @@ CREATE TABLE mytable ENGINE = MergeTree ORDER BY x; ``` - :::note "Замечание" - Если необходимо применить сжатие, это нужно явно прописать в запросе. Без этого будет выполнено только шифрование данных. - ::: +:::note Примечание +Если необходимо применить сжатие, это нужно явно прописать в запросе. Без этого будет выполнено только шифрование данных. +::: + **Пример** ```sql @@ -324,9 +328,10 @@ CREATE TEMPORARY TABLE [IF NOT EXISTS] table_name Запрос `REPLACE` позволяет частично изменить таблицу (структуру или данные). - :::note "Замечание" - Такие запросы поддерживаются только движком БД [Atomic](../../../engines/database-engines/atomic.md). - ::: +:::note Примечание +Такие запросы поддерживаются только движком БД [Atomic](../../../engines/database-engines/atomic.md). +::: + Чтобы удалить часть данных из таблицы, вы можете создать новую таблицу, добавить в нее данные из старой таблицы, которые вы хотите оставить (отобрав их с помощью запроса `SELECT`), затем удалить старую таблицу и переименовать новую таблицу так как старую: ```sql @@ -399,9 +404,9 @@ SELECT * FROM base.t1; Вы можете добавить комментарий к таблице при ее создании. - :::note "Замечание" - Комментарий поддерживается для всех движков таблиц, кроме [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) и [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). - ::: +:::note Примечание +Комментарий поддерживается для всех движков таблиц, кроме [Kafka](../../../engines/table-engines/integrations/kafka.md), [RabbitMQ](../../../engines/table-engines/integrations/rabbitmq.md) и [EmbeddedRocksDB](../../../engines/table-engines/integrations/embedded-rocksdb.md). +::: **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/statements/create/user.md b/docs/ru/sql-reference/statements/create/user.md index c0c50174d78f..76cfdb251dc6 100644 --- a/docs/ru/sql-reference/statements/create/user.md +++ b/docs/ru/sql-reference/statements/create/user.md @@ -13,7 +13,7 @@ sidebar_label: "Пользователь" ``` sql CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, name2 [ON CLUSTER cluster_name2] ...] - [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}] + [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'} | {WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa|...'}] [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] [DEFAULT ROLE role [,...]] [DEFAULT DATABASE database | NONE] @@ -27,14 +27,19 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] Существует несколько способов идентификации пользователя: -- `IDENTIFIED WITH no_password` -- `IDENTIFIED WITH plaintext_password BY 'qwerty'` -- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` -- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` -- `IDENTIFIED WITH double_sha1_password BY 'qwerty'` -- `IDENTIFIED WITH double_sha1_hash BY 'hash'` -- `IDENTIFIED WITH ldap SERVER 'server_name'` -- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +- `IDENTIFIED WITH no_password` +- `IDENTIFIED WITH plaintext_password BY 'qwerty'` +- `IDENTIFIED WITH sha256_password BY 'qwerty'` or `IDENTIFIED BY 'password'` +- `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'` +- `IDENTIFIED WITH double_sha1_password BY 'qwerty'` +- `IDENTIFIED WITH double_sha1_hash BY 'hash'` +- `IDENTIFIED WITH bcrypt_password BY 'qwerty'` +- `IDENTIFIED WITH bcrypt_hash BY 'hash'` +- `IDENTIFIED WITH ldap SERVER 'server_name'` +- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'` +- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'` +- `IDENTIFIED WITH ssh_key BY KEY 'public_key' TYPE 'ssh-rsa', KEY 'another_public_key' TYPE 'ssh-ed25519'` +- `IDENTIFIED BY 'qwerty'` Для идентификации с sha256_hash используя `SALT` - хэш должен быть вычислен от конкатенации 'password' и 'salt'. @@ -55,8 +60,8 @@ CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] - `CREATE USER mira@'localhost'` — Эквивалентно `HOST LOCAL`. - `CREATE USER mira@'192.168.%.%'` — Эквивалентно `HOST LIKE`. -:::info "Внимание" - ClickHouse трактует конструкцию `user_name@'address'` как имя пользователя целиком. То есть технически вы можете создать несколько пользователей с одинаковыми `user_name`, но разными частями конструкции после `@`, но лучше так не делать. +:::info Внимание +ClickHouse трактует конструкцию `user_name@'address'` как имя пользователя целиком. То есть технически вы можете создать несколько пользователей с одинаковыми `user_name`, но разными частями конструкции после `@`, но лучше так не делать. ::: ## Секция GRANTEES {#grantees} diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md index d3846aac2896..543a4b21ad17 100644 --- a/docs/ru/sql-reference/statements/create/view.md +++ b/docs/ru/sql-reference/statements/create/view.md @@ -48,12 +48,12 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na Материализованное представление устроено следующим образом: при вставке данных в таблицу, указанную в SELECT-е, кусок вставляемых данных преобразуется этим запросом SELECT, и полученный результат вставляется в представление. - :::note "Важно" +:::note Важно +Материализованные представления в ClickHouse используют **имена столбцов** вместо порядка следования столбцов при вставке в целевую таблицу. Если в результатах запроса `SELECT` некоторые имена столбцов отсутствуют, то ClickHouse использует значение по умолчанию, даже если столбец не является [Nullable](../../data-types/nullable.md). Безопасной практикой при использовании материализованных представлений считается добавление псевдонимов для каждого столбца. - Материализованные представления в ClickHouse используют **имена столбцов** вместо порядка следования столбцов при вставке в целевую таблицу. Если в результатах запроса `SELECT` некоторые имена столбцов отсутствуют, то ClickHouse использует значение по умолчанию, даже если столбец не является [Nullable](../../data-types/nullable.md). Безопасной практикой при использовании материализованных представлений считается добавление псевдонимов для каждого столбца. +Материализованные представления в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление. +::: - Материализованные представления в ClickHouse больше похожи на `after insert` триггеры. Если в запросе материализованного представления есть агрегирование, оно применяется только к вставляемому блоку записей. Любые изменения существующих данных исходной таблицы (например обновление, удаление, удаление раздела и т.д.) не изменяют материализованное представление. - ::: Если указано `POPULATE`, то при создании представления в него будут добавлены данные, уже содержащиеся в исходной таблице, как если бы был сделан запрос `CREATE TABLE ... AS SELECT ...` . Если `POPULATE` не указано, представление будет содержать только данные, добавленные в таблицу после создания представления. Использовать `POPULATE` не рекомендуется, так как в представление не попадут данные, добавляемые в таблицу во время создания представления. Запрос `SELECT` может содержать `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`… Следует иметь ввиду, что соответствующие преобразования будут выполняться независимо, на каждый блок вставляемых данных. Например, при наличии `GROUP BY`, данные будут агрегироваться при вставке, но только в рамках одной пачки вставляемых данных. Далее, данные не будут доагрегированы. Исключение - использование ENGINE, производящего агрегацию данных самостоятельно, например, `SummingMergeTree`. @@ -68,12 +68,12 @@ CREATE MATERIALIZED VIEW [IF NOT EXISTS] [db.]table_name [ON CLUSTER] [TO[db.]na ## LIVE-представления [экспериментальный функционал] {#live-view} - :::note "Важно" - Представления `LIVE VIEW` являются экспериментальной возможностью. Их использование может повлечь потерю совместимости в будущих версиях. - Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view). - ::: +:::note Важно +Представления `LIVE VIEW` являются экспериментальной возможностью. Их использование может повлечь потерю совместимости в будущих версиях. +Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view). +::: ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` `LIVE VIEW` хранит результат запроса [SELECT](../../../sql-reference/statements/select/index.md), указанного при создании, и обновляется сразу же при изменении этого результата. Конечный результат запроса и промежуточные данные, из которых формируется результат, хранятся в оперативной памяти, и это обеспечивает высокую скорость обработки для повторяющихся запросов. LIVE-представления могут отправлять push-уведомления при изменении результата исходного запроса `SELECT`. Для этого используйте запрос [WATCH](../../../sql-reference/statements/watch.md). @@ -81,14 +81,14 @@ CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [ LIVE-представления работают по тому же принципу, что и распределенные таблицы. Но вместо объединения отдельных частей данных с разных серверов, LIVE-представления объединяют уже имеющийся результат с новыми данными. Если в исходном запросе LIVE-представления есть вложенный подзапрос, его результаты не кешируются, в кеше хранится только результат основного запроса. -:::info "Ограничения" - - [Табличные функции](../../../sql-reference/table-functions/index.md) в основном запросе не поддерживаются. - - Таблицы, не поддерживающие изменение с помощью запроса `INSERT`, такие как [словари](../../../sql-reference/dictionaries/index.md) и [системные таблицы](../../../operations/system-tables/index.md), а также [нормальные представления](#normal) или [материализованные представления](#materialized), не запускают обновление LIVE-представления. - - В LIVE-представлениях могут использоваться только такие запросы, которые объединяют результаты по старым и новым данным. LIVE-представления не работают с запросами, требующими полного пересчета данных или агрегирования с сохранением состояния. - - `LIVE VIEW` не работает для реплицируемых и распределенных таблиц, добавление данных в которые происходит на разных узлах. - - `LIVE VIEW` не обновляется, если в исходном запросе используются несколько таблиц. +:::info Ограничения +- [Табличные функции](../../../sql-reference/table-functions/index.md) в основном запросе не поддерживаются. +- Таблицы, не поддерживающие изменение с помощью запроса `INSERT`, такие как [словари](../../../sql-reference/dictionaries/index.md) и [системные таблицы](../../../operations/system-tables/index.md), а также [нормальные представления](#normal) или [материализованные представления](#materialized), не запускают обновление LIVE-представления. +- В LIVE-представлениях могут использоваться только такие запросы, которые объединяют результаты по старым и новым данным. LIVE-представления не работают с запросами, требующими полного пересчета данных или агрегирования с сохранением состояния. +- `LIVE VIEW` не работает для реплицируемых и распределенных таблиц, добавление данных в которые происходит на разных узлах. +- `LIVE VIEW` не обновляется, если в исходном запросе используются несколько таблиц. - В случаях, когда `LIVE VIEW` не обновляется автоматически, чтобы обновлять его принудительно с заданной периодичностью, используйте [WITH REFRESH](#live-view-with-refresh). +В случаях, когда `LIVE VIEW` не обновляется автоматически, чтобы обновлять его принудительно с заданной периодичностью, используйте [WITH REFRESH](#live-view-with-refresh). ::: ### Отслеживание изменений LIVE-представлений {#live-view-monitoring} diff --git a/docs/ru/sql-reference/statements/exchange.md b/docs/ru/sql-reference/statements/exchange.md index 2c872791afd4..fdc28b88c454 100644 --- a/docs/ru/sql-reference/statements/exchange.md +++ b/docs/ru/sql-reference/statements/exchange.md @@ -9,9 +9,9 @@ sidebar_label: EXCHANGE Атомарно обменивает имена двух таблиц или словарей. Это действие также можно выполнить с помощью запроса [RENAME](./rename.md), используя третье временное имя, но в таком случае действие неатомарно. - :::note "Примечание" - Запрос `EXCHANGE` поддерживается только движком баз данных [Atomic](../../engines/database-engines/atomic.md). - ::: +:::note Примечание +Запрос `EXCHANGE` поддерживается только движком баз данных [Atomic](../../engines/database-engines/atomic.md). +::: **Синтаксис** ```sql diff --git a/docs/ru/sql-reference/statements/explain.md b/docs/ru/sql-reference/statements/explain.md index 0179c840df6d..4e0a13f7eaec 100644 --- a/docs/ru/sql-reference/statements/explain.md +++ b/docs/ru/sql-reference/statements/explain.md @@ -134,9 +134,9 @@ Union ReadFromStorage (SystemNumbers) ``` - :::note "Примечание" - Оценка стоимости выполнения шага и запроса не поддерживается. - ::: +:::note Примечание +Оценка стоимости выполнения шага и запроса не поддерживается. +::: При `json = 1` шаги выполнения запроса выводятся в формате JSON. Каждый узел — это словарь, в котором всегда есть ключи `Node Type` и `Plans`. `Node Type` — это строка с именем шага. `Plans` — это массив с описаниями дочерних шагов. Другие дополнительные ключи могут быть добавлены в зависимости от типа узла и настроек. Пример: diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md index 4fa6ac4ce660..747e36b88098 100644 --- a/docs/ru/sql-reference/statements/insert-into.md +++ b/docs/ru/sql-reference/statements/insert-into.md @@ -11,7 +11,7 @@ sidebar_label: INSERT INTO **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#except-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). @@ -100,7 +100,7 @@ INSERT INTO t FORMAT TabSeparated **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` Соответствие столбцов определяется их позицией в секции SELECT. При этом, их имена в выражении SELECT и в таблице для INSERT, могут отличаться. При необходимости выполняется приведение типов данных, эквивалентное соответствующему оператору CAST. @@ -120,7 +120,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... **Синтаксис** ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FROM INFILE file_name [COMPRESSION type] FORMAT format_name ``` Используйте этот синтаксис, чтобы вставить данные из файла, который хранится на стороне **клиента**. `file_name` и `type` задаются в виде строковых литералов. [Формат](../../interfaces/formats.md) входного файла должен быть задан в секции `FORMAT`. diff --git a/docs/ru/sql-reference/statements/optimize.md b/docs/ru/sql-reference/statements/optimize.md index 269931832327..abca8ab73d1d 100644 --- a/docs/ru/sql-reference/statements/optimize.md +++ b/docs/ru/sql-reference/statements/optimize.md @@ -8,8 +8,8 @@ sidebar_label: OPTIMIZE Запрос пытается запустить внеплановое слияние кусков данных для таблиц. -:::danger "Внимание" - `OPTIMIZE` не устраняет причину появления ошибки `Too many parts`. +:::danger Внимание +`OPTIMIZE` не устраняет причину появления ошибки `Too many parts`. ::: **Синтаксис** @@ -30,8 +30,8 @@ ClickHouse не оповещает клиента. Чтобы включить Вы можете указать время ожидания (в секундах) выполнения запросов `OPTIMIZE` для неактивных реплик с помощью настройки [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout). -:::info "Примечание" - Если значение настройки `alter_sync` равно `2` и некоторые реплики не активны больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, то генерируется исключение `UNFINISHED`. +:::info Примечание +Если значение настройки `alter_sync` равно `2` и некоторые реплики не активны больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, то генерируется исключение `UNFINISHED`. ::: ## Выражение BY {#by-expression} @@ -40,10 +40,10 @@ ClickHouse не оповещает клиента. Чтобы включить Список столбцов для дедупликации должен включать все столбцы, указанные в условиях сортировки (первичный ключ и ключ сортировки), а также в условиях партиционирования (ключ партиционирования). - :::note "Примечание" - Обратите внимание, что символ подстановки `*` обрабатывается так же, как и в запросах `SELECT`: столбцы [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) и [ALIAS](../../sql-reference/statements/create/table.md#alias) не включаются в результат. - Если указать пустой список или выражение, которое возвращает пустой список, то сервер вернет ошибку. Запрос вида `DEDUPLICATE BY aliased_value` также вернет ошибку. - ::: +:::note Примечание +Обратите внимание, что символ подстановки `*` обрабатывается так же, как и в запросах `SELECT`: столбцы [MATERIALIZED](../../sql-reference/statements/create/table.md#materialized) и [ALIAS](../../sql-reference/statements/create/table.md#alias) не включаются в результат. +Если указать пустой список или выражение, которое возвращает пустой список, то сервер вернет ошибку. Запрос вида `DEDUPLICATE BY aliased_value` также вернет ошибку. +::: **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/statements/rename.md b/docs/ru/sql-reference/statements/rename.md index 6575dae96425..797a0d47f356 100644 --- a/docs/ru/sql-reference/statements/rename.md +++ b/docs/ru/sql-reference/statements/rename.md @@ -9,9 +9,9 @@ sidebar_label: RENAME Переименовывает базы данных, таблицы или словари. Несколько сущностей могут быть переименованы в одном запросе. Обратите внимание, что запрос `RENAME` с несколькими сущностями это неатомарная операция. Чтобы обменять имена атомарно, используйте выражение [EXCHANGE](./exchange.md). - :::note "Примечание" - Запрос `RENAME` поддерживается только движком баз данных [Atomic](../../engines/database-engines/atomic.md). - ::: +:::note Примечание +Запрос `RENAME` поддерживается только движком баз данных [Atomic](../../engines/database-engines/atomic.md). +::: **Синтаксис** ```sql diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index ea4f357d33c8..f6b0669156c4 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -13,9 +13,9 @@ sidebar_label: GROUP BY Если вы хотите для группировки данных в таблице указывать номера столбцов, а не названия, включите настройку [enable_positional_arguments](../../../operations/settings/settings.md#enable-positional-arguments). - :::note "Примечание" - Есть ещё один способ запустить агрегацию по таблице. Если запрос содержит столбцы исходной таблицы только внутри агрегатных функций, то `GROUP BY` секцию можно опустить, и предполагается агрегирование по пустому набору ключей. Такие запросы всегда возвращают ровно одну строку. - ::: +:::note Примечание +Есть ещё один способ запустить агрегацию по таблице. Если запрос содержит столбцы исходной таблицы только внутри агрегатных функций, то `GROUP BY` секцию можно опустить, и предполагается агрегирование по пустому набору ключей. Такие запросы всегда возвращают ровно одну строку. +::: ## Обработка NULL {#null-processing} При агрегации ClickHouse интерпретирует [NULL](../../syntax.md#null-literal) как обычное значение, то есть `NULL==NULL`. Это отличается от обработки `NULL` в большинстве других контекстов. @@ -52,9 +52,9 @@ sidebar_label: GROUP BY Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым строки уже сгруппированы, указывается значение `0` или пустая строка. - :::note "Примечание" - Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. - ::: +:::note Примечание +Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. +::: **Пример** Рассмотрим таблицу t: @@ -112,9 +112,9 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH ROLLUP; Строки с подытогами добавляются в конец результирующей таблицы. В колонках, по которым выполняется группировка, указывается значение `0` или пустая строка. - :::note "Примечание" - Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. - ::: +:::note Примечание +Если в запросе есть секция [HAVING](../../../sql-reference/statements/select/having.md), она может повлиять на результаты расчета подытогов. +::: **Пример** Рассмотрим таблицу t: @@ -204,9 +204,9 @@ SELECT year, month, day, count(*) FROM t GROUP BY year, month, day WITH CUBE; - В `Pretty*` форматах, строка выводится в виде отдельной таблицы после основного результата. - В других форматах она не доступна. - :::note "Примечание" - totals выводится только в результатах запросов `SELECT`, и не вывоводится в `INSERT INTO ... SELECT`. - ::: +:::note Примечание +totals выводится только в результатах запросов `SELECT`, и не вывоводится в `INSERT INTO ... SELECT`. +::: При использовании секции [HAVING](having.md) поведение `WITH TOTALS` контролируется настройкой `totals_mode`. ### Настройка обработки итогов {#configuring-totals-processing} diff --git a/docs/ru/sql-reference/statements/select/join.md b/docs/ru/sql-reference/statements/select/join.md index 6be438f8c43f..612cf2760090 100644 --- a/docs/ru/sql-reference/statements/select/join.md +++ b/docs/ru/sql-reference/statements/select/join.md @@ -37,9 +37,9 @@ FROM - `LEFT ANY JOIN`, `RIGHT ANY JOIN` и `INNER ANY JOIN`, Частично (для противоположных сторон `LEFT` и `RIGHT`) или полностью (для `INNER` и `FULL`) отключает декартово произведение для стандартных видов `JOIN`. - `ASOF JOIN` и `LEFT ASOF JOIN`, Для соединения последовательностей по нечеткому совпадению. Использование `ASOF JOIN` описано ниже. - :::note "Примечание" - Если настройка [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) установлена в значение `partial_merge`, то для `RIGHT JOIN` и `FULL JOIN` поддерживается только уровень строгости `ALL` (`SEMI`, `ANTI`, `ANY` и `ASOF` не поддерживаются). - ::: +:::note Примечание +Если настройка [join_algorithm](../../../operations/settings/settings.md#settings-join_algorithm) установлена в значение `partial_merge`, то для `RIGHT JOIN` и `FULL JOIN` поддерживается только уровень строгости `ALL` (`SEMI`, `ANTI`, `ANY` и `ASOF` не поддерживаются). +::: ## Настройки {#join-settings} Значение строгости по умолчанию может быть переопределено с помощью настройки [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness). @@ -64,9 +64,9 @@ FROM Оператор `OR` внутри секции `ON` работает, используя алгоритм хеш-соединения — на каждый аргумент `OR` с ключами соединений для `JOIN` создается отдельная хеш-таблица, поэтому потребление памяти и время выполнения запроса растет линейно при увеличении количества выражений `OR` секции `ON`. - :::note "Примечание" - Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`). - ::: +:::note Примечание +Если в условии использованы столбцы из разных таблиц, то пока поддерживается только оператор равенства (`=`). +::: **Пример** Рассмотрим `table_1` и `table_2`: @@ -199,9 +199,9 @@ USING (equi_column1, ... equi_columnN, asof_column) `ASOF JOIN` принимает метку времени пользовательского события из `table_1` и находит такое событие в `table_2` метка времени которого наиболее близка к метке времени события из `table_1` в соответствии с условием на ближайшее совпадение. При этом столбец `user_id` используется для объединения по равенству, а столбец `ev_time` для объединения по ближайшему совпадению. В нашем примере `event_1_1` может быть объединено с `event_2_1`, `event_1_2` может быть объединено с `event_2_3`, а `event_2_2` не объединяется. - :::note "Примечание" - `ASOF JOIN` не поддержан для движка таблиц [Join](../../../engines/table-engines/special/join.md). - ::: +:::note Примечание +`ASOF JOIN` не поддержан для движка таблиц [Join](../../../engines/table-engines/special/join.md). +::: Чтобы задать значение строгости по умолчанию, используйте сессионный параметр [join_default_strictness](../../../operations/settings/settings.md#settings-join_default_strictness). ## Распределённый JOIN {#global-join} diff --git a/docs/ru/sql-reference/statements/select/limit.md b/docs/ru/sql-reference/statements/select/limit.md index 73daa76fafa0..6ca26075f1a0 100644 --- a/docs/ru/sql-reference/statements/select/limit.md +++ b/docs/ru/sql-reference/statements/select/limit.md @@ -13,9 +13,9 @@ sidebar_label: LIMIT При отсутствии секции [ORDER BY](order-by.md), однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным. - :::note "Примечание" - Количество возвращаемых строк может зависеть также от настройки [limit](../../../operations/settings/settings.md#limit). - ::: +:::note Примечание +Количество возвращаемых строк может зависеть также от настройки [limit](../../../operations/settings/settings.md#limit). +::: ## Модификатор LIMIT ... WITH TIES {#limit-with-ties} Когда вы установите модификатор WITH TIES для `LIMIT n[,m]` и указываете `ORDER BY expr_list`, вы получите первые `n` или `n,m` строк и дополнительно все строки с теми же самым значениями полей указанных в `ORDER BY` равными строке на позиции `n` для `LIMIT n` или `m` для `LIMIT n,m`. diff --git a/docs/ru/sql-reference/statements/select/offset.md b/docs/ru/sql-reference/statements/select/offset.md index fac995b9a8e3..908455a8cd11 100644 --- a/docs/ru/sql-reference/statements/select/offset.md +++ b/docs/ru/sql-reference/statements/select/offset.md @@ -31,12 +31,12 @@ SELECT * FROM test_fetch ORDER BY a LIMIT 3 OFFSET 1; Опция `WITH TIES` используется для возврата дополнительных строк, которые привязываются к последней в результате запроса. Например, если `fetch_row_count` имеет значение 5 и существуют еще 2 строки с такими же значениями столбцов, указанных в `ORDER BY`, что и у пятой строки результата, то финальный набор будет содержать 7 строк. - :::note "Примечание" - Секция `OFFSET` должна находиться перед секцией `FETCH`, если обе присутствуют. - ::: - :::note "Примечание" - Общее количество пропущенных строк может зависеть также от настройки [offset](../../../operations/settings/settings.md#offset). - ::: +:::note Примечание +Секция `OFFSET` должна находиться перед секцией `FETCH`, если обе присутствуют. +::: +:::note Примечание +Общее количество пропущенных строк может зависеть также от настройки [offset](../../../operations/settings/settings.md#offset). +::: ## Примеры {#examples} Входная таблица: diff --git a/docs/ru/sql-reference/statements/select/prewhere.md b/docs/ru/sql-reference/statements/select/prewhere.md index 092370d4b3a6..d2595cf22a39 100644 --- a/docs/ru/sql-reference/statements/select/prewhere.md +++ b/docs/ru/sql-reference/statements/select/prewhere.md @@ -19,9 +19,9 @@ Prewhere — это оптимизация для более эффективн Если в запросе есть модификатор [FINAL](from.md#select-from-final), оптимизация `PREWHERE` не всегда корректна. Она действует только если включены обе настройки [optimize_move_to_prewhere](../../../operations/settings/settings.md#optimize_move_to_prewhere) и [optimize_move_to_prewhere_if_final](../../../operations/settings/settings.md#optimize_move_to_prewhere_if_final). - :::note "Внимание" - Секция `PREWHERE` выполняется до `FINAL`, поэтому результаты запросов `FROM ... FINAL` могут исказиться при использовании `PREWHERE` с полями, не входящями в `ORDER BY` таблицы. - ::: +:::note Внимание +Секция `PREWHERE` выполняется до `FINAL`, поэтому результаты запросов `FROM ... FINAL` могут исказиться при использовании `PREWHERE` с полями, не входящями в `ORDER BY` таблицы. +::: ## Ограничения {#limitations} `PREWHERE` поддерживается только табличными движками из семейства [*MergeTree](../../../engines/table-engines/mergetree-family/index.md). diff --git a/docs/ru/sql-reference/statements/select/sample.md b/docs/ru/sql-reference/statements/select/sample.md index decef52d06f0..4edc91c34e4c 100644 --- a/docs/ru/sql-reference/statements/select/sample.md +++ b/docs/ru/sql-reference/statements/select/sample.md @@ -13,9 +13,9 @@ sidebar_label: SAMPLE 2. Возможности аппаратной части не позволяют соответствовать строгим критериям. Например, время ответа должно быть <100 мс. При этом точность расчета имеет более низкий приоритет. 3. Точность результата участвует в бизнес-модели сервиса. Например, пользователи с бесплатной подпиской на сервис могут получать отчеты с меньшей точностью, чем пользователи с премиум подпиской. - :::note "Внимание" - Не стоит использовать сэмплирование в тех задачах, где важна точность расчетов. Например, при работе с финансовыми отчетами. - ::: +:::note Внимание +Не стоит использовать сэмплирование в тех задачах, где важна точность расчетов. Например, при работе с финансовыми отчетами. +::: Свойства сэмплирования: - Сэмплирование работает детерминированно. При многократном выполнении одного и того же запроса `SELECT .. SAMPLE`, результат всегда будет одинаковым. @@ -60,9 +60,9 @@ ORDER BY PageViews DESC LIMIT 1000 Если задано выражение `SAMPLE n`, запрос будет выполнен для выборки из не менее `n` строк (но не значительно больше этого значения). Например, если задать `SAMPLE 10000000`, в выборку попадут не менее 10,000,000 строк. - :::note "Примечание" - Следует иметь в виду, что `n` должно быть достаточно большим числом. Так как минимальной единицей данных для чтения является одна гранула (её размер задаётся настройкой `index_granularity` для таблицы), имеет смысл создавать выборки, размер которых существенно превосходит размер гранулы. - ::: +:::note Примечание +Следует иметь в виду, что `n` должно быть достаточно большим числом. Так как минимальной единицей данных для чтения является одна гранула (её размер задаётся настройкой `index_granularity` для таблицы), имеет смысл создавать выборки, размер которых существенно превосходит размер гранулы. +::: При выполнении `SAMPLE n` коэффициент сэмплирования заранее неизвестен (то есть нет информации о том, относительно какого количества данных будет сформирована выборка). Чтобы узнать коэффициент сэмплирования, используйте столбец `_sample_factor`. Виртуальный столбец `_sample_factor` автоматически создается в тех таблицах, для которых задано выражение `SAMPLE BY` (подробнее см. в разделе [Создание таблицы MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-creating-a-table)). В столбце содержится коэффициент сэмплирования для таблицы – он рассчитывается динамически по мере добавления данных в таблицу. Ниже приведены примеры использования столбца `_sample_factor`. diff --git a/docs/ru/sql-reference/statements/select/where.md b/docs/ru/sql-reference/statements/select/where.md index f63ea121d4a1..10057cdeb844 100644 --- a/docs/ru/sql-reference/statements/select/where.md +++ b/docs/ru/sql-reference/statements/select/where.md @@ -11,9 +11,9 @@ sidebar_label: WHERE ClickHouse использует в выражении индексы, если это позволяет [движок таблицы](../../../engines/table-engines/index.md). - :::note "Примечание" - Существует оптимизация фильтрации под названием [PREWHERE](prewhere.md). - ::: +:::note Примечание +Существует оптимизация фильтрации под названием [PREWHERE](prewhere.md). +::: Если в секции необходимо проверить [NULL](../../../sql-reference/syntax.md#null-literal), то используйте операторы [IS NULL](../../operators/index.md#operator-is-null) и [IS NOT NULL](../../operators/index.md#is-not-null), а также соответствующие функции [isNull](../../../sql-reference/functions/functions-for-nulls.md#isnull) и [isNotNull](../../../sql-reference/functions/functions-for-nulls.md#isnotnull). В противном случае выражение будет считаться всегда не выполненным. **Пример** diff --git a/docs/ru/sql-reference/statements/show.md b/docs/ru/sql-reference/statements/show.md index 59f33c691ae2..b3694002cb5f 100644 --- a/docs/ru/sql-reference/statements/show.md +++ b/docs/ru/sql-reference/statements/show.md @@ -367,8 +367,8 @@ SHOW ACCESS Возвращает список кластеров. Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md). -:::info "Note" - По запросу `SHOW CLUSTER name` вы получите содержимое таблицы system.clusters для этого кластера. +:::info Примечание +По запросу `SHOW CLUSTER name` вы получите содержимое таблицы system.clusters для этого кластера. ::: ### Синтаксис {#show-cluster-syntax} diff --git a/docs/ru/sql-reference/statements/system.md b/docs/ru/sql-reference/statements/system.md index ec30a031643d..bcaf70e08248 100644 --- a/docs/ru/sql-reference/statements/system.md +++ b/docs/ru/sql-reference/statements/system.md @@ -166,9 +166,9 @@ ClickHouse может управлять фоновыми процессами SYSTEM STOP MERGES [ON CLUSTER cluster_name] [ON VOLUME | [db.]merge_tree_family_table_name] ``` - :::note - `DETACH / ATTACH` таблицы восстанавливает фоновые мержи для этой таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства MergeTree до `DETACH`). - ::: +:::note Примечание +`DETACH / ATTACH` таблицы восстанавливает фоновые мержи для этой таблицы (даже в случае отключения фоновых мержей для всех таблиц семейства MergeTree до `DETACH`). +::: ### START MERGES {#query_language-system-start-merges} Включает фоновые мержи для таблиц семейства MergeTree: @@ -313,8 +313,9 @@ SYSTEM RESTART REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_fami К реплике прикрепляются локально найденные куски, информация о них отправляется в Zookeeper. Если присутствующие в реплике до потери метаданных данные не устарели, они не скачиваются повторно с других реплик. Поэтому восстановление реплики не означает повторную загрузку всех данных по сети. -:::danger "Предупреждение" - Потерянные данные в любых состояниях перемещаются в папку `detached/`. Куски, активные до потери данных (находившиеся в состоянии Committed), прикрепляются. +:::danger Предупреждение +Потерянные данные в любых состояниях перемещаются в папку `detached/`. Куски, активные до потери данных (находившиеся в состоянии Committed), прикрепляются. +::: **Синтаксис** diff --git a/docs/ru/sql-reference/statements/truncate.md b/docs/ru/sql-reference/statements/truncate.md index cd918d198142..9f69dd41cfd6 100644 --- a/docs/ru/sql-reference/statements/truncate.md +++ b/docs/ru/sql-reference/statements/truncate.md @@ -18,6 +18,6 @@ TRUNCATE TABLE [IF EXISTS] [db.]name [ON CLUSTER cluster] Вы можете указать время ожидания (в секундах) выполнения запросов `TRUNCATE` для неактивных реплик с помощью настройки [replication_wait_for_inactive_replica_timeout](../../operations/settings/settings.md#replication-wait-for-inactive-replica-timeout). -:::info "Примечание" - Если значение настройки `alter_sync` равно `2` и некоторые реплики не активны больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, то генерируется исключение `UNFINISHED`. +:::info Примечание +Если значение настройки `alter_sync` равно `2` и некоторые реплики не активны больше времени, заданного настройкой `replication_wait_for_inactive_replica_timeout`, то генерируется исключение `UNFINISHED`. ::: diff --git a/docs/ru/sql-reference/statements/watch.md b/docs/ru/sql-reference/statements/watch.md index 3a4bfb7dd005..2d7a318631ad 100644 --- a/docs/ru/sql-reference/statements/watch.md +++ b/docs/ru/sql-reference/statements/watch.md @@ -6,10 +6,10 @@ sidebar_label: WATCH # Запрос WATCH {#watch} - :::note "Важно" - Это экспериментальная функция. Она может повлечь потерю совместимости в будущих версиях. - Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку `set allow_experimental_live_view = 1`. - ::: +:::note Важно +Это экспериментальная функция. Она может повлечь потерю совместимости в будущих версиях. +Чтобы использовать `LIVE VIEW` и запросы `WATCH`, включите настройку `set allow_experimental_live_view = 1`. +::: **Синтаксис** ``` sql @@ -103,6 +103,6 @@ WATCH lv EVENTS LIMIT 1; Параметр `FORMAT` работает аналогично одноименному параметру запроса [SELECT](../../sql-reference/statements/select/format.md#format-clause). -:::info "Примечание" - При отслеживании [LIVE VIEW](./create/view.md#live-view) через интерфейс HTTP следует использовать формат [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress). Постоянные сообщения об изменениях будут добавлены в поток вывода для поддержания активности долговременного HTTP-соединения до тех пор, пока результат запроса изменяется. Проомежуток времени между сообщениями об изменениях управляется настройкой[live_view_heartbeat_interval](./create/view.md#live-view-settings). +:::info Примечание +При отслеживании [LIVE VIEW](./create/view.md#live-view) через интерфейс HTTP следует использовать формат [JSONEachRowWithProgress](../../interfaces/formats.md#jsoneachrowwithprogress). Постоянные сообщения об изменениях будут добавлены в поток вывода для поддержания активности долговременного HTTP-соединения до тех пор, пока результат запроса изменяется. Проомежуток времени между сообщениями об изменениях управляется настройкой[live_view_heartbeat_interval](./create/view.md#live-view-settings). ::: \ No newline at end of file diff --git a/docs/ru/sql-reference/table-functions/cluster.md b/docs/ru/sql-reference/table-functions/cluster.md index a831c280ec4d..f148a21294a6 100644 --- a/docs/ru/sql-reference/table-functions/cluster.md +++ b/docs/ru/sql-reference/table-functions/cluster.md @@ -10,9 +10,9 @@ sidebar_label: cluster Функция `clusterAllReplicas` работает также как `cluster`, но каждая реплика в кластере используется как отдельный шард/отдельное соединение. - :::note "Примечание" - Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md). - ::: +:::note Примечание +Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md). +::: **Синтаксис** ``` sql diff --git a/docs/ru/sql-reference/table-functions/file.md b/docs/ru/sql-reference/table-functions/file.md index 0983c51d954c..f698554dcf90 100644 --- a/docs/ru/sql-reference/table-functions/file.md +++ b/docs/ru/sql-reference/table-functions/file.md @@ -79,7 +79,7 @@ SELECT * FROM file('test.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 U - `*` — заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `?` — заменяет ровно один любой символ. -- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. Эти строки также могут содержать символ `/`. - `{N..M}` — заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). Конструкция с `{}` аналогична табличной функции [remote](remote.md). @@ -107,8 +107,9 @@ SELECT count(*) FROM file('{some,another}_dir/some_file_{1..3}', 'TSV', 'name St SELECT count(*) FROM file('{some,another}_dir/*', 'TSV', 'name String, value UInt32'); ``` -:::danger "Предупреждение" - Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +:::danger Предупреждение +Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +::: **Пример** @@ -123,6 +124,7 @@ SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, - `_path` — путь к файлу. - `_file` — имя файла. + **Смотрите также** - [Виртуальные столбцы](index.md#table_engines-virtual_columns) diff --git a/docs/ru/sql-reference/table-functions/hdfs.md b/docs/ru/sql-reference/table-functions/hdfs.md index b0d182eef147..b70de5e3a4fe 100644 --- a/docs/ru/sql-reference/table-functions/hdfs.md +++ b/docs/ru/sql-reference/table-functions/hdfs.md @@ -39,17 +39,18 @@ LIMIT 2 └─────────┴─────────┴─────────┘ ``` -**Шаблоны в пути** +## Шаблоны поиска в компонентах пути {#globs-in-path} - `*` — Заменяет любое количество любых символов кроме `/`, включая отсутствие символов. - `?` — Заменяет ровно один любой символ. -- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. +- `{some_string,another_string,yet_another_one}` — Заменяет любую из строк `'some_string', 'another_string', 'yet_another_one'`. Эти строки также могут содержать символ `/`. - `{N..M}` — Заменяет любое число в интервале от `N` до `M` включительно (может содержать ведущие нули). Конструкция с `{}` аналогична табличной функции [remote](remote.md). -:::danger "Warning" - Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +:::danger Предупреждение +Если ваш список файлов содержит интервал с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры по отдельности или используйте `?`. +::: Шаблоны могут содержаться в разных частях пути. Обрабатываться будут ровно те файлы, которые и удовлетворяют всему шаблону пути, и существуют в файловой системе. @@ -61,3 +62,4 @@ LIMIT 2 **Смотрите также** - [Виртуальные столбцы](index.md#table_engines-virtual_columns) + diff --git a/docs/ru/sql-reference/table-functions/index.md b/docs/ru/sql-reference/table-functions/index.md index 949cd7dce98a..3de57abbca73 100644 --- a/docs/ru/sql-reference/table-functions/index.md +++ b/docs/ru/sql-reference/table-functions/index.md @@ -20,8 +20,9 @@ sidebar_position: 34 - Запросе [INSERT INTO TABLE FUNCTION](../../sql-reference/statements/insert-into.md#inserting-into-table-function). -:::danger "Предупреждение" - Если настройка [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) выключена, то использовать табличные функции невозможно. +:::danger Предупреждение +Если настройка [allow_ddl](../../operations/settings/permissions-for-queries.md#settings_allow_ddl) выключена, то использовать табличные функции невозможно. +::: | Функция | Описание | |-----------------------|---------------------------------------------------------------------------------------------------------------------------------------| diff --git a/docs/ru/sql-reference/table-functions/mysql.md b/docs/ru/sql-reference/table-functions/mysql.md index 9c50cfcb3071..5cd514def4f4 100644 --- a/docs/ru/sql-reference/table-functions/mysql.md +++ b/docs/ru/sql-reference/table-functions/mysql.md @@ -55,9 +55,9 @@ SELECT name FROM mysql(`mysql1:3306|mysql2:3306|mysql3:3306`, 'mysql_database', Объект таблицы с теми же столбцами, что и в исходной таблице MySQL. - :::note "Примечание" - Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже. - ::: +:::note Примечание +Чтобы отличить табличную функцию `mysql (...)` в запросе `INSERT` от имени таблицы со списком столбцов, используйте ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже. +::: **Примеры** Таблица в MySQL: diff --git a/docs/ru/sql-reference/table-functions/postgresql.md b/docs/ru/sql-reference/table-functions/postgresql.md index 0b1f437b98fd..4f705ea821c4 100644 --- a/docs/ru/sql-reference/table-functions/postgresql.md +++ b/docs/ru/sql-reference/table-functions/postgresql.md @@ -27,8 +27,8 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) Таблица с теми же столбцами, что и в исходной таблице PostgreSQL. -:::info "Примечание" - В запросах `INSERT` для того чтобы отличить табличную функцию `postgresql(...)` от таблицы со списком имен столбцов вы должны указывать ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже. +:::info Примечание +В запросах `INSERT` для того чтобы отличить табличную функцию `postgresql(...)` от таблицы со списком имен столбцов вы должны указывать ключевые слова `FUNCTION` или `TABLE FUNCTION`. См. примеры ниже. ::: ## Особенности реализации {#implementation-details} @@ -43,8 +43,8 @@ postgresql('host:port', 'database', 'table', 'user', 'password'[, `schema`]) PostgreSQL массивы конвертируются в массивы ClickHouse. -:::info "Примечание" - Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. +:::info Примечание +Будьте внимательны, в PostgreSQL массивы, созданные как `type_name[]`, являются многомерными и могут содержать в себе разное количество измерений в разных строках одной таблицы. Внутри ClickHouse допустипы только многомерные массивы с одинаковым кол-вом измерений во всех строках таблицы. ::: Поддерживает несколько реплик, которые должны быть перечислены через `|`. Например: diff --git a/docs/ru/sql-reference/table-functions/s3.md b/docs/ru/sql-reference/table-functions/s3.md index 9e456ddd3d97..7deef68f47fc 100644 --- a/docs/ru/sql-reference/table-functions/s3.md +++ b/docs/ru/sql-reference/table-functions/s3.md @@ -104,8 +104,9 @@ FROM s3('https://storage.yandexcloud.net/my-test-bucket-768/{some,another}_prefi └─────────┘ ``` -:::danger "Warning" - Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +:::danger Предупреждение +Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +::: Подсчитаем общее количество строк в файлах с именами `file-000.csv`, `file-001.csv`, … , `file-999.csv`: diff --git a/docs/ru/sql-reference/table-functions/s3Cluster.md b/docs/ru/sql-reference/table-functions/s3Cluster.md index 1c12913fabe1..b8f34d805ffc 100644 --- a/docs/ru/sql-reference/table-functions/s3Cluster.md +++ b/docs/ru/sql-reference/table-functions/s3Cluster.md @@ -40,8 +40,9 @@ SELECT * FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickho SELECT count(*) FROM s3Cluster('cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))'); ``` -:::danger "Внимание" - Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +:::danger Внимание +Если список файлов содержит диапазоны чисел с ведущими нулями, используйте конструкцию с фигурными скобками для каждой цифры отдельно или используйте `?`. +::: **Смотрите также** diff --git a/docs/zh/development/build.md b/docs/zh/development/build.md index d76f4b1577c2..bb25755a615c 100644 --- a/docs/zh/development/build.md +++ b/docs/zh/development/build.md @@ -3,13 +3,6 @@ slug: /zh/development/build --- # 如何构建 ClickHouse 发布包 {#ru-he-gou-jian-clickhouse-fa-bu-bao} -## 安装 Git 和 Pbuilder {#an-zhuang-git-he-pbuilder} - -``` bash -sudo apt-get update -sudo apt-get install git pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring -``` - ## 拉取 ClickHouse 源码 {#la-qu-clickhouse-yuan-ma} ``` bash diff --git a/docs/zh/development/developer-instruction.md b/docs/zh/development/developer-instruction.md index 557bf33ee0c9..1c1d4a03d64b 100644 --- a/docs/zh/development/developer-instruction.md +++ b/docs/zh/development/developer-instruction.md @@ -270,6 +270,4 @@ ClickHouse成员一旦在您的拉取请求上贴上«可以测试»标签,就 ## 浏览ClickHouse源代码 {#browse-clickhouse-source-code} -您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。 - 此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse) diff --git a/docs/zh/engines/database-engines/materialized-mysql.md b/docs/zh/engines/database-engines/materialized-mysql.md index 5c735556c48f..4432cdcb5389 100644 --- a/docs/zh/engines/database-engines/materialized-mysql.md +++ b/docs/zh/engines/database-engines/materialized-mysql.md @@ -54,8 +54,9 @@ CREATE DATABASE mysql ENGINE = MaterializedMySQL('localhost:3306', 'db', 'user', - `default_authentication_plugin = mysql_native_password `,因为 `MaterializedMySQL` 只能授权使用该方法。 - `gtid_mode = on`,因为基于GTID的日志记录是提供正确的 `MaterializedMySQL`复制的强制要求。 - :::info "注意" - 当打开`gtid_mode`时,您还应该指定`enforce_gtid_consistency = on`。 +:::info "注意" +当打开`gtid_mode`时,您还应该指定`enforce_gtid_consistency = on`。 +::: ## 虚拟列 {#virtual-columns} diff --git a/docs/zh/engines/table-engines/integrations/hive.md b/docs/zh/engines/table-engines/integrations/hive.md index fa1bf959ed64..e7af43863e31 100644 --- a/docs/zh/engines/table-engines/integrations/hive.md +++ b/docs/zh/engines/table-engines/integrations/hive.md @@ -61,7 +61,7 @@ PARTITION BY expr - limit_size: 必需的。本地缓存文件的最大大小(单位为字节)。 - bytes_read_before_flush: 从远程文件系统下载文件时,刷新到本地文件系统前的控制字节数。缺省值为1MB。 -当ClickHouse为远程文件系统启用了本地缓存时,用户仍然可以选择不使用缓存,并在查询中设置`use_local_cache_for_remote_fs = 0 `, `use_local_cache_for_remote_fs` 默认为 `false`。 +当ClickHouse为远程文件系统启用了本地缓存时,用户仍然可以选择不使用缓存,并在查询中设置 `use_local_cache_for_remote_storage = 0`, `use_local_cache_for_remote_storage` 默认为 `1`。 ### 查询 ORC 输入格式的Hive 表 diff --git a/docs/zh/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/aggregatingmergetree.md index c6c2888801fc..ada004d65581 100644 --- a/docs/zh/engines/table-engines/mergetree-family/aggregatingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/aggregatingmergetree.md @@ -40,8 +40,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 已弃用的建表方法 - :::info "注意" - 不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md index e31c40ec04d6..23326f5f95ab 100644 --- a/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/collapsingmergetree.md @@ -40,8 +40,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 已弃用的建表方法 - :::info "注意" - 不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/zh/engines/table-engines/mergetree-family/mergetree.md b/docs/zh/engines/table-engines/mergetree-family/mergetree.md index 6775662d5557..cec4cb09047c 100644 --- a/docs/zh/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/mergetree.md @@ -124,15 +124,18 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa
已弃用的建表方法 - :::attention "注意" - 不要在新版项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::attention "注意" +不要在新版项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: - CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], - ... - ) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE [=] MergeTree(date-column [, sampling_expression], (primary, key), index_granularity) +``` **MergeTree() 参数** @@ -742,8 +745,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' 4 1000 /var/lib/clickhouse/disks/s3/ - true - /var/lib/clickhouse/disks/s3/cache/ false @@ -769,8 +770,6 @@ SETTINGS storage_policy = 'moving_from_ssd_to_hdd' - `single_read_retries` - 读过程中连接丢失后重试次数,默认值为4。 - `min_bytes_for_seek` - 使用查找操作,而不是顺序读操作的最小字节数,默认值为1000。 - `metadata_path` - 本地存放S3元数据文件的路径,默认值为`/var/lib/clickhouse/disks//` -- `cache_enabled` - 是否允许缓存标记和索引文件。默认值为`true`。 -- `cache_path` - 本地缓存标记和索引文件的路径。默认值为`/var/lib/clickhouse/disks//cache/`。 - `skip_access_check` - 如果为`true`,Clickhouse启动时不检查磁盘是否可用。默认为`false`。 - `server_side_encryption_customer_key_base64` - 如果指定该项的值,请求时会加上为了访问SSE-C加密数据而必须的头信息。 @@ -820,4 +819,3 @@ S3磁盘也可以设置冷热存储: - `_part_uuid` - 唯一部分标识符(如果 MergeTree 设置`assign_part_uuids` 已启用)。 - `_partition_value` — `partition by` 表达式的值(元组)。 - `_sample_factor` - 采样因子(来自请求)。 - diff --git a/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md index f5f59b7510f7..707016788c4a 100644 --- a/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/replacingmergetree.md @@ -43,8 +43,9 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] 已弃用的建表方法 - :::info "注意" - 不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/zh/engines/table-engines/mergetree-family/sharedmergetree.md b/docs/zh/engines/table-engines/mergetree-family/sharedmergetree.md new file mode 100644 index 000000000000..e97f98b05a48 --- /dev/null +++ b/docs/zh/engines/table-engines/mergetree-family/sharedmergetree.md @@ -0,0 +1,103 @@ +--- +slug: /zh/engines/table-engines/mergetree-family/sharedmergetree +--- +# SharedMergeTree {#sharedmergetree} + + +仅在ClickHouse Cloud(以及第一方合作伙伴云服务)中提供 + +SharedMergeTree表引擎系列是ReplicatedMergeTree引擎的云原生替代方案,经过优化,适用于共享对象存储(例如Amazon S3、Google Cloud Storage、MinIO、Azure Blob Storage)。每个特定的MergeTree引擎类型都有对应的SharedMergeTree引擎,例如ReplacingSharedMergeTree替代ReplacingReplicatedMergeTree。 + +SharedMergeTree表引擎为ClickHouse Cloud的性能带来了显著提升。对于终端用户来说,无需做任何改变即可开始使用SharedMergeTree引擎系列,而不是基于ReplicatedMergeTree的引擎。它提供的好处包括: + +- 更高的插入吞吐量 +- 后台合并的吞吐量提高 +- Mutation操作的吞吐量提高 +- 更快的扩容和缩容操作 +- 用于选择查询的更轻量级强一致性 + +SharedMergeTree带来的一个重要改进是,与ReplicatedMergeTree相比,它提供了更彻底的计算和存储分离。下图展示了ReplicatedMergeTree如何分离计算和存储: + +![ReplicatedMergeTree Diagram](../../../images/shared-merge-tree-1.png) + +正如您所见,尽管存储在ReplicatedMergeTree中的数据位于对象存储中,但元数据仍存储在每个clickhouse-server上。这意味着对于每个复制操作,元数据也需要在所有副本上进行复制。 + +![ReplicatedMergeTree Diagram](../../../images/shared-merge-tree-2.png) + +与ReplicatedMergeTree不同,SharedMergeTree不需要副本之间进行通信。相反,所有通信都通过共享存储和clickhouse-keeper进行。SharedMergeTree实现了异步无领导复制,并使用clickhouse-keeper进行协调和元数据存储。这意味着随着服务的扩展,不需要复制元数据。这可以加快复制、变更、合并和扩展操作。SharedMergeTree允许每个表有数百个副本,使得无需分片即可进行动态扩展。这也意味着在ClickHouse Cloud中,使用分布式查询执行方法可以利用更多的计算资源来执行查询。 + + +## 系统监控 + +用于系统监控的ReplicatedMergeTree的大部分系统表(system table)在SharedMergeTree中也存在,唯独没有`system.replication_queue`和`system.replicated_fetches`,因为没有数据和元数据的复制。然而,SharedMergeTree对这两个表有相应的替代表。 + +`system.virtual_parts` + +这个表作为SharedMergeTree对 `system.replication_queue` 的替代,存储关于最新的一组data parts,以及未来正在进行的合并、变更和删除parts。 + +`system.shared_merge_tree_fetches` + +这个表是SharedMergeTree对`system.replicated_fetches`的替代。它包含关于正在加载入内存的主键和校验码信息。 + +## 使用SharedMergeTree + +SharedMergeTree已经是所有开发实例(development tier)中的默认表引擎,并且可以通过提交支持工单在生产环境实例(product tier)中启用:https://clickhouse.cloud/support。 + +对于支持SharedMergeTree表引擎的实例,您不需要做任何额外变更。您可以像以前一样创建表,它会自动使用基于SharedMergeTree的表引擎,该引擎与您在CREATE TABLE查询中指定的引擎相对应。 + +通过使用 SharedMergeTree 表引擎可以创建 my_table 表。 + +```sql +CREATE TABLE my_table( + key UInt64, + value String +) +ENGINE = MergeTree +ORDER BY key +``` + +在ClickHouse Cloud中,由于 `default_table_engine=MergeTree`,用户不必再特别设置`ENGINE=MergeTree`。下面的查询语句和上面的完全一样。 + +```sql + +CREATE TABLE my_table( + key UInt64, + value String +) +ORDER BY key +``` + +如果您使用Replacing、Collapsing、Aggregating、Summing、VersionedCollapsing、Graphite MergeTree表,它们将自动转换为相应的基于SharedMergeTree的表引擎。 + +```sql +CREATE TABLE myFirstReplacingMT +( + `key` Int64, + `someCol` String, + `eventTime` DateTime +) +ENGINE = ReplacingMergeTree +ORDER BY key; +``` + +您可以使用SHOW CREATE TABLE查看用于创建表的语句。 + +``` sql +SHOW CREATE TABLE myFirstReplacingMT; +``` + +```sql +CREATE TABLE default.myFirstReplacingMT +( `key` Int64, `someCol` String, `eventTime` DateTime ) +ENGINE = SharedReplacingMergeTree('/clickhouse/tables/{uuid}/{shard}', '{replica}') +ORDER BY key +SETTINGS index_granularity = 8192 +``` + +## 配置 + +一些配置的行为发生了显著的改变: + +- `insert_quorum` -- 所有对SharedMergeTree的insert都是quorum insert(写入共享对象存储),因此在使用SharedMergeTree表引擎时不需要此设置。 +- `insert_quorum_parallel` -- 所有对SharedMergeTree的insert都是quorum insert(写入共享对象存储)。 +- `select_sequential_consistency` -- 不需要quorum inserts,会引起在SELECT查询中向clickhouse-keeper增加附加的请求。 diff --git a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md index f59d327b4ae2..849bf7d9ce15 100644 --- a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md @@ -37,15 +37,18 @@ slug: /zh/engines/table-engines/mergetree-family/summingmergetree 已弃用的建表方法 - :::info "注意" - 不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用该方法,可能的话,请将旧项目切换到上述方法。 +::: - CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] - ( - name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], - name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], - ... - ) ENGINE [=] SummingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, [columns]) +``` sql +CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] +( + name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1], + name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2], + ... +) ENGINE [=] SummingMergeTree(date-column [, sampling_expression], (primary, key), index_granularity, [columns]) +``` 除 `columns` 外的所有参数都与 `MergeTree` 中的含义相同。 diff --git a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md index ebe06977dec4..0b1299070620 100644 --- a/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/versionedcollapsingmergetree.md @@ -54,8 +54,9 @@ VersionedCollapsingMergeTree(sign, version) 不推荐使用的创建表的方法 - :::info "注意" - 不要在新项目中使用此方法。 如果可能,请将旧项目切换到上述方法。 +:::info "注意" +不要在新项目中使用此方法。 如果可能,请将旧项目切换到上述方法。 +::: ``` sql CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster] diff --git a/docs/zh/engines/table-engines/special/buffer.md b/docs/zh/engines/table-engines/special/buffer.md index bb95ecdc5832..f92a819f3c32 100644 --- a/docs/zh/engines/table-engines/special/buffer.md +++ b/docs/zh/engines/table-engines/special/buffer.md @@ -5,7 +5,7 @@ slug: /zh/engines/table-engines/special/buffer 缓冲数据写入 RAM 中,周期性地将数据刷新到另一个表。在读取操作时,同时从缓冲区和另一个表读取数据。 - Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes) + Buffer(database, table, num_layers, min_time, max_time, min_rows, max_rows, min_bytes, max_bytes [,flush_time [,flush_rows [,flush_bytes]]]) 引擎的参数:database,table - 要刷新数据的表。可以使用返回字符串的常量表达式而不是数据库名称。 num_layers - 并行层数。在物理上,该表将表示为 num_layers 个独立缓冲区。建议值为16。min_time,max_time,min_rows,max_rows,min_bytes,max_bytes - 从缓冲区刷新数据的条件。 diff --git a/docs/zh/faq/general/why-clickhouse-is-so-fast.md b/docs/zh/faq/general/why-clickhouse-is-so-fast.md index 1962b8b90c29..a7df6aec2071 100644 --- a/docs/zh/faq/general/why-clickhouse-is-so-fast.md +++ b/docs/zh/faq/general/why-clickhouse-is-so-fast.md @@ -60,3 +60,4 @@ Last but not least, the ClickHouse team always monitors the Internet on people c - Benchmark on real datasets. - Test for performance regressions in CI. - Measure and observe everything. +::: diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md deleted file mode 100644 index 4ce13b0f1d3a..000000000000 --- a/docs/zh/getting-started/example-datasets/wikistat.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -slug: /zh/getting-started/example-datasets/wikistat -sidebar_position: 17 -sidebar_label: WikiStat ---- - -# WikiStat {#wikistat} - -参考: http://dumps.wikimedia.org/other/pagecounts-raw/ - -创建表结构: - -``` sql -CREATE TABLE wikistat -( - date Date, - time DateTime, - project String, - subproject String, - path String, - hits UInt64, - size UInt64 -) ENGINE = MergeTree(date, (path, time), 8192); -``` - -加载数据: - -``` bash -$ for i in {2007..2016}; do for j in {01..12}; do echo $i-$j >&2; curl -sSL "http://dumps.wikimedia.org/other/pagecounts-raw/$i/$i-$j/" | grep -oE 'pagecounts-[0-9]+-[0-9]+\.gz'; done; done | sort | uniq | tee links.txt -$ cat links.txt | while read link; do wget http://dumps.wikimedia.org/other/pagecounts-raw/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1/')/$(echo $link | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})[0-9]{2}-[0-9]+\.gz/\1-\2/')/$link; done -$ ls -1 /opt/wikistat/ | grep gz | while read i; do echo $i; gzip -cd /opt/wikistat/$i | ./wikistat-loader --time="$(echo -n $i | sed -r 's/pagecounts-([0-9]{4})([0-9]{2})([0-9]{2})-([0-9]{2})([0-9]{2})([0-9]{2})\.gz/\1-\2-\3 \4-00-00/')" | clickhouse-client --query="INSERT INTO wikistat FORMAT TabSeparated"; done -``` diff --git a/docs/zh/getting-started/example-datasets/wikistat.md b/docs/zh/getting-started/example-datasets/wikistat.md new file mode 120000 index 000000000000..2d429d00984e --- /dev/null +++ b/docs/zh/getting-started/example-datasets/wikistat.md @@ -0,0 +1 @@ +../../../en/getting-started/example-datasets/wikistat.md \ No newline at end of file diff --git a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md index eedc913cf82f..758992e40840 100644 --- a/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md +++ b/docs/zh/guides/improving-query-performance/sparse-primary-indexes.md @@ -346,9 +346,7 @@ UserID.bin,URL.bin,和EventTime.bin是UserID - 我们将主键列(UserID, URL)中的一些列值标记为橙色。 - 这些橙色标记的列值是每个颗粒中每个主键列的最小值。这里的例外是最后一个颗粒(上图中的颗粒1082),最后一个颗粒我们标记的是最大的值。 - - 正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。 + 这些橙色标记的列值是每个颗粒中第一行的主键列值。正如我们将在下面看到的,这些橙色标记的列值将是表主索引中的条目。 - 我们从0开始对行进行编号,以便与ClickHouse内部行编号方案对齐,该方案也用于记录消息。 ::: @@ -1071,13 +1069,6 @@ ClickHouse服务器日志文件中相应的跟踪日志确认了ClickHouse正在 ## 通过projections使用联合主键索引 -Projections目前是一个实验性的功能,因此我们需要告诉ClickHouse: - -```sql -SET optimize_use_projections = 1; -``` - - 在原表上创建projection: ```sql ALTER TABLE hits_UserID_URL @@ -1096,10 +1087,12 @@ ALTER TABLE hits_UserID_URL :::note - 该projection正在创建一个隐藏表,该表的行顺序和主索引基于该projection的给定order BY子句 -- 我们使用MATERIALIZE关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表 +- `SHOW TABLES` 语句查询是不会列出这个隐藏表的 +- 我们使用`MATERIALIZE`关键字,以便立即用源表hits_UserID_URL的所有887万行导入隐藏表 - 如果在源表hits_UserID_URL中插入了新行,那么这些行也会自动插入到隐藏表中 - 查询总是(从语法上)针对源表hits_UserID_URL,但是如果隐藏表的行顺序和主索引允许更有效地执行查询,那么将使用该隐藏表 -- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: +- 请注意,投影(projections)不会使 `ORDER BY` 查询语句的效率更高,即使 `ORDER BY` 匹配上了 projection 的 `ORDER BY` 语句(请参阅:https://github.com/ClickHouse/ClickHouse/issues/47333) +- 实际上,隐式创建的隐藏表的行顺序和主索引与我们显式创建的辅助表相同: @@ -1163,7 +1156,7 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 ``` -## 移除无效的主键列 +## 小结 带有联合主键(UserID, URL)的表的主索引对于加快UserID的查询过滤非常有用。但是,尽管URL列是联合主键的一部分,但该索引在加速URL查询过滤方面并没有提供显著的帮助。 @@ -1176,4 +1169,269 @@ ClickHouse服务器日志文件中跟踪日志确认了ClickHouse正在对索引 但是,如果复合主键中的键列在基数上有很大的差异,那么查询按基数升序对主键列进行排序是有益的。 -主键键列之间的基数差越大,主键键列的顺序越重要。我们将在以后的文章中对此进行演示。请继续关注。 +主键键列之间的基数差得越大,主键中的列的顺序越重要。我们将在下一章节对此进行演示。 + +## 高效地为键列排序 + + + + +在复合主键中,键列的顺序会对以下两方面产生重大影响: +- 查询中过滤次关键字列的效率,以及 +- 表数据文件的压缩率。 + +为了演示这一点,我们将使用我们的[网络流量样本数据集(web traffic sample data set)](#数据集)这个版本, +其中每一行包含三列,分别表示互联网用户(`UserID` 列)对 URL(`URL`列)的访问是否被标记为僵尸流量(`IsRobot` 列)。 + +我们将使用一个包含上述所有三列的复合主键,该主键可用于加快计算以下内容的典型网络分析查询速度 +- 特定 URL 有多少(百分比)流量来自机器人,或 +- 我们对特定用户是否为僵尸用户有多大把握(来自该用户的流量中有多大比例被认为是(或不是)僵尸流量) + +我们使用该查询来计算我们要用作复合主键中三个列的基数(注意,我们使用 [URL 表函数](/docs/en/sql-reference/table-functions/url.md) 来即席查询 TSV 数据,而无需创建本地表)。在 `clickhouse client`中运行此查询: +```sql +SELECT + formatReadableQuantity(uniq(URL)) AS cardinality_URL, + formatReadableQuantity(uniq(UserID)) AS cardinality_UserID, + formatReadableQuantity(uniq(IsRobot)) AS cardinality_IsRobot +FROM +( + SELECT + c11::UInt64 AS UserID, + c15::String AS URL, + c20::UInt8 AS IsRobot + FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz') + WHERE URL != '' +) +``` +响应如下: +```response +┌─cardinality_URL─┬─cardinality_UserID─┬─cardinality_IsRobot─┐ +│ 2.39 million │ 119.08 thousand │ 4.00 │ +└─────────────────┴────────────────────┴─────────────────────┘ + +1 row in set. Elapsed: 118.334 sec. Processed 8.87 million rows, 15.88 GB (74.99 thousand rows/s., 134.21 MB/s.) +``` + +我们可以看到,各列之间的基数,尤其是 `URL` 列和 `IsRobot` 列之间,存在着很大的差异,因此,在复合主键中,这些列的顺序对于有效加快对这些列的查询过滤速度,以及实现表中列数据文件的最佳压缩比都非常重要。 + +为了证明这一点,我们为僵尸流量分析数据创建了两个版本的表: +- 带有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`,其中的键列按基数降序排列 +- 使用复合主键`(IsRobot, UserID, URL)` 创建表 `hits_IsRobot_UserID_URL`,其中的键列按基数升序排列 + + +创建具有复合主键`(URL、UserID、IsRobot)`的表 `hits_URL_UserID_IsRobot`: +```sql +CREATE TABLE hits_URL_UserID_IsRobot +( + `UserID` UInt32, + `URL` String, + `IsRobot` UInt8 +) +ENGINE = MergeTree +// highlight-next-line +PRIMARY KEY (URL, UserID, IsRobot); +``` + +然后,填充887万行数据: +```sql +INSERT INTO hits_URL_UserID_IsRobot SELECT + intHash32(c11::UInt64) AS UserID, + c15 AS URL, + c20 AS IsRobot +FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz') +WHERE URL != ''; +``` +响应如下: +```response +0 rows in set. Elapsed: 104.729 sec. Processed 8.87 million rows, 15.88 GB (84.73 thousand rows/s., 151.64 MB/s.) +``` + + +接下来,创建带有复合主键 `(IsRobot,UserID,URL)`的表 `hits_IsRobot_UserID_URL`: +```sql +CREATE TABLE hits_IsRobot_UserID_URL +( + `UserID` UInt32, + `URL` String, + `IsRobot` UInt8 +) +ENGINE = MergeTree +// highlight-next-line +PRIMARY KEY (IsRobot, UserID, URL); +``` +并在其中填入与上一个表相同的 887 万行数据: + +```sql +INSERT INTO hits_IsRobot_UserID_URL SELECT + intHash32(c11::UInt64) AS UserID, + c15 AS URL, + c20 AS IsRobot +FROM url('https://datasets.clickhouse.com/hits/tsv/hits_v1.tsv.xz') +WHERE URL != ''; +``` +响应如下: +```response +0 rows in set. Elapsed: 95.959 sec. Processed 8.87 million rows, 15.88 GB (92.48 thousand rows/s., 165.50 MB/s.) +``` + + + +### 在次关键字列上高效过滤 + +当查询对至少一列进行过滤时,该列是复合关键字的一部分,并且是第一关键字列,[那么 ClickHouse 将在关键字列的索引标记上运行二分查找算法](#主索引被用来选择颗粒)。 + +当查询(仅)过滤属于复合关键字的某一列,但不是第一关键字列时,[ClickHouse 将在关键字列的索引标记上使用通用排除搜索算法](#查询使用第二位主键的性能问题)。 + + +对于第二种情况,复合主键中关键列的排序对[通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)的有效性很重要。 + +这是一个对表中的 `UserID` 列进行过滤的查询,我们对该表的关键字列`(URL、UserID、IsRobot)`按基数进行了降序排序: +```sql +SELECT count(*) +FROM hits_URL_UserID_IsRobot +WHERE UserID = 112304 +``` +响应如下: +```response +┌─count()─┐ +│ 73 │ +└─────────┘ + +1 row in set. Elapsed: 0.026 sec. +// highlight-next-line +Processed 7.92 million rows, +31.67 MB (306.90 million rows/s., 1.23 GB/s.) +``` + +对关键字列`(IsRobot, UserID, URL)`按基数升序排列的表,进行相同的查询: +```sql +SELECT count(*) +FROM hits_IsRobot_UserID_URL +WHERE UserID = 112304 +``` +响应如下: +```response +┌─count()─┐ +│ 73 │ +└─────────┘ + +1 row in set. Elapsed: 0.003 sec. +// highlight-next-line +Processed 20.32 thousand rows, +81.28 KB (6.61 million rows/s., 26.44 MB/s.) +``` + +我们可以看到,在对关键列按基数进行升序排列的表中,查询执行的效率和速度明显更高。 + +其原因是,当通过具有较低基数前键列的次关键字列选择[颗粒](#主索引被用来选择颗粒)时, [通用排除搜索算法](https://github.com/ClickHouse/ClickHouse/blob/22.3/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp#L1444)最有效。 我们在本指南的[上一节](#generic-exclusion-search-algorithm)中对此进行了详细说明。 + + +### 数据文件的最佳压缩率 + +此查询将比较上面创建的两个表中 `UserID` 列的压缩率: + +```sql +SELECT + table AS Table, + name AS Column, + formatReadableSize(data_uncompressed_bytes) AS Uncompressed, + formatReadableSize(data_compressed_bytes) AS Compressed, + round(data_uncompressed_bytes / data_compressed_bytes, 0) AS Ratio +FROM system.columns +WHERE (table = 'hits_URL_UserID_IsRobot' OR table = 'hits_IsRobot_UserID_URL') AND (name = 'UserID') +ORDER BY Ratio ASC +``` +这是响应: +```response +┌─Table───────────────────┬─Column─┬─Uncompressed─┬─Compressed─┬─Ratio─┐ +│ hits_URL_UserID_IsRobot │ UserID │ 33.83 MiB │ 11.24 MiB │ 3 │ +│ hits_IsRobot_UserID_URL │ UserID │ 33.83 MiB │ 877.47 KiB │ 39 │ +└─────────────────────────┴────────┴──────────────┴────────────┴───────┘ + +2 rows in set. Elapsed: 0.006 sec. +``` +我们可以看到,在按关键字列`(IsRobot、UserID、URL)` 按基数升序排列的表中,`UserID` 列的压缩率明显更高。 + +虽然两个表中存储的数据完全相同(我们在两个表中插入了相同的 887 万行),但复合主键中关键字列的顺序对表的 [列数据文件](#数据按照主键排序存储在磁盘上)中的 压缩数据所需的磁盘空间有很大影响: +- 在具有复合主键`(URL, UserID, IsRobot)` 的表 `hits_URL_UserID_IsRobot` 中,我们按照键列的基数降序排列,此时 `UserID.bin` 数据文件占用**11.24MB**的磁盘空间。 +- 在具有复合主键`(IsRobot, UserID, URL)` 的表 `hits_IsRobot_UserID_URL` 中,我们按照键列的基数升序排列,`UserID.bin` 数据文件仅占用**877.47 KiB**的磁盘空间。 + +对磁盘上表的列数据进行良好的压缩比不仅能节省磁盘空间,还能使需要从该列读取数据的查询(尤其是分析查询)更快,因为将列数据从磁盘移动到主内存(操作系统的文件缓存)所需的 i/o 更少。 + +下面我们将说明,为什么主键列按基数升序排列有利于提高表列的压缩率。 + +下图阐述了主键的磁盘上行顺序,其中键列是按基数升序排列的: + + +我们讨论过 [表的行数据按主键列有序存储在磁盘上](#数据按照主键排序存储在磁盘上)。 + +在上图中,表格的行(它们在磁盘上的列值)首先按其 `cl` 值排序,具有相同 `cl` 值的行按其 `ch` 值排序。由于第一键列 `cl` 的基数较低,因此很可能存在具有相同 `cl` 值的行。因此,`ch`值也很可能是有序的(局部地--对于具有相同`cl`值的行而言)。 + +如果在一列中,相似的数据被放在彼此相近的位置,例如通过排序,那么这些数据将得到更好的压缩。 +一般来说,压缩算法会受益于数据的运行长度(可见的数据越多,压缩效果越好)和局部性(数据越相似,压缩率越高)。 + +与上图不同的是,下图阐述了主键的磁盘上行顺序,其中主键列是按基数降序排列的: + + +现在,表格的行首先按其 `ch` 值排序,具有相同 `ch` 值的行按其 `cl` 值排序。 +但是,由于第一键列 `ch` 的基数很高,因此不太可能存在具有相同 `ch` 值的行。因此,`cl`值也不太可能是有序的(局部地--对于具有相同`ch`值的行而言)。 + +因此,`cl`值很可能是随机排序的,因此局部性和压缩比都很差。 + + +### 小结 + +为了在查询中有效地过滤次关键字列和提高表列数据文件的压缩率,按基数升序排列主键中的列是有益的。 + + +### 相关内容 +- 博客: [Super charging your ClickHouse queries](https://clickhouse.com/blog/clickhouse-faster-queries-with-projections-and-primary-indexes) + + +## 有效识别单行 + +尽管在一般情况下,它[不](/knowledgebase/key-value)是ClickHouse 的最佳用例, +但是有时建立在ClickHouse之上的应用程序,需要识别ClickHouse表中的单行。 + + +一个直观的解决方案可能是使用[UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier) 列,每一行的值都是唯一的,并且为了快速检索行,将该列用作主键列。 + +为了实现最快的检索,UUID 列[需要成为主键列](#主索引被用来选择颗粒)。 + +我们讨论过,由于[ClickHouse 表的行数据是按主键列顺序存储在磁盘上的](#数据按照主键排序存储在磁盘上),因此在主键或复合主键中,在基数较小的列之前设置基数非常大的列(如 UUID 列)[不利于其他表列的压缩率](#数据文件的最佳压缩率)。 + +在最快检索速度和最佳数据压缩之间的折中方法是使用某种复合主键,其中 UUID 是最后一列关键字,位于(更)小基数关键字列之后,这些关键字列用于确保表中某些列的良好压缩比。 + +### 一个具体例子 + +一个具体的例子是 Alexey Milovidov 开发的文本粘贴服务 https://pastila.nl, 相关[博客](https://clickhouse.com/blog/building-a-paste-service-with-clickhouse/)。 + +每次更改文本区域时,数据都会自动保存到 ClickHouse 表格行中(每次更改保存一行)。 + +识别和检索(特定版本)粘贴内容的一种方法是使用内容的哈希值作为包含内容的表行的 UUID。 + +下图显示了 +- 当内容发生变化时(例如由于按键将文本键入文本框),行的插入顺序,以及 +- 当使用 `PRIMARY KEY (hash)` 时,插入行数据的磁盘顺序: + + +由于 `hash` 列被用作主键列 +- 可以[非常快速](#主索引被用来选择颗粒) 检索特定行,但 +- 表格的行(列数据)是按照(唯一和随机的)哈希值升序存储在磁盘上的。因此,内容列的值也是按随机顺序存储的,不具有数据局部性,导致**内容列数据文件的压缩率不理想**。 + + +为了大幅提高内容列的压缩率,同时仍能快速检索特定行,pastila.nl 使用两个哈希值(和一个复合主键)来识别特定行: +- 内容哈希值,如上所述,对于不同的数据是不同的,以及 +- 对[局部性敏感的哈希值(fingerprint)](https://en.wikipedia.org/wiki/Locality-sensitive_hashing), 它**不会**因数据的微小变化而变化。 + +下图显示了 +- 当内容发生变化时(例如,由于按键将文本输入文本区),行的插入顺序以及 +- 当使用复合主键`(fingerprint,hash)` 时,插入行数据的磁盘顺序: + + + +现在,磁盘上的行首先按指纹 (`fingerprint`) 排序,对于`fingerprint` 值相同的行,其哈希(`hash`)值决定最终的排序。 + +由于仅有细微差别的数据会获得相同的指纹值,因此类似的数据现在会被存储在磁盘的内容列中,并且彼此靠近。这对内容列的压缩率非常有利,因为压缩算法一般会从数据局部性中获益(数据越相似,压缩率越高)。 + +由此带来的妥协是,检索特定行时需要两个字段("指纹"和 "散列"),以便最佳地利用由复合主键 `(fingerprint, hash)` 产生的主索引。 diff --git a/docs/zh/images/shared-merge-tree-1.png b/docs/zh/images/shared-merge-tree-1.png new file mode 100644 index 000000000000..ef6791e47b98 Binary files /dev/null and b/docs/zh/images/shared-merge-tree-1.png differ diff --git a/docs/zh/images/shared-merge-tree-2.png b/docs/zh/images/shared-merge-tree-2.png new file mode 100644 index 000000000000..be6f9e6f5edb Binary files /dev/null and b/docs/zh/images/shared-merge-tree-2.png differ diff --git a/docs/zh/interfaces/cli.md b/docs/zh/interfaces/cli.md index 80d13154a762..a6b4d10dd2ff 100644 --- a/docs/zh/interfaces/cli.md +++ b/docs/zh/interfaces/cli.md @@ -116,7 +116,7 @@ $ clickhouse-client --param_tuple_in_tuple="(10, ('dt', 10))" -q "SELECT * FROM - `--port` – 连接的端口,默认值:9000。注意HTTP接口以及TCP原生接口使用的是不同端口。 - `--user, -u` – 用户名。 默认值:`default`。 - `--password` – 密码。 默认值:空字符串。 -- `--query, -q` – 使用非交互模式查询。 +- `--query, -q` – 使用非交互模式查询。 允许多次指定 `--query`(`--query "SELECT 1;" --query "SELECT 2;"...`)。 - `--database, -d` – 默认当前操作的数据库. 默认值:服务端默认的配置(默认是`default`)。 - `--multiline, -m` – 如果指定,允许多行语句查询(Enter仅代表换行,不代表查询语句完结)。 - `--multiquery, -n` – 如果指定, 允许处理用`;`号分隔的多个查询,只在非交互模式下生效。 diff --git a/docs/zh/interfaces/http.md b/docs/zh/interfaces/http.md index c7a0f355a92d..dfdcf53bd3ff 100644 --- a/docs/zh/interfaces/http.md +++ b/docs/zh/interfaces/http.md @@ -53,7 +53,7 @@ Connection: Close Content-Type: text/tab-separated-values; charset=UTF-8 X-ClickHouse-Server-Display-Name: clickhouse.ru-central1.internal X-ClickHouse-Query-Id: 5abe861c-239c-467f-b955-8a201abb8b7f -X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} 1 ``` @@ -262,9 +262,9 @@ $ echo 'SELECT number FROM system.numbers LIMIT 10' | curl 'http://localhost:812 您可以在`X-ClickHouse-Progress`响应头中收到查询进度的信息。为此,启用[Http Header携带进度](../operations/settings/settings.md#settings-send_progress_in_http_headers)。示例: ``` text -X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128"} -X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128"} +X-ClickHouse-Progress: {"read_rows":"2752512","read_bytes":"240570816","total_rows_to_read":"8880128","elapsed_ns":"662334"} +X-ClickHouse-Progress: {"read_rows":"5439488","read_bytes":"482285394","total_rows_to_read":"8880128","elapsed_ns":"992334"} +X-ClickHouse-Progress: {"read_rows":"8783786","read_bytes":"819092887","total_rows_to_read":"8880128","elapsed_ns":"1232334"} ``` 显示字段信息: @@ -363,7 +363,7 @@ $ curl -v 'http://localhost:8123/predefined_query' < X-ClickHouse-Format: Template < X-ClickHouse-Timezone: Asia/Shanghai < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < # HELP "Query" "Number of executing queries" # TYPE "Query" counter @@ -521,7 +521,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/hi' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < * Connection #0 to host localhost left intact Say Hi!% @@ -561,7 +561,7 @@ $ curl -v -H 'XXX:xxx' 'http://localhost:8123/get_config_static_handler' < Content-Type: text/plain; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < * Connection #0 to host localhost left intact
% @@ -613,7 +613,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_absolute_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < Absolute Path File * Connection #0 to host localhost left intact @@ -632,7 +632,7 @@ $ curl -vv -H 'XXX:xxx' 'http://localhost:8123/get_relative_path_static_handler' < Content-Type: text/html; charset=UTF-8 < Transfer-Encoding: chunked < Keep-Alive: timeout=3 -< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0"} +< X-ClickHouse-Summary: {"read_rows":"0","read_bytes":"0","written_rows":"0","written_bytes":"0","total_rows_to_read":"0","elapsed_ns":"662334"} < Relative Path File * Connection #0 to host localhost left intact diff --git a/docs/zh/operations/optimizing-performance/profile-guided-optimization.md b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md new file mode 120000 index 000000000000..31cb656bd991 --- /dev/null +++ b/docs/zh/operations/optimizing-performance/profile-guided-optimization.md @@ -0,0 +1 @@ +../../../en/operations/optimizing-performance/profile-guided-optimization.md \ No newline at end of file diff --git a/docs/zh/operations/server-configuration-parameters/settings.md b/docs/zh/operations/server-configuration-parameters/settings.md index f6106d8734e5..a4f6ce732556 100644 --- a/docs/zh/operations/server-configuration-parameters/settings.md +++ b/docs/zh/operations/server-configuration-parameters/settings.md @@ -455,7 +455,7 @@ SSL客户端/服务器配置。 - verificationMode – The method for checking the node’s certificates. Details are in the description of the [A.背景](https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/Context.h) 同学们 可能的值: `none`, `relaxed`, `strict`, `once`. - verificationDepth – The maximum length of the verification chain. Verification will fail if the certificate chain length exceeds the set value. - loadDefaultCAFile – Indicates that built-in CA certificates for OpenSSL will be used. Acceptable values: `true`, `false`. \| -- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:@STRENGTH`. +- cipherList – Supported OpenSSL encryptions. For example: `ALL:!ADH:!LOW:!EXP:!MD5:!3DES:@STRENGTH`. - cacheSessions – Enables or disables caching sessions. Must be used in combination with `sessionIdContext`. 可接受的值: `true`, `false`. - sessionIdContext – A unique set of random characters that the server appends to each generated identifier. The length of the string must not exceed `SSL_MAX_SSL_SESSION_ID_LENGTH`. 始终建议使用此参数,因为如果服务器缓存会话,以及客户端请求缓存,它有助于避免出现问题。 默认值: `${application.name}`. - sessionCacheSize – The maximum number of sessions that the server caches. Default value: 1024\*20. 0 – Unlimited sessions. @@ -850,10 +850,11 @@ ZooKeeper中数据部分头的存储方法。 如果 `use_minimalistic_part_header_in_zookeeper = 1`,然后 [复制](../../engines/table-engines/mergetree-family/replication.md) 表存储的数据部分的头紧凑使用一个单一的 `znode`. 如果表包含许多列,则此存储方法显着减少了Zookeeper中存储的数据量。 - :::info "注意" - 申请后 `use_minimalistic_part_header_in_zookeeper = 1`,您不能将ClickHouse服务器降级到不支持此设置的版本。 在集群中的服务器上升级ClickHouse时要小心。 不要一次升级所有服务器。 在测试环境中或在集群的几台服务器上测试ClickHouse的新版本更安全。 +:::info "注意" +申请后 `use_minimalistic_part_header_in_zookeeper = 1`,您不能将ClickHouse服务器降级到不支持此设置的版本。 在集群中的服务器上升级ClickHouse时要小心。 不要一次升级所有服务器。 在测试环境中或在集群的几台服务器上测试ClickHouse的新版本更安全。 - Data part headers already stored with this setting can't be restored to their previous (non-compact) representation. +Data part headers already stored with this setting can't be restored to their previous (non-compact) representation. +::: **默认值:** 0. diff --git a/docs/zh/operations/system-tables/asynchronous_metric_log.md b/docs/zh/operations/system-tables/asynchronous_metric_log.md index 419ad2a7ed60..9fa399f1aed2 100644 --- a/docs/zh/operations/system-tables/asynchronous_metric_log.md +++ b/docs/zh/operations/system-tables/asynchronous_metric_log.md @@ -8,7 +8,6 @@ slug: /zh/operations/system-tables/asynchronous_metric_log 列: - `event_date` ([Date](../../sql-reference/data-types/date.md)) — 事件日期。 - `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间。 -- `event_time_microseconds` ([DateTime64](../../sql-reference/data-types/datetime64.md)) — 事件时间(微秒)。 - `name` ([String](../../sql-reference/data-types/string.md)) — 指标名。 - `value` ([Float64](../../sql-reference/data-types/float.md)) — 指标值。 @@ -17,18 +16,18 @@ slug: /zh/operations/system-tables/asynchronous_metric_log SELECT * FROM system.asynchronous_metric_log LIMIT 10 ``` ``` text -┌─event_date─┬──────────event_time─┬────event_time_microseconds─┬─name─────────────────────────────────────┬─────value─┐ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ CPUFrequencyMHz_0 │ 2120.9 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pmuzzy │ 743 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.arenas.all.pdirty │ 26288 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.run_intervals │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.background_thread.num_runs │ 0 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.retained │ 60694528 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.mapped │ 303161344 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.resident │ 260931584 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.metadata │ 12079488 │ -│ 2020-09-05 │ 2020-09-05 15:56:30 │ 2020-09-05 15:56:30.025227 │ jemalloc.allocated │ 133756128 │ -└────────────┴─────────────────────┴────────────────────────────┴──────────────────────────────────────────┴───────────┘ +┌─event_date─┬──────────event_time─┬─name─────────────────────────────────────┬─────value─┐ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ CPUFrequencyMHz_0 │ 2120.9 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pmuzzy │ 743 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.arenas.all.pdirty │ 26288 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.run_intervals │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.background_thread.num_runs │ 0 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.retained │ 60694528 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.mapped │ 303161344 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.resident │ 260931584 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.metadata │ 12079488 │ +│ 2020-09-05 │ 2020-09-05 15:56:30 │ jemalloc.allocated │ 133756128 │ +└────────────┴─────────────────────┴──────────────────────────────────────────┴───────────┘ ``` **另请参阅** diff --git a/docs/zh/operations/system-tables/licenses.md b/docs/zh/operations/system-tables/licenses.md index dc09e65264d2..38260491dc07 100644 --- a/docs/zh/operations/system-tables/licenses.md +++ b/docs/zh/operations/system-tables/licenses.md @@ -20,21 +20,9 @@ SELECT library_name, license_type, license_path FROM system.licenses LIMIT 15 ``` text ┌─library_name───────┬─license_type─┬─license_path────────────────────────┐ -│ FastMemcpy │ MIT │ /contrib/FastMemcpy/LICENSE │ -│ arrow │ Apache │ /contrib/arrow/LICENSE.txt │ -│ avro │ Apache │ /contrib/avro/LICENSE.txt │ │ aws-c-common │ Apache │ /contrib/aws-c-common/LICENSE │ -│ aws-c-event-stream │ Apache │ /contrib/aws-c-event-stream/LICENSE │ -│ aws-checksums │ Apache │ /contrib/aws-checksums/LICENSE │ -│ aws │ Apache │ /contrib/aws/LICENSE.txt │ -│ base64 │ BSD 2-clause │ /contrib/base64/LICENSE │ -│ boost │ Boost │ /contrib/boost/LICENSE_1_0.txt │ +│ base64 │ BSD 2-clause │ /contrib/aklomp-base64/LICENSE │ │ brotli │ MIT │ /contrib/brotli/LICENSE │ -│ capnproto │ MIT │ /contrib/capnproto/LICENSE │ -│ cassandra │ Apache │ /contrib/cassandra/LICENSE.txt │ -│ cctz │ Apache │ /contrib/cctz/LICENSE.txt │ -│ cityhash102 │ MIT │ /contrib/cityhash102/COPYING │ -│ cppkafka │ BSD 2-clause │ /contrib/cppkafka/LICENSE │ +│ [...] │ [...] │ [...] │ └────────────────────┴──────────────┴─────────────────────────────────────┘ - ``` diff --git a/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md b/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md index d242f4e3401d..4d0828400b21 100644 --- a/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md +++ b/docs/zh/sql-reference/aggregate-functions/reference/stddevsamp.md @@ -5,8 +5,8 @@ sidebar_position: 31 # stddevSamp {#stddevsamp} -结果等于 [varSamp] (../../../sql-reference/aggregate-functions/reference/varsamp.md)的平方根。 +结果等于 [varSamp](../../../sql-reference/aggregate-functions/reference/varsamp.md) 的平方根。 :::note 该函数使用数值不稳定的算法。 如果你需要 [数值稳定性](https://en.wikipedia.org/wiki/Numerical_stability) 在计算中,使用 `stddevSampStable` 函数。 它的工作速度较慢,但提供较低的计算错误。 -::: \ No newline at end of file +::: diff --git a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md index 8b07c3e67bdf..b30cc7041008 100644 --- a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md +++ b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md @@ -213,8 +213,9 @@ ClickHouse接收来自ODBC-driver的引用符号,并将查询中的所有设 ### ODBC字典功能的已知漏洞 {#known-vulnerability-of-the-odbc-dictionary-functionality} - :::info "注意" - 通过ODBC驱动程序连接参数连接到数据库时 `Servername` 可以取代。 在这种情况下,值 `USERNAME` 和 `PASSWORD` 从 `odbc.ini` 被发送到远程服务器,并且可能会受到损害。 +:::info "注意" +通过ODBC驱动程序连接参数连接到数据库时 `Servername` 可以取代。 在这种情况下,值 `USERNAME` 和 `PASSWORD` 从 `odbc.ini` 被发送到远程服务器,并且可能会受到损害。 +::: **不安全使用示例** diff --git a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md index 1396de633917..b755d45a731d 100644 --- a/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md +++ b/docs/zh/sql-reference/dictionaries/external-dictionaries/external-dicts.md @@ -48,8 +48,9 @@ ClickHouse: [字典的DDL查询](../../statements/create.md#create-dictionary-query) 在服务器配置中不需要任何其他记录。 它们允许使用字典作为一流的实体,如表或视图。 - :::info "注意" - 您可以通过在一个小字典中描述它来转换小字典的值 `SELECT` 查询(见 [变换](../../../sql-reference/functions/other-functions.md) 功能)。 此功能与外部字典无关。 +:::info "注意" +您可以通过在一个小字典中描述它来转换小字典的值 `SELECT` 查询(见 [变换](../../../sql-reference/functions/other-functions.md) 功能)。 此功能与外部字典无关。 +::: ## 另请参阅 {#ext-dicts-see-also} diff --git a/docs/zh/sql-reference/functions/arithmetic-functions.md b/docs/zh/sql-reference/functions/arithmetic-functions.md index b0a37565c16d..49d800fd0699 100644 --- a/docs/zh/sql-reference/functions/arithmetic-functions.md +++ b/docs/zh/sql-reference/functions/arithmetic-functions.md @@ -125,7 +125,7 @@ SELECT max2(-1, 2); **语法** ```sql -max2(value1, value2) +min2(value1, value2) ``` **参数** diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md index 53dadc23c6d6..e4b703224775 100644 --- a/docs/zh/sql-reference/functions/date-time-functions.md +++ b/docs/zh/sql-reference/functions/date-time-functions.md @@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone]) - `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。 可能的值: + - `microsecond` + - `millisecond` - `second` - `minute` - `hour` diff --git a/docs/zh/sql-reference/functions/functions-for-nulls.md b/docs/zh/sql-reference/functions/functions-for-nulls.md index 4dd309709236..b3dca3ac549b 100644 --- a/docs/zh/sql-reference/functions/functions-for-nulls.md +++ b/docs/zh/sql-reference/functions/functions-for-nulls.md @@ -192,7 +192,7 @@ SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook **返回值** - 如果`x`不为`NULL`,返回非`Nullable`类型的原始值。 -- 如果`x`为`NULL`,返回对应非`Nullable`类型的默认值。 +- 如果`x`为`NULL`,则返回任意值。 **示例** diff --git a/docs/zh/sql-reference/operators/in.md b/docs/zh/sql-reference/operators/in.md index df4c8772e863..e030b8f18203 100644 --- a/docs/zh/sql-reference/operators/in.md +++ b/docs/zh/sql-reference/operators/in.md @@ -107,8 +107,9 @@ FROM t_null 带子查询的IN-s有两个选项(类似于连接):normal `IN` / `JOIN` 和 `GLOBAL IN` / `GLOBAL JOIN`. 它们在分布式查询处理的运行方式上有所不同。 - :::info "注意" - 请记住,下面描述的算法可能会有不同的工作方式取决于 [设置](../../operations/settings/settings.md) `distributed_product_mode` 设置。 +:::info "注意" +请记住,下面描述的算法可能会有不同的工作方式取决于 [设置](../../operations/settings/settings.md) `distributed_product_mode` 设置。 +::: 当使用常规IN时,查询被发送到远程服务器,并且它们中的每个服务器都在运行子查询 `IN` 或 `JOIN` 条款 diff --git a/docs/zh/sql-reference/statements/alter/index/index.md b/docs/zh/sql-reference/statements/alter/index/index.md index 85038171eb5e..568d5d087566 100644 --- a/docs/zh/sql-reference/statements/alter/index/index.md +++ b/docs/zh/sql-reference/statements/alter/index/index.md @@ -21,5 +21,5 @@ Also, they are replicated, syncing indices metadata via ZooKeeper. 此外,它们会被复制,会通过ZooKeeper同步索引元数据。 :::note "注意" - 索引操作仅支持具有以下特征的表 [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md)引擎 (包括[replicated](../../../../engines/table-engines/mergetree-family/replication.md)). +索引操作仅支持具有以下特征的表 [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md)引擎 (包括[replicated](../../../../engines/table-engines/mergetree-family/replication.md)). ::: diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md index 8ce2d20a10c1..bce0994ecd2b 100644 --- a/docs/zh/sql-reference/statements/create/view.md +++ b/docs/zh/sql-reference/statements/create/view.md @@ -72,7 +72,7 @@ ClickHouse 中的物化视图更像是插入触发器。 如果视图查询中 使用[allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view)设置启用实时视图和`WATCH`查询的使用。 输入命令`set allow_experimental_live_view = 1`。 ```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH REFRESH [value_in_sec]] AS SELECT ... ``` 实时视图存储相应[SELECT](../../../sql-reference/statements/select/index.md)查询的结果,并在查询结果更改时随时更新。 查询结果以及与新数据结合所需的部分结果存储在内存中,为重复查询提供更高的性能。当使用[WATCH](../../../sql-reference/statements/watch.md)查询更改查询结果时,实时视图可以提供推送通知。 diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md index 9acc1655f9a2..f80c0a8a8eae 100644 --- a/docs/zh/sql-reference/statements/insert-into.md +++ b/docs/zh/sql-reference/statements/insert-into.md @@ -8,7 +8,7 @@ INSERT INTO 语句主要用于向系统中添加数据. 查询的基本格式: ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ... ``` 您可以在查询中指定要插入的列的列表,如:`[(c1, c2, c3)]`。您还可以使用列[匹配器](../../sql-reference/statements/select/index.md#asterisk)的表达式,例如`*`和/或[修饰符](../../sql-reference/statements/select/index.md#select-modifiers),例如 [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier)。 @@ -71,7 +71,7 @@ INSERT INTO [db.]table [(c1, c2, c3)] FORMAT format_name data_set 例如,下面的查询所使用的输入格式就与上面INSERT … VALUES的中使用的输入格式相同: ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] FORMAT Values (v11, v12, v13), (v21, v22, v23), ... ``` ClickHouse会清除数据前所有的空白字符与一个换行符(如果有换行符的话)。所以在进行查询时,我们建议您将数据放入到输入输出格式名称后的新的一行中去(如果数据是以空白字符开始的,这将非常重要)。 @@ -93,7 +93,7 @@ INSERT INTO t FORMAT TabSeparated ### 使用`SELECT`的结果写入 {#inserting-the-results-of-select} ``` sql -INSERT INTO [db.]table [(c1, c2, c3)] SELECT ... +INSERT INTO [TABLE] [db.]table [(c1, c2, c3)] SELECT ... ``` 写入与SELECT的列的对应关系是使用位置来进行对应的,尽管它们在SELECT表达式与INSERT中的名称可能是不同的。如果需要,会对它们执行对应的类型转换。 diff --git a/packages/.gitignore b/packages/.gitignore index 355164c12651..69cf3a785e4f 100644 --- a/packages/.gitignore +++ b/packages/.gitignore @@ -1 +1,2 @@ */ +!pre-build/ diff --git a/packages/build b/packages/build index 6ec991aad079..cc089dd08ad4 100755 --- a/packages/build +++ b/packages/build @@ -84,8 +84,8 @@ function deb2tgz { FILE=$1 PKG_NAME=${FILE##*/}; PKG_NAME=${PKG_NAME%%_*} PKG_DIR="$PKG_NAME-$CLICKHOUSE_VERSION_STRING" - PKG_PATH="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING" - TARBALL="$OUTPUT_DIR/$PKG_NAME-$CLICKHOUSE_VERSION_STRING-$DEB_ARCH.tgz" + PKG_PATH="$OUTPUT_DIR/$PKG_DIR" + TARBALL="$OUTPUT_DIR/$PKG_DIR-$DEB_ARCH.tgz" rm -rf "$PKG_PATH" dpkg-deb -R "$FILE" "$PKG_PATH" mkdir -p "$PKG_PATH/install" diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml index d4fd9300208e..4d707b28ad90 100644 --- a/packages/clickhouse-client.yaml +++ b/packages/clickhouse-client.yaml @@ -1,6 +1,13 @@ # package sources should be placed in ${PWD}/root # nfpm should run from the same directory with a config name: "clickhouse-client" +description: | + Client binary for ClickHouse + ClickHouse is a column-oriented database management system. + that allows generating analytical data reports in real time. + This package provides clickhouse-client, clickhouse-local and clickhouse-benchmark. + +# Common packages config arch: "${DEB_ARCH}" # amd64, arm64 platform: "linux" version: "${CLICKHOUSE_VERSION_STRING}" @@ -9,19 +16,17 @@ homepage: "https://clickhouse.com" license: "Apache" section: "database" priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse +# Package specific content replaces: - clickhouse-compressor conflicts: - clickhouse-compressor -maintainer: "ClickHouse Dev Team " -description: | - Client binary for ClickHouse - ClickHouse is a column-oriented database management system. - that allows generating analytical data reports in real time. - This package provides clickhouse-client, clickhouse-local and clickhouse-benchmark. - overrides: deb: depends: @@ -30,10 +35,6 @@ overrides: depends: - clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING} -deb: - fields: - Source: clickhouse - contents: - src: root/etc/clickhouse-client/config.xml dst: /etc/clickhouse-client/config.xml diff --git a/packages/clickhouse-common-static-dbg.yaml b/packages/clickhouse-common-static-dbg.yaml index b2d2b3aaf262..96de4c17d88f 100644 --- a/packages/clickhouse-common-static-dbg.yaml +++ b/packages/clickhouse-common-static-dbg.yaml @@ -1,6 +1,13 @@ # package sources should be placed in ${PWD}/root # nfpm should run from the same directory with a config name: "clickhouse-common-static-dbg" +description: | + debugging symbols for clickhouse-common-static + This package contains the debugging symbols for clickhouse-common. + + +# +# Common packages config arch: "${DEB_ARCH}" # amd64, arm64 platform: "linux" version: "${CLICKHOUSE_VERSION_STRING}" @@ -9,21 +16,17 @@ homepage: "https://clickhouse.com" license: "Apache" section: "database" priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse +# Package specific content replaces: - clickhouse-common-dbg conflicts: - clickhouse-common-dbg -maintainer: "ClickHouse Dev Team " -description: | - debugging symbols for clickhouse-common-static - This package contains the debugging symbols for clickhouse-common. - -deb: - fields: - Source: clickhouse - contents: - src: root/usr/lib/debug/usr/bin/clickhouse.debug dst: /usr/lib/debug/usr/bin/clickhouse.debug diff --git a/packages/clickhouse-common-static.yaml b/packages/clickhouse-common-static.yaml index c77914d0d692..95532726d945 100644 --- a/packages/clickhouse-common-static.yaml +++ b/packages/clickhouse-common-static.yaml @@ -1,6 +1,13 @@ # package sources should be placed in ${PWD}/root # nfpm should run from the same directory with a config name: "clickhouse-common-static" +description: | + Common files for ClickHouse + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + This package provides common files for both clickhouse server and client + +# Common packages config arch: "${DEB_ARCH}" # amd64, arm64 platform: "linux" version: "${CLICKHOUSE_VERSION_STRING}" @@ -9,7 +16,12 @@ homepage: "https://clickhouse.com" license: "Apache" section: "database" priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse +# Package specific content replaces: - clickhouse-common - clickhouse-server-base @@ -19,17 +31,6 @@ provides: suggests: - clickhouse-common-static-dbg -maintainer: "ClickHouse Dev Team " -description: | - Common files for ClickHouse - ClickHouse is a column-oriented database management system - that allows generating analytical data reports in real time. - This package provides common files for both clickhouse server and client - -deb: - fields: - Source: clickhouse - contents: - src: root/usr/bin/clickhouse dst: /usr/bin/clickhouse diff --git a/packages/clickhouse-keeper-dbg.yaml b/packages/clickhouse-keeper-dbg.yaml index a6be9ec9e971..28d53b39518d 100644 --- a/packages/clickhouse-keeper-dbg.yaml +++ b/packages/clickhouse-keeper-dbg.yaml @@ -1,6 +1,13 @@ # package sources should be placed in ${PWD}/root # nfpm should run from the same directory with a config name: "clickhouse-keeper-dbg" +description: | + debugging symbols for clickhouse-keeper + This package contains the debugging symbols for clickhouse-keeper. + + +# +# Common packages config arch: "${DEB_ARCH}" # amd64, arm64 platform: "linux" version: "${CLICKHOUSE_VERSION_STRING}" @@ -10,14 +17,11 @@ license: "Apache" section: "database" priority: "optional" maintainer: "ClickHouse Dev Team " -description: | - debugging symbols for clickhouse-keeper - This package contains the debugging symbols for clickhouse-keeper. - deb: fields: Source: clickhouse +# Package specific content contents: - src: root/usr/lib/debug/usr/bin/clickhouse-keeper.debug dst: /usr/lib/debug/usr/bin/clickhouse-keeper.debug diff --git a/packages/clickhouse-keeper.postinstall b/packages/clickhouse-keeper.postinstall index 3d6cd484146e..2b4f303b6849 100644 --- a/packages/clickhouse-keeper.postinstall +++ b/packages/clickhouse-keeper.postinstall @@ -3,12 +3,12 @@ set -e # set -x PROGRAM=clickhouse-keeper -KEEPER_USER=${KEEPER_USER:=clickhouse} -KEEPER_GROUP=${KEEPER_GROUP:=clickhouse} +KEEPER_USER=${KEEPER_USER:-clickhouse} +KEEPER_GROUP=${KEEPER_GROUP:-clickhouse} # Please note that we don't support paths with whitespaces. This is rather ignorant. -KEEPER_CONFDIR=${KEEPER_CONFDIR:=/etc/$PROGRAM} -KEEPER_DATADIR=${KEEPER_DATADIR:=/var/lib/clickhouse} -KEEPER_LOGDIR=${KEEPER_LOGDIR:=/var/log/$PROGRAM} +KEEPER_CONFDIR=${KEEPER_CONFDIR:-/etc/$PROGRAM} +KEEPER_DATADIR=${KEEPER_DATADIR:-/var/lib/clickhouse} +KEEPER_LOGDIR=${KEEPER_LOGDIR:-/var/log/$PROGRAM} [ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule [ -f /etc/default/clickhouse-keeper ] && . /etc/default/clickhouse-keeper diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml index e9c2e9297551..9dad5382c082 100644 --- a/packages/clickhouse-keeper.yaml +++ b/packages/clickhouse-keeper.yaml @@ -1,6 +1,13 @@ # package sources should be placed in ${PWD}/root # nfpm should run from the same directory with a config name: "clickhouse-keeper" +description: | + Static clickhouse-keeper binary + A stand-alone clickhouse-keeper package + + +# +# Common packages config arch: "${DEB_ARCH}" # amd64, arm64 platform: "linux" version: "${CLICKHOUSE_VERSION_STRING}" @@ -9,29 +16,31 @@ homepage: "https://clickhouse.com" license: "Apache" section: "database" priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse +# Package specific content conflicts: - clickhouse-server suggests: - clickhouse-keeper-dbg -maintainer: "ClickHouse Dev Team " -description: | - Static clickhouse-keeper binary - A stand-alone clickhouse-keeper package - -deb: - fields: - Source: clickhouse - contents: - src: root/etc/clickhouse-keeper/keeper_config.xml dst: /etc/clickhouse-keeper/keeper_config.xml type: config|noreplace -- src: root/usr/bin/clickhouse-keeper - dst: /usr/bin/clickhouse-keeper - src: clickhouse-keeper.service dst: /lib/systemd/system/clickhouse-keeper.service +- src: root/usr/bin/clickhouse-keeper + dst: /usr/bin/clickhouse-keeper +- src: clickhouse-keeper + dst: /usr/bin/clickhouse-keeper-client + type: symlink +- src: clickhouse-keeper + dst: /usr/bin/clickhouse-keeper-converter + type: symlink # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-keeper/AUTHORS diff --git a/packages/clickhouse-server.postinstall b/packages/clickhouse-server.postinstall index 2b9830faf3b0..d3b49db758f0 100644 --- a/packages/clickhouse-server.postinstall +++ b/packages/clickhouse-server.postinstall @@ -3,16 +3,21 @@ set -e # set -x PROGRAM=clickhouse-server -CLICKHOUSE_USER=${CLICKHOUSE_USER:=clickhouse} -CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:=${CLICKHOUSE_USER}} +CLICKHOUSE_USER=${CLICKHOUSE_USER:-clickhouse} +CLICKHOUSE_GROUP=${CLICKHOUSE_GROUP:-${CLICKHOUSE_USER}} # Please note that we don't support paths with whitespaces. This is rather ignorant. -CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:=/etc/clickhouse-server} -CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:=/var/lib/clickhouse} -CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:=/var/log/clickhouse-server} -CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:=/usr/bin} -CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:=clickhouse} +CLICKHOUSE_CONFDIR=${CLICKHOUSE_CONFDIR:-/etc/clickhouse-server} +CLICKHOUSE_DATADIR=${CLICKHOUSE_DATADIR:-/var/lib/clickhouse} +CLICKHOUSE_LOGDIR=${CLICKHOUSE_LOGDIR:-/var/log/clickhouse-server} +CLICKHOUSE_BINDIR=${CLICKHOUSE_BINDIR:-/usr/bin} +CLICKHOUSE_GENERIC_PROGRAM=${CLICKHOUSE_GENERIC_PROGRAM:-clickhouse} CLICKHOUSE_PIDDIR=/var/run/$PROGRAM +# Provide clickhouse-keeper +KEEPER_CONFDIR=${KEEPER_CONFDIR:-/etc/clickhouse-keeper} +KEEPER_DATADIR=${KEEPER_DATADIR:-/var/lib/clickhouse} +KEEPER_LOGDIR=${KEEPER_LOGDIR:-/var/log/clickhouse-keeper} + [ -f /usr/share/debconf/confmodule ] && . /usr/share/debconf/confmodule [ -f /etc/default/clickhouse ] && . /etc/default/clickhouse @@ -54,4 +59,20 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then fi done fi + + # Setup clickhouse-keeper directories + chown -R "${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}" "${KEEPER_CONFDIR}" + chmod 0755 "${KEEPER_CONFDIR}" + + if ! [ -d "${KEEPER_DATADIR}" ]; then + mkdir -p "${KEEPER_DATADIR}" + chown -R "${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}" "${KEEPER_DATADIR}" + chmod 0700 "${KEEPER_DATADIR}" + fi + + if ! [ -d "${KEEPER_LOGDIR}" ]; then + mkdir -p "${KEEPER_LOGDIR}" + chown -R "${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}" "${KEEPER_LOGDIR}" + chmod 0770 "${KEEPER_LOGDIR}" + fi fi diff --git a/packages/clickhouse-server.service b/packages/clickhouse-server.service index 7742d8b278ae..9a7d07e5cee0 100644 --- a/packages/clickhouse-server.service +++ b/packages/clickhouse-server.service @@ -17,6 +17,13 @@ User=clickhouse Group=clickhouse Restart=always RestartSec=30 +# The following ClickHouse directives should be used instead of forcing SIGKILL by systemd: +# - shutdown_wait_unfinished_queries +# - shutdown_wait_unfinished +TimeoutStopSec=infinity +# Disable forwarding signals by watchdog, since with default systemd's +# kill-mode control-group, systemd will send signal to all process in cgroup. +Environment=CLICKHOUSE_WATCHDOG_NO_FORWARD=1 # Since ClickHouse is systemd aware default 1m30sec may not be enough TimeoutStartSec=0 # %p is resolved to the systemd unit name @@ -29,6 +36,7 @@ EnvironmentFile=-/etc/default/clickhouse LimitCORE=infinity LimitNOFILE=500000 CapabilityBoundingSet=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE +AmbientCapabilities=CAP_NET_ADMIN CAP_IPC_LOCK CAP_SYS_NICE CAP_NET_BIND_SERVICE [Install] # ClickHouse should not start from the rescue shell (rescue.target). diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml index 018e88ef8285..5e2bc7c74125 100644 --- a/packages/clickhouse-server.yaml +++ b/packages/clickhouse-server.yaml @@ -1,6 +1,13 @@ # package sources should be placed in ${PWD}/root # nfpm should run from the same directory with a config name: "clickhouse-server" +description: | + Server binary for ClickHouse + ClickHouse is a column-oriented database management system + that allows generating analytical data reports in real time. + This package provides clickhouse common configuration files + +# Common packages config arch: "${DEB_ARCH}" # amd64, arm64 platform: "linux" version: "${CLICKHOUSE_VERSION_STRING}" @@ -9,24 +16,21 @@ homepage: "https://clickhouse.com" license: "Apache" section: "database" priority: "optional" +maintainer: "ClickHouse Dev Team " +deb: + fields: + Source: clickhouse -conflicts: -- clickhouse-keeper +# Package specific content replaces: - clickhouse-server-common - clickhouse-server-base provides: +- clickhouse-keeper - clickhouse-server-common recommends: - libcap2-bin -maintainer: "ClickHouse Dev Team " -description: | - Server binary for ClickHouse - ClickHouse is a column-oriented database management system - that allows generating analytical data reports in real time. - This package provides clickhouse common configuration files - overrides: deb: depends: @@ -35,10 +39,6 @@ overrides: depends: - clickhouse-common-static = ${CLICKHOUSE_VERSION_STRING} -deb: - fields: - Source: clickhouse - contents: - src: root/etc/clickhouse-server/config.xml dst: /etc/clickhouse-server/config.xml @@ -52,13 +52,25 @@ contents: dst: /lib/systemd/system/clickhouse-server.service - src: root/usr/bin/clickhouse-copier dst: /usr/bin/clickhouse-copier -- src: clickhouse - dst: /usr/bin/clickhouse-keeper - type: symlink - src: root/usr/bin/clickhouse-report dst: /usr/bin/clickhouse-report - src: root/usr/bin/clickhouse-server dst: /usr/bin/clickhouse-server +# clickhouse-keeper part +- src: root/etc/clickhouse-keeper/keeper_config.xml + dst: /etc/clickhouse-keeper/keeper_config.xml + type: config|noreplace +- src: clickhouse-keeper.service + dst: /lib/systemd/system/clickhouse-keeper.service +- src: clickhouse + dst: /usr/bin/clickhouse-keeper + type: symlink +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-client + type: symlink +- src: clickhouse + dst: /usr/bin/clickhouse-keeper-converter + type: symlink # docs - src: ../AUTHORS dst: /usr/share/doc/clickhouse-server/AUTHORS diff --git a/packages/pre-build/example.sh b/packages/pre-build/example.sh new file mode 100755 index 000000000000..2e361d6ca44b --- /dev/null +++ b/packages/pre-build/example.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +echo "This is an example pre-build script!" diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 466a0c194f73..a5564f47784c 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -1,12 +1,11 @@ #include #include -#include #include -#include #include #include #include #include +#include #include #include #include @@ -18,9 +17,7 @@ #include #include #include -#include #include -#include #include #include #include @@ -38,8 +35,6 @@ #include -namespace fs = std::filesystem; - /** A tool for evaluating ClickHouse performance. * The tool emulates a case with fixed amount of simultaneously executing queries. */ @@ -54,6 +49,7 @@ namespace DB { using Ports = std::vector; +static constexpr std::string_view DEFAULT_CLIENT_NAME = "benchmark"; namespace ErrorCodes { @@ -79,7 +75,6 @@ class Benchmark : public Poco::Util::Application bool randomize_, size_t max_iterations_, double max_time_, - const String & json_path_, size_t confidence_, const String & query_id_, const String & query_to_execute_, @@ -98,7 +93,6 @@ class Benchmark : public Poco::Util::Application cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), - json_path(json_path_), confidence(confidence_), query_id(query_id_), query_to_execute(query_to_execute_), @@ -130,7 +124,7 @@ class Benchmark : public Poco::Util::Application default_database_, user_, password_, quota_key_, /* cluster_= */ "", /* cluster_secret_= */ "", - /* client_name_= */ "benchmark", + /* client_name_= */ std::string(DEFAULT_CLIENT_NAME), Protocol::Compression::Enable, secure)); @@ -143,6 +137,8 @@ class Benchmark : public Poco::Util::Application global_context->makeGlobalContext(); global_context->setSettings(settings); + global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); + global_context->setQueryKindInitial(); std::cerr << std::fixed << std::setprecision(3); @@ -165,9 +161,6 @@ class Benchmark : public Poco::Util::Application int main(const std::vector &) override { - if (!json_path.empty() && fs::exists(json_path)) /// Clear file with previous results - fs::remove(json_path); - readQueries(); runBenchmark(); return 0; @@ -197,7 +190,6 @@ class Benchmark : public Poco::Util::Application bool cumulative; size_t max_iterations; double max_time; - String json_path; size_t confidence; String query_id; String query_to_execute; @@ -226,26 +218,23 @@ class Benchmark : public Poco::Util::Application size_t read_bytes = 0; size_t result_rows = 0; size_t result_bytes = 0; - double work_time = 0; using Sampler = ReservoirSampler; Sampler sampler {1 << 16}; - void add(double seconds, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc) + void add(double duration, size_t read_rows_inc, size_t read_bytes_inc, size_t result_rows_inc, size_t result_bytes_inc) { ++queries; - work_time += seconds; read_rows += read_rows_inc; read_bytes += read_bytes_inc; result_rows += result_rows_inc; result_bytes += result_bytes_inc; - sampler.insert(seconds); + sampler.insert(duration); } void clear() { queries = 0; - work_time = 0; read_rows = 0; read_bytes = 0; result_rows = 0; @@ -331,10 +320,13 @@ class Benchmark : public Poco::Util::Application return false; } - if (delay > 0 && delay_watch.elapsedSeconds() > delay) + double seconds = delay_watch.elapsedSeconds(); + if (delay > 0 && seconds > delay) { printNumberOfQueriesExecuted(queries_executed); - cumulative ? report(comparison_info_total) : report(comparison_info_per_interval); + cumulative + ? report(comparison_info_total, total_watch.elapsedSeconds()) + : report(comparison_info_per_interval, seconds); delay_watch.restart(); } } @@ -350,16 +342,7 @@ class Benchmark : public Poco::Util::Application try { for (size_t i = 0; i < concurrency; ++i) - { - EntryPtrs connection_entries; - connection_entries.reserve(connections.size()); - - for (const auto & connection : connections) - connection_entries.emplace_back(std::make_shared( - connection->get(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)))); - - pool.scheduleOrThrowOnError([this, connection_entries]() mutable { thread(connection_entries); }); - } + pool.scheduleOrThrowOnError([this]() mutable { thread(); }); } catch (...) { @@ -389,21 +372,18 @@ class Benchmark : public Poco::Util::Application pool.wait(); total_watch.stop(); - if (!json_path.empty()) - reportJSON(comparison_info_total, json_path); - printNumberOfQueriesExecuted(queries_executed); - report(comparison_info_total); + report(comparison_info_total, total_watch.elapsedSeconds()); } - void thread(EntryPtrs & connection_entries) + void thread() { Query query; /// Randomly choosing connection index pcg64 generator(randomSeed()); - std::uniform_int_distribution distribution(0, connection_entries.size() - 1); + std::uniform_int_distribution distribution(0, connections.size() - 1); /// In these threads we do not accept INT signal. sigset_t sig_set; @@ -423,15 +403,13 @@ class Benchmark : public Poco::Util::Application extracted = queue.tryPop(query, 100); if (shutdown || (max_iterations && queries_executed == max_iterations)) - { return; - } } const auto connection_index = distribution(generator); try { - execute(connection_entries, query, connection_index); + execute(query, connection_index); consecutive_errors = 0; } catch (...) @@ -460,17 +438,18 @@ class Benchmark : public Poco::Util::Application } } - void execute(EntryPtrs & connection_entries, Query & query, size_t connection_index) + void execute(Query & query, size_t connection_index) { Stopwatch watch; - Connection & connection = **connection_entries[connection_index]; + ConnectionPool::Entry entry = connections[connection_index]->get( + ConnectionTimeouts::getTCPTimeoutsWithoutFailover(settings)); if (reconnect) - connection.disconnect(); + entry->disconnect(); RemoteQueryExecutor executor( - connection, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); + *entry, query, {}, global_context, nullptr, Scalars(), Tables(), query_processing_stage); if (!query_id.empty()) executor.setQueryId(query_id); @@ -485,19 +464,19 @@ class Benchmark : public Poco::Util::Application executor.finish(); - double seconds = (display_client_side_time || progress.elapsed_ns == 0) + double duration = (display_client_side_time || progress.elapsed_ns == 0) ? watch.elapsedSeconds() : progress.elapsed_ns / 1e9; std::lock_guard lock(mutex); size_t info_index = round_robin ? 0 : connection_index; - comparison_info_per_interval[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); - comparison_info_total[info_index]->add(seconds, progress.read_rows, progress.read_bytes, info.rows, info.bytes); - t_test.add(info_index, seconds); + comparison_info_per_interval[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + comparison_info_total[info_index]->add(duration, progress.read_rows, progress.read_bytes, info.rows, info.bytes); + t_test.add(info_index, duration); } - void report(MultiStats & infos) + void report(MultiStats & infos, double seconds) { std::lock_guard lock(mutex); @@ -510,8 +489,6 @@ class Benchmark : public Poco::Util::Application if (0 == info->queries) return; - double seconds = info->work_time / concurrency; - std::string connection_description = connections[i]->getDescription(); if (round_robin) { @@ -525,10 +502,10 @@ class Benchmark : public Poco::Util::Application } std::cerr << connection_description << ", " - << "queries " << info->queries << ", "; + << "queries: " << info->queries << ", "; if (info->errors) { - std::cerr << "errors " << info->errors << ", "; + std::cerr << "errors: " << info->errors << ", "; } std::cerr << "QPS: " << (info->queries / seconds) << ", " @@ -567,62 +544,6 @@ class Benchmark : public Poco::Util::Application } } - void reportJSON(MultiStats & infos, const std::string & filename) - { - WriteBufferFromFile json_out(filename); - - std::lock_guard lock(mutex); - - auto print_key_value = [&](auto key, auto value, bool with_comma = true) - { - json_out << double_quote << key << ": " << value << (with_comma ? ",\n" : "\n"); - }; - - auto print_percentile = [&json_out](Stats & info, auto percent, bool with_comma = true) - { - json_out << "\"" << percent << "\": " << info.sampler.quantileNearest(percent / 100.0) << (with_comma ? ",\n" : "\n"); - }; - - json_out << "{\n"; - - for (size_t i = 0; i < infos.size(); ++i) - { - const auto & info = infos[i]; - - json_out << double_quote << connections[i]->getDescription() << ": {\n"; - json_out << double_quote << "statistics" << ": {\n"; - - double seconds = info->work_time / concurrency; - - print_key_value("QPS", info->queries.load() / seconds); - print_key_value("RPS", info->read_rows / seconds); - print_key_value("MiBPS", info->read_bytes / seconds / 1048576); - print_key_value("RPS_result", info->result_rows / seconds); - print_key_value("MiBPS_result", info->result_bytes / seconds / 1048576); - print_key_value("num_queries", info->queries.load()); - print_key_value("num_errors", info->errors, false); - - json_out << "},\n"; - json_out << double_quote << "query_time_percentiles" << ": {\n"; - - if (info->queries != 0) - { - for (int percent = 0; percent <= 90; percent += 10) - print_percentile(*info, percent); - - print_percentile(*info, 95); - print_percentile(*info, 99); - print_percentile(*info, 99.9); - print_percentile(*info, 99.99, false); - } - - json_out << "}\n"; - json_out << (i == infos.size() - 1 ? "}\n" : "},\n"); - } - - json_out << "}\n"; - } - public: ~Benchmark() override @@ -675,7 +596,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("iterations,i", value()->default_value(0), "amount of queries to be executed") ("timelimit,t", value()->default_value(0.), "stop launch of queries after specified time limit") ("randomize,r", "randomize order of execution") - ("json", value()->default_value(""), "write final report to specified file in JSON format") ("host,h", value()->multitoken(), "list of hosts") ("port", value()->multitoken(), "list of ports") ("roundrobin", "Instead of comparing queries for different --host/--port just pick one random --host/--port for every query and send query to it.") @@ -739,7 +659,6 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options.count("randomize"), options["iterations"].as(), options["timelimit"].as(), - options["json"].as(), options["confidence"].as(), options["query_id"].as(), options["query"].as(), diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index 14516bfa9390..ac5c9f99e0e1 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -125,7 +125,7 @@ void Client::showWarnings() std::cout << std::endl; } } - catch (...) + catch (...) // NOLINT(bugprone-empty-catch) { /// Ignore exception } @@ -790,7 +790,7 @@ bool Client::processWithFuzzing(const String & full_query) WriteBufferFromOStream cerr_buf(std::cerr, 4096); fuzz_base->dumpTree(cerr_buf); - cerr_buf.next(); + cerr_buf.finalize(); fmt::print( stderr, @@ -812,6 +812,11 @@ bool Client::processWithFuzzing(const String & full_query) } catch (...) { + if (!ast_to_process) + fmt::print(stderr, + "Error while forming new query: {}\n", + getCurrentExceptionMessage(true)); + // Some functions (e.g. protocol parsers) don't throw, but // set last_exception instead, so we'll also do it here for // uniformity. @@ -928,7 +933,7 @@ bool Client::processWithFuzzing(const String & full_query) std::cout << std::endl; WriteBufferFromOStream ast_buf(std::cout, 4096); formatAST(*query, ast_buf, false /*highlight*/); - ast_buf.next(); + ast_buf.finalize(); if (const auto * insert = query->as()) { /// For inserts with data it's really useful to have the data itself available in the logs, as formatAST doesn't print it @@ -989,6 +994,8 @@ void Client::addOptions(OptionsDescription & options_description) ("user,u", po::value()->default_value("default"), "user") ("password", po::value(), "password") ("ask-password", "ask-password") + ("ssh-key-file", po::value(), "File containing ssh private key needed for authentication. If not set does password authentication.") + ("ssh-key-passphrase", po::value(), "Passphrase for imported ssh key.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") ("max_client_network_bandwidth", po::value(), "the maximum speed of data exchange over the network for the client in bytes per second.") @@ -1131,6 +1138,10 @@ void Client::processOptions(const OptionsDescription & options_description, config().setString("password", options["password"].as()); if (options.count("ask-password")) config().setBool("ask-password", true); + if (options.count("ssh-key-file")) + config().setString("ssh-key-file", options["ssh-key-file"].as()); + if (options.count("ssh-key-passphrase")) + config().setString("ssh-key-passphrase", options["ssh-key-passphrase"].as()); if (options.count("quota_key")) config().setString("quota_key", options["quota_key"].as()); if (options.count("max_client_network_bandwidth")) @@ -1173,18 +1184,18 @@ void Client::processOptions(const OptionsDescription & options_description, { String traceparent = options["opentelemetry-traceparent"].as(); String error; - if (!global_context->getClientInfo().client_trace_context.parseTraceparentHeader(traceparent, error)) + if (!global_context->getClientTraceContext().parseTraceparentHeader(traceparent, error)) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot parse OpenTelemetry traceparent '{}': {}", traceparent, error); } if (options.count("opentelemetry-tracestate")) - global_context->getClientInfo().client_trace_context.tracestate = options["opentelemetry-tracestate"].as(); + global_context->getClientTraceContext().tracestate = options["opentelemetry-tracestate"].as(); } void Client::processConfig() { - if (config().has("query") && config().has("queries-file")) + if (!queries.empty() && config().has("queries-file")) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time"); /// Batch mode is enabled if one of the following is true: @@ -1195,9 +1206,9 @@ void Client::processConfig() /// - --queries-file command line option is present. /// The value of the option is used as file with query (or of multiple queries) to execute. - delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file")); + delayed_interactive = config().has("interactive") && (!queries.empty() || config().has("queries-file")); if (stdin_is_a_tty - && (delayed_interactive || (!config().has("query") && queries_files.empty()))) + && (delayed_interactive || (queries.empty() && queries_files.empty()))) { is_interactive = true; } @@ -1238,10 +1249,10 @@ void Client::processConfig() global_context->getSettingsRef().max_insert_block_size); } - ClientInfo & client_info = global_context->getClientInfo(); - client_info.setInitialQuery(); - client_info.quota_key = config().getString("quota_key", ""); - client_info.query_kind = query_kind; + global_context->setClientName(std::string(DEFAULT_CLIENT_NAME)); + global_context->setQueryKindInitial(); + global_context->setQuotaClientKey(config().getString("quota_key", "")); + global_context->setQueryKind(query_kind); } @@ -1404,10 +1415,9 @@ void Client::readArguments( else if (arg == "--password" && ((arg_num + 1) >= argc || std::string_view(argv[arg_num + 1]).starts_with('-'))) { common_arguments.emplace_back(arg); - /// No password was provided by user. Add '\n' as implicit password, - /// which encodes that client should ask user for the password. - /// '\n' is used because there is hardly a chance that a user would use '\n' as a password. - common_arguments.emplace_back("\n"); + /// if the value of --password is omitted, the password will be asked before + /// connection start + common_arguments.emplace_back(ConnectionParameters::ASK_PASSWORD); } else common_arguments.emplace_back(arg); diff --git a/programs/copier/ClusterCopier.cpp b/programs/copier/ClusterCopier.cpp index efe7121cace1..29e31cf97e3b 100644 --- a/programs/copier/ClusterCopier.cpp +++ b/programs/copier/ClusterCopier.cpp @@ -608,6 +608,8 @@ TaskStatus ClusterCopier::tryMoveAllPiecesToDestinationTable(const TaskTable & t ss << "ALTER TABLE " << getQuotedTable(original_table) << ((partition_name == "'all'") ? " DROP PARTITION ID " : " DROP PARTITION ") << partition_name; UInt64 num_shards_drop_partition = executeQueryOnCluster(task_table.cluster_push, ss.str(), task_cluster->settings_push, ClusterExecutionMode::ON_EACH_SHARD); + if (num_shards_drop_partition != task_table.cluster_push->getShardCount()) + return TaskStatus::Error; LOG_INFO(log, "Drop partition {} in original table {} have been executed successfully on {} shards of {}", partition_name, getQuotedTable(original_table), num_shards_drop_partition, task_table.cluster_push->getShardCount()); @@ -2023,7 +2025,7 @@ UInt64 ClusterCopier::executeQueryOnCluster( { connections.emplace_back(std::make_shared( node.host_name, node.port, node.default_database, - node.user, node.password, node.quota_key, node.cluster, node.cluster_secret, + node.user, node.password, ssh::SSHKey(), node.quota_key, node.cluster, node.cluster_secret, "ClusterCopier", node.compression, node.secure )); diff --git a/programs/copier/ClusterCopierApp.cpp b/programs/copier/ClusterCopierApp.cpp index 64071423b8e2..8f24d13d379f 100644 --- a/programs/copier/ClusterCopierApp.cpp +++ b/programs/copier/ClusterCopierApp.cpp @@ -89,7 +89,7 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options) .argument("task-path").binding("task-path")); options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path") .argument("task-file").binding("task-file")); - options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists") + options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists. Default is false.") .argument("task-upload-force").binding("task-upload-force")); options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors") .binding("safe-mode")); diff --git a/programs/diagnostics/internal/platform/data/file_test.go b/programs/diagnostics/internal/platform/data/file_test.go index 938c34281f14..9e305b1a5daa 100644 --- a/programs/diagnostics/internal/platform/data/file_test.go +++ b/programs/diagnostics/internal/platform/data/file_test.go @@ -132,13 +132,6 @@ func TestConfigFileFrameCopy(t *testing.T) { configFrame, errs := data.NewConfigFileFrame(path.Join(cwd, "../../../testdata", "configs", "xml")) require.Empty(t, errs) i := 0 - sizes := map[string]int64{ - "users.xml": int64(2017), - "default-password.xml": int64(188), - "config.xml": int64(61662), - "server-include.xml": int64(168), - "user-include.xml": int64(559), - } var checkedFiles []string for { values, ok, err := configFrame.Next() @@ -153,8 +146,6 @@ func TestConfigFileFrameCopy(t *testing.T) { newPath := path.Join(tmrDir, fileName) err = configFile.Copy(newPath, true) require.FileExists(t, newPath) - destInfo, _ := os.Stat(newPath) - require.Equal(t, sizes[fileName], destInfo.Size()) require.Nil(t, err) bytes, err := ioutil.ReadFile(newPath) require.Nil(t, err) @@ -186,13 +177,6 @@ func TestConfigFileFrameCopy(t *testing.T) { configFrame, errs := data.NewConfigFileFrame(path.Join(cwd, "../../../testdata", "configs", "yaml")) require.Empty(t, errs) i := 0 - sizes := map[string]int64{ - "users.yaml": int64(1023), - "default-password.yaml": int64(132), - "config.yaml": int64(42512), - "server-include.yaml": int64(21), - "user-include.yaml": int64(120), - } var checkedFiles []string for { values, ok, err := configFrame.Next() @@ -207,8 +191,6 @@ func TestConfigFileFrameCopy(t *testing.T) { newPath := path.Join(tmrDir, fileName) err = configFile.Copy(newPath, true) require.FileExists(t, newPath) - destInfo, _ := os.Stat(newPath) - require.Equal(t, sizes[fileName], destInfo.Size()) require.Nil(t, err) bytes, err := ioutil.ReadFile(newPath) require.Nil(t, err) diff --git a/programs/diagnostics/testdata/configs/xml/config.xml b/programs/diagnostics/testdata/configs/xml/config.xml index 21a0821f89de..ae09d2070915 100644 --- a/programs/diagnostics/testdata/configs/xml/config.xml +++ b/programs/diagnostics/testdata/configs/xml/config.xml @@ -649,73 +649,6 @@ - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - - - - - - - - - + + + https://{bucket}.s3.amazonaws.com + + + https://{bucket}.storage.googleapis.com + + + https://{bucket}.oss.aliyuncs.com + + + - 100 + 1000 10000 + false + /var/lib/clickhouse/ @@ -446,8 +460,6 @@ account pass123 /var/lib/clickhouse/disks/blob_storage_disk/ - true - /var/lib/clickhouse/disks/blob_storage_disk/cache/ false @@ -805,7 +817,7 @@ --> - + + + - - - - false - - 127.0.0.1 - 9000 - - - 127.0.0.2 - 9000 - - - 127.0.0.3 - 9000 - - - - - - - false - - 127.0.0.1 - 9000 - - - 127.0.0.2 - 9000 - - - 127.0.0.3 - 9000 - - - 127.0.0.4 - 9000 - - - 127.0.0.5 - 9000 - - - 127.0.0.6 - 9000 - - - 127.0.0.7 - 9000 - - - 127.0.0.8 - 9000 - - - 127.0.0.9 - 9000 - - - 127.0.0.10 - 9000 - - - - 127.0.0.11 - 1234 - - - - - - - localhost - 9000 - - - - - localhost - 9000 - - - - - - - 127.0.0.1 - 9000 - - - - - 127.0.0.2 - 9000 - - - - - - true - - 127.0.0.1 - 9000 - - - - true - - 127.0.0.2 - 9000 - - - - - - - localhost - 9440 - 1 - - - - - - - localhost - 9000 - - - - - localhost - 1 - - - + + + + @@ -1165,6 +1036,14 @@ 7500 + + 1048576 + + 8192 + + 524288 + + false @@ -1178,6 +1057,11 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + + false @@ -1223,7 +1119,11 @@ system metric_log
7500 + 1048576 + 8192 + 524288 1000 + false @@ -1290,6 +1206,10 @@ toYYYYMM(event_date) 7500 + 1048576 + 8192 + 524288 + false + + system + backup_log
+ toYYYYMM(event_date) + 7500 +
+ - - + + + + + + diff --git a/programs/server/config.yaml.example b/programs/server/config.yaml.example index 88287d040888..b472b6f4a453 100644 --- a/programs/server/config.yaml.example +++ b/programs/server/config.yaml.example @@ -515,7 +515,7 @@ remap_executable: false # https://clickhouse.com/docs/en/operations/table_engines/distributed/ remote_servers: # Test only shard config for testing distributed storage - test_shard_localhost: + default: # Inter-server per-cluster secret for Distributed queries # default: no secret (no authentication will be performed) @@ -546,46 +546,8 @@ remote_servers: port: 9000 # Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). # priority: 1 - test_cluster_two_shards_localhost: - shard: - - replica: - host: localhost - port: 9000 - - replica: - host: localhost - port: 9000 - test_cluster_two_shards: - shard: - - replica: - host: 127.0.0.1 - port: 9000 - - replica: - host: 127.0.0.2 - port: 9000 - test_cluster_two_shards_internal_replication: - shard: - - internal_replication: true - replica: - host: 127.0.0.1 - port: 9000 - - internal_replication: true - replica: - host: 127.0.0.2 - port: 9000 - test_shard_localhost_secure: - shard: - replica: - host: localhost - port: 9440 - secure: 1 - test_unavailable_shard: - shard: - - replica: - host: localhost - port: 9000 - - replica: - host: localhost - port: 1 + # Use SSL? Default: no + # secure: 0 # The list of hosts allowed to use in URL-related storage engines and table functions. # If this section is not present in configuration, all hosts are allowed. @@ -677,7 +639,6 @@ default_session_timeout: 60 # metrics - send data from table system.metrics # events - send data from table system.events # asynchronous_metrics - send data from table system.asynchronous_metrics -# status_info - send data from different component from CH, ex: Dictionaries status # prometheus: # endpoint: /metrics @@ -686,7 +647,6 @@ default_session_timeout: 60 # metrics: true # events: true # asynchronous_metrics: true -# status_info: true # Query log. Used only for queries with setting log_queries = 1. query_log: diff --git a/programs/server/dashboard.html b/programs/server/dashboard.html index 951b7db3aa38..555d039cec3c 100644 --- a/programs/server/dashboard.html +++ b/programs/server/dashboard.html @@ -11,8 +11,10 @@ --background: linear-gradient(to bottom, #00CCFF, #00D0D0); --chart-background: white; --shadow-color: rgba(0, 0, 0, 0.25); + --moving-shadow-color: rgba(0, 0, 0, 0.5); --input-shadow-color: rgba(0, 255, 0, 1); - --error-color: white; + --error-color: red; + --auth-error-color: white; --legend-background: rgba(255, 255, 255, 0.75); --title-color: #666; --text-color: black; @@ -33,6 +35,7 @@ --background: #151C2C; --chart-background: #1b2834; --shadow-color: rgba(0, 0, 0, 0); + --moving-shadow-color: rgba(255, 255, 255, 0.25); --input-shadow-color: rgba(255, 128, 0, 0.25); --error-color: #F66; --legend-background: rgba(255, 255, 255, 0.25); @@ -90,6 +93,21 @@ position: relative; } + .chart-maximized { + flex: 1 100%; + height: 75vh + } + + .chart-moving { + z-index: 11; + box-shadow: 0 0 2rem var(--moving-shadow-color); + } + + .chart-displaced { + opacity: 75%; + filter: blur(1px); + } + .chart div { position: absolute; } .inputs { @@ -229,8 +247,8 @@ filter: contrast(125%); } - #add, #reload { - padding: .25rem 0.5rem; + #add, #reload, #edit { + padding: 0.25rem 0.5rem; text-align: center; font-weight: bold; user-select: none; @@ -243,13 +261,10 @@ margin-right: 1rem !important; margin-left: 0rem; margin-bottom: 1rem; + height: 3ex; } - /* .unconnected #reload { - margin-left: 3px; - } */ - - #add:hover, #reload:hover { + #add:hover, #reload:hover, #edit:hover { background: var(--button-background-color); } @@ -258,7 +273,7 @@ width: 60%; padding: .5rem; - color: var(--error-color); + color: var(--auth-error-color); display: flex; flex-flow: row nowrap; @@ -305,6 +320,7 @@ } .chart-buttons a { margin-right: 0.25rem; + user-select: none; } .chart-buttons a:hover { color: var(--chart-button-hover-color); @@ -332,18 +348,21 @@ padding: 2rem; } - .query-editor textarea { - grid-row: 1; - grid-column: 1 / span 2; - z-index: 11; + textarea { padding: 0.5rem; outline: none; border: none; font-size: 12pt; - border-bottom: 1px solid var(--edit-title-border); background: var(--chart-background); color: var(--text-color); resize: none; + } + + .query-editor textarea { + grid-row: 1; + grid-column: 1 / span 2; + z-index: 11; + border-bottom: 1px solid var(--edit-title-border); margin: 0; } @@ -366,10 +385,41 @@ filter: contrast(125%); } + .edit-cancel { + cursor: pointer; + background: var(--new-chart-background-color); + } + .edit-cancel:hover { + filter: contrast(125%); + } + .nowrap { white-space: pre; } + #mass-editor { + display: none; + grid-template-columns: auto fit-content(10%) fit-content(10%); + grid-template-rows: auto fit-content(10%); + row-gap: 1rem; + column-gap: 1rem; + } + + #mass-editor-textarea { + width: 100%; + height: 100%; + grid-row: 1; + grid-column: 1 / span 3; + } + + #mass-editor input { + padding: 0.5rem; + } + + #mass-editor-message { + color: var(--auth-error-color); + } + /* Source: https://cdn.jsdelivr.net/npm/uplot@1.6.21/dist/uPlot.min.css * It is copy-pasted to lower the number of requests. */ @@ -388,6 +438,7 @@
🌚🌞 +
@@ -396,6 +447,12 @@
+
+ +   + + +