From a604c4fae2913bd5cc65b5cc246416576883e744 Mon Sep 17 00:00:00 2001 From: feiniaofeiafei <53502832+feiniaofeiafei@users.noreply.github.com> Date: Fri, 9 Aug 2024 10:39:27 +0800 Subject: [PATCH 01/94] [Fix](nereids) fix bind expression compare dbname ignore cluster (#39114) This pr is similar with #23008, ignoring cluster_name in binding when compare dbname. e.g. in this sql, the "dbname" should be viewed same db when comparing with "default_cluster:dbname" ```sql select dbname.test_db_name_ignore_cluster.a from `default_cluster:dbname`.test_db_name_ignore_cluster; ``` --- .../rules/analysis/ExpressionAnalyzer.java | 28 ++++++++++--- .../trees/plans/commands/UpdateCommand.java | 3 +- .../bind_dbname_ignore_cluster.out | 4 ++ .../bind_dbname_ignore_cluster.groovy | 41 +++++++++++++++++++ 4 files changed, 70 insertions(+), 6 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/bindExpression/bind_dbname_ignore_cluster.out create mode 100644 regression-test/suites/nereids_rules_p0/bindExpression/bind_dbname_ignore_cluster.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java index 6dd0963b2fa5f2b..afec568545bc0fc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java @@ -674,10 +674,10 @@ private BoundStar bindQualifiedStar(List qualifierStar, List bound case 1: // bound slot is `table`.`column` return false; case 2:// bound slot is `db`.`table`.`column` - return compareDbName(qualifierStar.get(0), boundSlotQualifier.get(0)) + return compareDbNameIgnoreClusterName(qualifierStar.get(0), boundSlotQualifier.get(0)) && qualifierStar.get(1).equalsIgnoreCase(boundSlotQualifier.get(1)); case 3:// bound slot is `catalog`.`db`.`table`.`column` - return compareDbName(qualifierStar.get(0), boundSlotQualifier.get(1)) + return compareDbNameIgnoreClusterName(qualifierStar.get(0), boundSlotQualifier.get(1)) && qualifierStar.get(1).equalsIgnoreCase(boundSlotQualifier.get(2)); default: throw new AnalysisException("Not supported qualifier: " @@ -693,7 +693,7 @@ private BoundStar bindQualifiedStar(List qualifierStar, List bound return false; case 3:// bound slot is `catalog`.`db`.`table`.`column` return qualifierStar.get(0).equalsIgnoreCase(boundSlotQualifier.get(0)) - && compareDbName(qualifierStar.get(1), boundSlotQualifier.get(1)) + && compareDbNameIgnoreClusterName(qualifierStar.get(1), boundSlotQualifier.get(1)) && qualifierStar.get(2).equalsIgnoreCase(boundSlotQualifier.get(2)); default: throw new AnalysisException("Not supported qualifier: " @@ -861,7 +861,7 @@ private List bindSingleSlotByDb(String db, String table, String name, Scop List boundSlotQualifier = boundSlot.getQualifier(); String boundSlotDb = boundSlotQualifier.get(boundSlotQualifier.size() - 2); String boundSlotTable = boundSlotQualifier.get(boundSlotQualifier.size() - 1); - if (!compareDbName(boundSlotDb, db) || !sameTableName(boundSlotTable, table)) { + if (!compareDbNameIgnoreClusterName(boundSlotDb, db) || !sameTableName(boundSlotTable, table)) { continue; } // set sql case as alias @@ -882,7 +882,7 @@ private List bindSingleSlotByCatalog(String catalog, String db, String tab String boundSlotDb = boundSlotQualifier.get(boundSlotQualifier.size() - 2); String boundSlotTable = boundSlotQualifier.get(boundSlotQualifier.size() - 1); if (!boundSlotCatalog.equalsIgnoreCase(catalog) - || !compareDbName(boundSlotDb, db) + || !compareDbNameIgnoreClusterName(boundSlotDb, db) || !sameTableName(boundSlotTable, table)) { continue; } @@ -891,4 +891,22 @@ private List bindSingleSlotByCatalog(String catalog, String db, String tab } return usedSlots.build(); } + + /**compareDbNameIgnoreClusterName.*/ + public static boolean compareDbNameIgnoreClusterName(String name1, String name2) { + if (name1.equalsIgnoreCase(name2)) { + return true; + } + String ignoreClusterName1 = name1; + int idx1 = name1.indexOf(":"); + if (idx1 > -1) { + ignoreClusterName1 = name1.substring(idx1 + 1); + } + String ignoreClusterName2 = name2; + int idx2 = name2.indexOf(":"); + if (idx2 > -1) { + ignoreClusterName2 = name2.substring(idx2 + 1); + } + return ignoreClusterName1.equalsIgnoreCase(ignoreClusterName2); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java index 444bc8aa3097553..19379dc56b85af2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/UpdateCommand.java @@ -214,7 +214,8 @@ private void checkAssignmentColumn(ConnectContext ctx, List columnNamePa } List tableQualifier = RelationUtil.getQualifierName(ctx, nameParts); if (!ExpressionAnalyzer.sameTableName(tableAlias == null ? tableQualifier.get(2) : tableAlias, tableName) - || (dbName != null && ExpressionAnalyzer.compareDbName(tableQualifier.get(1), dbName))) { + || (dbName != null + && !ExpressionAnalyzer.compareDbNameIgnoreClusterName(tableQualifier.get(1), dbName))) { throw new AnalysisException("column in assignment list is invalid, " + String.join(".", columnNameParts)); } } diff --git a/regression-test/data/nereids_rules_p0/bindExpression/bind_dbname_ignore_cluster.out b/regression-test/data/nereids_rules_p0/bindExpression/bind_dbname_ignore_cluster.out new file mode 100644 index 000000000000000..b07b84f44a39672 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/bindExpression/bind_dbname_ignore_cluster.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_update -- +2 10 + diff --git a/regression-test/suites/nereids_rules_p0/bindExpression/bind_dbname_ignore_cluster.groovy b/regression-test/suites/nereids_rules_p0/bindExpression/bind_dbname_ignore_cluster.groovy new file mode 100644 index 000000000000000..a17d1742133a08e --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/bindExpression/bind_dbname_ignore_cluster.groovy @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("test_db_name_ignore_cluster") { + String db = context.config.getDbNameByFile(new File(context.file.parent)) + sql 'set enable_nereids_planner=true' + sql 'set enable_nereids_distribute_planner=false' + sql "use ${db}" + sql "drop table if exists ${db}.test_db_name_ignore_cluster" + sql """create table ${db}.test_db_name_ignore_cluster(a int, b int) unique key(a) distributed by hash(a) + properties("replication_num"="1");""" + sql "select ${db}.test_db_name_ignore_cluster.a from `default_cluster:${db}`.test_db_name_ignore_cluster;" + sql "select ${db}.test_db_name_ignore_cluster.* from `default_cluster:${db}`.test_db_name_ignore_cluster;" + + sql "select `default_cluster:${db}`.test_db_name_ignore_cluster.a from `${db}`.test_db_name_ignore_cluster;" + sql "select `default_cluster:${db}`.test_db_name_ignore_cluster.* from `${db}`.test_db_name_ignore_cluster;" + + sql "select `default_cluster:${db}`.test_db_name_ignore_cluster.a from `default_cluster:${db}`.test_db_name_ignore_cluster;" + sql "select `default_cluster:${db}`.test_db_name_ignore_cluster.* from `default_cluster:${db}`.test_db_name_ignore_cluster;" + + sql "select internal.`${db}`.test_db_name_ignore_cluster.a from internal.`default_cluster:${db}`.test_db_name_ignore_cluster;" + sql "select internal.`default_cluster:${db}`.test_db_name_ignore_cluster.* from internal.`${db}`.test_db_name_ignore_cluster;" + + sql "insert into ${db}.test_db_name_ignore_cluster values(2,4)" + sql "update `default_cluster:${db}`.test_db_name_ignore_cluster set `${db}`.test_db_name_ignore_cluster.b=10;" + sql "update `${db}`.test_db_name_ignore_cluster set `${db}`.test_db_name_ignore_cluster.b=10;" + qt_test_update "select * from ${db}.test_db_name_ignore_cluster" +} \ No newline at end of file From eea944626271c8d0991dc5c6e570a96417603e55 Mon Sep 17 00:00:00 2001 From: Gavin Chou Date: Fri, 9 Aug 2024 10:51:11 +0800 Subject: [PATCH 02/94] [fix](file cache) Fix slow IO for table stats procedure, introduced by #37141 (#39123) Session variable `disable_file_cache` is processed as "disposable file cache" in beta_rowset_reader.cpp. ``` if (_read_context->runtime_state != nullptr) { _read_options.io_ctx.query_id = &_read_context->runtime_state->query_id(); _read_options.io_ctx.read_file_cache = _read_context->runtime_state->query_options().enable_file_cache; _read_options.io_ctx.is_disposable = _read_context->runtime_state->query_options().disable_file_cache; } ``` We use disposable cache to avoid IO amp and avoid large amount of eviction from the cached data ("normal cache"). We cannot set the read option cache policy to "no cache" because it may cause IO amp: every page IO will cause a remote IO, which is a performance disaster. --- be/src/olap/parallel_scanner_builder.cpp | 7 ++----- be/src/olap/rowset/beta_rowset.cpp | 18 +++++++----------- be/src/olap/rowset/beta_rowset.h | 9 +++------ be/src/olap/rowset/beta_rowset_reader.cpp | 10 +++------- be/src/olap/segment_loader.cpp | 4 ++-- be/src/olap/segment_loader.h | 3 +-- 6 files changed, 18 insertions(+), 33 deletions(-) diff --git a/be/src/olap/parallel_scanner_builder.cpp b/be/src/olap/parallel_scanner_builder.cpp index 6a2503a70e90029..10bd61cd8d5d4b6 100644 --- a/be/src/olap/parallel_scanner_builder.cpp +++ b/be/src/olap/parallel_scanner_builder.cpp @@ -182,9 +182,6 @@ Status ParallelScannerBuilder::_load() { bool enable_segment_cache = _state->query_options().__isset.enable_segment_cache ? _state->query_options().enable_segment_cache : true; - bool disable_file_cache = _state->query_options().__isset.disable_file_cache - ? _state->query_options().disable_file_cache - : false; for (auto& rowset : rowsets) { RETURN_IF_ERROR(rowset->load()); const auto rowset_id = rowset->rowset_id(); @@ -192,7 +189,7 @@ Status ParallelScannerBuilder::_load() { RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( std::dynamic_pointer_cast(rowset), &segment_cache_handle, - enable_segment_cache, false, disable_file_cache)); + enable_segment_cache, false)); _total_rows += rowset->num_rows(); } } @@ -211,4 +208,4 @@ std::shared_ptr ParallelScannerBuilder::_build_scanner( return NewOlapScanner::create_shared(_parent, std::move(params)); } -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/be/src/olap/rowset/beta_rowset.cpp b/be/src/olap/rowset/beta_rowset.cpp index 5114cc6595a74ad..832ca3140887da2 100644 --- a/be/src/olap/rowset/beta_rowset.cpp +++ b/be/src/olap/rowset/beta_rowset.cpp @@ -147,26 +147,23 @@ Status BetaRowset::get_segments_size(std::vector* segments_size) { return Status::OK(); } -Status BetaRowset::load_segments(std::vector* segments, - bool disable_file_cache) { - return load_segments(0, num_segments(), segments, disable_file_cache); +Status BetaRowset::load_segments(std::vector* segments) { + return load_segments(0, num_segments(), segments); } Status BetaRowset::load_segments(int64_t seg_id_begin, int64_t seg_id_end, - std::vector* segments, - bool disable_file_cache) { + std::vector* segments) { int64_t seg_id = seg_id_begin; while (seg_id < seg_id_end) { std::shared_ptr segment; - RETURN_IF_ERROR(load_segment(seg_id, &segment, disable_file_cache)); + RETURN_IF_ERROR(load_segment(seg_id, &segment)); segments->push_back(std::move(segment)); seg_id++; } return Status::OK(); } -Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment, - bool disable_file_cache) { +Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment) { auto fs = _rowset_meta->fs(); if (!fs) { return Status::Error("get fs failed"); @@ -175,9 +172,8 @@ Status BetaRowset::load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* se DCHECK(seg_id >= 0); auto seg_path = DORIS_TRY(segment_path(seg_id)); io::FileReaderOptions reader_options { - .cache_type = !disable_file_cache && config::enable_file_cache - ? io::FileCachePolicy::FILE_BLOCK_CACHE - : io::FileCachePolicy::NO_CACHE, + .cache_type = config::enable_file_cache ? io::FileCachePolicy::FILE_BLOCK_CACHE + : io::FileCachePolicy::NO_CACHE, .is_doris_table = true, .cache_base_path = "", .file_size = _rowset_meta->segment_file_size(seg_id), diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 59ed6e061feb063..52d5ac5c8a8742f 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -71,15 +71,12 @@ class BetaRowset final : public Rowset { Status check_file_exist() override; - Status load_segments(std::vector* segments, - bool disable_file_cache = false); + Status load_segments(std::vector* segments); Status load_segments(int64_t seg_id_begin, int64_t seg_id_end, - std::vector* segments, - bool disable_file_cache = false); + std::vector* segments); - Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment, - bool disable_file_cache = false); + Status load_segment(int64_t seg_id, segment_v2::SegmentSharedPtr* segment); Status get_segments_size(std::vector* segments_size); diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index 4d953d1dbe37e74..42456bb862502d6 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -249,13 +249,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context } // load segments - bool disable_file_cache = false; bool enable_segment_cache = true; auto* state = read_context->runtime_state; if (state != nullptr) { - disable_file_cache = state->query_options().__isset.disable_file_cache - ? state->query_options().disable_file_cache - : false; enable_segment_cache = state->query_options().__isset.enable_segment_cache ? state->query_options().enable_segment_cache : true; @@ -264,9 +260,9 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context bool should_use_cache = use_cache || (_read_context->reader_type == ReaderType::READER_QUERY && enable_segment_cache); SegmentCacheHandle segment_cache_handle; - RETURN_IF_ERROR(SegmentLoader::instance()->load_segments( - _rowset, &segment_cache_handle, should_use_cache, - /*need_load_pk_index_and_bf*/ false, disable_file_cache)); + RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(_rowset, &segment_cache_handle, + should_use_cache, + /*need_load_pk_index_and_bf*/ false)); // create iterator for each segment auto& segments = segment_cache_handle.get_segments(); diff --git a/be/src/olap/segment_loader.cpp b/be/src/olap/segment_loader.cpp index 98db03512409012..12ab89af0be283a 100644 --- a/be/src/olap/segment_loader.cpp +++ b/be/src/olap/segment_loader.cpp @@ -52,7 +52,7 @@ void SegmentCache::erase(const SegmentCache::CacheKey& key) { Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, bool use_cache, - bool need_load_pk_index_and_bf, bool disable_file_cache) { + bool need_load_pk_index_and_bf) { if (cache_handle->is_inited()) { return Status::OK(); } @@ -62,7 +62,7 @@ Status SegmentLoader::load_segments(const BetaRowsetSharedPtr& rowset, continue; } segment_v2::SegmentSharedPtr segment; - RETURN_IF_ERROR(rowset->load_segment(i, &segment, disable_file_cache)); + RETURN_IF_ERROR(rowset->load_segment(i, &segment)); if (need_load_pk_index_and_bf) { RETURN_IF_ERROR(segment->load_pk_index_and_bf()); } diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index fc2f0d8c03fafea..5bb8fae3c418775 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -118,8 +118,7 @@ class SegmentLoader { // Load segments of "rowset", return the "cache_handle" which contains segments. // If use_cache is true, it will be loaded from _cache. Status load_segments(const BetaRowsetSharedPtr& rowset, SegmentCacheHandle* cache_handle, - bool use_cache = false, bool need_load_pk_index_and_bf = false, - bool disable_file_cache = false); + bool use_cache = false, bool need_load_pk_index_and_bf = false); void erase_segment(const SegmentCache::CacheKey& key); From ffc7fcb23f1afbce91dc81ffea9be5359fbc0e4f Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Fri, 9 Aug 2024 11:33:15 +0800 Subject: [PATCH 03/94] [test](catalog) add upgrade test cases for external catalog (#39063) 1. create hive/iceberg/paimon/jdbc catalog 2. do some queries or ddl, dml after upgrade - Change `regression-test/README.md` to English --- regression-test/README.md | 39 +++-- .../data/external_table_p0/upgrade/test.out | 52 ++++++ .../external_table_p0/upgrade/load.groovy | 152 ++++++++++++++++++ .../external_table_p0/upgrade/test.groovy | 83 ++++++++++ 4 files changed, 312 insertions(+), 14 deletions(-) create mode 100644 regression-test/data/external_table_p0/upgrade/test.out create mode 100644 regression-test/suites/external_table_p0/upgrade/load.groovy create mode 100644 regression-test/suites/external_table_p0/upgrade/test.groovy diff --git a/regression-test/README.md b/regression-test/README.md index f8404bbd77b9e50..1cc6aca452e932d 100644 --- a/regression-test/README.md +++ b/regression-test/README.md @@ -17,49 +17,58 @@ specific language governing permissions and limitations under the License. --> -# 新加case注意事项 +# Guide for test cases -## 常规 case -1. 变量名前要写 def,否则是全局变量,并行跑的 case 的时候可能被其他 case 影响。 +## General Case + +1. Write "def" before variable names; otherwise, they will be global variables and may be affected by other cases running in parallel. Problematic code: ``` ret = *** ``` + Correct code: ``` def ret = *** ``` -2. 尽量不要在 case 中 global 的设置 session variable,或者修改集群配置,可能会影响其他 case。 + +2. Avoid setting global session variables or modifying cluster configurations in cases, as it may affect other cases. Problematic code: ``` sql """set global enable_pipeline_x_engine=true;""" ``` + Correct code: ``` sql """set enable_pipeline_x_engine=true;""" ``` -3. 如果必须要设置 global,或者要改集群配置,可以指定 case 以 nonConcurrent 的方式运行。 - [示例](https://github.com/apache/doris/blob/master/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_string_to_array.groovy#L18) -4. case 中涉及时间相关的,最好固定时间,不要用类似 now() 函数这种动态值,避免过一段时间后 case 就跑不过了。 +3. If it is necessary to set global variables or modify cluster configurations, specify the case to run in a nonConcurrent manner. + + [Example](https://github.com/apache/doris/blob/master/regression-test/suites/query_p0/sql_functions/cast_function/test_cast_string_to_array.groovy#L18) + +4. For cases involving time-related operations, it is best to use fixed time values instead of dynamic values like the `now()` function to prevent cases from failing after some time. Problematic code: ``` sql """select count(*) from table where created < now();""" ``` + Correct code: ``` sql """select count(*) from table where created < '2023-11-13';""" ``` -5. case 中 streamload 后请加上 sync 一下,避免在多 FE 环境中执行不稳定。 + +5. After streamloading in a case, add a sync to ensure stability when executing in a multi-FE environment. Problematic code: ``` streamLoad { ... } sql """select count(*) from table """ ``` + Correct code: ``` streamLoad { ... } @@ -67,13 +76,15 @@ under the License. sql """select count(*) from table """ ``` -6. UDF 的 case,需要把对应的 jar 包拷贝到所有 BE 机器上。 +6. For UDF cases, make sure to copy the corresponding JAR file to all BE machines. + + [Example](https://github.com/apache/doris/blob/master/regression-test/suites/javaudf_p0/test_javaudf_case.groovy#L27) + +7. Do not create the same table in different cases under the same directory to avoid conflicts. - [示例](https://github.com/apache/doris/blob/master/regression-test/suites/javaudf_p0/test_javaudf_case.groovy#L27) +## Compatibility case -7. 同一个目录下不同case间不要创建相同的表,避免互相冲突 +Refers to the resources or rules created on the initial cluster during FE testing or upgrade testing, which can still be used normally after the cluster restart or upgrade, such as permissions, UDF, etc. -## 兼容性 case -指重启 FE 测试或升级测试中,在初始集群上创建的资源或规则,在集群重启或升级后也能正常使用,比如权限、UDF等。 -这些 case 需要拆分成两个文件,load.groovy 和 xxxx.groovy,放到一个文件夹中并加上 `restart_fe` 组标签,[示例](https://github.com/apache/doris/pull/37118)。 +These cases need to be split into two files, `load.groovy` and `xxxx.groovy`, placed in a folder, and tagged with the `restart_fe` group label, [example](https://github.com/apache/doris/pull/37118). diff --git a/regression-test/data/external_table_p0/upgrade/test.out b/regression-test/data/external_table_p0/upgrade/test.out new file mode 100644 index 000000000000000..24271ce54691250 --- /dev/null +++ b/regression-test/data/external_table_p0/upgrade/test.out @@ -0,0 +1,52 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !hive1 -- +1 +2 是 +3 III类户 + +-- !hive2 -- +1 +2 是 +3 III类户 + +-- !ice_rest1 -- +-97226733 true -7154 93 -21192305 72903745700759648 9.823223 0.20321478918108538 2969-02-03 1970-01-03T08:00:01.000001 2017-12-03T10:12:55.038194 !gdi%$v SkfeaRF9prAcz AwYZHC7 0000101110010000110 2549.125025 Hefei +-97834381 false 30790 -51 329082 67450834578680294 8.32709 1.695344509839929 2969-02-03 1970-01-02T08:00:01.000001 2017-12-03T10:12:55.038194 !po(qhs OildgTxC6Yp3MW0 gHN 00111111101000010000000 3469.439442 Shanghai +-97894492 false -17752 89 -17810797 24642172641614839 8.316614 1.3521628855022216 2969-02-03 1970-01-01T08:00:01.000001 2017-12-02T10:12:55.038194 cixjlv$&#gapyzo zpowfZb32re9ScL 1uQ8VE964 0010110000001 5770.442982 Hangzhou +-98385282 false 12821 -29 -4184928 22450415624137594 5.0620017 3.754570746934458 1969-09-21 1970-01-01T08:00:01.000001 2017-12-02T10:12:55.038194 j eIkY1NtoauZfTEJd lij8utE 00011111000 9537.539893 Shanghai +-98428712 true -19994 -37 -10403458 -79747158929324986 9.39804 9.184069888398918 2969-02-03 1970-01-02T08:00:01.000001 2017-12-01T10:12:55.038194 q)drohaz 2IgsxyLcl aY9UJjuwdpK6 00001 7232.956012 Beijing +-98494260 true -31980 -1 -17192202 -46554401964424720 2.624445 3.357848957747339 1969-09-21 1970-01-01T08:00:01.000001 2017-12-01T10:12:55.038194 #grf Rv nWv5Z9E6 101010 4409.038994 Shanghai +-98623027 true -2082 59 -18839692 -65567297943909781 4.0910926 4.667513572115034 1969-09-21 1970-01-04T08:00:01.000001 2017-12-02T10:12:55.038194 h!(q hGpXWFVwsSYm38t5 FINmCZMUE3dvl5Gz7PAq 00 1726.632887 Shanghai +-98763612 true -20598 -116 -890992 -47536120268669383 7.334266 3.714530270898617 2000-12-31 1970-01-04T08:00:01.000001 2017-12-03T10:12:55.038194 wq*!md)ak^(lbz Ac ZvRJzGTmkYrVcl5UCi3S 00100100011011010101 9153.323992 Hefei +-99328658 false -18802 97 -20497622 -11291399559888626 4.1584477 6.082803585731483 2000-12-31 1970-01-03T08:00:01.000001 2017-12-02T10:12:55.038194 !bzk*x@c%(pdqgw 9yHPEgnQSr7zC5R8GVpf dv6DcPa 10011010011001000100000101 3688.355941 Hangzhou +-99567408 true -12903 -18 -10473717 71894869370121368 1.1465179 1.9420032493661743 2000-12-31 1970-01-01T08:00:01.000001 2017-12-01T10:12:55.038194 fk&xbldo*h 0PrxtvD kTj0SH 1000101 4939.088709 Hangzhou + +-- !ice_rest2 -- +-97226733 true -7154 93 -21192305 72903745700759648 9.823223 0.20321478918108538 2969-02-03 1970-01-03T08:00:01.000001 2017-12-03T10:12:55.038194 !gdi%$v SkfeaRF9prAcz AwYZHC7 0000101110010000110 2549.125025 Hefei +-97834381 false 30790 -51 329082 67450834578680294 8.32709 1.695344509839929 2969-02-03 1970-01-02T08:00:01.000001 2017-12-03T10:12:55.038194 !po(qhs OildgTxC6Yp3MW0 gHN 00111111101000010000000 3469.439442 Shanghai +-97894492 false -17752 89 -17810797 24642172641614839 8.316614 1.3521628855022216 2969-02-03 1970-01-01T08:00:01.000001 2017-12-02T10:12:55.038194 cixjlv$&#gapyzo zpowfZb32re9ScL 1uQ8VE964 0010110000001 5770.442982 Hangzhou +-98385282 false 12821 -29 -4184928 22450415624137594 5.0620017 3.754570746934458 1969-09-21 1970-01-01T08:00:01.000001 2017-12-02T10:12:55.038194 j eIkY1NtoauZfTEJd lij8utE 00011111000 9537.539893 Shanghai +-98428712 true -19994 -37 -10403458 -79747158929324986 9.39804 9.184069888398918 2969-02-03 1970-01-02T08:00:01.000001 2017-12-01T10:12:55.038194 q)drohaz 2IgsxyLcl aY9UJjuwdpK6 00001 7232.956012 Beijing +-98494260 true -31980 -1 -17192202 -46554401964424720 2.624445 3.357848957747339 1969-09-21 1970-01-01T08:00:01.000001 2017-12-01T10:12:55.038194 #grf Rv nWv5Z9E6 101010 4409.038994 Shanghai +-98623027 true -2082 59 -18839692 -65567297943909781 4.0910926 4.667513572115034 1969-09-21 1970-01-04T08:00:01.000001 2017-12-02T10:12:55.038194 h!(q hGpXWFVwsSYm38t5 FINmCZMUE3dvl5Gz7PAq 00 1726.632887 Shanghai +-98763612 true -20598 -116 -890992 -47536120268669383 7.334266 3.714530270898617 2000-12-31 1970-01-04T08:00:01.000001 2017-12-03T10:12:55.038194 wq*!md)ak^(lbz Ac ZvRJzGTmkYrVcl5UCi3S 00100100011011010101 9153.323992 Hefei +-99328658 false -18802 97 -20497622 -11291399559888626 4.1584477 6.082803585731483 2000-12-31 1970-01-03T08:00:01.000001 2017-12-02T10:12:55.038194 !bzk*x@c%(pdqgw 9yHPEgnQSr7zC5R8GVpf dv6DcPa 10011010011001000100000101 3688.355941 Hangzhou +-99567408 true -12903 -18 -10473717 71894869370121368 1.1465179 1.9420032493661743 2000-12-31 1970-01-01T08:00:01.000001 2017-12-01T10:12:55.038194 fk&xbldo*h 0PrxtvD kTj0SH 1000101 4939.088709 Hangzhou + +-- !ice_hms1 -- +true 2 + +-- !paimon_fs1 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 +10 20 30 40 50 60 70 80 90.1 100.1 110.10 2020-03-02 130str 140varchar b false bbbb 2023-08-14T08:32:52.821 + +-- !paimon_fs2 -- +1 2 3 4 5 6 7 8 9.1 10.1 11.10 2020-02-02 13str 14varchar a true aaaa 2023-08-13T09:32:38.530 +10 20 30 40 50 60 70 80 90.1 100.1 110.10 2020-03-02 130str 140varchar b false bbbb 2023-08-14T08:32:52.821 + +-- !mysql1 -- +2023-06-17T10:00 2023-06-17T10:00:01.100 2023-06-17T10:00:02.220 2023-06-17T10:00:03.333 2023-06-17T10:00:04.444400 2023-06-17T10:00:05.555550 2023-06-17T10:00:06.666666 + +-- !mysql2 -- +2023-06-17T10:00 2023-06-17T10:00:01.100 2023-06-17T10:00:02.220 2023-06-17T10:00:03.333 2023-06-17T10:00:04.444400 2023-06-17T10:00:05.555550 2023-06-17T10:00:06.666666 + diff --git a/regression-test/suites/external_table_p0/upgrade/load.groovy b/regression-test/suites/external_table_p0/upgrade/load.groovy new file mode 100644 index 000000000000000..6dc068bb6bed92e --- /dev/null +++ b/regression-test/suites/external_table_p0/upgrade/load.groovy @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_catalog_upgrade_load", "p0,external,hive,external_docker,external_docker_hive,restart_fe,upgrade_case") { + + // Hive + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String hivePrefix = "hive2" + String catalog_name = "test_catalog_upgrade_hive2" + String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp") + String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort") + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + } + + // Iceberg rest catalog + enabled = context.config.otherConfigs.get("enableIcebergTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port") + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String catalog_name = "test_catalog_upgrade_iceberg_rest" + + sql """drop catalog if exists ${catalog_name}""" + sql """ + CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='iceberg', + 'iceberg.catalog.type'='rest', + 'uri' = 'http://${externalEnvIp}:${rest_port}', + "s3.access_key" = "admin", + "s3.secret_key" = "password", + "s3.endpoint" = "http://${externalEnvIp}:${minio_port}", + "s3.region" = "us-east-1" + );""" + } + + // Iceberg hms catalog + enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String hivePrefix = "hive2" + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String catalog_name = "test_catalog_upgrade_iceberg_hms" + + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + 'type'='iceberg', + 'iceberg.catalog.type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'warehouse' = 'hdfs://${externalEnvIp}:${hdfs_port}/user/iceberg_test/', + 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}' + );""" + } + + // Paimon filesystem catalog + enabled = context.config.otherConfigs.get("enablePaimonTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String catalog_name = "test_catalog_upgrade_paimon_fs" + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type" = "paimon", + "paimon.catalog.type"="filesystem", + "warehouse" = "hdfs://${externalEnvIp}:${hdfs_port}/user/doris/paimon1" + );""" + } + + // Kerberos hive catalog + enabled = context.config.otherConfigs.get("enableKerberosTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String catalog_name = "test_catalog_upgrade_kerberos_hive" + // sql """drop catalog if exists ${catalog_name};""" + // sql """ + // CREATE CATALOG IF NOT EXISTS ${catalog_name} + // PROPERTIES ( + // "type" = "hms", + // "hive.metastore.uris" = "thrift://172.31.71.25:9083", + // "fs.defaultFS" = "hdfs://172.31.71.25:8020", + // "hadoop.kerberos.min.seconds.before.relogin" = "5", + // "hadoop.security.authentication" = "kerberos", + // "hadoop.kerberos.principal"="hive/presto-master.docker.cluster@LABS.TERADATA.COM", + // "hadoop.kerberos.keytab" = "/keytabs/hive-presto-master.keytab", + // "hive.metastore.sasl.enabled " = "true", + // "hive.metastore.kerberos.principal" = "hive/_HOST@LABS.TERADATA.COM" + // ); + // """ + + // sql """drop catalog if exists other_${catalog_name};""" + // sql """ + // CREATE CATALOG IF NOT EXISTS other_${catalog_name} + // PROPERTIES ( + // "type" = "hms", + // "hive.metastore.uris" = "thrift://172.31.71.26:9083", + // "fs.defaultFS" = "hdfs://172.31.71.26:8020", + // "hadoop.kerberos.min.seconds.before.relogin" = "5", + // "hadoop.security.authentication" = "kerberos", + // "hadoop.kerberos.principal"="hive/presto-master.docker.cluster@OTHERREALM.COM", + // "hadoop.kerberos.keytab" = "/keytabs/other-hive-presto-master.keytab", + // "hive.metastore.sasl.enabled " = "true", + // "hive.metastore.kerberos.principal" = "hive/_HOST@OTHERREALM.COM", + // "hadoop.security.auth_to_local" ="RULE:[2:\$1@\$0](.*@OTHERREALM.COM)s/@.*// + // RULE:[2:\$1@\$0](.*@OTHERLABS.TERADATA.COM)s/@.*// + // DEFAULT" + // ); + // """ + } + + // Jdbc MySQL catalog + enabled = context.config.otherConfigs.get("enableJdbcTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String mysql_port = context.config.otherConfigs.get("mysql_57_port"); + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String s3_endpoint = getS3Endpoint() + String bucket = getS3BucketName() + String driver_url = "https://${bucket}.${s3_endpoint}/regression/jdbc_driver/mysql-connector-java-8.0.25.jar" + // String driver_url = "mysql-connector-java-8.0.25.jar" + String catalog_name = "test_catalog_upgrade_jdbc_mysql" + sql """drop catalog if exists ${catalog_name} """ + sql """create catalog if not exists ${catalog_name} properties( + "type"="jdbc", + "user"="root", + "password"="123456", + "jdbc_url" = "jdbc:mysql://${externalEnvIp}:${mysql_port}/doris_test?useSSL=false&zeroDateTimeBehavior=convertToNull", + "driver_url" = "${driver_url}", + "driver_class" = "com.mysql.cj.jdbc.Driver" + );""" + } +} + diff --git a/regression-test/suites/external_table_p0/upgrade/test.groovy b/regression-test/suites/external_table_p0/upgrade/test.groovy new file mode 100644 index 000000000000000..a74106ba75f65a1 --- /dev/null +++ b/regression-test/suites/external_table_p0/upgrade/test.groovy @@ -0,0 +1,83 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_catalog_upgrade_test", "p0,external,hive,external_docker,external_docker_hive,restart_fe,upgrade_case") { + + // Hive + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + sql """switch test_catalog_upgrade_hive2""" + order_qt_hive1 """select * from multi_catalog.test_chinese_orc limit 10"""; + sql """refresh catalog test_catalog_upgrade_hive2""" + order_qt_hive2 """select * from multi_catalog.test_chinese_orc limit 10"""; + } + + // Iceberg rest catalog + enabled = context.config.otherConfigs.get("enableIcebergTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port") + sql """switch test_catalog_upgrade_iceberg_rest""" + order_qt_ice_rest1 """select * from format_v2.sample_cow_parquet order by id limit 10;""" + sql """refresh catalog test_catalog_upgrade_iceberg_rest""" + order_qt_ice_rest2 """select * from format_v2.sample_cow_parquet order by id limit 10;""" + } + + // Iceberg hms catalog + enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + sql """switch test_catalog_upgrade_iceberg_hms""" + sql """drop database if exists ice_upgrade_db"""; + sql """create database ice_upgrade_db"""; + sql """use ice_upgrade_db""" + sql """CREATE TABLE unpartitioned_table ( + `col1` BOOLEAN COMMENT 'col1', + `col2` INT COMMENT 'col2' + ) ENGINE=iceberg + PROPERTIES ( + 'write-format'='parquet' + ); + """ + sql """insert into unpartitioned_table values(true, 2)""" + order_qt_ice_hms1 """select * from unpartitioned_table""" + } + + // Paimon filesystem catalog + enabled = context.config.otherConfigs.get("enablePaimonTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + sql """switch test_catalog_upgrade_paimon_fs""" + order_qt_paimon_fs1 """select * from db1.all_table limit 10"""; + sql """refresh catalog test_catalog_upgrade_paimon_fs""" + order_qt_paimon_fs2 """select * from db1.all_table limit 10"""; + } + + // Kerberos hive catalog + enabled = context.config.otherConfigs.get("enableKerberosTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String catalog_name = "test_catalog_upgrade_kerberos_hive" + // TODO + } + + // Jdbc MySQL catalog + enabled = context.config.otherConfigs.get("enableJdbcTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + sql """switch test_catalog_upgrade_jdbc_mysql""" + order_qt_mysql1 """select * from doris_test.dt"""; + sql """refresh catalog test_catalog_upgrade_jdbc_mysql""" + order_qt_mysql2 """select * from doris_test.dt"""; + } +} + From dac1555e1789397f180691f3db5fb57a204822e6 Mon Sep 17 00:00:00 2001 From: catpineapple <1391869588@qq.com> Date: Fri, 9 Aug 2024 12:02:50 +0800 Subject: [PATCH 04/94] [feature](docker) add disaggregated doris docker image content (#39093) 1. Added corresponding scripts for the doris image(fe,be) of storage-computing separation deployment 2. Added a new metaservice image module 3. Added binary package storage paths and prompt files for different architectures to facilitate clearer construction of your own image --- .../be/resource/amd64/x64_package_is_here | 23 ++ .../be/resource/arm64/arm64_package_is_here | 23 ++ .../resource/be_disaggregated_entrypoint.sh | 185 ++++++++++++++++ .../be/resource/be_disaggregated_prestop.sh | 23 ++ .../be/resource/be_disaggregated_probe.sh | 64 ++++++ .../broker/resource/amd64/x64_package_is_here | 23 ++ .../resource/arm64/arm64_package_is_here | 23 ++ .../fe/resource/amd64/x64_package_is_here | 23 ++ .../fe/resource/arm64/arm64_package_is_here | 23 ++ .../resource/fe_disaggregated_entrypoint.sh | 207 ++++++++++++++++++ .../fe/resource/fe_disaggregated_prestop.sh | 23 ++ .../fe/resource/fe_disaggregated_probe.sh | 64 ++++++ docker/runtime/ms/Dockerfile | 57 +++++ .../ms/resource/amd64/x64_package_is_here | 23 ++ .../ms/resource/arm64/arm64_package_is_here | 23 ++ .../resource/ms_disaggregated_entrypoint.sh | 44 ++++ .../ms/resource/ms_disaggregated_prestop.sh | 24 ++ .../ms/resource/ms_disaggregated_probe.sh | 62 ++++++ 18 files changed, 937 insertions(+) create mode 100644 docker/runtime/be/resource/amd64/x64_package_is_here create mode 100644 docker/runtime/be/resource/arm64/arm64_package_is_here create mode 100755 docker/runtime/be/resource/be_disaggregated_entrypoint.sh create mode 100755 docker/runtime/be/resource/be_disaggregated_prestop.sh create mode 100755 docker/runtime/be/resource/be_disaggregated_probe.sh create mode 100644 docker/runtime/broker/resource/amd64/x64_package_is_here create mode 100644 docker/runtime/broker/resource/arm64/arm64_package_is_here create mode 100644 docker/runtime/fe/resource/amd64/x64_package_is_here create mode 100644 docker/runtime/fe/resource/arm64/arm64_package_is_here create mode 100755 docker/runtime/fe/resource/fe_disaggregated_entrypoint.sh create mode 100755 docker/runtime/fe/resource/fe_disaggregated_prestop.sh create mode 100755 docker/runtime/fe/resource/fe_disaggregated_probe.sh create mode 100644 docker/runtime/ms/Dockerfile create mode 100644 docker/runtime/ms/resource/amd64/x64_package_is_here create mode 100644 docker/runtime/ms/resource/arm64/arm64_package_is_here create mode 100755 docker/runtime/ms/resource/ms_disaggregated_entrypoint.sh create mode 100755 docker/runtime/ms/resource/ms_disaggregated_prestop.sh create mode 100755 docker/runtime/ms/resource/ms_disaggregated_probe.sh diff --git a/docker/runtime/be/resource/amd64/x64_package_is_here b/docker/runtime/be/resource/amd64/x64_package_is_here new file mode 100644 index 000000000000000..c92bb295a23d638 --- /dev/null +++ b/docker/runtime/be/resource/amd64/x64_package_is_here @@ -0,0 +1,23 @@ + + + +This file is not required for building an image. +It is just a reminder for you: If you build a docker image yourself, +please place the installation package (already unzipped) corresponding to the CPU architecture at the same level as this file. diff --git a/docker/runtime/be/resource/arm64/arm64_package_is_here b/docker/runtime/be/resource/arm64/arm64_package_is_here new file mode 100644 index 000000000000000..c92bb295a23d638 --- /dev/null +++ b/docker/runtime/be/resource/arm64/arm64_package_is_here @@ -0,0 +1,23 @@ + + + +This file is not required for building an image. +It is just a reminder for you: If you build a docker image yourself, +please place the installation package (already unzipped) corresponding to the CPU architecture at the same level as this file. diff --git a/docker/runtime/be/resource/be_disaggregated_entrypoint.sh b/docker/runtime/be/resource/be_disaggregated_entrypoint.sh new file mode 100755 index 000000000000000..f2dc6f6e2231f84 --- /dev/null +++ b/docker/runtime/be/resource/be_disaggregated_entrypoint.sh @@ -0,0 +1,185 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +#TODO: convert to "_" +MS_ENDPOINT=${MS_ENDPOINT} +MS_TOKEN=${MS_TOKEN:="greedisgood9999"} +DORIS_HOME=${DORIS_HOME:="/opt/apache-doris"} +CONFIGMAP_PATH=${CONFIGMAP_PATH:="/etc/doris"} +INSTANCE_ID=${INSTANCE_ID} +INSTANCE_NAME=${INSTANCE_NAME} +HEARTBEAT_PORT=9050 +CLUSTER_NMAE=${CLUSTER_NAME} +#option:IP,FQDN +HOST_TYPE=${HOST_TYPE:="FQDN"} +STATEFULSET_NAME=${STATEFULSET_NAME} +POD_NAMESPACE=$POD_NAMESPACE +DEFAULT_CLUSTER_ID=${POD_NAMESPACE}"_"${STATEFULSET_NAME} +CLUSTER_ID=${CLUSTER_ID:="$DEFAULT_CLUSTER_ID"} +POD_NAME=${POD_NAME} +CLOUD_UNIQUE_ID_PRE=${CLOUD_UNIQUE_ID_PRE:="1:$INSTANCE_ID"} +CLOUD_UNIQUE_ID="$CLOUD_UNIQUE_ID_PRE:$POD_NAME" +# replace "-" with "_" in CLUSTER_ID and CLOUD_UNIQUE_ID +CLUSTER_ID=$(sed 's/-/_/g' <<<$CLUSTER_ID) + +CONFIG_FILE="$DORIS_HOME/be/conf/be.conf" +MY_SELF= + +DEFAULT_CLUSTER_NAME=$(awk -F $INSTANCE_NAME"-" '{print $NF}' <<<$STATEFULSET_NAME) +CLUSTER_NAME=${CLUSTER_NAME:="$DEFAULT_CLUSTER_NAME"} + +#TODO: check config or not, add default +echo 'file_cache_path = [{"path":"/opt/apache-doris/be/storage","total_size":107374182400,"query_limit":107374182400}]' >> $DORIS_HOME/be/conf/be.conf + +function log_stderr() +{ + echo "[`date`] $@" >& 1 +} + +function add_cluster_info_to_conf() +{ + echo "meta_service_endpoint=$MS_ENDPOINT" >> $DORIS_HOME/be/conf/be.conf + echo "cloud_unique_id=$CLOUD_UNIQUE_ID" >> $DORIS_HOME/be/conf/be.conf + echo "meta_service_use_load_balancer = false" >> $DORIS_HOME/be/conf/be.conf + echo "enable_file_cache = true" >> $DORIS_HOME/be/conf/be.conf +} + +function link_config_files() +{ + if [[ -d $CONFIGMAP_PATH ]]; then + for file in `ls $CONFIGMAP_PATH`; + do + if [[ -f $DORIS_HOME/be/conf/$file ]]; then + mv $DORIS_HOME/be/conf/$file $DORIS_HOME/be/conf/$file.bak + fi + done + fi + + for file in `ls $CONFIGMAP_PATH`; + do + if [[ "$file" == "be.conf" ]]; then + cp $CONFIGMAP_PATH/$file $DORIS_HOME/be/conf/$file + add_cluster_info_to_conf + continue + fi + + ln -sfT $CONFIGMAP_PATH/$file $DORIS_HOME/be/conf/$file + done +} + +function parse_config_file_with_key() +{ + local key=$1 + local value=`grep "^\s*$key\s*=" $CONFIG_FILE | sed "s|^\s*$key\s*=\s*\(.*\)\s*$|\1|g"` +} + +function parse_my_self_address() +{ + local my_ip=`hostname -i | awk '{print $1}'` + local my_fqdn=`hostname -f` + if [[ $HOST_TYPE == "IP" ]]; then + MY_SELF=$my_ip + else + MY_SELF=$my_fqdn + fi +} + +function variables_initial() +{ + parse_my_self_address + local heartbeat_port=$(parse_config_file_with_key "heartbeat_service_port") + if [[ "x$heartbeat_port" != "x" ]]; then + HEARTBEAT_PORT=$heartbeat_port + fi +} + +function check_or_register_in_ms() +{ + interval=5 + start=$(date +%s) + timeout=60 + while true; + do + local find_address="http://$MS_ENDPOINT/MetaService/http/get_cluster?token=$MS_TOKEN" + local output=$(curl -s $find_address \ + -d '{"cloud_unique_id":"'$CLOUD_UNIQUE_ID'","cluster_id":"'$CLUSTER_ID'"}') + if grep -q -w "$MY_SELF" <<< $output &>/dev/null; then + log_stderr "[INFO] $MY_SELF have register in instance id $INSTANCE_ID cluser id $CLUSTER_ID!" + return + fi + + local code=$(jq -r ".code" <<< $output) + if [[ "$code" == "NOT_FOUND" ]]; then + # if grep -q -w "$CLUSTER_ID" <<< $output &>/dev/null; then + # log_stderr "[INFO] cluster id $CLUSTER_ID have exists, only register self.!" + # add_my_self + # else + log_stderr "[INFO] register cluster id $CLUSTER_ID with myself $MY_SELF into instance id $INSTANCE_ID." + add_my_self_with_cluster + # fi + else + log_stderr "[INFO] register $MY_SELF into cluster id $CLUSTER_ID!" + add_my_self + fi + + local now=$(date +%s) + let "expire=start+timeout" + if [[ $expire -le $now ]]; then + log_stderr "[ERROR] Timeout for register myself to ms, abort!" + exit 1 + fi + sleep $interval + done +} + +function add_my_self() +{ + local register_address="http://$MS_ENDPOINT/MetaService/http/add_node?token=$MS_TOKEN" + local output=$(curl -s $register_address \ + -d '{"instance_id":"'$INSTANCE_ID'", + "cluster":{"type":"COMPUTE","cluster_id":"'$CLUSTER_ID'", + "nodes":[{"cloud_unique_id":"'$CLOUD_UNIQUE_ID'","ip":"'$MY_SELF'","host":"'$MY_SELF'","heartbeat_port":'$HEARTBEAT_PORT'}]}}') + local code=$(jq -r ".code" <<< $output) + if [[ "$code" == "OK" ]]; then + log_stderr "[INFO] my_self $MY_SELF register to ms $MS_ENDPOINT instance_id $INSTANCE_ID be cluster $CLUSTER_ID success." + else + log_stderr "[ERROR] my_self $MY_SELF register ms $MS_ENDPOINT instance_id $INSTANCE_ID be cluster $CLUSTER_ID failed,err=$output!" + fi +} + +function add_my_self_with_cluster() +{ + local register_address="http://$MS_ENDPOINT/MetaService/http/add_cluster?token=$MS_TOKEN" + local output=$(curl -s $register_address \ + -d '{"instance_id":"'$INSTANCE_ID'", + "cluster":{"type":"COMPUTE","cluster_name":"'$CLUSTER_NAME'","cluster_id":"'$CLUSTER_ID'", + "nodes":[{"cloud_unique_id":"'$CLOUD_UNIQUE_ID'","ip":"'$MY_SELF'","host":"'$MY_SELF'","heartbeat_port":'$HEARTBEAT_PORT'}]}}') + local code=$(jq -r ".code" <<< $output) + if [[ "$code" == "OK" ]]; then + log_stderr "[INFO] cluster $CLUSTER_ID contains $MY_SELF register to ms $MS_ENDPOINT instance_id $INSTANCE_ID success." + else + log_stderr "[ERROR] cluster $CLUSTER_ID contains $MY_SELF register to ms $MS_ENDPOINT instance_id $INSTANCE_ID failed,err=$output!" + fi +} + +add_cluster_info_to_conf +link_config_files +variables_initial +check_or_register_in_ms + +$DORIS_HOME/be/bin/start_be.sh --console diff --git a/docker/runtime/be/resource/be_disaggregated_prestop.sh b/docker/runtime/be/resource/be_disaggregated_prestop.sh new file mode 100755 index 000000000000000..06808b72206e2ff --- /dev/null +++ b/docker/runtime/be/resource/be_disaggregated_prestop.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# + +DORIS_HOME=${DORIS_HOME:="/opt/apache-doris"} + +$DORIS_HOME/be/bin/stop_be.sh --grace diff --git a/docker/runtime/be/resource/be_disaggregated_probe.sh b/docker/runtime/be/resource/be_disaggregated_probe.sh new file mode 100755 index 000000000000000..741d78c750f533c --- /dev/null +++ b/docker/runtime/be/resource/be_disaggregated_probe.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# + +PROBE_TYPE=$1 +DORIS_HOME=${DORIS_HOME:="/opt/apache-doris"} +CONFIG_FILE="$DORIS_HOME/be/conf/be.conf" +DEFAULT_HEARTBEAT_SERVICE_PORT=9050 +DEFAULT_WEBSERVER_PORT=8040 + +function parse_config_file_with_key() +{ + local key=$1 + local value=`grep "^\s*$key\s*=" $CONFIG_FILE | sed "s|^\s*$key\s*=\s*\(.*\)\s*$|\1|g"` + echo $value +} + +function alive_probe() +{ + local heartbeat_service_port=$(parse_config_file_with_key "heartbeat_service_port") + heartbeat_service_port=${heartbeat_service_port:=$DEFAULT_HEARTBEAT_SERVICE_PORT} + if netstat -lntp | grep ":$heartbeat_service_port" > /dev/null ; then + exit 0 + else + exit 1 + fi +} + +function ready_probe() +{ + local webserver_port=$(parse_config_file_with_key "webserver_port") + webserver_port=${webserver_port:=$DEFAULT_WEBSERVER_PORT} + local ip=`hostname -i | awk '{print $1}'` + local url="http://${ip}:${webserver_port}/api/health" + local res=$(curl -s $url) + local status=$(jq -r ".status" <<< $res) + if [[ "x$status" == "xOK" ]]; then + exit 0 + else + exit 1 + fi +} + +if [[ "$PROBE_TYPE" == "ready" ]]; then + ready_probe +else + alive_probe +fi diff --git a/docker/runtime/broker/resource/amd64/x64_package_is_here b/docker/runtime/broker/resource/amd64/x64_package_is_here new file mode 100644 index 000000000000000..c92bb295a23d638 --- /dev/null +++ b/docker/runtime/broker/resource/amd64/x64_package_is_here @@ -0,0 +1,23 @@ + + + +This file is not required for building an image. +It is just a reminder for you: If you build a docker image yourself, +please place the installation package (already unzipped) corresponding to the CPU architecture at the same level as this file. diff --git a/docker/runtime/broker/resource/arm64/arm64_package_is_here b/docker/runtime/broker/resource/arm64/arm64_package_is_here new file mode 100644 index 000000000000000..c92bb295a23d638 --- /dev/null +++ b/docker/runtime/broker/resource/arm64/arm64_package_is_here @@ -0,0 +1,23 @@ + + + +This file is not required for building an image. +It is just a reminder for you: If you build a docker image yourself, +please place the installation package (already unzipped) corresponding to the CPU architecture at the same level as this file. diff --git a/docker/runtime/fe/resource/amd64/x64_package_is_here b/docker/runtime/fe/resource/amd64/x64_package_is_here new file mode 100644 index 000000000000000..c92bb295a23d638 --- /dev/null +++ b/docker/runtime/fe/resource/amd64/x64_package_is_here @@ -0,0 +1,23 @@ + + + +This file is not required for building an image. +It is just a reminder for you: If you build a docker image yourself, +please place the installation package (already unzipped) corresponding to the CPU architecture at the same level as this file. diff --git a/docker/runtime/fe/resource/arm64/arm64_package_is_here b/docker/runtime/fe/resource/arm64/arm64_package_is_here new file mode 100644 index 000000000000000..c92bb295a23d638 --- /dev/null +++ b/docker/runtime/fe/resource/arm64/arm64_package_is_here @@ -0,0 +1,23 @@ + + + +This file is not required for building an image. +It is just a reminder for you: If you build a docker image yourself, +please place the installation package (already unzipped) corresponding to the CPU architecture at the same level as this file. diff --git a/docker/runtime/fe/resource/fe_disaggregated_entrypoint.sh b/docker/runtime/fe/resource/fe_disaggregated_entrypoint.sh new file mode 100755 index 000000000000000..d9af8ab612ad319 --- /dev/null +++ b/docker/runtime/fe/resource/fe_disaggregated_entrypoint.sh @@ -0,0 +1,207 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +# ms address, fe pod's address should register in it. +MS_ENDPOINT=${MS_ENDPOINT} +MS_TOKEN=${MS_TOKEN:="greedisgood9999"} +ELECT_NUMBER=${ELECT_NUMBER:=1} +FE_EDIT_PORT=${FE_EDIT_PORT:=9010} +# cloud_id is default. +CLUSTER_ID=${CLUSTER_ID:="RESERVED_CLUSTER_ID_FOR_SQL_SERVER"} +# cloud_name is default. +CLUSTER_NAME=${CLUSTER_NAME:="RESERVED_CLUSTER_NAME_FOR_SQL_SERVER"} +#the instance id, pod's address should register in instance->cluster. +INSTANCE_ID=${INSTANCE_ID} +MY_SELF= +HOSTNAME=`hostname` +STATEFULSET_NAME=${STATEFULSET_NAME} +POD_NAME=${POD_NAME} +CLOUD_UNIQUE_ID_PRE=${CLOUD_UNIQUE_ID_PRE:="1:$INSTANCE_ID"} +CLOUD_UNIQUE_ID="$CLOUD_UNIQUE_ID_PRE:$POD_NAME" + +CONFIGMAP_PATH=${CONFIGMAP_MOUNT_PATH:="/etc/doris"} +DORIS_HOME=${DORIS_HOME:="/opt/apache-doris"} +CONFIG_FILE="$DORIS_HOME/fe/conf/fe.conf" + +SEQUENCE_NUMBER=$(hostname | awk -F '-' '{print $NF}') +NODE_TYPE="FE_MASTER" + +if [ "$SEQUENCE_NUMBER" -ge "$ELECT_NUMBER" ]; then + NODE_TYPE="FE_OBSERVER" +fi + +# 1. add default config in config file or link config files. +# 2. assign global variables. +# 3. register myself. + + +function log_stderr() +{ + echo "[`date`] $@" >& 1 +} + +function add_cluster_info_to_conf() +{ + echo "meta_service_endpoint=$MS_ENDPOINT" >> $DORIS_HOME/fe/conf/fe.conf + echo "cloud_unique_id=$CLOUD_UNIQUE_ID" >> $DORIS_HOME/fe/conf/fe.conf +} + +function link_config_files() +{ + if [[ -d $CONFIGMAP_PATH ]]; then + #backup files want to replace + for file in `ls $CONFIGMAP_PATH`; + do + if [[ -f $DORIS_HOME/fe/conf/$file ]]; then + mv $DORIS_HOME/fe/conf/$file $DORIS_HOME/fe/conf/$file.bak + fi + done + + for file in `ls $CONFIGMAP_PATH`; + do + if [[ "$file" == "fe.conf" ]]; then + cp $CONFIGMAP_PATH/$file $DORIS_HOME/fe/conf/$file + add_cluster_info_to_conf + continue + fi + + ln -sfT $CONFIGMAP_PATH/$file $DORIS_HOME/fe/conf/$file + done + fi +} + +parse_config_file_with_key() +{ + local key=$1 + local value=`grep "^\s*$key\s*=" $CONFIG_FILE | sed "s|^\s*$key\s*=\s*\(.*\)\s*$|\1|g"` + echo $value +} + +# confirm the register address, if config `enable_fqdn_mode=true` use fqdn start or use ip. +function parse_my_self_address() +{ + local value=`parse_config_file_with_key "enable_fqdn_mode"` + + local my_ip=`hostname -i | awk '{print $1}'` + local my_fqdn=`hostname -f` + if [[ $value == "true" ]]; then + MY_SELF=$my_fqdn + else + MY_SELF=$my_ip + fi +} + +function variables_inital() +{ + parse_my_self_address + local edit_port=$(parse_config_file_with_key "edit_log_port") + if [[ "x$edit_port" != "x" ]]; then + FE_EDIT_PORT=${edit_port:=$FE_EDIT_PORT} + fi + +} + +function check_or_register_in_ms() +{ + interval=5 + start=$(date +%s) + timeout=60 + while true; + do + local find_address="http://$MS_ENDPOINT/MetaService/http/get_cluster?token=$MS_TOKEN" + local output=$(curl -s $find_address \ + -d '{"cloud_unique_id": "'$CLOUD_UNIQUE_ID'", + "cluster_id": "RESERVED_CLUSTER_ID_FOR_SQL_SERVER"}') + if grep -q -w $MY_SELF <<< $output &>/dev/null ; then + log_stderr "[INFO] $MY_SELF have registerd in metaservice!" + return + fi + + local code=$(jq -r ".code" <<< $output) + if [[ "$code" == "NOT_FOUND" ]]; then + # if grep -q -w "RESERVED_CLUSTER_NAME_FOR_SQL_SERVER" <<< $output &>/dev/null; then + # log_stderr "[INFO] RESERVED_CLUSTER_NAME_FOR_SQL_SERVER fe cluster have exist, register node $MY_SELF." + # add_my_self + # else + log_stderr "[INFO] RESERVED_CLUSTER_NAME_FOR_SQL_SERVER fe cluster not exist, register fe clsuter." + add_my_self_with_cluster + # fi + else + log_stderr "[INFO] register myself $MY_SELF into fe cluster cloud_unique_id $CLOUD_UNIQUE_ID." + add_my_self + fi + + local now=$(date +%s) + let "expire=start+timeout" + if [[ $expire -le $now ]]; then + log_stderr "[ERROR] Timeout for register myself to ms, abort!" + exit 1 + fi + sleep $interval + done +} + +function add_my_self() +{ + local register_address="http://$MS_ENDPOINT/MetaService/http/add_node?token=$MS_TOKEN" + local curl_cmd="curl -s $register_address -d '{\"instance_id\":\"$INSTANCE_ID\",\"cluster\":{\"type\":\"SQL\",\"cluster_name\":\"RESERVED_CLUSTER_NAME_FOR_SQL_SERVER\",\"cluster_id\":\"RESERVED_CLUSTER_ID_FOR_SQL_SERVER\",\"nodes\":[{\"cloud_unique_id\":\"$CLOUD_UNIQUE_ID\",\"ip\":\"$MY_SELF\",\"host\":\"$MY_SELF\",\"edit_log_port\":9010,\"node_type\":\"$NODE_TYPE\"}]}}'" + # echo "add_my_self: $curl_cmd" + local output=$(eval "$curl_cmd") + # echo "add_my_self response:$output" + local code=$(jq -r ".code" <<< $output) + if [[ "$code" == "OK" ]]; then + log_stderr "[INFO] my_self $MY_SELF register to ms $MS_ENDPOINT instance_id $INSTANCE_ID fe cluster RESERVED_CLUSTER_NAME_FOR_SQL_SERVER success!" + else + log_stderr "[ERROR] my_self register ms $MS_ENDPOINT instance_id $INSTANCE_ID fe cluster failed, response $output!" + fi +} + +function add_my_self_with_cluster() +{ + local register_address="http://$MS_ENDPOINT/MetaService/http/add_cluster?token=$MS_TOKEN" + local curl_data="{\"instance_id\":\"$INSTANCE_ID\",\"cluster\":{\"type\":\"SQL\",\"cluster_name\":\"RESERVED_CLUSTER_NAME_FOR_SQL_SERVER\",\"cluster_id\":\"RESERVED_CLUSTER_ID_FOR_SQL_SERVER\",\"nodes\":[{\"cloud_unique_id\":\"$CLOUD_UNIQUE_ID\",\"ip\":\"$MY_SELF\",\"host\":\"$MY_SELF\",\"node_type\":\"$NODE_TYPE\",\"edit_log_port\":$FE_EDIT_PORT}]}}" + local curl_cmd="curl -s $register_address -d '$curl_data'" + # echo "add_my_self_with_cluster: $curl_cmd" + local output=$(eval "$curl_cmd") + # echo "add_my_self_with_cluster response: $output" + code=$(jq -r ".code" <<< $output) + if [[ "$code" == "OK" ]]; then + log_stderr "[INFO] fe cluster contains $MY_SELF node_type $NODE_TYPE register to ms $MS_ENDPOINT instance_id $INSTANCE_ID success." + else + log_stderr "[ERROR] fe cluster contains $MY_SELF node_type $NODE_TYPE register to ms $MS_ENDPOINT instance_id $INSTANCE_ID faied, $output!" + fi +} + +function check_and_modify_fqdn_config() +{ + local enable_fqdn=`parse_config_file_with_key "enable_fqdn_mode"` + log_stderr "enable_fqdn is : $enable_fqdn" + if [[ "x$enable_fqdn" != "xtrue" ]] ; then + log_stderr "add enable_fqdn_mode = true to $CONFIG_FILE" + echo "enable_fqdn_mode = true" >> $CONFIG_FILE + fi +} + +add_cluster_info_to_conf +check_and_modify_fqdn_config +link_config_files +variables_inital +check_or_register_in_ms +/opt/apache-doris/fe/bin/start_fe.sh --console + diff --git a/docker/runtime/fe/resource/fe_disaggregated_prestop.sh b/docker/runtime/fe/resource/fe_disaggregated_prestop.sh new file mode 100755 index 000000000000000..5a101954c91ac56 --- /dev/null +++ b/docker/runtime/fe/resource/fe_disaggregated_prestop.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# + +DORIS_HOME=${DORIS_HOME:="/opt/apache-doris"} + +$DORIS_HOME/fe/bin/stop_fe.sh --grace diff --git a/docker/runtime/fe/resource/fe_disaggregated_probe.sh b/docker/runtime/fe/resource/fe_disaggregated_probe.sh new file mode 100755 index 000000000000000..31d96d32d6dba06 --- /dev/null +++ b/docker/runtime/fe/resource/fe_disaggregated_probe.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# + +DORIS_HOEM=${DORIS_HOME:="/opt/apache-doris"} +CONFIG_FILE="$DORIS_HOME/fe/conf/fe.conf" +DEFAULT_HTTP_PORT=8030 +DEFAULT_QUERY_PORT=9030 +PROBE_TYPE=$1 + +function parse_config_file_with_key() +{ + local key=$1 + local value=`grep "^\s*$key\s*=" $CONFIG_FILE | sed "s|^\s*$key\s*=\s*\(.*\)\s*$|\1|g"` + echo $value +} + +function alive_probe() +{ + local query_port=$(parse_config_file_with_key "query_port") + query_port=${query_port:=$DEFAULT_QUERY_PORT} + if netstat -lntp | grep ":$query_port" > /dev/null ; then + exit 0 + else + exit 1 + fi +} + +function ready_probe() +{ + local http_port=$(parse_config_file_with_key "http_port") + http_port=${http_port:=$DEFAULT_HTTP_PORT} + local ip=`hostname -i | awk '{print $1}'` + local url="http://${ip}:${http_port}/api/health" + local res=$(curl -s $url) + local code=$(jq -r ".code" <<< $res) + if [[ "x$code" == "x0" ]]; then + exit 0 + else + exit 1 + fi +} + +if [[ "$PROBE_TYPE" == "ready" ]]; then + ready_probe +else + alive_probe +fi diff --git a/docker/runtime/ms/Dockerfile b/docker/runtime/ms/Dockerfile new file mode 100644 index 000000000000000..d50f3ec05ffb6b8 --- /dev/null +++ b/docker/runtime/ms/Dockerfile @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# how to use Dockerfile. +# this is dockerfile for build doris ms image +# when build youself image. +# 1. pull binary from official website and decompress into resource directory that the level equals with this Dockerfile. +# 2. untar xxxx.tar.gz in resource/{arch} directory, makesure your doris package real version and target arch. +# 3. run commad docker build -t xxx.doris.ms:xx -f Dockerfile --build-arg DORIS_VERSION=3.0.0 + +# we have support buildx for amd64 and arm64 architecture image build. +# get the binary from doris github and utar into resource, update the directory as apache-`version(example:3.0.0)`-bin-`architecture(amd64/arm64)` mode. + +# choose a base image +FROM selectdb/base:latest + +ARG TARGETARCH + +ARG DORIS_VERSION="x.x.x" + + +RUN if echo $DORIS_VERSION | grep -E '^([3-9]|([1-9]([0-9])))|^branch\-([3-9]|([1-9]([0-9])))|master.*' >>/dev/null ; then \ + ln -s /usr/lib/jvm/jdk-17 /usr/lib/jvm/java && \ + rm -rf /usr/lib/jvm/jdk-8; \ + else \ + ln -s /usr/lib/jvm/jdk-8 /usr/lib/jvm/java && \ + rm -rf /usr/lib/jvm/jdk-17; \ + fi; + +# set environment variables +ENV JAVA_HOME=/usr/lib/jvm/java +ENV PATH=$PATH:$JAVA_HOME/bin:/opt/apache-doris/ms/bin + +COPY resource/${TARGETARCH:-amd64}/apache-doris-${DORIS_VERSION}-bin-*/ms /opt/apache-doris/ms + +COPY resource/ms_*.sh /opt/apache-doris/ + +#RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime + +WORKDIR /opt/apache-doris + +#ENTRYPOINT ["bash","entry_point.sh"] +ENTRYPOINT ["bash","ms_disaggregated_entrypoint.sh"] diff --git a/docker/runtime/ms/resource/amd64/x64_package_is_here b/docker/runtime/ms/resource/amd64/x64_package_is_here new file mode 100644 index 000000000000000..c92bb295a23d638 --- /dev/null +++ b/docker/runtime/ms/resource/amd64/x64_package_is_here @@ -0,0 +1,23 @@ + + + +This file is not required for building an image. +It is just a reminder for you: If you build a docker image yourself, +please place the installation package (already unzipped) corresponding to the CPU architecture at the same level as this file. diff --git a/docker/runtime/ms/resource/arm64/arm64_package_is_here b/docker/runtime/ms/resource/arm64/arm64_package_is_here new file mode 100644 index 000000000000000..c92bb295a23d638 --- /dev/null +++ b/docker/runtime/ms/resource/arm64/arm64_package_is_here @@ -0,0 +1,23 @@ + + + +This file is not required for building an image. +It is just a reminder for you: If you build a docker image yourself, +please place the installation package (already unzipped) corresponding to the CPU architecture at the same level as this file. diff --git a/docker/runtime/ms/resource/ms_disaggregated_entrypoint.sh b/docker/runtime/ms/resource/ms_disaggregated_entrypoint.sh new file mode 100755 index 000000000000000..e3415b1d0c4d7ad --- /dev/null +++ b/docker/runtime/ms/resource/ms_disaggregated_entrypoint.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# + +#get from env +FDB_ENDPOINT=${FDB_ENDPOINT} +CONFIGMAP_PATH=${CONFIGMAP_PATH:="/etc/doris"} +DORIS_HOME=${DORIS_HOME:="/opt/apache-doris"} + +echo "fdb_cluster=$FDB_ENDPOINT" >> $DORIS_HOME/ms/conf/doris_cloud.conf +if [[ -d $CONFIGMAP_PATH ]]; then + for file in `ls $CONFIGMAP_PATH` + do + if [[ "$file" == "doris_cloud.conf" ]] ; then + mv -f $DORIS_HOME/ms/conf/$file $DORIS_HOME/ms/conf/$file.bak + cp $CONFIGMAP_PATH/$file $DORIS_HOME/ms/conf/$file + echo "fdb_cluster=$FDB_ENDPOINT" >> $DORIS_HOME/ms/conf/doris_cloud.conf + continue + fi + + if test -e $DORIS_HOME/ms/conf/$file ; then + mv -f $DORIS_HOME/ms/conf/$file $DORIS_HOME/ms/conf/$file.bak + fi + ln -sfT $CONFIGMAP_PATH/$file $DORIS_HOME/ms/conf/$file + done +fi + +$DORIS_HOME/ms/bin/start.sh --$1 diff --git a/docker/runtime/ms/resource/ms_disaggregated_prestop.sh b/docker/runtime/ms/resource/ms_disaggregated_prestop.sh new file mode 100755 index 000000000000000..f92acad40a83415 --- /dev/null +++ b/docker/runtime/ms/resource/ms_disaggregated_prestop.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +#get from env +DORIS_HOME=${DORIS_HOME:="/opt/apache-doris"} + +$DORIS_HOME/ms/bin/stop.sh --$1 + diff --git a/docker/runtime/ms/resource/ms_disaggregated_probe.sh b/docker/runtime/ms/resource/ms_disaggregated_probe.sh new file mode 100755 index 000000000000000..87bb316375fdf2a --- /dev/null +++ b/docker/runtime/ms/resource/ms_disaggregated_probe.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +DORIS_HOME=${DORIS_HOME:="/opt/apache-doris"} +CONFIG_FILE="$DORIS_HOME/ms/conf/doris_cloud.conf" +DEFAULT_BRPC_LISTEN_PORT=5000 +PROBE_TYPE=$1 + +log_stderr() +{ + echo "[`date`] $@" >&2 +} + +function parse_config_file_with_key() +{ + local key=$1 + local value=`grep "^\s*$key\s*=" $CONFIG_FILE | sed "s|^\s*$key\s*=\s*\(.*\)\s*$|\1|g"` + echo $value +} + +function alive_probe() +{ + local brpc_listen_port=$(parse_config_file_with_key "brpc_listen_port") + brpc_listen_port=${brpc_listen_port:=$DEFAULT_BRPC_LISTEN_PORT} + if netstat -lntp | grep ":$brpc_listen_port" > /dev/null ; then + exit 0 + else + exit 1 + fi +} + +function ready_probe() +{ + local brpc_listen_port=$(parse_config_file_with_key "brpc_listen_port") + brpc_listen_port=${brpc_listen_port:=$DEFAULT_BRPC_LISTEN_PORT} + if netstat -lntp | grep ":$brpc_listen_port" > /dev/null ; then + exit 0 + else + exit 1 + fi +} + +if [[ "$PROBE_TYPE" == "ready" ]]; then + ready_probe +else + alive_probe +fi From 5028bad0a692998e85de6105a6a6e00a995eeb5c Mon Sep 17 00:00:00 2001 From: zzzxl <33418555+zzzxl1993@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:12:20 +0800 Subject: [PATCH 05/94] [fix](inverted index) warm up inverted index (#38986) --- be/src/cloud/cloud_tablet.cpp | 32 +++++++++++++++++++++ be/src/cloud/cloud_warm_up_manager.cpp | 39 ++++++++++++++++++++++++++ be/src/olap/storage_policy.cpp | 31 ++++++++++++++++++++ be/src/olap/storage_policy.h | 5 ++++ 4 files changed, 107 insertions(+) diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 17ec1fe22b0d852..2c6b841be546f84 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -246,6 +246,38 @@ void CloudTablet::add_rowsets(std::vector to_add, bool version_ }, .download_done {}, }); + + auto download_idx_file = [&](const io::Path& idx_path) { + io::DownloadFileMeta meta { + .path = idx_path, + .file_size = -1, + .file_system = storage_resource.value()->fs, + .ctx = + { + .expiration_time = expiration_time, + }, + .download_done {}, + }; + _engine.file_cache_block_downloader().submit_download_task(std::move(meta)); + }; + auto schema_ptr = rowset_meta->tablet_schema(); + auto idx_version = schema_ptr->get_inverted_index_storage_format(); + if (idx_version == InvertedIndexStorageFormatPB::V1) { + for (const auto& index : schema_ptr->indexes()) { + if (index.index_type() == IndexType::INVERTED) { + auto idx_path = storage_resource.value()->remote_idx_v1_path( + *rowset_meta, seg_id, index.index_id(), + index.get_index_suffix()); + download_idx_file(idx_path); + } + } + } else if (idx_version == InvertedIndexStorageFormatPB::V2) { + if (schema_ptr->has_inverted_index()) { + auto idx_path = storage_resource.value()->remote_idx_v2_path( + *rowset_meta, seg_id); + download_idx_file(idx_path); + } + } } #endif } diff --git a/be/src/cloud/cloud_warm_up_manager.cpp b/be/src/cloud/cloud_warm_up_manager.cpp index 6324f7b23c9ea9d..47046de36985adf 100644 --- a/be/src/cloud/cloud_warm_up_manager.cpp +++ b/be/src/cloud/cloud_warm_up_manager.cpp @@ -114,6 +114,45 @@ void CloudWarmUpManager::handle_jobs() { wait->signal(); }, }); + + auto download_idx_file = [&](const io::Path& idx_path) { + io::DownloadFileMeta meta { + .path = idx_path, + .file_size = -1, + .file_system = storage_resource.value()->fs, + .ctx = + { + .expiration_time = expiration_time, + }, + .download_done = + [wait](Status st) { + if (!st) { + LOG_WARNING("Warm up error ").error(st); + } + wait->signal(); + }, + }; + _engine.file_cache_block_downloader().submit_download_task(std::move(meta)); + }; + auto schema_ptr = rs->tablet_schema(); + auto idx_version = schema_ptr->get_inverted_index_storage_format(); + if (idx_version == InvertedIndexStorageFormatPB::V1) { + for (const auto& index : schema_ptr->indexes()) { + if (index.index_type() == IndexType::INVERTED) { + wait->add_count(); + auto idx_path = storage_resource.value()->remote_idx_v1_path( + *rs, seg_id, index.index_id(), index.get_index_suffix()); + download_idx_file(idx_path); + } + } + } else if (idx_version == InvertedIndexStorageFormatPB::V2) { + if (schema_ptr->has_inverted_index()) { + wait->add_count(); + auto idx_path = + storage_resource.value()->remote_idx_v2_path(*rs, seg_id); + download_idx_file(idx_path); + } + } } } timespec time; diff --git a/be/src/olap/storage_policy.cpp b/be/src/olap/storage_policy.cpp index c553d2b7441aeca..837e9bed178e3a9 100644 --- a/be/src/olap/storage_policy.cpp +++ b/be/src/olap/storage_policy.cpp @@ -191,6 +191,37 @@ std::string StorageResource::remote_segment_path(const RowsetMeta& rowset, int64 } } +std::string StorageResource::remote_idx_v1_path(const RowsetMeta& rowset, int64_t seg_id, + int64_t index_id, + std::string_view index_path_suffix) const { + std::string suffix = + index_path_suffix.empty() ? "" : std::string {"@"} + index_path_suffix.data(); + switch (path_version) { + case 0: + return fmt::format("{}/{}/{}_{}_{}{}.idx", DATA_PREFIX, rowset.tablet_id(), + rowset.rowset_id().to_string(), seg_id, index_id, suffix); + case 1: + return fmt::format("{}/{}/{}/{}/{}_{}{}.idx", DATA_PREFIX, shard_fn(rowset.tablet_id()), + rowset.tablet_id(), rowset.rowset_id().to_string(), seg_id, index_id, + suffix); + default: + exit_at_unknown_path_version(fs->id(), path_version); + } +} + +std::string StorageResource::remote_idx_v2_path(const RowsetMeta& rowset, int64_t seg_id) const { + switch (path_version) { + case 0: + return fmt::format("{}/{}/{}_{}.idx", DATA_PREFIX, rowset.tablet_id(), + rowset.rowset_id().to_string(), seg_id); + case 1: + return fmt::format("{}/{}/{}/{}/{}.idx", DATA_PREFIX, shard_fn(rowset.tablet_id()), + rowset.tablet_id(), rowset.rowset_id().to_string(), seg_id); + default: + exit_at_unknown_path_version(fs->id(), path_version); + } +} + std::string StorageResource::remote_tablet_path(int64_t tablet_id) const { switch (path_version) { case 0: diff --git a/be/src/olap/storage_policy.h b/be/src/olap/storage_policy.h index 9eb27773272704d..f79b1a052095cab 100644 --- a/be/src/olap/storage_policy.h +++ b/be/src/olap/storage_policy.h @@ -77,6 +77,11 @@ struct StorageResource { int64_t seg_id) const; std::string remote_segment_path(const RowsetMeta& rowset, int64_t seg_id) const; std::string remote_tablet_path(int64_t tablet_id) const; + + std::string remote_idx_v1_path(const RowsetMeta& rowset, int64_t seg_id, int64_t index_id, + std::string_view index_suffix) const; + std::string remote_idx_v2_path(const RowsetMeta& rowset, int64_t seg_id) const; + std::string cooldown_tablet_meta_path(int64_t tablet_id, int64_t replica_id, int64_t cooldown_term) const; }; From 572643a26e363c3dc231ab86264679ba90780e74 Mon Sep 17 00:00:00 2001 From: Xu Chen Date: Fri, 9 Aug 2024 12:25:51 +0800 Subject: [PATCH 06/94] [Fix](lakesoul) Fix CVEs and some filter pushdown issues (#39044) ## Proposed changes 1. Addressed CVEs caused by hadoop and spark. Unused packages are excluded from LakeSoul's depdencies. 2. Fix som filter pushdown issues. --------- Signed-off-by: dmetasoul01 Co-authored-by: dmetasoul01 --- .../lakesoul-scanner/pom.xml | 52 +--- .../doris/lakesoul/LakeSoulJniScanner.java | 37 +-- .../doris/lakesoul/parquet/ParquetFilter.java | 288 ------------------ fe/fe-core/pom.xml | 67 ++-- .../lakesoul/LakeSoulExternalCatalog.java | 29 +- .../lakesoul/LakeSoulExternalTable.java | 6 +- .../datasource/lakesoul/LakeSoulUtils.java | 21 +- .../lakesoul/source/LakeSoulScanNode.java | 26 +- .../lakesoul/LakeSoulPredicateTest.java | 14 +- fe/pom.xml | 2 +- .../lakesoul/test_lakesoul_filter.groovy | 16 + .../test_external_table_lakesoul.groovy | 24 +- 12 files changed, 129 insertions(+), 453 deletions(-) delete mode 100644 fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/parquet/ParquetFilter.java diff --git a/fe/be-java-extensions/lakesoul-scanner/pom.xml b/fe/be-java-extensions/lakesoul-scanner/pom.xml index 24d7efc76144771..4a1ae6b2e8a730d 100644 --- a/fe/be-java-extensions/lakesoul-scanner/pom.xml +++ b/fe/be-java-extensions/lakesoul-scanner/pom.xml @@ -48,25 +48,6 @@ under the License. - - - org.scala-lang - scala-library - ${scala.version} - compile - - - org.scala-lang - scala-compiler - ${scala.version} - provided - - - org.scala-lang - scala-reflect - ${scala.version} - - com.dmetasoul lakesoul-io-java @@ -74,42 +55,11 @@ under the License. shaded - org.slf4j - slf4j-log4j12 - - - log4j - log4j - - - org.slf4j - slf4j-api - - - org.antlr - antlr4-runtime - - - commons-logging - commons-logging - - - com.google.code.findbugs - jsr305 - - - org.apache.spark + * * - - - org.apache.parquet - parquet-column - 1.12.2 - - diff --git a/fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/LakeSoulJniScanner.java b/fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/LakeSoulJniScanner.java index a7ac785d1fb0662..18b7bea03bbbb05 100644 --- a/fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/LakeSoulJniScanner.java +++ b/fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/LakeSoulJniScanner.java @@ -24,17 +24,17 @@ import com.dmetasoul.lakesoul.lakesoul.io.substrait.SubstraitUtil; import com.lakesoul.shaded.com.fasterxml.jackson.core.type.TypeReference; import com.lakesoul.shaded.com.fasterxml.jackson.databind.ObjectMapper; +import com.lakesoul.shaded.io.substrait.proto.Plan; import com.lakesoul.shaded.org.apache.arrow.vector.VectorSchemaRoot; import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Field; import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Schema; -import io.substrait.proto.Plan; +import com.lakesoul.shaded.org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; @@ -65,9 +65,8 @@ public void open() throws IOException { withAllocator(nativeIOReader.getAllocator()); nativeIOReader.setBatchSize(batchSize); - if (LOG.isDebugEnabled()) { - LOG.debug("opening LakeSoulJniScanner with params={}", params); - } + LOG.info("opening LakeSoulJniScanner with params={}", params); + // add files for (String file : params.get(LakeSoulUtils.FILE_NAMES).split(LakeSoulUtils.LIST_DELIM)) { nativeIOReader.addFile(file); @@ -105,6 +104,10 @@ public void open() throws IOException { List requiredFields = new ArrayList<>(); for (String fieldName : requiredFieldNames) { + String name = fieldName.strip(); + if (StringUtils.isEmpty(name)) { + continue; + } requiredFields.add(tableSchema.findField(fieldName)); } @@ -182,28 +185,4 @@ public void releaseTable() { currentBatch.close(); } } - - public static void main(String[] args) throws IOException { - HashMap params = new HashMap<>(); - params.put("required_fields", "r_regionkey;r_name;r_comment"); - params.put("primary_keys", "r_regionkey;r_name"); - params.put("query_id", "e9d075a6500a4cac-b94630fd4b30c171"); - params.put("file_paths", - "file:/Users/ceng/Documents/GitHub/LakeSoul/rust/lakesoul-datafusion/" - + "default/region/part-RzmUvDFtYV8ceb3J_0000.parquet" - ); - params.put("options", "{}"); - params.put("table_schema", - "{\"fields\":[" - + "{\"name\":\"r_regionkey\",\"type\":{\"name\":\"int\",\"isSigned\":true,\"bitWidth\":64}," - + "\"nullable\":false,\"children\":[]}," - + "{\"name\":\"r_name\",\"type\":{\"name\":\"utf8\"},\"nullable\":false,\"children\":[]}," - + "{\"name\":\"r_comment\",\"type\":{\"name\":\"utf8\"},\"nullable\":false,\"children\":[]}" - + "]," - + "\"metadata\":null}"); - params.put("partition_descs", ""); - LakeSoulJniScanner scanner = new LakeSoulJniScanner(1024, params); - scanner.open(); - System.out.println(scanner.getNext()); - } } diff --git a/fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/parquet/ParquetFilter.java b/fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/parquet/ParquetFilter.java deleted file mode 100644 index 7d2820acd793f2d..000000000000000 --- a/fe/be-java-extensions/lakesoul-scanner/src/main/java/org/apache/doris/lakesoul/parquet/ParquetFilter.java +++ /dev/null @@ -1,288 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.lakesoul.parquet; - -import org.apache.doris.common.jni.vec.ColumnType; -import org.apache.doris.common.jni.vec.ScanPredicate; - -import org.apache.parquet.filter2.predicate.FilterApi; -import org.apache.parquet.filter2.predicate.FilterPredicate; -import org.apache.parquet.io.api.Binary; - -public class ParquetFilter { - - public static FilterPredicate toParquetFilter(ScanPredicate predicate) { - ScanPredicate.FilterOp filterOp = predicate.op; - switch (filterOp) { - case FILTER_IN: - return convertIn(predicate); - case FILTER_NOT_IN: - return convertNotIn(predicate); - case FILTER_LESS: - return convertLess(predicate); - case FILTER_LARGER: - return convertLarger(predicate); - case FILTER_LESS_OR_EQUAL: - return convertLessOrEqual(predicate); - case FILTER_LARGER_OR_EQUAL: - return convertLargerOrEqual(predicate); - default: - break; - } - throw new RuntimeException("Unsupported ScanPredicate" + ScanPredicate.dump(new ScanPredicate[] {predicate})); - } - - private static FilterPredicate convertNotIn(ScanPredicate predicate) { - String colName = predicate.columName; - ColumnType.Type colType = predicate.type; - ScanPredicate.PredicateValue[] predicateValues = predicate.predicateValues(); - FilterPredicate resultPredicate = null; - for (ScanPredicate.PredicateValue predicateValue : predicateValues) { - if (resultPredicate == null) { - resultPredicate = makeNotEquals(colName, colType, predicateValue); - } else { - resultPredicate = FilterApi.and(resultPredicate, makeNotEquals(colName, colType, predicateValue)); - } - } - return resultPredicate; - } - - private static FilterPredicate convertIn(ScanPredicate predicate) { - String colName = predicate.columName; - ColumnType.Type colType = predicate.type; - ScanPredicate.PredicateValue[] predicateValues = predicate.predicateValues(); - FilterPredicate resultPredicate = null; - for (ScanPredicate.PredicateValue predicateValue : predicateValues) { - if (resultPredicate == null) { - resultPredicate = makeEquals(colName, colType, predicateValue); - } else { - resultPredicate = FilterApi.or(resultPredicate, makeEquals(colName, colType, predicateValue)); - } - } - return resultPredicate; - } - - private static FilterPredicate convertLarger(ScanPredicate predicate) { - String colName = predicate.columName; - ColumnType.Type colType = predicate.type; - ScanPredicate.PredicateValue predicateValue = predicate.predicateValues()[0]; - return makeLarger(colName, colType, predicateValue); - } - - private static FilterPredicate convertLargerOrEqual(ScanPredicate predicate) { - String colName = predicate.columName; - ColumnType.Type colType = predicate.type; - ScanPredicate.PredicateValue predicateValue = predicate.predicateValues()[0]; - return makeLargerOrEqual(colName, colType, predicateValue); - } - - private static FilterPredicate convertLess(ScanPredicate predicate) { - String colName = predicate.columName; - ColumnType.Type colType = predicate.type; - ScanPredicate.PredicateValue predicateValue = predicate.predicateValues()[0]; - return makeLess(colName, colType, predicateValue); - } - - private static FilterPredicate convertLessOrEqual(ScanPredicate predicate) { - String colName = predicate.columName; - ColumnType.Type colType = predicate.type; - ScanPredicate.PredicateValue predicateValue = predicate.predicateValues()[0]; - return makeLessOrEqual(colName, colType, predicateValue); - } - - private static FilterPredicate makeNotEquals(String colName, ColumnType.Type type, - ScanPredicate.PredicateValue value) { - switch (type) { - case BOOLEAN: - return FilterApi.notEq(FilterApi.booleanColumn(colName), value.getBoolean()); - case TINYINT: - return FilterApi.notEq(FilterApi.intColumn(colName), (int) value.getByte()); - case SMALLINT: - return FilterApi.notEq(FilterApi.intColumn(colName), (int) value.getShort()); - case INT: - return FilterApi.notEq(FilterApi.intColumn(colName), value.getInt()); - case BIGINT: - return FilterApi.notEq(FilterApi.longColumn(colName), value.getLong()); - case FLOAT: - return FilterApi.notEq(FilterApi.floatColumn(colName), value.getFloat()); - case DOUBLE: - return FilterApi.notEq(FilterApi.doubleColumn(colName), value.getDouble()); - case CHAR: - case VARCHAR: - case STRING: - return FilterApi.notEq(FilterApi.binaryColumn(colName), Binary.fromString(value.getString())); - case BINARY: - return FilterApi.notEq(FilterApi.binaryColumn(colName), Binary.fromConstantByteArray(value.getBytes())); - case ARRAY: - case MAP: - case STRUCT: - default: - throw new RuntimeException("Unsupported push_down_filter type value: " + type); - } - } - - - private static FilterPredicate makeEquals(String colName, ColumnType.Type type, - ScanPredicate.PredicateValue value) { - switch (type) { - case BOOLEAN: - return FilterApi.eq(FilterApi.booleanColumn(colName), value.getBoolean()); - case TINYINT: - return FilterApi.eq(FilterApi.intColumn(colName), (int) value.getByte()); - case SMALLINT: - return FilterApi.eq(FilterApi.intColumn(colName), (int) value.getShort()); - case INT: - return FilterApi.eq(FilterApi.intColumn(colName), value.getInt()); - case BIGINT: - return FilterApi.eq(FilterApi.longColumn(colName), value.getLong()); - case FLOAT: - return FilterApi.eq(FilterApi.floatColumn(colName), value.getFloat()); - case DOUBLE: - return FilterApi.eq(FilterApi.doubleColumn(colName), value.getDouble()); - case CHAR: - case VARCHAR: - case STRING: - return FilterApi.eq(FilterApi.binaryColumn(colName), Binary.fromString(value.getString())); - case BINARY: - return FilterApi.eq(FilterApi.binaryColumn(colName), Binary.fromConstantByteArray(value.getBytes())); - case ARRAY: - case MAP: - case STRUCT: - default: - throw new RuntimeException("Unsupported push_down_filter type value: " + type); - } - } - - private static FilterPredicate makeLarger(String colName, ColumnType.Type type, - ScanPredicate.PredicateValue value) { - switch (type) { - case TINYINT: - return FilterApi.gt(FilterApi.intColumn(colName), (int) value.getByte()); - case SMALLINT: - return FilterApi.gt(FilterApi.intColumn(colName), (int) value.getShort()); - case INT: - return FilterApi.gt(FilterApi.intColumn(colName), value.getInt()); - case BIGINT: - return FilterApi.gt(FilterApi.longColumn(colName), value.getLong()); - case FLOAT: - return FilterApi.gt(FilterApi.floatColumn(colName), value.getFloat()); - case DOUBLE: - return FilterApi.gt(FilterApi.doubleColumn(colName), value.getDouble()); - case CHAR: - case VARCHAR: - case STRING: - return FilterApi.gt(FilterApi.binaryColumn(colName), Binary.fromString(value.getString())); - case BINARY: - return FilterApi.gt(FilterApi.binaryColumn(colName), Binary.fromConstantByteArray(value.getBytes())); - case ARRAY: - case MAP: - case STRUCT: - default: - throw new RuntimeException("Unsupported push_down_filter type value: " + type); - } - - } - - private static FilterPredicate makeLargerOrEqual(String colName, ColumnType.Type type, - ScanPredicate.PredicateValue value) { - switch (type) { - case TINYINT: - return FilterApi.gtEq(FilterApi.intColumn(colName), (int) value.getByte()); - case SMALLINT: - return FilterApi.gtEq(FilterApi.intColumn(colName), (int) value.getShort()); - case INT: - return FilterApi.gtEq(FilterApi.intColumn(colName), value.getInt()); - case BIGINT: - return FilterApi.gtEq(FilterApi.longColumn(colName), value.getLong()); - case FLOAT: - return FilterApi.gtEq(FilterApi.floatColumn(colName), value.getFloat()); - case DOUBLE: - return FilterApi.gtEq(FilterApi.doubleColumn(colName), value.getDouble()); - case CHAR: - case VARCHAR: - case STRING: - return FilterApi.gtEq(FilterApi.binaryColumn(colName), Binary.fromString(value.getString())); - case BINARY: - return FilterApi.gtEq(FilterApi.binaryColumn(colName), Binary.fromConstantByteArray(value.getBytes())); - case ARRAY: - case MAP: - case STRUCT: - default: - throw new RuntimeException("Unsupported push_down_filter type value: " + type); - } - - } - - private static FilterPredicate makeLess(String colName, ColumnType.Type type, ScanPredicate.PredicateValue value) { - switch (type) { - case TINYINT: - return FilterApi.lt(FilterApi.intColumn(colName), (int) value.getByte()); - case SMALLINT: - return FilterApi.lt(FilterApi.intColumn(colName), (int) value.getShort()); - case INT: - return FilterApi.lt(FilterApi.intColumn(colName), value.getInt()); - case BIGINT: - return FilterApi.lt(FilterApi.longColumn(colName), value.getLong()); - case FLOAT: - return FilterApi.lt(FilterApi.floatColumn(colName), value.getFloat()); - case DOUBLE: - return FilterApi.lt(FilterApi.doubleColumn(colName), value.getDouble()); - case CHAR: - case VARCHAR: - case STRING: - return FilterApi.lt(FilterApi.binaryColumn(colName), Binary.fromString(value.getString())); - case BINARY: - return FilterApi.lt(FilterApi.binaryColumn(colName), Binary.fromConstantByteArray(value.getBytes())); - case ARRAY: - case MAP: - case STRUCT: - default: - throw new RuntimeException("Unsupported push_down_filter type value: " + type); - } - - } - - private static FilterPredicate makeLessOrEqual(String colName, ColumnType.Type type, - ScanPredicate.PredicateValue value) { - switch (type) { - case TINYINT: - return FilterApi.ltEq(FilterApi.intColumn(colName), (int) value.getByte()); - case SMALLINT: - return FilterApi.ltEq(FilterApi.intColumn(colName), (int) value.getShort()); - case INT: - return FilterApi.ltEq(FilterApi.intColumn(colName), value.getInt()); - case BIGINT: - return FilterApi.ltEq(FilterApi.longColumn(colName), value.getLong()); - case FLOAT: - return FilterApi.ltEq(FilterApi.floatColumn(colName), value.getFloat()); - case DOUBLE: - return FilterApi.ltEq(FilterApi.doubleColumn(colName), value.getDouble()); - case CHAR: - case VARCHAR: - case STRING: - return FilterApi.ltEq(FilterApi.binaryColumn(colName), Binary.fromString(value.getString())); - case BINARY: - return FilterApi.ltEq(FilterApi.binaryColumn(colName), Binary.fromConstantByteArray(value.getBytes())); - case ARRAY: - case MAP: - case STRUCT: - default: - throw new RuntimeException("Unsupported push_down_filter type value: " + type); - } - } -} diff --git a/fe/fe-core/pom.xml b/fe/fe-core/pom.xml index 8021f2a3b18b072..92e576fb2e1779c 100644 --- a/fe/fe-core/pom.xml +++ b/fe/fe-core/pom.xml @@ -565,58 +565,77 @@ under the License. hadoop-auth + com.dmetasoul - lakesoul-common + lakesoul-io-java ${lakesoul.version} - shaded - * + io.netty * - - - - com.dmetasoul - lakesoul-io-java - ${lakesoul.version} - shaded - - org.slf4j - slf4j-log4j12 + org.apache.arrow + arrow-vector - log4j - log4j + org.apache.arrow + arrow-memory-core - org.slf4j - slf4j-api + org.apache.arrow + arrow-memory-netty org.apache.arrow + arrow-format + + + org.apache.spark * - org.antlr - antlr4-runtime + com.fasterxml.jackson.core + * - commons-logging - commons-logging + com.fasterxml.jackson.datatype + * - com.google.code.findbugs - jsr305 + org.apache.commons + * - org.apache.spark + org.scala-lang + * + + + org.json4s * + + org.ow2.asm + * + + + org.postgresql + postgresql + + + org.postgresql + postgresql + 42.7.3 + + + org.scala-lang + scala-library + ${scala.version} + compile + org.apache.iceberg diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalCatalog.java index e813ac2fc97a32e..15df1d3b5676a5c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalCatalog.java @@ -17,7 +17,6 @@ package org.apache.doris.datasource.lakesoul; - import org.apache.doris.datasource.CatalogProperty; import org.apache.doris.datasource.ExternalCatalog; import org.apache.doris.datasource.InitCatalogLog; @@ -29,12 +28,16 @@ import com.dmetasoul.lakesoul.meta.entity.PartitionInfo; import com.dmetasoul.lakesoul.meta.entity.TableInfo; import com.google.common.collect.Lists; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.util.List; import java.util.Map; public class LakeSoulExternalCatalog extends ExternalCatalog { + private static final Logger LOG = LogManager.getLogger(LakeSoulExternalCatalog.class); + private DBManager lakesoulMetadataManager; private final Map props; @@ -67,26 +70,24 @@ public List listTableNames(SessionContext ctx, String dbName) { @Override public boolean tableExist(SessionContext ctx, String dbName, String tblName) { makeSureInitialized(); - TableInfo tableInfo = lakesoulMetadataManager.getTableInfoByNameAndNamespace(dbName, tblName); + TableInfo tableInfo = lakesoulMetadataManager.getTableInfoByNameAndNamespace(tblName, dbName); return null != tableInfo; } @Override protected void initLocalObjectsImpl() { - if (lakesoulMetadataManager == null) { - if (props != null) { - if (props.containsKey(DBUtil.urlKey)) { - System.setProperty(DBUtil.urlKey, props.get(DBUtil.urlKey)); - } - if (props.containsKey(DBUtil.usernameKey)) { - System.setProperty(DBUtil.usernameKey, props.get(DBUtil.usernameKey)); - } - if (props.containsKey(DBUtil.passwordKey)) { - System.setProperty(DBUtil.passwordKey, props.get(DBUtil.passwordKey)); - } + if (props != null) { + if (props.containsKey(DBUtil.urlKey)) { + System.setProperty(DBUtil.urlKey, props.get(DBUtil.urlKey)); + } + if (props.containsKey(DBUtil.usernameKey)) { + System.setProperty(DBUtil.usernameKey, props.get(DBUtil.usernameKey)); + } + if (props.containsKey(DBUtil.passwordKey)) { + System.setProperty(DBUtil.passwordKey, props.get(DBUtil.passwordKey)); } - lakesoulMetadataManager = new DBManager(); } + lakesoulMetadataManager = new DBManager(); } public TableInfo getLakeSoulTable(String dbName, String tblName) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalTable.java index a5cf3478ae840a9..9dd2f4811e98f07 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulExternalTable.java @@ -58,7 +58,11 @@ public class LakeSoulExternalTable extends ExternalTable { public LakeSoulExternalTable(long id, String name, String dbName, LakeSoulExternalCatalog catalog) { super(id, name, catalog, dbName, TableType.LAKESOUl_EXTERNAL_TABLE); - tableId = getLakeSoulTableInfo().getTableId(); + TableInfo tableInfo = getLakeSoulTableInfo(); + if (tableInfo == null) { + throw new RuntimeException(String.format("LakeSoul table %s.%s does not exist", dbName, name)); + } + tableId = tableInfo.getTableId(); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulUtils.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulUtils.java index 8f7cf83dbfcb9ab..fba74d4f978a5e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/LakeSoulUtils.java @@ -24,7 +24,6 @@ import org.apache.doris.analysis.DecimalLiteral; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.FloatLiteral; -import org.apache.doris.analysis.FunctionCallExpr; import org.apache.doris.analysis.InPredicate; import org.apache.doris.analysis.IntLiteral; import org.apache.doris.analysis.IsNullPredicate; @@ -42,12 +41,12 @@ import com.google.common.collect.BoundType; import com.google.common.collect.Range; import com.google.common.collect.RangeSet; -import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Field; -import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Schema; import io.substrait.expression.Expression; import io.substrait.extension.DefaultExtensionCatalog; import io.substrait.type.Type; import io.substrait.type.TypeCreator; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import java.io.IOException; import java.time.Instant; @@ -64,7 +63,6 @@ import java.util.Set; import java.util.stream.Collectors; - public class LakeSoulUtils { public static final String FILE_NAMES = "file_paths"; @@ -417,19 +415,10 @@ private static Expression convertBinaryExpr(Expr dorisExpr, Schema tableSchema) func = "lt:any_any"; break; case INVALID_OPCODE: - if (dorisExpr instanceof FunctionCallExpr) { - String name = dorisExpr.getExprName().toLowerCase(); - String s = literalExpr.getStringValue(); - if (name.equals("like") && !s.startsWith("%") && s.endsWith("%")) { - namespace = DefaultExtensionCatalog.FUNCTIONS_STRING; - func = "like:bool"; - break; - } - } else if (dorisExpr instanceof IsNullPredicate) { + if (dorisExpr instanceof IsNullPredicate) { if (((IsNullPredicate) dorisExpr).isNotNull()) { namespace = DefaultExtensionCatalog.FUNCTIONS_COMPARISON; func = "is_not_null:any"; - } else { namespace = DefaultExtensionCatalog.FUNCTIONS_COMPARISON; func = "is_null:any"; @@ -458,7 +447,9 @@ public static Object extractDorisLiteral(Type type, LiteralExpr expr) { if (dateLiteral.getType().isDatetimeV2() || dateLiteral.getType().isDatetime()) { return null; } - return dateLiteral.getLongValue(); + return (int) LocalDate.of((int) dateLiteral.getYear(), + (int) dateLiteral.getMonth(), + (int) dateLiteral.getDay()).toEpochDay(); } if (type instanceof Type.TimestampTZ || type instanceof Type.Timestamp) { return dateLiteral.getLongValue(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java index fd36bfd52bdfac2..7f4d0aca4da7779 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java @@ -17,8 +17,6 @@ package org.apache.doris.datasource.lakesoul.source; - - import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.DdlException; @@ -29,6 +27,7 @@ import org.apache.doris.datasource.TableFormatType; import org.apache.doris.datasource.lakesoul.LakeSoulExternalTable; import org.apache.doris.datasource.lakesoul.LakeSoulUtils; +import org.apache.doris.datasource.property.constants.OssProperties; import org.apache.doris.datasource.property.constants.S3Properties; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.spi.Split; @@ -39,6 +38,8 @@ import org.apache.doris.thrift.TLakeSoulFileDesc; import org.apache.doris.thrift.TTableFormatFileDesc; +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONObject; import com.dmetasoul.lakesoul.lakesoul.io.substrait.SubstraitUtil; import com.dmetasoul.lakesoul.meta.DBUtil; import com.dmetasoul.lakesoul.meta.DataFileInfo; @@ -46,15 +47,13 @@ import com.dmetasoul.lakesoul.meta.LakeSoulOptions; import com.dmetasoul.lakesoul.meta.entity.PartitionInfo; import com.dmetasoul.lakesoul.meta.entity.TableInfo; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.Lists; -import com.lakesoul.shaded.com.alibaba.fastjson.JSON; -import com.lakesoul.shaded.com.alibaba.fastjson.JSONObject; -import com.lakesoul.shaded.com.fasterxml.jackson.core.type.TypeReference; -import com.lakesoul.shaded.com.fasterxml.jackson.databind.ObjectMapper; -import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Field; -import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Schema; import io.substrait.proto.Plan; import lombok.SneakyThrows; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -69,6 +68,7 @@ public class LakeSoulScanNode extends FileQueryScanNode { private static final Logger LOG = LogManager.getLogger(LakeSoulScanNode.class); + protected LakeSoulExternalTable lakeSoulExternalTable; String tableName; @@ -197,7 +197,13 @@ public void setLakeSoulParams(TFileRangeDesc rangeDesc, LakeSoulSplit lakeSoulSp if (catalogProps.get(S3Properties.Env.ENDPOINT) != null) { options.put(LakeSoulUtils.FS_S3A_ENDPOINT, catalogProps.get(S3Properties.Env.ENDPOINT)); - options.put(LakeSoulUtils.FS_S3A_PATH_STYLE_ACCESS, "true"); + if (!options.containsKey(OssProperties.ENDPOINT)) { + // Aliyun OSS requires virtual host style access + options.put(LakeSoulUtils.FS_S3A_PATH_STYLE_ACCESS, "false"); + } else { + // use path style access for all other s3 compatible storage services + options.put(LakeSoulUtils.FS_S3A_PATH_STYLE_ACCESS, "true"); + } if (catalogProps.get(S3Properties.Env.ACCESS_KEY) != null) { options.put(LakeSoulUtils.FS_S3A_ACCESS_KEY, catalogProps.get(S3Properties.Env.ACCESS_KEY)); } @@ -289,8 +295,6 @@ public List getSplits() throws UserException { } } return splits; - } - } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/lakesoul/LakeSoulPredicateTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/lakesoul/LakeSoulPredicateTest.java index aebd74f5e020f52..016819a382be786 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/lakesoul/LakeSoulPredicateTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/lakesoul/LakeSoulPredicateTest.java @@ -37,14 +37,14 @@ import com.dmetasoul.lakesoul.lakesoul.io.substrait.SubstraitUtil; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.Lists; -import com.lakesoul.shaded.org.apache.arrow.vector.types.DateUnit; -import com.lakesoul.shaded.org.apache.arrow.vector.types.FloatingPointPrecision; -import com.lakesoul.shaded.org.apache.arrow.vector.types.TimeUnit; -import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.ArrowType; -import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Field; -import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.FieldType; -import com.lakesoul.shaded.org.apache.arrow.vector.types.pojo.Schema; import io.substrait.expression.Expression; +import org.apache.arrow.vector.types.DateUnit; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.types.pojo.Schema; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; diff --git a/fe/pom.xml b/fe/pom.xml index e1ca62dc5c62715..96c152b7dc1854c 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -318,7 +318,7 @@ under the License. 2.7.4-11 3.0.0-8 - 2.6.1 + 2.6.2 1.13.1 3.2.2 diff --git a/regression-test/suites/external_table_p0/lakesoul/test_lakesoul_filter.groovy b/regression-test/suites/external_table_p0/lakesoul/test_lakesoul_filter.groovy index 799e8ba61bb2d46..01c5d981c49175c 100644 --- a/regression-test/suites/external_table_p0/lakesoul/test_lakesoul_filter.groovy +++ b/regression-test/suites/external_table_p0/lakesoul/test_lakesoul_filter.groovy @@ -53,6 +53,22 @@ suite("test_lakesoul_filter", "p0,external,doris,external_docker,external_docker sql """select * from nation where n_regionkey = 0 and n_nationkey > 0;""" sql """select * from nation where n_regionkey = 0;""" + + // non-selecting query + sql """select count(*) from customer;""" + + // filter by non-partition column + sql """select count(*) from customer where c_mktsegment='BUILDING';""" + + // filter by partition column + sql """select count(*) from customer where c_nationkey=19;""" + + // filter by both partition and non-partition column + sql """select count(*) from customer where c_mktsegment='BUILDING' and c_nationkey=19;""" + + sql """select * from lineitem where l_shipdate <= DATE '1992-12-01' limit 10;""" + + sql """select count(*) from part where p_type like 'MEDIUM POLISHED%';""" } } diff --git a/regression-test/suites/external_table_p2/lakesoul/test_external_table_lakesoul.groovy b/regression-test/suites/external_table_p2/lakesoul/test_external_table_lakesoul.groovy index bb85dc687d7c990..104f419e5d0dc59 100644 --- a/regression-test/suites/external_table_p2/lakesoul/test_external_table_lakesoul.groovy +++ b/regression-test/suites/external_table_p2/lakesoul/test_external_table_lakesoul.groovy @@ -55,18 +55,18 @@ suite("test_external_table_lakesoul", "p2,external,lakesoul,external_remote,exte );""" // analyze - sql """use `${catalog_name}`.`${db_name}`""" - - sql q1 - sql q2 - sql q3 - sql q4 - sql q5 - sql q6 - sql q7 - sql q8 - sql q9 - sql q11 + sql """use `${catalog_name}`.`${db_name}`""" + + sql q1 + sql q2 + sql q3 + sql q4 + sql q5 + sql q6 + sql q7 + sql q8 + sql q9 + sql q11 } } From 9b9deb37dab10c2fb32bbabd280208c9abad8d46 Mon Sep 17 00:00:00 2001 From: Lei Zhang <27994433+SWJTU-ZhangLei@users.noreply.github.com> Date: Fri, 9 Aug 2024 12:36:17 +0800 Subject: [PATCH 07/94] [fix](fe) Fix calc cloud qps metric incorrect (#38874) * When calculating qps, we should use `HashMap.put` instead of `HashMap.replace` to record last value --- .../main/java/org/apache/doris/metric/MetricCalculator.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricCalculator.java index 44ba7dd7cea72b3..f4beed94b0c636a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricCalculator.java @@ -130,7 +130,7 @@ private void updateCloudMetrics(long interval) { rps = Double.max(rps, 0); MetricRepo.updateClusterRequestPerSecond(clusterId, rps, metric.getLabels()); MetricRepo.DORIS_METRIC_REGISTER.addMetrics(metric); - clusterLastRequestCounter.replace(clusterId, metric.getValue()); + clusterLastRequestCounter.put(clusterId, metric.getValue()); }); } @@ -142,7 +142,7 @@ private void updateCloudMetrics(long interval) { rps = Double.max(rps, 0); MetricRepo.updateClusterQueryPerSecond(clusterId, rps, metric.getLabels()); MetricRepo.DORIS_METRIC_REGISTER.addMetrics(metric); - clusterLastQueryCounter.replace(clusterId, metric.getValue()); + clusterLastQueryCounter.put(clusterId, metric.getValue()); }); } @@ -154,7 +154,7 @@ private void updateCloudMetrics(long interval) { rps = Double.max(rps, 0); MetricRepo.updateClusterQueryErrRate(clusterId, rps, metric.getLabels()); MetricRepo.DORIS_METRIC_REGISTER.addMetrics(metric); - clusterLastQueryCounter.replace(clusterId, metric.getValue()); + clusterLastQueryErrCounter.put(clusterId, metric.getValue()); }); } } From cd0a8eca680df9c1f9c8adf7e62327e34e4e7403 Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Fri, 9 Aug 2024 14:31:45 +0800 Subject: [PATCH 08/94] [fix](move-memtable) fix bvar g_load_stream_file_writer_cnt (#39075) Previously the bvar `g_load_stream_file_writer_cnt` is not accurate when error happens. Some `FileWriters` are not supposed to be closed manually. All `FileWriter` in `LoadStreamWriter` is managed by `std::unique_ptr`. A `FileWriter` should be able to close itself and cleanup storage state on its deconstructor. This PR changes the bvar `g_load_stream_file_writer_cnt` to track deconstruction of `FileWriter`, instead of tracking `FileWriter` close. --- be/src/runtime/load_stream_writer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/runtime/load_stream_writer.cpp b/be/src/runtime/load_stream_writer.cpp index d501de3d53fbb4a..ca78311b8ea2508 100644 --- a/be/src/runtime/load_stream_writer.cpp +++ b/be/src/runtime/load_stream_writer.cpp @@ -84,6 +84,8 @@ LoadStreamWriter::LoadStreamWriter(WriteRequest* context, RuntimeProfile* profil } LoadStreamWriter::~LoadStreamWriter() { + g_load_stream_file_writer_cnt << -_segment_file_writers.size(); + g_load_stream_file_writer_cnt << -_inverted_file_writers.size(); g_load_stream_writer_cnt << -1; } @@ -164,7 +166,6 @@ Status LoadStreamWriter::close_writer(uint32_t segid, FileType file_type) { _is_canceled = true; return st; } - g_load_stream_file_writer_cnt << -1; LOG(INFO) << "file " << segid << " path " << file_writer->path().native() << "closed, written " << file_writer->bytes_appended() << " bytes" << ", file type is " << file_type; From 107f90eb5e26da5f57d5d5e66930e7b478f4edf9 Mon Sep 17 00:00:00 2001 From: Sun Chenyang Date: Fri, 9 Aug 2024 15:01:56 +0800 Subject: [PATCH 09/94] [fix] (nereids) fix Match Expreesion in filter estimation (#39050) ## Proposed changes fix error sql ``` select request from table where request like '1.0' or not request MATCH 'GETA'; ``` --- .../doris/nereids/stats/FilterEstimation.java | 7 +- .../inverted_index_p0/test_or_not_match.out | 5 ++ .../test_or_not_match.groovy | 69 +++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 regression-test/data/inverted_index_p0/test_or_not_match.out create mode 100644 regression-test/suites/inverted_index_p0/test_or_not_match.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index b8e08086d633016..d44e6198170b1a7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -34,6 +34,7 @@ import org.apache.doris.nereids.trees.expressions.LessThan; import org.apache.doris.nereids.trees.expressions.LessThanEqual; import org.apache.doris.nereids.trees.expressions.Like; +import org.apache.doris.nereids.trees.expressions.Match; import org.apache.doris.nereids.trees.expressions.Not; import org.apache.doris.nereids.trees.expressions.NullSafeEqual; import org.apache.doris.nereids.trees.expressions.Or; @@ -486,7 +487,8 @@ public Statistics visitNot(Not not, EstimationContext context) { child instanceof EqualPredicate || child instanceof InPredicate || child instanceof IsNull - || child instanceof Like, + || child instanceof Like + || child instanceof Match, "Not-predicate meet unexpected child: %s", child.toSql()); if (child instanceof Like) { rowCount = context.statistics.getRowCount() - childStats.getRowCount(); @@ -509,6 +511,9 @@ public Statistics visitNot(Not not, EstimationContext context) { .setMinExpr(originColStats.minExpr) .setMaxValue(originColStats.maxValue) .setMaxExpr(originColStats.maxExpr); + } else if (child instanceof Match) { + rowCount = context.statistics.getRowCount() - childStats.getRowCount(); + colBuilder.setNdv(Math.max(1.0, originColStats.ndv - childColStats.ndv)); } if (not.child().getInputSlots().size() == 1 && !(child instanceof IsNull)) { // only consider the single column numNull, otherwise, ignore diff --git a/regression-test/data/inverted_index_p0/test_or_not_match.out b/regression-test/data/inverted_index_p0/test_or_not_match.out new file mode 100644 index 000000000000000..22dde7a8bf7a006 --- /dev/null +++ b/regression-test/data/inverted_index_p0/test_or_not_match.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +GET / HTTP/1.0 +GET / HTTP/1.0 + diff --git a/regression-test/suites/inverted_index_p0/test_or_not_match.groovy b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy new file mode 100644 index 000000000000000..95af26480c9ea89 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/test_or_not_match.groovy @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_or_not_match", "p0") { + def tableName = "test_or_not_match" + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + // load the json data + streamLoad { + table "${tableName}" + + // set http request header params + set 'read_json_by_line', 'true' + set 'format', 'json' + file 'documents-1000.json' // import json file + time 10000 // limit inflight 10s + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + for (int i = 0; i < 10; i++) { + sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA';" + } + + sql "set enable_nereids_planner = true" + sql "set enable_fallback_to_original_planner = false" + + qt_sql "select request from ${tableName} where request like '1.0' or not request MATCH 'GETA' order by request limit 2;" +} From 948d8ce7c416f2ac808dda15f5f125831996224b Mon Sep 17 00:00:00 2001 From: Pxl Date: Fri, 9 Aug 2024 15:10:30 +0800 Subject: [PATCH 10/94] [Chore](case) remove max in num check case (#39049) ## Proposed changes remove max in num check case --- .../pipeline/cloud_p0/conf/regression-conf-custom.groovy | 1 - .../pipeline/cloud_p1/conf/regression-conf-custom.groovy | 2 -- regression-test/pipeline/external/conf/regression-conf.groovy | 1 - regression-test/pipeline/p0/conf/regression-conf.groovy | 1 - regression-test/pipeline/p1/conf/regression-conf.groovy | 2 -- .../suites/query_p0/join/test_bitmap_filter.groovy | 2 +- regression-test/suites/query_p1/test_big_pad.groovy | 4 ---- 7 files changed, 1 insertion(+), 12 deletions(-) diff --git a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy index 9c39476f5d42709..3372a16031fb027 100644 --- a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy +++ b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy @@ -25,7 +25,6 @@ excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as th "mv_contain_external_table," + // run on external pipeline "set_replica_status," + // not a case for cloud mode, no need to run "test_be_inject_publish_txn_fail," + // not a case for cloud mode, no need to run - "test_bitmap_filter," + "test_compaction_uniq_cluster_keys_with_delete," + "test_compaction_uniq_keys_cluster_key," + "test_dump_image," + diff --git a/regression-test/pipeline/cloud_p1/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p1/conf/regression-conf-custom.groovy index 2662090b401af50..4f7c4d3b3f53d3d 100644 --- a/regression-test/pipeline/cloud_p1/conf/regression-conf-custom.groovy +++ b/regression-test/pipeline/cloud_p1/conf/regression-conf-custom.groovy @@ -3,8 +3,6 @@ testGroups = "p1" excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line "stress_test_insert_into," + "test_analyze_stats_p1," + - "test_big_pad," + - "test_bitmap_filter," + "test_broker_load," + "test_profile," + "test_refresh_mtmv," + diff --git a/regression-test/pipeline/external/conf/regression-conf.groovy b/regression-test/pipeline/external/conf/regression-conf.groovy index f6ea31dffaa3363..395d3a99e63ccff 100644 --- a/regression-test/pipeline/external/conf/regression-conf.groovy +++ b/regression-test/pipeline/external/conf/regression-conf.groovy @@ -63,7 +63,6 @@ excludeGroups = "" // load_stream_fault_injection may cause bad disk excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line - "test_bitmap_filter," + "test_dump_image," + "test_index_failure_injection," + "test_information_schema_external," + diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy index 61140e8d5956397..d8848b95a6a2aea 100644 --- a/regression-test/pipeline/p0/conf/regression-conf.groovy +++ b/regression-test/pipeline/p0/conf/regression-conf.groovy @@ -63,7 +63,6 @@ excludeGroups = "" // load_stream_fault_injection may cause bad disk excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line - "test_bitmap_filter," + "test_dump_image," + "test_index_failure_injection," + "test_profile," + diff --git a/regression-test/pipeline/p1/conf/regression-conf.groovy b/regression-test/pipeline/p1/conf/regression-conf.groovy index d5207fee03c60e7..6878d533a91024e 100644 --- a/regression-test/pipeline/p1/conf/regression-conf.groovy +++ b/regression-test/pipeline/p1/conf/regression-conf.groovy @@ -54,8 +54,6 @@ testSuites = "" // this suites will not be executed excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line "test_analyze_stats_p1," + - "test_big_pad," + - "test_bitmap_filter," + "test_broker_load," + "test_profile," + "test_refresh_mtmv," + diff --git a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy index 02657d836cb3c23..a0bde17cd1c75e4 100644 --- a/regression-test/suites/query_p0/join/test_bitmap_filter.groovy +++ b/regression-test/suites/query_p0/join/test_bitmap_filter.groovy @@ -86,6 +86,6 @@ suite("test_bitmap_filter", "query_p0") { test { sql "select k1, count(*) from ${tbl1} b1 group by k1 having k1 in (select k2 from ${tbl2} b2) order by k1;" - exception "HAVING clause dose not support in bitmap" + exception "Doris hll, bitmap, array, map, struct, jsonb, variant column must use with specific function, and don't support filter, group by or order by." } } diff --git a/regression-test/suites/query_p1/test_big_pad.groovy b/regression-test/suites/query_p1/test_big_pad.groovy index 9c781a5189c49ce..a5650feb34eabfb 100644 --- a/regression-test/suites/query_p1/test_big_pad.groovy +++ b/regression-test/suites/query_p1/test_big_pad.groovy @@ -30,10 +30,6 @@ suite("test_big_pad") { distributed BY hash(k1) buckets 3 properties("replication_num" = "1"); """ - test { - sql "select rpad('a',15000,'asd');" - exception "rpad function the length argument is 15000 exceeded maximum default value" - } qt_sql_rpad"select length(rpad('a',15000,'asd'));" sql "insert into d_table values(1,2000000000,1,'a'),(1,2000000000,1,'a'),(1,2000000000,1,'a');" From 62bb1803f785c3ac2f9dd7c3835adf0e3eca60f5 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Fri, 9 Aug 2024 15:19:50 +0800 Subject: [PATCH 11/94] [fix](local exchange) Fix EOS processing in local exchanger (#39031) --- be/src/pipeline/dependency.cpp | 6 +- be/src/pipeline/dependency.h | 85 ++++- be/src/pipeline/exec/operator.cpp | 5 +- .../local_exchange_sink_operator.cpp | 5 +- .../local_exchange_source_operator.cpp | 11 +- .../local_exchange/local_exchanger.cpp | 297 ++++++++---------- .../pipeline/local_exchange/local_exchanger.h | 122 ++++--- be/src/pipeline/pipeline_fragment_context.cpp | 26 +- 8 files changed, 312 insertions(+), 245 deletions(-) diff --git a/be/src/pipeline/dependency.cpp b/be/src/pipeline/dependency.cpp index 5e1ce79a1eb3eb9..560efec94e1d18b 100644 --- a/be/src/pipeline/dependency.cpp +++ b/be/src/pipeline/dependency.cpp @@ -188,10 +188,12 @@ void LocalExchangeSharedState::sub_running_sink_operators() { } } -void LocalExchangeSharedState::sub_running_source_operators() { +void LocalExchangeSharedState::sub_running_source_operators( + LocalExchangeSourceLocalState& local_state) { std::unique_lock lc(le_lock); if (exchanger->_running_source_operators.fetch_sub(1) == 1) { _set_always_ready(); + exchanger->finalize(local_state); } } @@ -397,4 +399,6 @@ Status AggSharedState::_destroy_agg_status(vectorized::AggregateDataPtr data) { return Status::OK(); } +LocalExchangeSharedState::~LocalExchangeSharedState() = default; + } // namespace doris::pipeline diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index ee1afaaf55e4261..36f06b91095b8d3 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -49,7 +49,7 @@ class Dependency; class PipelineTask; struct BasicSharedState; using DependencySPtr = std::shared_ptr; -using DependencyMap = std::map>; +class LocalExchangeSourceLocalState; static constexpr auto SLOW_DEPENDENCY_THRESHOLD = 60 * 1000L * 1000L * 1000L; static constexpr auto TIME_UNIT_DEPENDENCY_LOG = 30 * 1000L * 1000L * 1000L; @@ -811,20 +811,21 @@ struct LocalExchangeSharedState : public BasicSharedState { public: ENABLE_FACTORY_CREATOR(LocalExchangeSharedState); LocalExchangeSharedState(int num_instances); + ~LocalExchangeSharedState() override; std::unique_ptr exchanger {}; std::vector mem_trackers; std::atomic mem_usage = 0; // We need to make sure to add mem_usage first and then enqueue, otherwise sub mem_usage may cause negative mem_usage during concurrent dequeue. std::mutex le_lock; - void create_source_dependencies(int operator_id, int node_id) { + virtual void create_dependencies(int operator_id, int node_id) { for (auto& source_dep : source_deps) { source_dep = std::make_shared(operator_id, node_id, "LOCAL_EXCHANGE_OPERATOR_DEPENDENCY"); source_dep->set_shared_state(this); } - }; + } void sub_running_sink_operators(); - void sub_running_source_operators(); + void sub_running_source_operators(LocalExchangeSourceLocalState& local_state); void _set_always_ready() { for (auto& dep : source_deps) { DCHECK(dep); @@ -836,7 +837,10 @@ struct LocalExchangeSharedState : public BasicSharedState { } } - Dependency* get_dep_by_channel_id(int channel_id) { return source_deps[channel_id].get(); } + virtual std::vector get_dep_by_channel_id(int channel_id) { + return {source_deps[channel_id]}; + } + virtual Dependency* get_sink_dep_by_channel_id(int channel_id) { return nullptr; } void set_ready_to_read(int channel_id) { auto& dep = source_deps[channel_id]; @@ -847,28 +851,79 @@ struct LocalExchangeSharedState : public BasicSharedState { void add_mem_usage(int channel_id, size_t delta, bool update_total_mem_usage = true) { mem_trackers[channel_id]->consume(delta); if (update_total_mem_usage) { - add_total_mem_usage(delta); + add_total_mem_usage(delta, channel_id); } } - void sub_mem_usage(int channel_id, size_t delta, bool update_total_mem_usage = true) { - mem_trackers[channel_id]->release(delta); - if (update_total_mem_usage) { - sub_total_mem_usage(delta); - } - } + void sub_mem_usage(int channel_id, size_t delta) { mem_trackers[channel_id]->release(delta); } - void add_total_mem_usage(size_t delta) { + virtual void add_total_mem_usage(size_t delta, int channel_id = 0) { if (mem_usage.fetch_add(delta) + delta > config::local_exchange_buffer_mem_limit) { sink_deps.front()->block(); } } - void sub_total_mem_usage(size_t delta) { - if (mem_usage.fetch_sub(delta) - delta <= config::local_exchange_buffer_mem_limit) { + virtual void sub_total_mem_usage(size_t delta, int channel_id = 0) { + auto prev_usage = mem_usage.fetch_sub(delta); + DCHECK_GE(prev_usage - delta, 0) << "prev_usage: " << prev_usage << " delta: " << delta + << " channel_id: " << channel_id; + if (prev_usage - delta <= config::local_exchange_buffer_mem_limit) { sink_deps.front()->set_ready(); } } }; +struct LocalMergeExchangeSharedState : public LocalExchangeSharedState { + LocalMergeExchangeSharedState(int num_instances) + : LocalExchangeSharedState(num_instances), + _queues_mem_usage(num_instances), + _each_queue_limit(config::local_exchange_buffer_mem_limit / num_instances) { + for (size_t i = 0; i < num_instances; i++) { + _queues_mem_usage[i] = 0; + } + } + + void create_dependencies(int operator_id, int node_id) override { + sink_deps.resize(source_deps.size()); + for (size_t i = 0; i < source_deps.size(); i++) { + source_deps[i] = std::make_shared(operator_id, node_id, + "LOCAL_EXCHANGE_OPERATOR_DEPENDENCY"); + source_deps[i]->set_shared_state(this); + sink_deps[i] = std::make_shared( + operator_id, node_id, "LOCAL_EXCHANGE_OPERATOR_SINK_DEPENDENCY", true); + sink_deps[i]->set_shared_state(this); + } + } + + void sub_total_mem_usage(size_t delta, int channel_id) override { + auto prev_usage = _queues_mem_usage[channel_id].fetch_sub(delta); + DCHECK_GE(prev_usage - delta, 0) << "prev_usage: " << prev_usage << " delta: " << delta + << " channel_id: " << channel_id; + if (prev_usage - delta <= _each_queue_limit) { + sink_deps[channel_id]->set_ready(); + } + if (_queues_mem_usage[channel_id] == 0) { + source_deps[channel_id]->block(); + } + } + void add_total_mem_usage(size_t delta, int channel_id) override { + if (_queues_mem_usage[channel_id].fetch_add(delta) + delta > _each_queue_limit) { + sink_deps[channel_id]->block(); + } + source_deps[channel_id]->set_ready(); + } + + Dependency* get_sink_dep_by_channel_id(int channel_id) override { + return sink_deps[channel_id].get(); + } + + std::vector get_dep_by_channel_id(int channel_id) override { + return source_deps; + } + +private: + std::vector _queues_mem_usage; + const int64_t _each_queue_limit; +}; + } // namespace doris::pipeline diff --git a/be/src/pipeline/exec/operator.cpp b/be/src/pipeline/exec/operator.cpp index 07e0c3cf6407d7f..1e00b9fcbcbc86c 100644 --- a/be/src/pipeline/exec/operator.cpp +++ b/be/src/pipeline/exec/operator.cpp @@ -452,7 +452,10 @@ Status PipelineXLocalState::init(RuntimeState* state, LocalState DCHECK(info.le_state_map.find(_parent->operator_id()) != info.le_state_map.end()); _shared_state = info.le_state_map.at(_parent->operator_id()).first.get(); - _dependency = _shared_state->get_dep_by_channel_id(info.task_idx); + auto deps = _shared_state->get_dep_by_channel_id(info.task_idx); + if (deps.size() == 1) { + _dependency = deps.front().get(); + } _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL( _runtime_profile, "WaitForDependency[" + _dependency->name() + "]Time", 1); } else if (info.shared_state) { diff --git a/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp index ba51a2da39be70a..97acd2a8070931c 100644 --- a/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp +++ b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp @@ -27,8 +27,9 @@ LocalExchangeSinkLocalState::~LocalExchangeSinkLocalState() = default; std::vector LocalExchangeSinkLocalState::dependencies() const { auto deps = Base::dependencies(); - auto exchanger_deps = _exchanger->local_sink_state_dependency(_channel_id); - for (auto* dep : exchanger_deps) { + + auto dep = _shared_state->get_sink_dep_by_channel_id(_channel_id); + if (dep != nullptr) { deps.push_back(dep); } return deps; diff --git a/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp index 6b0cca2d71a969e..32e93fbc5b24327 100644 --- a/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp +++ b/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp @@ -56,7 +56,7 @@ Status LocalExchangeSourceLocalState::close(RuntimeState* state) { _exchanger->close(*this); } if (_shared_state) { - _shared_state->sub_running_source_operators(); + _shared_state->sub_running_source_operators(*this); } return Base::close(state); @@ -64,9 +64,12 @@ Status LocalExchangeSourceLocalState::close(RuntimeState* state) { std::vector LocalExchangeSourceLocalState::dependencies() const { auto deps = Base::dependencies(); - auto exchanger_deps = _exchanger->local_state_dependency(_channel_id); - for (auto* dep : exchanger_deps) { - deps.push_back(dep); + auto le_deps = _shared_state->get_dep_by_channel_id(_channel_id); + if (le_deps.size() > 1) { + // If this is a local merge exchange, we should use all dependencies here. + for (auto& dep : le_deps) { + deps.push_back(dep.get()); + } } return deps; } diff --git a/be/src/pipeline/local_exchange/local_exchanger.cpp b/be/src/pipeline/local_exchange/local_exchanger.cpp index 647ddcfba2d87e8..e256419688e7745 100644 --- a/be/src/pipeline/local_exchange/local_exchanger.cpp +++ b/be/src/pipeline/local_exchange/local_exchanger.cpp @@ -27,28 +27,74 @@ namespace doris::pipeline { template -bool Exchanger::_enqueue_data_and_set_ready(int channel_id, +void Exchanger::_enqueue_data_and_set_ready(int channel_id, LocalExchangeSinkLocalState& local_state, BlockType&& block) { + size_t allocated_bytes = 0; + // PartitionedBlock is used by shuffle exchanger. + // PartitionedBlock will be push into multiple queues with different row ranges, so it will be + // referenced multiple times. Otherwise, we only ref the block once because it is only push into + // one queue. + if constexpr (std::is_same_v) { + allocated_bytes = block.first->data_block.allocated_bytes(); + } else { + block->ref(1); + allocated_bytes = block->data_block.allocated_bytes(); + } std::unique_lock l(_m); + local_state._shared_state->add_mem_usage(channel_id, allocated_bytes, + !std::is_same_v); if (_data_queue[channel_id].enqueue(std::move(block))) { local_state._shared_state->set_ready_to_read(channel_id); - return true; + } else { + local_state._shared_state->sub_mem_usage(channel_id, allocated_bytes); + // `enqueue(block)` return false iff this queue's source operator is already closed so we + // just unref the block. + if constexpr (std::is_same_v) { + block.first->unref(local_state._shared_state, allocated_bytes); + } else { + block->unref(local_state._shared_state, allocated_bytes); + } } - return false; } template bool Exchanger::_dequeue_data(LocalExchangeSourceLocalState& local_state, - BlockType& block, bool* eos) { + BlockType& block, bool* eos, + vectorized::Block* data_block) { + return _dequeue_data(local_state, block, eos, data_block, local_state._channel_id); +} + +template +bool Exchanger::_dequeue_data(LocalExchangeSourceLocalState& local_state, + BlockType& block, bool* eos, vectorized::Block* data_block, + int channel_id) { bool all_finished = _running_sink_operators == 0; - if (_data_queue[local_state._channel_id].try_dequeue(block)) { + if (_data_queue[channel_id].try_dequeue(block)) { + if constexpr (std::is_same_v) { + local_state._shared_state->sub_mem_usage(channel_id, + block.first->data_block.allocated_bytes()); + } else { + local_state._shared_state->sub_mem_usage(channel_id, + block->data_block.allocated_bytes()); + data_block->swap(block->data_block); + block->unref(local_state._shared_state, data_block->allocated_bytes()); + } return true; } else if (all_finished) { *eos = true; } else { std::unique_lock l(_m); - if (_data_queue[local_state._channel_id].try_dequeue(block)) { + if (_data_queue[channel_id].try_dequeue(block)) { + if constexpr (std::is_same_v) { + local_state._shared_state->sub_mem_usage(channel_id, + block.first->data_block.allocated_bytes()); + } else { + local_state._shared_state->sub_mem_usage(channel_id, + block->data_block.allocated_bytes()); + data_block->swap(block->data_block); + block->unref(local_state._shared_state, data_block->allocated_bytes()); + } return true; } COUNTER_UPDATE(local_state._get_block_failed_counter, 1); @@ -76,12 +122,11 @@ Status ShuffleExchanger::sink(RuntimeState* state, vectorized::Block* in_block, void ShuffleExchanger::close(LocalExchangeSourceLocalState& local_state) { PartitionedBlock partitioned_block; + bool eos; + vectorized::Block block; _data_queue[local_state._channel_id].set_eos(); - while (_data_queue[local_state._channel_id].try_dequeue(partitioned_block)) { - auto block_wrapper = partitioned_block.first; - local_state._shared_state->sub_mem_usage( - local_state._channel_id, block_wrapper->data_block.allocated_bytes(), false); - block_wrapper->unref(local_state._shared_state); + while (_dequeue_data(local_state, partitioned_block, &eos, &block)) { + partitioned_block.first->unref(local_state._shared_state); } } @@ -95,17 +140,15 @@ Status ShuffleExchanger::get_block(RuntimeState* state, vectorized::Block* block const auto* offset_start = partitioned_block.second.row_idxs->data() + partitioned_block.second.offset_start; auto block_wrapper = partitioned_block.first; - local_state._shared_state->sub_mem_usage( - local_state._channel_id, block_wrapper->data_block.allocated_bytes(), false); RETURN_IF_ERROR(mutable_block.add_rows(&block_wrapper->data_block, offset_start, offset_start + partitioned_block.second.length)); block_wrapper->unref(local_state._shared_state); - } while (mutable_block.rows() < state->batch_size() && - _data_queue[local_state._channel_id].try_dequeue(partitioned_block)); + } while (mutable_block.rows() < state->batch_size() && !*eos && + _dequeue_data(local_state, partitioned_block, eos, block)); return Status::OK(); }; - if (_dequeue_data(local_state, partitioned_block, eos)) { + if (_dequeue_data(local_state, partitioned_block, eos, block)) { SCOPED_TIMER(local_state._copy_data_timer); mutable_block = vectorized::VectorizedUtils::build_mutable_mem_reuse_block( block, partitioned_block.first->data_block); @@ -135,11 +178,11 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest } vectorized::Block data_block; - std::shared_ptr new_block_wrapper; + std::shared_ptr new_block_wrapper; if (_free_blocks.try_dequeue(data_block)) { - new_block_wrapper = ShuffleBlockWrapper::create_shared(std::move(data_block)); + new_block_wrapper = BlockWrapper::create_shared(std::move(data_block)); } else { - new_block_wrapper = ShuffleBlockWrapper::create_shared(block->clone_empty()); + new_block_wrapper = BlockWrapper::create_shared(block->clone_empty()); } new_block_wrapper->data_block.swap(*block); @@ -157,15 +200,8 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest uint32_t start = local_state._partition_rows_histogram[it.first]; uint32_t size = local_state._partition_rows_histogram[it.first + 1] - start; if (size > 0) { - local_state._shared_state->add_mem_usage( - it.second, new_block_wrapper->data_block.allocated_bytes(), false); - - if (!_enqueue_data_and_set_ready(it.second, local_state, - {new_block_wrapper, {row_idx, start, size}})) { - local_state._shared_state->sub_mem_usage( - it.second, new_block_wrapper->data_block.allocated_bytes(), false); - new_block_wrapper->unref(local_state._shared_state); - } + _enqueue_data_and_set_ready(it.second, local_state, + {new_block_wrapper, {row_idx, start, size}}); } else { new_block_wrapper->unref(local_state._shared_state); } @@ -176,15 +212,8 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest uint32_t start = local_state._partition_rows_histogram[i]; uint32_t size = local_state._partition_rows_histogram[i + 1] - start; if (size > 0) { - local_state._shared_state->add_mem_usage( - i % _num_sources, new_block_wrapper->data_block.allocated_bytes(), false); - if (!_enqueue_data_and_set_ready(i % _num_sources, local_state, - {new_block_wrapper, {row_idx, start, size}})) { - local_state._shared_state->sub_mem_usage( - i % _num_sources, new_block_wrapper->data_block.allocated_bytes(), - false); - new_block_wrapper->unref(local_state._shared_state); - } + _enqueue_data_and_set_ready(i % _num_sources, local_state, + {new_block_wrapper, {row_idx, start, size}}); } else { new_block_wrapper->unref(local_state._shared_state); } @@ -197,14 +226,8 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest uint32_t start = local_state._partition_rows_histogram[i]; uint32_t size = local_state._partition_rows_histogram[i + 1] - start; if (size > 0) { - local_state._shared_state->add_mem_usage( - map[i], new_block_wrapper->data_block.allocated_bytes(), false); - if (!_enqueue_data_and_set_ready(map[i], local_state, - {new_block_wrapper, {row_idx, start, size}})) { - local_state._shared_state->sub_mem_usage( - map[i], new_block_wrapper->data_block.allocated_bytes(), false); - new_block_wrapper->unref(local_state._shared_state); - } + _enqueue_data_and_set_ready(map[i], local_state, + {new_block_wrapper, {row_idx, start, size}}); } else { new_block_wrapper->unref(local_state._shared_state); } @@ -217,40 +240,32 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest Status PassthroughExchanger::sink(RuntimeState* state, vectorized::Block* in_block, bool eos, LocalExchangeSinkLocalState& local_state) { vectorized::Block new_block; + BlockWrapperSPtr wrapper; if (!_free_blocks.try_dequeue(new_block)) { new_block = {in_block->clone_empty()}; } new_block.swap(*in_block); + wrapper = BlockWrapper::create_shared(std::move(new_block)); auto channel_id = (local_state._channel_id++) % _num_partitions; - size_t memory_usage = new_block.allocated_bytes(); - local_state._shared_state->add_mem_usage(channel_id, memory_usage); - if (!_enqueue_data_and_set_ready(channel_id, local_state, std::move(new_block))) { - local_state._shared_state->sub_mem_usage(channel_id, memory_usage); - } + _enqueue_data_and_set_ready(channel_id, local_state, std::move(wrapper)); return Status::OK(); } void PassthroughExchanger::close(LocalExchangeSourceLocalState& local_state) { vectorized::Block next_block; + BlockWrapperSPtr wrapper; + bool eos; _data_queue[local_state._channel_id].set_eos(); - while (_data_queue[local_state._channel_id].try_dequeue(next_block)) { - local_state._shared_state->sub_mem_usage(local_state._channel_id, - next_block.allocated_bytes()); + while (_dequeue_data(local_state, wrapper, &eos, &next_block)) { + next_block = vectorized::Block(); } } Status PassthroughExchanger::get_block(RuntimeState* state, vectorized::Block* block, bool* eos, LocalExchangeSourceLocalState& local_state) { - vectorized::Block next_block; - if (_dequeue_data(local_state, next_block, eos)) { - block->swap(next_block); - local_state._shared_state->sub_mem_usage(local_state._channel_id, block->allocated_bytes()); - if (_free_block_limit == 0 || - _free_blocks.size_approx() < _free_block_limit * _num_sources) { - _free_blocks.enqueue(std::move(next_block)); - } - } + BlockWrapperSPtr next_block; + _dequeue_data(local_state, next_block, eos, block); return Status::OK(); } @@ -258,7 +273,9 @@ Status PassToOneExchanger::sink(RuntimeState* state, vectorized::Block* in_block LocalExchangeSinkLocalState& local_state) { vectorized::Block new_block(in_block->clone_empty()); new_block.swap(*in_block); - _enqueue_data_and_set_ready(0, local_state, std::move(new_block)); + + BlockWrapperSPtr wrapper = BlockWrapper::create_shared(std::move(new_block)); + _enqueue_data_and_set_ready(0, local_state, std::move(wrapper)); return Status::OK(); } @@ -269,10 +286,8 @@ Status PassToOneExchanger::get_block(RuntimeState* state, vectorized::Block* blo *eos = true; return Status::OK(); } - vectorized::Block next_block; - if (_dequeue_data(local_state, next_block, eos)) { - *block = std::move(next_block); - } + BlockWrapperSPtr next_block; + _dequeue_data(local_state, next_block, eos, block); return Status::OK(); } @@ -282,21 +297,39 @@ Status LocalMergeSortExchanger::sink(RuntimeState* state, vectorized::Block* in_ if (!_free_blocks.try_dequeue(new_block)) { new_block = {in_block->clone_empty()}; } - new_block.swap(*in_block); DCHECK_LE(local_state._channel_id, _data_queue.size()); - size_t memory_usage = new_block.allocated_bytes(); - add_mem_usage(local_state, memory_usage); - - if (!_enqueue_data_and_set_ready(local_state._channel_id, local_state, std::move(new_block))) { - sub_mem_usage(local_state, memory_usage); - } + new_block.swap(*in_block); + _enqueue_data_and_set_ready(local_state._channel_id, local_state, + BlockWrapper::create_shared(std::move(new_block))); if (eos) { - _queue_deps[local_state._channel_id]->set_always_ready(); + local_state._shared_state->source_deps[local_state._channel_id]->set_always_ready(); } return Status::OK(); } +void ExchangerBase::finalize(LocalExchangeSourceLocalState& local_state) { + DCHECK(_running_source_operators == 0); + vectorized::Block block; + while (_free_blocks.try_dequeue(block)) { + // do nothing + } +} +void LocalMergeSortExchanger::finalize(LocalExchangeSourceLocalState& local_state) { + BlockWrapperSPtr next_block; + vectorized::Block block; + bool eos; + int id = 0; + for (auto& data_queue : _data_queue) { + data_queue.set_eos(); + while (_dequeue_data(local_state, next_block, &eos, &block, id)) { + block = vectorized::Block(); + } + id++; + } + ExchangerBase::finalize(local_state); +} + Status LocalMergeSortExchanger::build_merger(RuntimeState* state, LocalExchangeSourceLocalState& local_state) { RETURN_IF_ERROR(_sort_source->build_merger(state, _merger, local_state.profile())); @@ -304,18 +337,8 @@ Status LocalMergeSortExchanger::build_merger(RuntimeState* state, for (int channel_id = 0; channel_id < _num_partitions; channel_id++) { vectorized::BlockSupplier block_supplier = [&, id = channel_id](vectorized::Block* block, bool* eos) { - vectorized::Block next_block; - bool all_finished = _running_sink_operators == 0; - if (_data_queue[id].try_dequeue(next_block)) { - block->swap(next_block); - if (_free_block_limit == 0 || - _free_blocks.size_approx() < _free_block_limit * _num_sources) { - _free_blocks.enqueue(std::move(next_block)); - } - sub_mem_usage(local_state, id, block->allocated_bytes()); - } else if (all_finished) { - *eos = true; - } + BlockWrapperSPtr next_block; + _dequeue_data(local_state, next_block, eos, block, id); return Status::OK(); }; child_block_suppliers.push_back(block_supplier); @@ -349,63 +372,13 @@ Status LocalMergeSortExchanger::get_block(RuntimeState* state, vectorized::Block return Status::OK(); } -void LocalMergeSortExchanger::sub_mem_usage(LocalExchangeSinkLocalState& local_state, - int64_t delta) { - const auto channel_id = local_state._channel_id; - local_state._shared_state->mem_trackers[channel_id]->release(delta); - if (_queues_mem_usege[channel_id].fetch_sub(delta) > _each_queue_limit) { - _sink_deps[channel_id]->set_ready(); - } - // if queue empty , block this queue - if (_queues_mem_usege[channel_id] == 0) { - _queue_deps[channel_id]->block(); - } -} - -void LocalMergeSortExchanger::add_mem_usage(LocalExchangeSinkLocalState& local_state, - int64_t delta) { - const auto channel_id = local_state._channel_id; - local_state._shared_state->mem_trackers[channel_id]->consume(delta); - if (_queues_mem_usege[channel_id].fetch_add(delta) > _each_queue_limit) { - _sink_deps[channel_id]->block(); - } - _queue_deps[channel_id]->set_ready(); -} - -void LocalMergeSortExchanger::sub_mem_usage(LocalExchangeSourceLocalState& local_state, - int channel_id, int64_t delta) { - local_state._shared_state->mem_trackers[channel_id]->release(delta); - if (_queues_mem_usege[channel_id].fetch_sub(delta) <= _each_queue_limit) { - _sink_deps[channel_id]->set_ready(); - } - // if queue empty , block this queue - if (_queues_mem_usege[channel_id] == 0) { - _queue_deps[channel_id]->block(); - } -} - -std::vector LocalMergeSortExchanger::local_sink_state_dependency(int channel_id) { - DCHECK(_sink_deps[channel_id]); - return {_sink_deps[channel_id].get()}; -} - -std::vector LocalMergeSortExchanger::local_state_dependency(int channel_id) { - if (channel_id != 0) { - return {}; - } - std::vector deps; - for (auto depSptr : _queue_deps) { - deps.push_back(depSptr.get()); - } - return deps; -} - Status BroadcastExchanger::sink(RuntimeState* state, vectorized::Block* in_block, bool eos, LocalExchangeSinkLocalState& local_state) { for (size_t i = 0; i < _num_partitions; i++) { auto mutable_block = vectorized::MutableBlock::create_unique(in_block->clone_empty()); RETURN_IF_ERROR(mutable_block->add_rows(in_block, 0, in_block->rows())); - _enqueue_data_and_set_ready(i, local_state, mutable_block->to_block()); + _enqueue_data_and_set_ready(i, local_state, + BlockWrapper::create_shared(mutable_block->to_block())); } return Status::OK(); @@ -413,18 +386,18 @@ Status BroadcastExchanger::sink(RuntimeState* state, vectorized::Block* in_block void BroadcastExchanger::close(LocalExchangeSourceLocalState& local_state) { vectorized::Block next_block; + bool eos; + BlockWrapperSPtr wrapper; _data_queue[local_state._channel_id].set_eos(); - while (_data_queue[local_state._channel_id].try_dequeue(next_block)) { - // do nothing + while (_dequeue_data(local_state, wrapper, &eos, &next_block)) { + next_block = vectorized::Block(); } } Status BroadcastExchanger::get_block(RuntimeState* state, vectorized::Block* block, bool* eos, LocalExchangeSourceLocalState& local_state) { - vectorized::Block next_block; - if (_dequeue_data(local_state, next_block, eos)) { - *block = std::move(next_block); - } + BlockWrapperSPtr next_block; + _dequeue_data(local_state, next_block, eos, block); return Status::OK(); } @@ -437,12 +410,8 @@ Status AdaptivePassthroughExchanger::_passthrough_sink(RuntimeState* state, } new_block.swap(*in_block); auto channel_id = (local_state._channel_id++) % _num_partitions; - size_t memory_usage = new_block.allocated_bytes(); - local_state._shared_state->add_mem_usage(channel_id, memory_usage); - - if (!_enqueue_data_and_set_ready(channel_id, local_state, std::move(new_block))) { - local_state._shared_state->sub_mem_usage(channel_id, memory_usage); - } + _enqueue_data_and_set_ready(channel_id, local_state, + BlockWrapper::create_shared(std::move(new_block))); return Status::OK(); } @@ -497,11 +466,8 @@ Status AdaptivePassthroughExchanger::_split_rows(RuntimeState* state, RETURN_IF_ERROR(mutable_block->add_rows(block, start, size)); auto new_block = mutable_block->to_block(); - size_t memory_usage = new_block.allocated_bytes(); - local_state._shared_state->add_mem_usage(i, memory_usage); - if (!_enqueue_data_and_set_ready(i, local_state, std::move(new_block))) { - local_state._shared_state->sub_mem_usage(i, memory_usage); - } + _enqueue_data_and_set_ready(i, local_state, + BlockWrapper::create_shared(std::move(new_block))); } } return Status::OK(); @@ -522,16 +488,19 @@ Status AdaptivePassthroughExchanger::sink(RuntimeState* state, vectorized::Block Status AdaptivePassthroughExchanger::get_block(RuntimeState* state, vectorized::Block* block, bool* eos, LocalExchangeSourceLocalState& local_state) { + BlockWrapperSPtr next_block; + _dequeue_data(local_state, next_block, eos, block); + return Status::OK(); +} + +void AdaptivePassthroughExchanger::close(LocalExchangeSourceLocalState& local_state) { vectorized::Block next_block; - if (_dequeue_data(local_state, next_block, eos)) { - block->swap(next_block); - if (_free_block_limit == 0 || - _free_blocks.size_approx() < _free_block_limit * _num_sources) { - _free_blocks.enqueue(std::move(next_block)); - } - local_state._shared_state->sub_mem_usage(local_state._channel_id, block->allocated_bytes()); + bool eos; + BlockWrapperSPtr wrapper; + _data_queue[local_state._channel_id].set_eos(); + while (_dequeue_data(local_state, wrapper, &eos, &next_block)) { + next_block = vectorized::Block(); } - return Status::OK(); } } // namespace doris::pipeline diff --git a/be/src/pipeline/local_exchange/local_exchanger.h b/be/src/pipeline/local_exchange/local_exchanger.h index afdebd21101f9aa..dfb5c31fff87bd2 100644 --- a/be/src/pipeline/local_exchange/local_exchanger.h +++ b/be/src/pipeline/local_exchange/local_exchanger.h @@ -24,9 +24,22 @@ namespace doris::pipeline { class LocalExchangeSourceLocalState; class LocalExchangeSinkLocalState; -struct ShuffleBlockWrapper; +struct BlockWrapper; class SortSourceOperatorX; +/** + * One exchanger is hold by one `LocalExchangeSharedState`. And one `LocalExchangeSharedState` is + * shared by all local exchange sink operators and source operators with the same id. + * + * In exchanger, two block queues is maintained, one is data block queue and another is free block queue. + * + * In details, data block queue has queues as many as source operators. Each source operator will get + * data block from the corresponding queue. Data blocks is push into the queue by sink operators. One + * sink operator will push blocks into one or more queues. + * + * Free block is used to reuse the allocated memory. To reduce the memory limit, we also use a conf + * to limit the size of free block queue. + */ class ExchangerBase { public: ExchangerBase(int running_sink_operators, int num_partitions, int free_block_limit) @@ -50,16 +63,17 @@ class ExchangerBase { virtual Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos, LocalExchangeSinkLocalState& local_state) = 0; virtual ExchangeType get_type() const = 0; + // Called if a local exchanger source operator are closed. Free the unused data block in data_queue. virtual void close(LocalExchangeSourceLocalState& local_state) = 0; - - virtual std::vector local_sink_state_dependency(int channel_id) { return {}; } - virtual std::vector local_state_dependency(int channel_id) { return {}; } + // Called if all local exchanger source operators are closed. We free the memory in + // `_free_blocks` here. + virtual void finalize(LocalExchangeSourceLocalState& local_state); virtual std::string data_queue_debug_string(int i) = 0; protected: friend struct LocalExchangeSharedState; - friend struct ShuffleBlockWrapper; + friend struct BlockWrapper; friend class LocalExchangeSourceLocalState; friend class LocalExchangeSinkOperatorX; friend class LocalExchangeSinkLocalState; @@ -78,7 +92,7 @@ struct PartitionedRowIdxs { uint32_t length; }; -using PartitionedBlock = std::pair, PartitionedRowIdxs>; +using PartitionedBlock = std::pair, PartitionedRowIdxs>; template struct BlockQueue { @@ -108,6 +122,8 @@ struct BlockQueue { void set_eos() { eos = true; } }; +using BlockWrapperSPtr = std::shared_ptr; + template class Exchanger : public ExchangerBase { public: @@ -123,9 +139,13 @@ class Exchanger : public ExchangerBase { } protected: - bool _enqueue_data_and_set_ready(int channel_id, LocalExchangeSinkLocalState& local_state, + // Enqueue data block and set downstream source operator to read. + void _enqueue_data_and_set_ready(int channel_id, LocalExchangeSinkLocalState& local_state, BlockType&& block); - bool _dequeue_data(LocalExchangeSourceLocalState& local_state, BlockType& block, bool* eos); + bool _dequeue_data(LocalExchangeSourceLocalState& local_state, BlockType& block, bool* eos, + vectorized::Block* data_block); + bool _dequeue_data(LocalExchangeSourceLocalState& local_state, BlockType& block, bool* eos, + vectorized::Block* data_block, int channel_id); std::vector> _data_queue; private: @@ -135,10 +155,33 @@ class Exchanger : public ExchangerBase { class LocalExchangeSourceLocalState; class LocalExchangeSinkLocalState; -struct ShuffleBlockWrapper { - ENABLE_FACTORY_CREATOR(ShuffleBlockWrapper); - ShuffleBlockWrapper(vectorized::Block&& data_block_) : data_block(std::move(data_block_)) {} +/** + * `BlockWrapper` is used to wrap a data block with a reference count. + * + * In function `unref()`, if `ref_count` decremented to 0, which means this block is not needed by + * operators, so we put it into `_free_blocks` to reuse its memory if needed and refresh memory usage + * in current queue. + * + * Note: `ref_count` will be larger than 1 only if this block is shared between multiple queues in + * shuffle exchanger. + */ +struct BlockWrapper { + ENABLE_FACTORY_CREATOR(BlockWrapper); + BlockWrapper(vectorized::Block&& data_block_) : data_block(std::move(data_block_)) {} + ~BlockWrapper() { DCHECK_EQ(ref_count.load(), 0); } void ref(int delta) { ref_count += delta; } + void unref(LocalExchangeSharedState* shared_state, size_t allocated_bytes) { + if (ref_count.fetch_sub(1) == 1) { + shared_state->sub_total_mem_usage(allocated_bytes); + if (shared_state->exchanger->_free_block_limit == 0 || + shared_state->exchanger->_free_blocks.size_approx() < + shared_state->exchanger->_free_block_limit * + shared_state->exchanger->_num_sources) { + data_block.clear_column_data(); + shared_state->exchanger->_free_blocks.enqueue(std::move(data_block)); + } + } + } void unref(LocalExchangeSharedState* shared_state) { if (ref_count.fetch_sub(1) == 1) { shared_state->sub_total_mem_usage(data_block.allocated_bytes()); @@ -197,12 +240,12 @@ class BucketShuffleExchanger final : public ShuffleExchanger { ExchangeType get_type() const override { return ExchangeType::BUCKET_HASH_SHUFFLE; } }; -class PassthroughExchanger final : public Exchanger { +class PassthroughExchanger final : public Exchanger { public: ENABLE_FACTORY_CREATOR(PassthroughExchanger); PassthroughExchanger(int running_sink_operators, int num_partitions, int free_block_limit) - : Exchanger(running_sink_operators, num_partitions, - free_block_limit) { + : Exchanger(running_sink_operators, num_partitions, + free_block_limit) { _data_queue.resize(num_partitions); } ~PassthroughExchanger() override = default; @@ -215,12 +258,12 @@ class PassthroughExchanger final : public Exchanger { void close(LocalExchangeSourceLocalState& local_state) override; }; -class PassToOneExchanger final : public Exchanger { +class PassToOneExchanger final : public Exchanger { public: ENABLE_FACTORY_CREATOR(PassToOneExchanger); PassToOneExchanger(int running_sink_operators, int num_partitions, int free_block_limit) - : Exchanger(running_sink_operators, num_partitions, - free_block_limit) { + : Exchanger(running_sink_operators, num_partitions, + free_block_limit) { _data_queue.resize(num_partitions); } ~PassToOneExchanger() override = default; @@ -233,25 +276,14 @@ class PassToOneExchanger final : public Exchanger { void close(LocalExchangeSourceLocalState& local_state) override {} }; -class LocalMergeSortExchanger final : public Exchanger { +class LocalMergeSortExchanger final : public Exchanger { public: ENABLE_FACTORY_CREATOR(LocalMergeSortExchanger); LocalMergeSortExchanger(std::shared_ptr sort_source, int running_sink_operators, int num_partitions, int free_block_limit) - : Exchanger(running_sink_operators, num_partitions, - free_block_limit), - _sort_source(std::move(sort_source)), - _queues_mem_usege(num_partitions), - _each_queue_limit(config::local_exchange_buffer_mem_limit / num_partitions) { + : Exchanger(running_sink_operators, num_partitions, free_block_limit), + _sort_source(std::move(sort_source)) { _data_queue.resize(num_partitions); - for (size_t i = 0; i < num_partitions; i++) { - _queues_mem_usege[i] = 0; - _sink_deps.push_back( - std::make_shared(0, 0, "LOCAL_MERGE_SORT_SINK_DEPENDENCY", true)); - _queue_deps.push_back( - std::make_shared(0, 0, "LOCAL_MERGE_SORT_QUEUE_DEPENDENCY")); - _queue_deps.back()->block(); - } } ~LocalMergeSortExchanger() override = default; Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos, @@ -263,33 +295,21 @@ class LocalMergeSortExchanger final : public Exchanger { Status build_merger(RuntimeState* statem, LocalExchangeSourceLocalState& local_state); - std::vector local_sink_state_dependency(int channel_id) override; - - std::vector local_state_dependency(int channel_id) override; - - void add_mem_usage(LocalExchangeSinkLocalState& local_state, int64_t delta); - void sub_mem_usage(LocalExchangeSinkLocalState& local_state, int64_t delta); - void sub_mem_usage(LocalExchangeSourceLocalState& local_state, int channel_id, int64_t delta); void close(LocalExchangeSourceLocalState& local_state) override {} + void finalize(LocalExchangeSourceLocalState& local_state) override; private: - // only channel_id = 0 , build _merger and use it - std::unique_ptr _merger; std::shared_ptr _sort_source; - std::vector _sink_deps; std::vector _queues_mem_usege; - // if cur queue is empty, block this queue - std::vector _queue_deps; - const int64_t _each_queue_limit; }; -class BroadcastExchanger final : public Exchanger { +class BroadcastExchanger final : public Exchanger { public: ENABLE_FACTORY_CREATOR(BroadcastExchanger); BroadcastExchanger(int running_sink_operators, int num_partitions, int free_block_limit) - : Exchanger(running_sink_operators, num_partitions, - free_block_limit) { + : Exchanger(running_sink_operators, num_partitions, + free_block_limit) { _data_queue.resize(num_partitions); } ~BroadcastExchanger() override = default; @@ -304,13 +324,13 @@ class BroadcastExchanger final : public Exchanger { //The code in AdaptivePassthroughExchanger is essentially // a copy of ShuffleExchanger and PassthroughExchanger. -class AdaptivePassthroughExchanger : public Exchanger { +class AdaptivePassthroughExchanger : public Exchanger { public: ENABLE_FACTORY_CREATOR(AdaptivePassthroughExchanger); AdaptivePassthroughExchanger(int running_sink_operators, int num_partitions, int free_block_limit) - : Exchanger(running_sink_operators, num_partitions, - free_block_limit) { + : Exchanger(running_sink_operators, num_partitions, + free_block_limit) { _data_queue.resize(num_partitions); } Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos, @@ -320,7 +340,7 @@ class AdaptivePassthroughExchanger : public Exchanger { LocalExchangeSourceLocalState& local_state) override; ExchangeType get_type() const override { return ExchangeType::ADAPTIVE_PASSTHROUGH; } - void close(LocalExchangeSourceLocalState& local_state) override {} + void close(LocalExchangeSourceLocalState& local_state) override; private: Status _passthrough_sink(RuntimeState* state, vectorized::Block* in_block, bool eos, diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index a2f26ac0a009a1d..6f7a59c0f9818c0 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -697,7 +697,10 @@ Status PipelineFragmentContext::_add_local_exchange_impl( is_shuffled_hash_join, shuffle_idx_to_instance_idx)); // 2. Create and initialize LocalExchangeSharedState. - auto shared_state = LocalExchangeSharedState::create_shared(_num_instances); + std::shared_ptr shared_state = + data_distribution.distribution_type == ExchangeType::LOCAL_MERGE_SORT + ? LocalMergeExchangeSharedState::create_shared(_num_instances) + : LocalExchangeSharedState::create_shared(_num_instances); switch (data_distribution.distribution_type) { case ExchangeType::HASH_SHUFFLE: shared_state->exchanger = ShuffleExchanger::create_unique( @@ -730,11 +733,20 @@ Status PipelineFragmentContext::_add_local_exchange_impl( : 0); break; case ExchangeType::PASS_TO_ONE: - shared_state->exchanger = BroadcastExchanger::create_unique( - cur_pipe->num_tasks(), _num_instances, - _runtime_state->query_options().__isset.local_exchange_free_blocks_limit - ? _runtime_state->query_options().local_exchange_free_blocks_limit - : 0); + if (_runtime_state->enable_share_hash_table_for_broadcast_join()) { + // If shared hash table is enabled for BJ, hash table will be built by only one task + shared_state->exchanger = PassToOneExchanger::create_unique( + cur_pipe->num_tasks(), _num_instances, + _runtime_state->query_options().__isset.local_exchange_free_blocks_limit + ? _runtime_state->query_options().local_exchange_free_blocks_limit + : 0); + } else { + shared_state->exchanger = BroadcastExchanger::create_unique( + cur_pipe->num_tasks(), _num_instances, + _runtime_state->query_options().__isset.local_exchange_free_blocks_limit + ? _runtime_state->query_options().local_exchange_free_blocks_limit + : 0); + } break; case ExchangeType::LOCAL_MERGE_SORT: { auto child_op = cur_pipe->sink_x()->child_x(); @@ -788,7 +800,7 @@ Status PipelineFragmentContext::_add_local_exchange_impl( } operator_xs.insert(operator_xs.begin(), source_op); - shared_state->create_source_dependencies(source_op->operator_id(), source_op->node_id()); + shared_state->create_dependencies(source_op->operator_id(), source_op->node_id()); // 5. Set children for two pipelines separately. std::vector> new_children; From 9e011c92924efe08187f069ab583e326d1b6af1c Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Fri, 9 Aug 2024 15:21:39 +0800 Subject: [PATCH 12/94] [fix](Nereids) set operation output nullable maybe wrong (#39109) when first regulator child output nullable is not right, we may get wrong nullable output, and lead be crash --- .../nereids/rules/rewrite/AdjustNullable.java | 8 +-- .../adjust_nullable/set_operation.groovy | 69 +++++++++++++++++-- 2 files changed, 66 insertions(+), 11 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java index 1f47f8bfc216283..e387218c47c5d12 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/AdjustNullable.java @@ -26,7 +26,6 @@ import org.apache.doris.nereids.trees.expressions.OrderExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotReference; -import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait; import org.apache.doris.nereids.trees.expressions.functions.Function; import org.apache.doris.nereids.trees.expressions.visitor.DefaultExpressionRewriter; import org.apache.doris.nereids.trees.plans.Plan; @@ -59,7 +58,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.stream.Collectors; /** * because some rule could change output's nullable. @@ -168,8 +166,10 @@ public Plan visitLogicalSetOperation(LogicalSetOperation setOperation, Map> newChildrenOutputs = ImmutableList.builder(); List inputNullable = null; if (!setOperation.children().isEmpty()) { - inputNullable = setOperation.getRegularChildOutput(0).stream() - .map(ExpressionTrait::nullable).collect(Collectors.toList()); + inputNullable = Lists.newArrayListWithCapacity(setOperation.getOutputs().size()); + for (int i = 0; i < setOperation.getOutputs().size(); i++) { + inputNullable.add(false); + } for (int i = 0; i < setOperation.arity(); i++) { List childOutput = setOperation.child(i).getOutput(); List setChildOutput = setOperation.getRegularChildOutput(i); diff --git a/regression-test/suites/nereids_rules_p0/adjust_nullable/set_operation.groovy b/regression-test/suites/nereids_rules_p0/adjust_nullable/set_operation.groovy index 4e0c4089d86a6c3..a0e2d843dd572d0 100644 --- a/regression-test/suites/nereids_rules_p0/adjust_nullable/set_operation.groovy +++ b/regression-test/suites/nereids_rules_p0/adjust_nullable/set_operation.groovy @@ -18,29 +18,84 @@ suite("test_set_operation_adjust_nullable") { sql """ - DROP TABLE IF EXISTS t1 + DROP TABLE IF EXISTS set_operation_t1 """ sql """ - DROP TABLE IF EXISTS t2 + DROP TABLE IF EXISTS set_operation_t2 """ sql """ - CREATE TABLE t1(c1 varchar) DISTRIBUTED BY hash(c1) PROPERTIES ("replication_num" = "1"); + CREATE TABLE set_operation_t1(c1 varchar) DISTRIBUTED BY hash(c1) PROPERTIES ("replication_num" = "1"); """ sql """ - CREATE TABLE t2(c2 date) DISTRIBUTED BY hash(c2) PROPERTIES ("replication_num" = "1"); + CREATE TABLE set_operation_t2(c2 date) DISTRIBUTED BY hash(c2) PROPERTIES ("replication_num" = "1"); """ sql """ - insert into t1 values('+06-00'); + insert into set_operation_t1 values('+06-00'); """ sql """ - insert into t2 values('1990-11-11'); + insert into set_operation_t2 values('1990-11-11'); """ sql """ - SELECT c1, c1 FROM t1 MINUS SELECT c2, c2 FROM t2; + SELECT c1, c1 FROM set_operation_t1 MINUS SELECT c2, c2 FROM set_operation_t2; + """ + + // do not use regulator child output nullable as init nullable info + + sql """ + DROP TABLE IF EXISTS set_operation_t1 + """ + sql """ + DROP TABLE IF EXISTS set_operation_t2 + """ + + sql """ + create table set_operation_t1 ( + pk int, + c1 char(25) not null , + c2 varchar(100) null , + ) + distributed by hash(pk) buckets 10 + properties("replication_num" = "1"); + """ + + sql """insert into set_operation_t1 values (1, '1', '1');""" + + sql """ + create table set_operation_t2 ( + c3 varchar(100) not null , + pk int + ) + distributed by hash(pk) buckets 10 + properties("replication_num" = "1"); + """ + + sql """insert into set_operation_t2 values ('1', 1);""" + + sql """ + select + c2, + c1 + from + set_operation_t1 + order by + 1, + 2 asc + limit + 0 + union distinct + select + c3, + c3 + from + set_operation_t2 + except + select + 'LDvlqYTfrq', + 'rVdUjeSaJW'; """ } From 46a56c10d25cb257e0e451b932fbb017ba2215e4 Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Fri, 9 Aug 2024 16:20:08 +0800 Subject: [PATCH 13/94] [fix](mtmv)fix can not show create mtmv use follower fe (#38794) err msg: internal error processing forward --- .../trees/plans/commands/ShowCreateMTMVCommand.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowCreateMTMVCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowCreateMTMVCommand.java index eb244be7afca448..7da1df6af6f30ad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowCreateMTMVCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ShowCreateMTMVCommand.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.trees.plans.commands; +import org.apache.doris.analysis.StmtType; import org.apache.doris.nereids.trees.plans.PlanType; import org.apache.doris.nereids.trees.plans.commands.info.ShowCreateMTMVInfo; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; @@ -28,7 +29,7 @@ /** * resume mtmv */ -public class ShowCreateMTMVCommand extends Command implements ForwardWithSync, NotAllowFallback { +public class ShowCreateMTMVCommand extends Command implements NoForward, NotAllowFallback { private final ShowCreateMTMVInfo showCreateMTMVInfo; public ShowCreateMTMVCommand(ShowCreateMTMVInfo showCreateMTMVInfo) { @@ -46,4 +47,9 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { public R accept(PlanVisitor visitor, C context) { return visitor.visitShowCreateMTMVCommand(this, context); } + + @Override + public StmtType stmtType() { + return StmtType.SHOW; + } } From 998629732633f70076e12b7608e01aa08a56e47b Mon Sep 17 00:00:00 2001 From: airborne12 Date: Fri, 9 Aug 2024 16:54:58 +0800 Subject: [PATCH 14/94] [Fix](regression) remove useless properties when create table in test_array_with_large_dataset (#39090) ## Proposed changes Useless properties in regression case may cause error --- .../test_array_with_large_dataset.groovy | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/regression-test/suites/inverted_index_p2/test_array_with_large_dataset.groovy b/regression-test/suites/inverted_index_p2/test_array_with_large_dataset.groovy index 4d2f53d53533f4d..5de718339b9c013 100644 --- a/regression-test/suites/inverted_index_p2/test_array_with_large_dataset.groovy +++ b/regression-test/suites/inverted_index_p2/test_array_with_large_dataset.groovy @@ -67,17 +67,7 @@ suite("test_array_with_large_dataset", "p2"){ COMMENT 'OLAP' DISTRIBUTED BY HASH(`id`) BUCKETS 16 PROPERTIES ( - "replication_allocation" = "tag.location.default: 1", - "min_load_replica_num" = "-1", - "is_being_synced" = "false", - "storage_medium" = "hdd", - "storage_format" = "V2", - "inverted_index_storage_format" = "V1", - "light_schema_change" = "true", - "disable_auto_compaction" = "false", - "enable_single_replica_compaction" = "false", - "group_commit_interval_ms" = "10000", - "group_commit_data_bytes" = "134217728" + "replication_allocation" = "tag.location.default: 1" ); """ From 3c78aed430840d777d00da82e3d0f9a0dc87ee80 Mon Sep 17 00:00:00 2001 From: Pxl Date: Fri, 9 Aug 2024 17:06:11 +0800 Subject: [PATCH 15/94] [Bug](materialized-view) fix analyze where clause failed on mv (#39061) ## Proposed changes fix analyze where clause failed on mv do not analyze slot after replaceSlot to avoid duplicate columns in desc --- .../org/apache/doris/analysis/SlotRef.java | 5 ++ .../data/mv_p0/routine_load_mapping/test2 | 0 .../routine_load_mapping.groovy | 66 +++++++++++++++++++ 3 files changed, 71 insertions(+) create mode 100644 regression-test/data/mv_p0/routine_load_mapping/test2 diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java index 6bf11b0a953e907..2fe2b7b411500a4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SlotRef.java @@ -679,6 +679,11 @@ public Expr getResultValue(boolean forPushDownPredicatesToView) throws AnalysisE @Override public void replaceSlot(TupleDescriptor tuple) { + // do not analyze slot after replaceSlot to avoid duplicate columns in desc desc = tuple.getColumnSlot(col); + type = desc.getType(); + if (!isAnalyzed) { + analysisDone(); + } } } diff --git a/regression-test/data/mv_p0/routine_load_mapping/test2 b/regression-test/data/mv_p0/routine_load_mapping/test2 new file mode 100644 index 000000000000000..e69de29bb2d1d64 diff --git a/regression-test/suites/mv_p0/routine_load_mapping/routine_load_mapping.groovy b/regression-test/suites/mv_p0/routine_load_mapping/routine_load_mapping.groovy index 1188e3031c0ff76..ca3a3560e01cfe0 100644 --- a/regression-test/suites/mv_p0/routine_load_mapping/routine_load_mapping.groovy +++ b/regression-test/suites/mv_p0/routine_load_mapping/routine_load_mapping.groovy @@ -49,4 +49,70 @@ suite ("routine_load_mapping") { qt_select "select * from test order by 1,2,3;" qt_select_mv "select * from test index m_view order by 1,2;" + + + sql """ DROP TABLE IF EXISTS rt_new; """ + + sql """ + CREATE TABLE `rt_new` ( + `battery_id` VARCHAR(50) NULL , + `create_time` DATETIME(3) NULL , + `imei` VARCHAR(50) NULL , + `event_id` VARCHAR(50) NULL , + `event_name` VARCHAR(50) NULL, + `heart_type` INT NULL +) ENGINE=OLAP +DUPLICATE KEY(`battery_id`, `create_time`) +PARTITION BY RANGE(`create_time`) +(PARTITION p20240421 VALUES [('2024-04-21 00:00:00'), ('2024-04-22 00:00:00')), +PARTITION p20240422 VALUES [('2024-04-22 00:00:00'), ('2024-04-23 00:00:00')), +PARTITION p20240804 VALUES [('2024-08-04 00:00:00'), ('2024-08-05 00:00:00'))) +DISTRIBUTED BY HASH(`battery_id`) BUCKETS AUTO +PROPERTIES ( +"replication_allocation" = "tag.location.default: 1", +"file_cache_ttl_seconds" = "0", +"is_being_synced" = "false", +"dynamic_partition.enable" = "true", +"dynamic_partition.time_unit" = "DAY", +"dynamic_partition.time_zone" = "Asia/Shanghai", +"dynamic_partition.start" = "-2147483648", +"dynamic_partition.end" = "3", +"dynamic_partition.prefix" = "p", +"dynamic_partition.buckets" = "10", +"dynamic_partition.create_history_partition" = "true", +"dynamic_partition.history_partition_num" = "100", +"dynamic_partition.hot_partition_num" = "0", +"dynamic_partition.reserved_history_periods" = "NULL", +"storage_medium" = "hdd", +"storage_format" = "V2", +"inverted_index_storage_format" = "V2", +"light_schema_change" = "true", +"disable_auto_compaction" = "false", +"enable_single_replica_compaction" = "false", +"group_commit_interval_ms" = "10000", +"group_commit_data_bytes" = "134217728" +); + """ + + createMV("""CREATE MATERIALIZED VIEW location_rt_mv AS + SELECT + battery_id, + create_time + FROM + rt_new + WHERE + heart_type = 1 + ;""") + + sql """ ALTER TABLE rt_new MODIFY COLUMN event_id VARCHAR(51) NULL;""" + Thread.sleep(1000) + + streamLoad { + table "rt_new" + set 'column_separator', ',' + set 'columns', '`battery_id`,`create_time`,`imei`,`event_id`,`event_name`,`heart_type`' + + file './test2' + time 10000 // limit inflight 10s + } } From 08c9e0518d8f00f2d6faa5ab6d354deaab483fa8 Mon Sep 17 00:00:00 2001 From: seawinde <149132972+seawinde@users.noreply.github.com> Date: Fri, 9 Aug 2024 17:09:29 +0800 Subject: [PATCH 16/94] [fix](mtmv) Fix rewrite by materialized view fail when query hive table (#38909) mv def is select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate from ${hive_catalog_name}.${hive_database}.${hive_table} left join ${internal_catalog}.${olap_db}.${olap_table} on l_orderkey = o_orderkey if we query the sql as following, it will rewrite fail by mv, the fail info is `mv can not offer any partition for query` select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate from ${hive_catalog_name}.${hive_database}.${hive_table} left join ${internal_catalog}.${olap_db}.${olap_table} on l_orderkey = o_orderkey This pr fix this problem. it would be rewritten by mv successfully. --- .../mv/AbstractMaterializedViewRule.java | 4 + .../rules/exploration/mv/StructInfo.java | 12 +- .../external_table/part_partition_invalid.out | 70 +++++ .../part_partition_invalid.groovy | 284 ++++++++++++++++++ 4 files changed, 365 insertions(+), 5 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out create mode 100644 regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java index b34faaf1250cc1e..857fd0e51b9b680 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java @@ -431,6 +431,10 @@ protected Pair>, Map>> Map> queryUsedBaseTablePartitions = new LinkedHashMap<>(); queryUsedBaseTablePartitions.put(relatedPartitionTable, new HashSet<>()); queryPlan.accept(new StructInfo.QueryScanPartitionsCollector(), queryUsedBaseTablePartitions); + // Bail out, not check invalid partition if not olap scan, support later + if (queryUsedBaseTablePartitions.isEmpty()) { + return Pair.of(ImmutableMap.of(), ImmutableMap.of()); + } Set queryUsedBaseTablePartitionNameSet = queryUsedBaseTablePartitions.get(relatedPartitionTable) .stream() .map(Partition::getName) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java index eeb2192565359ad..fa29d4d0e123be7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/StructInfo.java @@ -67,7 +67,6 @@ import java.util.ArrayList; import java.util.BitSet; -import java.util.HashSet; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; @@ -740,14 +739,17 @@ public Plan visitLogicalCatalogRelation(LogicalCatalogRelation catalogRelation, if (!targetTablePartitionMap.containsKey(relatedPartitionTable)) { return catalogRelation; } - // todo Support other type partition table if (catalogRelation instanceof LogicalOlapScan) { + // Handle olap table LogicalOlapScan logicalOlapScan = (LogicalOlapScan) catalogRelation; + Set tablePartitions = targetTablePartitionMap.get(relatedPartitionTable); for (Long partitionId : logicalOlapScan.getSelectedPartitionIds()) { - Set partitions = targetTablePartitionMap.computeIfAbsent(relatedPartitionTable, - key -> new HashSet<>()); - partitions.add(logicalOlapScan.getTable().getPartition(partitionId)); + tablePartitions.add(logicalOlapScan.getTable().getPartition(partitionId)); } + } else { + // todo Support other type partition table + // Not support to partition check now when query external catalog table, support later. + targetTablePartitionMap.clear(); } return catalogRelation; } diff --git a/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out b/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out new file mode 100644 index 000000000000000..b2cd5c559ca77d1 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/external_table/part_partition_invalid.out @@ -0,0 +1,70 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query_sql -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 + +-- !query_mv_directly -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_data_without_refresh_catalog -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_and_without_refresh_catalog_19 -- +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_and_without_refresh_catalog_18 -- +2 2 2 2023-10-18 2023-10-18 + +-- !after_modify_data_and_refresh_catalog -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_and_refresh_catalog_19 -- +3 2 3 2023-10-19 2023-10-19 + +-- !after_modify_and_refresh_catalog_18 -- +2 2 2 2023-10-18 2023-10-18 + +-- !after_modify_data_and_refresh_catalog_and_mv -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_data_without_refresh_catalog -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_and_without_refresh_catalog_19 -- +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_and_without_refresh_catalog_20 -- + +-- !after_add_data_with_refresh_catalog -- +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_and_refresh_catalog_19 -- +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + +-- !after_add_and_refresh_catalog_20 -- + +-- !after_add_data_and_refresh_catalog_and_mv -- +\N \N 7 \N 2023-10-20 +1 2 1 2023-10-17 2023-10-17 +2 2 2 2023-10-18 2023-10-18 +3 2 3 2023-10-19 2023-10-19 +3 2 3 2023-10-19 2023-10-19 + diff --git a/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy b/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy new file mode 100644 index 000000000000000..d2e6f18824def8f --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/external_table/part_partition_invalid.groovy @@ -0,0 +1,284 @@ +package mv.external_table +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("part_partition_invalid", "p0,external") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test. then doesn't test mv rewrite") + return; + } + // prepare catalog + def suite_name = "part_partition_invalid"; + def externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + def hms_port = context.config.otherConfigs.get("hive2HmsPort") + def hive_catalog_name = "${suite_name}_catalog" + def hive_database = "${suite_name}_db" + def hive_table = "${suite_name}_orders" + + sql """drop catalog if exists ${hive_catalog_name}""" + sql """ + create catalog if not exists ${hive_catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + + sql """switch ${hive_catalog_name};""" + sql """drop table if exists ${hive_catalog_name}.${hive_database}.${hive_table}""" + sql """ drop database if exists ${hive_database}""" + sql """ create database ${hive_database}""" + sql """use ${hive_database}""" + sql """ + CREATE TABLE IF NOT EXISTS ${hive_table} ( + o_orderkey integer, + o_custkey integer, + o_orderstatus char(1), + o_totalprice decimalv3(15,2), + o_orderpriority char(15), + o_clerk char(15), + o_shippriority integer, + o_comment varchar(79), + o_orderdate date + ) ENGINE=hive + PARTITION BY list(o_orderdate)() + PROPERTIES ( + "replication_num" = "1", + "file_format"="orc", + "compression"="zlib" + ); + """ + + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(1, 1, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-17');""" + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(2, 2, 'ok', 109.2, 'c','d',2, 'mm', '2023-10-18');""" + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(3, 3, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-19');""" + + // prepare table and data in olap + def internal_catalog = "internal" + def olap_db = context.config.getDbNameByFile(context.file) + def olap_table = "${suite_name}_lineitem" + + sql """switch ${internal_catalog};""" + sql "use ${olap_db};" + sql "SET enable_nereids_planner=true;" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject';" + + sql """ + drop table if exists ${olap_table} + """ + + sql """ + CREATE TABLE IF NOT EXISTS ${olap_table} ( + l_orderkey integer not null, + l_partkey integer not null, + l_suppkey integer not null, + l_linenumber integer not null, + l_quantity decimalv3(15,2) not null, + l_extendedprice decimalv3(15,2) not null, + l_discount decimalv3(15,2) not null, + l_tax decimalv3(15,2) not null, + l_returnflag char(1) not null, + l_linestatus char(1) not null, + l_shipdate date not null, + l_commitdate date not null, + l_receiptdate date not null, + l_shipinstruct char(25) not null, + l_shipmode char(10) not null, + l_comment varchar(44) not null + ) + DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber) + PARTITION BY RANGE(l_shipdate) + (FROM ('2023-10-01') TO ('2023-10-30') INTERVAL 1 DAY) + DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + insert into ${olap_table} values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'), + (2, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy'), + (3, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx'); + """ + + def query_sql = """ + select l_orderkey, l_partkey, o_custkey, l_shipdate, o_orderdate + from ${hive_catalog_name}.${hive_database}.${hive_table} + left join ${internal_catalog}.${olap_db}.${olap_table} on l_orderkey = o_orderkey + """ + order_qt_query_sql """${query_sql}""" + + // create partition mtmv, related partition is hive catalog + def mv_name = 'mv_join' + sql """drop materialized view if exists ${mv_name}""" + sql """ + CREATE MATERIALIZED VIEW ${mv_name} + BUILD IMMEDIATE REFRESH AUTO ON MANUAL + partition by(`o_orderdate`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1', 'grace_period' = '0') + AS ${query_sql} + """ + + sql """REFRESH MATERIALIZED VIEW ${mv_name} complete""" + waitingMTMVTaskFinished(getJobName(olap_db, mv_name)) + order_qt_query_mv_directly """select * from ${mv_name};""" + + // test query rewrite by mv, should fail ,because materialized_view_rewrite_enable_contain_external_table + // is false default + explain { + sql(""" ${query_sql}""") + notContains("${mv_name}(${mv_name})") + } + sql "SET materialized_view_rewrite_enable_contain_external_table=true" + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + + // data change in external table doesn't influence query rewrite, + // if want to use new data in external table should be refresh manually + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(3, 3, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-19');""" + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_data_without_refresh_catalog """ ${query_sql}""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-19'; + """) + // query invalid partition data, should hit mv, because not check now. + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_and_without_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-18'; + """) + // query valid partition data, should hit mv + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_and_without_refresh_catalog_18 """ ${query_sql} where o_orderdate = '2023-10-18';""" + + // refresh catalog cache + sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_data_and_refresh_catalog """ ${query_sql}""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-19'; + """) + // query invalid partition data, should hit mv, because not check now. + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_and_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-18'; + """) + // query valid partition data, should hit mv + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_and_refresh_catalog_18 """ ${query_sql} where o_orderdate = '2023-10-18';""" + + // refresh manually + sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ + sql """REFRESH MATERIALIZED VIEW ${mv_name} auto""" + waitingMTMVTaskFinished(getJobName(olap_db, mv_name)) + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_modify_data_and_refresh_catalog_and_mv """ ${query_sql}""" + + // test after hive add partition + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(6, 7, 'ok', 29.5, 'x', 'y', 6, 'ss', '2023-10-20');""" + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_data_without_refresh_catalog """ ${query_sql}""" + + explain { + sql(""" + ${query_sql} + """) + // query invalid partition data, should hit mv, because not check now. + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_and_without_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-20'; + """) + // query valid partition data, should hit mv + notContains("${mv_name}(${mv_name})") + } + order_qt_after_add_and_without_refresh_catalog_20 """ ${query_sql} where o_orderdate = '2023-10-20';""" + + // refresh catalog cache + sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_data_with_refresh_catalog """ ${query_sql}""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-19'; + """) + // query invalid partition data, should hit mv, because not check now. + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_and_refresh_catalog_19 """ ${query_sql} where o_orderdate = '2023-10-19';""" + + explain { + sql(""" + ${query_sql} where o_orderdate = '2023-10-20'; + """) + // query valid partition data, should hit mv + notContains("${mv_name}(${mv_name})") + } + order_qt_after_add_and_refresh_catalog_20 """ ${query_sql} where o_orderdate = '2023-10-20';""" + + // refresh manually + sql """ REFRESH CATALOG ${hive_catalog_name} PROPERTIES("invalid_cache" = "true"); """ + sql """REFRESH MATERIALIZED VIEW ${mv_name} auto""" + waitingMTMVTaskFinished(getJobName(olap_db, mv_name)) + explain { + sql(""" ${query_sql}""") + contains("${mv_name}(${mv_name})") + } + order_qt_after_add_data_and_refresh_catalog_and_mv """ ${query_sql}""" + + sql """drop table if exists ${hive_catalog_name}.${hive_database}.${hive_table}""" + sql """drop table if exists ${internal_catalog}.${olap_db}.${olap_table}""" + sql """drop database if exists ${hive_catalog_name}.${hive_database}""" + sql """drop materialized view if exists ${internal_catalog}.${olap_db}.${mv_name};""" + sql """drop catalog if exists ${hive_catalog_name}""" +} From 63f6aec730b02242da6ee37a5013b437dcbc8f46 Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Fri, 9 Aug 2024 17:09:41 +0800 Subject: [PATCH 17/94] [enhance](mtmv)Disable mtmv list rollup (#38124) Prohibit creating materialized views on partitions where the base table is a list partition --- .../mtmv/MTMVPartitionExprDateTrunc.java | 2 + .../test_hive_limit_partition_mtmv.out | 5 - .../mtmv_p0/test_rollup_partition_mtmv.out | 37 -- .../suites/mtmv_p0/test_build_mtmv.groovy | 12 +- .../test_hive_limit_partition_mtmv.groovy | 45 +-- .../mtmv_p0/test_rollup_partition_mtmv.groovy | 334 +----------------- 6 files changed, 39 insertions(+), 396 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionExprDateTrunc.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionExprDateTrunc.java index f16252cede0a10a..0fcfaa909e66c29 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionExprDateTrunc.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionExprDateTrunc.java @@ -78,6 +78,8 @@ public void analyze(MTMVPartitionInfo mvPartitionInfo) throws AnalysisException "partitionColumnType should be date/datetime " + "when PartitionType is range and expr is date_trunc"); } + } else { + throw new AnalysisException("date_trunc only support range partition"); } } diff --git a/regression-test/data/mtmv_p0/test_hive_limit_partition_mtmv.out b/regression-test/data/mtmv_p0/test_hive_limit_partition_mtmv.out index 1a8731d6a1c4461..48e60bb46cc0271 100644 --- a/regression-test/data/mtmv_p0/test_hive_limit_partition_mtmv.out +++ b/regression-test/data/mtmv_p0/test_hive_limit_partition_mtmv.out @@ -11,11 +11,6 @@ 2 20380101 sh 5 20380102 bj --- !mtmv_datetrunc -- -1 20380101 bj -2 20380101 sh -5 20380102 bj - -- !select_base_table -- 1 bj 20380101 2 sh 20380101 diff --git a/regression-test/data/mtmv_p0/test_rollup_partition_mtmv.out b/regression-test/data/mtmv_p0/test_rollup_partition_mtmv.out index 5f01fb0ee29ad9f..f828e65cc70b392 100644 --- a/regression-test/data/mtmv_p0/test_rollup_partition_mtmv.out +++ b/regression-test/data/mtmv_p0/test_rollup_partition_mtmv.out @@ -1,41 +1,4 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !date_list_month -- -1 2020-01-01 2020-01-01 -2 2020-01-02 2020-01-02 -3 2020-02-01 2020-02-01 - --- !date_list_month_partition_by_column -- -2020-01-01 1 2020-01-01 2020-01-01 -2020-01-01 2 2020-01-02 2020-01-02 -2020-02-01 3 2020-02-01 2020-02-01 - --- !date_list_month_level -- -2020-01-01 1 2020-01-01 2020-01-01 -2020-01-02 2 2020-01-02 2020-01-02 -2020-02-01 3 2020-02-01 2020-02-01 - --- !date_list_month_level_agg -- -2020-01-01 1 1 -2020-01-02 2 1 -2020-02-01 3 1 - --- !date_list_month_level_agg_multi -- -2020-01-01 2020-01-01 1 -2020-01-02 2020-01-02 1 -2020-02-01 2020-02-01 1 - --- !date_list_month_level_agg -- -2020-01-01 1 -2020-01-02 1 -2020-02-01 1 - --- !date_list_year_partition_by_column -- - --- !string_list_month -- -1 2020==01==01 -2 2020==01==02 -3 2020==02==01 - -- !date_range_month -- 1 2020-01-01 2020-01-01 2 2020-01-02 2020-01-02 diff --git a/regression-test/suites/mtmv_p0/test_build_mtmv.groovy b/regression-test/suites/mtmv_p0/test_build_mtmv.groovy index 580b9f9bbfc2b76..ae1af18fdc5816c 100644 --- a/regression-test/suites/mtmv_p0/test_build_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_build_mtmv.groovy @@ -500,12 +500,12 @@ suite("test_build_mtmv") { sql """ DROP MATERIALIZED VIEW ${mvName} """ - def jobs = sql """select count(1) from jobs("type"="mv") where name= '${jobName}'""" - println jobs - assertEquals(jobs.get(0).get(0), 0); - def tasks = sql """select count(1) from tasks("type"="mv") where jobname = '${jobName}'""" - println tasks - assertEquals(tasks.get(0).get(0), 0); + def jobs = sql """select * from jobs("type"="mv") where MvName= '${mvName}'""" + log.info(jobs.toString()) + assertEquals(0, jobs.size()); + def tasks = sql """select * from tasks("type"="mv") where MvName = '${mvName}'""" + log.info(tasks.toString()) + assertEquals(0, tasks.size()); // test bitmap sql """drop table if exists `${tableName}`""" diff --git a/regression-test/suites/mtmv_p0/test_hive_limit_partition_mtmv.groovy b/regression-test/suites/mtmv_p0/test_hive_limit_partition_mtmv.groovy index 1e6c49bb50af0e9..ea5c2ff28445915 100644 --- a/regression-test/suites/mtmv_p0/test_hive_limit_partition_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_hive_limit_partition_mtmv.groovy @@ -115,34 +115,23 @@ suite("test_hive_limit_partition_mtmv", "p0,external,hive,external_docker,extern // date trunc sql """drop materialized view if exists ${mvName};""" - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD DEFERRED REFRESH AUTO ON MANUAL - partition by (date_trunc(`day`,'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1', - 'partition_sync_limit'='2', - 'partition_sync_time_unit'='MONTH', - 'partition_date_format'='%Y%m%d' - ) - AS - SELECT k1,day,region FROM ${catalog_name}.${hive_database}.${hive_table}; - """ - showPartitionsResult = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + showPartitionsResult.toString()) - assertEquals(1, showPartitionsResult.size()) - assertTrue(showPartitionsResult.toString().contains("_20380101")) - assertTrue(showPartitionsResult.toString().contains("_20380102")) - - // refresh complete - sql """ - REFRESH MATERIALIZED VIEW ${mvName} complete - """ - jobName = getJobName(dbName, mvName); - waitingMTMVTaskFinished(jobName) - order_qt_mtmv_datetrunc "SELECT * FROM ${mvName} order by k1,day,region" - + test { + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by (date_trunc(`day`,'month')) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ( + 'replication_num' = '1', + 'partition_sync_limit'='2', + 'partition_sync_time_unit'='MONTH', + 'partition_date_format'='%Y%m%d' + ) + AS + SELECT k1,day,region FROM ${catalog_name}.${hive_database}.${hive_table}; + """ + exception "only support" + } // date type sql """drop materialized view if exists ${mvName};""" diff --git a/regression-test/suites/mtmv_p0/test_rollup_partition_mtmv.groovy b/regression-test/suites/mtmv_p0/test_rollup_partition_mtmv.groovy index 21c77bc1e2473d7..514c784534c23d7 100644 --- a/regression-test/suites/mtmv_p0/test_rollup_partition_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_rollup_partition_mtmv.groovy @@ -46,326 +46,20 @@ suite("test_rollup_partition_mtmv") { insert into ${tableName} values(1,"2020-01-01", "2020-01-01"),(2,"2020-01-02", "2020-01-02"),(3,"2020-02-01", "2020-02-01"); """ - // list date month - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD DEFERRED REFRESH AUTO ON MANUAL - partition by (date_trunc(`k2`,'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT * FROM ${tableName}; - """ - showPartitionsResult = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + showPartitionsResult.toString()) - assertEquals(2, showPartitionsResult.size()) - - sql """ - REFRESH MATERIALIZED VIEW ${mvName} AUTO - """ - def jobName = getJobName(dbName, mvName); - log.info(jobName) - waitingMTMVTaskFinished(jobName) - order_qt_date_list_month "SELECT * FROM ${mvName} order by k1,k2" - - sql """drop materialized view if exists ${mvName};""" - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (month_alias) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'month') as month_alias, * FROM ${tableName}; - """ - def date_list_month_partitions = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + date_list_month_partitions.toString()) - assertEquals(2, date_list_month_partitions.size()) - waitingMTMVTaskFinished(getJobName(dbName, mvName)) - order_qt_date_list_month_partition_by_column "SELECT * FROM ${mvName}" - - sql """drop materialized view if exists ${mvName};""" - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(month_alias, 'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'day') as month_alias, * FROM ${tableName}; - """ - def date_list_month_partitions_level = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + date_list_month_partitions_level.toString()) - assertEquals(2, date_list_month_partitions_level.size()) - waitingMTMVTaskFinished(getJobName(dbName, mvName)) - order_qt_date_list_month_level "SELECT * FROM ${mvName}" - - - sql """drop materialized view if exists ${mvName};""" - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(month_alias, 'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'day') as month_alias, k1, count(*) FROM ${tableName} group by month_alias, k1; - """ - def date_list_month_partitions_level_agg = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + date_list_month_partitions_level_agg.toString()) - assertEquals(2, date_list_month_partitions_level_agg.size()) - waitingMTMVTaskFinished(getJobName(dbName, mvName)) - order_qt_date_list_month_level_agg "SELECT * FROM ${mvName}" - - - sql """drop materialized view if exists ${mvName};""" - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(month_alias, 'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'day') as month_alias, k3, count(*) FROM ${tableName} group by date_trunc(`k2`,'day'), k3; - """ - def date_list_month_partitions_level_agg_multi = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + date_list_month_partitions_level_agg_multi.toString()) - assertEquals(2, date_list_month_partitions_level_agg_multi.size()) - waitingMTMVTaskFinished(getJobName(dbName, mvName)) - order_qt_date_list_month_level_agg_multi "SELECT * FROM ${mvName}" - - - sql """drop materialized view if exists ${mvName};""" - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(month_alias, 'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'day') as month_alias, count(*) FROM ${tableName} group by k2; - """ - def date_list_month_partitions_level_agg_direct = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + date_list_month_partitions_level_agg_direct.toString()) - assertEquals(2, date_list_month_partitions_level_agg_direct.size()) - waitingMTMVTaskFinished(getJobName(dbName, mvName)) - order_qt_date_list_month_level_agg "SELECT * FROM ${mvName}" - - - - // mv partition level should be higher or equal then query, should fail - sql """drop materialized view if exists ${mvName};""" - try { - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(month_alias, 'day')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'month') as month_alias, * FROM ${tableName}; - """ - Assert.fail(); - } catch (Exception e) { - log.info(e.getMessage()) - assertTrue(e.getMessage().contains("partition column time unit level should be greater than sql select column")) - } - - // mv partition use a column not in mv sql select, should fail - sql """drop materialized view if exists ${mvName};""" - try { - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(`k2`, 'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'day') as month_alias FROM ${tableName}; - """ - Assert.fail(); - } catch (Exception e) { - log.info(e.getMessage()) - assertTrue(e.getMessage().contains("partition column can not find from sql select column")) - } - - sql """drop materialized view if exists ${mvName};""" - // list date year - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD DEFERRED REFRESH AUTO ON MANUAL - partition by (date_trunc(`k2`,'year')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT * FROM ${tableName}; - """ - showPartitionsResult = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + showPartitionsResult.toString()) - assertEquals(1, showPartitionsResult.size()) - - sql """drop materialized view if exists ${mvName};""" - // list date year - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (year_alias) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'year') as year_alias, * FROM ${tableName}; - """ - def date_list_year_partitions = sql """show partitions from ${mvName}""" - assertEquals(1, date_list_year_partitions.size()) - order_qt_date_list_year_partition_by_column "SELECT * FROM ${mvName}" - - // list string month - sql """drop table if exists `${tableName}`""" - sql """drop materialized view if exists ${mvName};""" - sql """ - CREATE TABLE `${tableName}` ( - `k1` LARGEINT NOT NULL COMMENT '\"用户id\"', - `k2` varchar(200) NOT NULL COMMENT '\"数据灌入日期时间\"' - ) ENGINE=OLAP - DUPLICATE KEY(`k1`) - COMMENT 'OLAP' - PARTITION BY list(`k2`) - ( - PARTITION p_20200101 VALUES IN ("2020==01==01"), - PARTITION p_20200102 VALUES IN ("2020==01==02"), - PARTITION p_20200201 VALUES IN ("2020==02==01") - ) - DISTRIBUTED BY HASH(`k1`) BUCKETS 2 - PROPERTIES ('replication_num' = '1') ; - """ - sql """ - insert into ${tableName} values(1,"2020==01==01"),(2,"2020==01==02"),(3,"2020==02==01"); - """ - - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD DEFERRED REFRESH AUTO ON MANUAL - partition by (date_trunc(`k2`,'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1', - 'partition_date_format'='%Y==%m==%d' - ) - AS - SELECT * FROM ${tableName}; - """ - showPartitionsResult = sql """show partitions from ${mvName}""" - logger.info("showPartitionsResult: " + showPartitionsResult.toString()) - assertEquals(2, showPartitionsResult.size()) - - sql """ - REFRESH MATERIALIZED VIEW ${mvName} AUTO - """ - jobName = getJobName(dbName, mvName); - log.info(jobName) - waitingMTMVTaskFinished(jobName) - order_qt_string_list_month "SELECT * FROM ${mvName} order by k1,k2" - - - sql """drop materialized view if exists ${mvName};""" - try { - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(month_alias, 'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'day') as month_alias, * FROM ${tableName}; - """ - Assert.fail(); - } catch (Exception e) { - log.info(e.getMessage()) - assertTrue(e.getMessage().contains("use invalid implicit expression")) - } - - // mv partition level should be higher or equal then query, should fail - sql """drop materialized view if exists ${mvName};""" - try { - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(month_alias, 'day')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'month') as month_alias, * FROM ${tableName}; - """ - Assert.fail(); - } catch (Exception e) { - log.info(e.getMessage()) - assertTrue(e.getMessage().contains("use invalid implicit expression")) - } - - // mv partition use a column not in mv sql select, should fail - sql """drop materialized view if exists ${mvName};""" - try { - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (date_trunc(`k2`, 'month')) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1' - ) - AS - SELECT date_trunc(`k2`,'day') as month_alias FROM ${tableName}; - """ - Assert.fail(); - } catch (Exception e) { - log.info(e.getMessage()) - assertTrue(e.getMessage().contains("partition column can not find from sql select column")) - } - - // mv partition column type is date, base table is string, partition mapping fail - // support later - sql """drop materialized view if exists ${mvName};""" - try { - sql """ - CREATE MATERIALIZED VIEW ${mvName} - BUILD IMMEDIATE REFRESH AUTO ON MANUAL - partition by (month_alias) - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ( - 'replication_num' = '1', - 'partition_date_format'='%Y==%m==%d' - ) - AS - SELECT date_trunc(`k2`,'month') as month_alias, * FROM ${tableName}; - """ - Assert.fail(); - } catch (Exception e) { - log.info(e.getMessage()) - } + test { + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + partition by (date_trunc(`k2`,'month')) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ( + 'replication_num' = '1' + ) + AS + SELECT * FROM ${tableName}; + """ + exception "only support" + } // range date month sql """drop table if exists `${tableName}`""" From a9800b4e81bdb3e602d6cbcbf4c3ab6984f6c65f Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Fri, 9 Aug 2024 17:53:20 +0800 Subject: [PATCH 18/94] [enhance](mtmv) change mysql table type of mtmv to table (#38797) mv1 is MTMV mysql> SHOW FULL TABLES WHERE Table_type = 'BASE TABLE'; +--------------+------------+ | Tables_in_zd | Table_type | +--------------+------------+ | mv1 | BASE TABLE | | com3 | BASE TABLE | +--------------+------------+ --- .../java/org/apache/doris/catalog/TableIf.java | 2 +- .../suites/mtmv_p0/test_build_mtmv.groovy | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index a052f30ee59ab13..ea23a7ddbad27bc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -489,7 +489,6 @@ public String toMysqlType() { return "SYSTEM VIEW"; case INLINE_VIEW: case VIEW: - case MATERIALIZED_VIEW: return "VIEW"; case OLAP: case MYSQL: @@ -505,6 +504,7 @@ public String toMysqlType() { case ES_EXTERNAL_TABLE: case ICEBERG_EXTERNAL_TABLE: case PAIMON_EXTERNAL_TABLE: + case MATERIALIZED_VIEW: return "BASE TABLE"; default: return null; diff --git a/regression-test/suites/mtmv_p0/test_build_mtmv.groovy b/regression-test/suites/mtmv_p0/test_build_mtmv.groovy index ae1af18fdc5816c..17b4d4704857937 100644 --- a/regression-test/suites/mtmv_p0/test_build_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_build_mtmv.groovy @@ -101,6 +101,21 @@ suite("test_build_mtmv") { logger.info("showDataResult: " + showDataResult.toString()) assertTrue(showDataResult.toString().contains("${mvName}")) + // show full tables + def showFullTablesResult = sql """SHOW FULL TABLES WHERE Table_type = 'BASE TABLE';""" + logger.info("showFullTablesResult: " + showFullTablesResult.toString()) + assertTrue(showFullTablesResult.toString().contains("${mvName}")) + + // views should not contains mtmv + def selectViewsResult = sql """ SELECT * from INFORMATION_SCHEMA.VIEWS;""" + logger.info("selectViewsResult: " + selectViewsResult.toString()) + assertFalse(selectViewsResult.toString().contains("${mvName}")) + + // views should not contains mtmv + def selectTablesResult = sql """ SELECT * from INFORMATION_SCHEMA.TABLES;""" + logger.info("selectTablesResult: " + selectTablesResult.toString()) + assertTrue(selectTablesResult.toString().contains("${mvName}")) + // if not exist try { sql """ From 34f7c66a1ca10f3570e5c0715875cc1390609fb9 Mon Sep 17 00:00:00 2001 From: zzzxl <33418555+zzzxl1993@users.noreply.github.com> Date: Fri, 9 Aug 2024 19:49:30 +0800 Subject: [PATCH 19/94] [fix](inverted index) multi match distinguishes the inverted index v1 and v2 (#39149) ## Proposed changes 1. reversed Index v1 and v2 have different column names --- be/src/vec/functions/function_multi_match.cpp | 12 ++- .../test_index_multi_match.out | 24 +++++ .../test_index_multi_match.groovy | 95 ++++++++++--------- 3 files changed, 83 insertions(+), 48 deletions(-) diff --git a/be/src/vec/functions/function_multi_match.cpp b/be/src/vec/functions/function_multi_match.cpp index ba7fa887f19a962..4fc8103a2d38a50 100644 --- a/be/src/vec/functions/function_multi_match.cpp +++ b/be/src/vec/functions/function_multi_match.cpp @@ -169,9 +169,15 @@ Status FunctionMultiMatch::eval_inverted_index(FunctionContext* context, auto single_result = std::make_shared(); StringRef query_value(match_param->query.data()); - RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state, - std::to_string(column.unique_id()), &query_value, - query_type, single_result)); + auto index_version = tablet_schema->get_inverted_index_storage_format(); + if (index_version == InvertedIndexStorageFormatPB::V1) { + RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state, column_name, + &query_value, query_type, single_result)); + } else if (index_version == InvertedIndexStorageFormatPB::V2) { + RETURN_IF_ERROR(index_reader->query(opts.stats, opts.runtime_state, + std::to_string(column.unique_id()), &query_value, + query_type, single_result)); + } (*result) |= (*single_result); } diff --git a/regression-test/data/inverted_index_p0/test_index_multi_match.out b/regression-test/data/inverted_index_p0/test_index_multi_match.out index 0a2ed2730b48a41..77e3c86623e17a8 100644 --- a/regression-test/data/inverted_index_p0/test_index_multi_match.out +++ b/regression-test/data/inverted_index_p0/test_index_multi_match.out @@ -23,3 +23,27 @@ -- !sql -- 44 +-- !sql -- +178 + +-- !sql -- +180 + +-- !sql -- +859 + +-- !sql -- +44 + +-- !sql -- +178 + +-- !sql -- +180 + +-- !sql -- +859 + +-- !sql -- +44 + diff --git a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy index f08dd984a67e96e..90f9f7a751bfc60 100644 --- a/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy +++ b/regression-test/suites/inverted_index_p0/test_index_multi_match.groovy @@ -19,51 +19,37 @@ suite("test_index_multi_match", "p0"){ def indexTbName1 = "test_index_multi_match_1" def indexTbName2 = "test_index_multi_match_2" + def indexTbName3 = "test_index_multi_match_3" + def indexTbName4 = "test_index_multi_match_4" sql "DROP TABLE IF EXISTS ${indexTbName1}" sql "DROP TABLE IF EXISTS ${indexTbName2}" + sql "DROP TABLE IF EXISTS ${indexTbName3}" + sql "DROP TABLE IF EXISTS ${indexTbName4}" - sql """ - CREATE TABLE ${indexTbName1} ( - `@timestamp` int(11) NULL COMMENT "", - `clientip` text NULL COMMENT "", - `request` text NULL COMMENT "", - `status` text NULL COMMENT "", - `size` text NULL COMMENT "", - INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' - ) ENGINE=OLAP - DUPLICATE KEY(`@timestamp`) - COMMENT "OLAP" - DISTRIBUTED BY RANDOM BUCKETS 1 - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1", - "disable_auto_compaction" = "true" - ); - """ - - sql """ - CREATE TABLE ${indexTbName2} ( - `@timestamp` int(11) NULL COMMENT "", - `clientip` text NULL COMMENT "", - `request` text NULL COMMENT "", - `status` text NULL COMMENT "", - `size` text NULL COMMENT "", - INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', - INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' - ) ENGINE=OLAP - DUPLICATE KEY(`@timestamp`) - COMMENT "OLAP" - DISTRIBUTED BY RANDOM BUCKETS 1 - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1", - "disable_auto_compaction" = "true" - ); - """ + def create_table = {table_name, idx_version -> + sql """ + CREATE TABLE ${table_name} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` text NULL COMMENT "", + `request` text NULL COMMENT "", + `status` text NULL COMMENT "", + `size` text NULL COMMENT "", + INDEX clientip_idx (`clientip`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', + INDEX status_idx (`status`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '', + INDEX size_idx (`size`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY RANDOM BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "inverted_index_storage_format" = "${idx_version}", + "disable_auto_compaction" = "true" + ); + """ + } def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, expected_succ_rows = -1, load_to_single_tablet = 'true' -> @@ -103,20 +89,39 @@ suite("test_index_multi_match", "p0"){ } try { + create_table(indexTbName1, 'V1') + create_table(indexTbName2, 'V2') + create_table(indexTbName3, 'V1') + create_table(indexTbName4, 'V2') + load_httplogs_data.call(indexTbName1, 'test_index_multi_match_1', 'true', 'json', 'documents-1000.json') load_httplogs_data.call(indexTbName2, 'test_index_multi_match_2', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName3, 'test_index_multi_match_3', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(indexTbName4, 'test_index_multi_match_4', 'true', 'json', 'documents-1000.json') sql "sync" + sql """ set enable_common_expr_pushdown = true """ + qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2'); """ qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2'); """ qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2' or status match_phrase_prefix '2' or size match_phrase_prefix '2'); """ qt_sql """ select count() from ${indexTbName1} where (clientip match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """ - qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, '', 'phrase_prefix', '2'); """ - qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """ - qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """ - qt_sql """ select count() from ${indexTbName2} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """ + qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2'); """ + qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2'); """ + qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix '2' or request match_phrase_prefix '2' or status match_phrase_prefix '2' or size match_phrase_prefix '2'); """ + qt_sql """ select count() from ${indexTbName2} where (clientip match_phrase_prefix 'a' or request match_phrase_prefix 'a' or status match_phrase_prefix 'a' or size match_phrase_prefix 'a'); """ + + qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, '', 'phrase_prefix', '2'); """ + qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """ + qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """ + qt_sql """ select count() from ${indexTbName3} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """ + + qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, '', 'phrase_prefix', '2'); """ + qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request', 'phrase_prefix', '2'); """ + qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request, status, size', 'phrase_prefix', '2'); """ + qt_sql """ select count() from ${indexTbName4} where multi_match(clientip, 'request, status, size', 'phrase_prefix', 'a'); """ } finally { //try_sql("DROP TABLE IF EXISTS ${testTable}") From c9cdaf855fcf4ede46bc733f119095cb964752f1 Mon Sep 17 00:00:00 2001 From: amory Date: Fri, 9 Aug 2024 21:26:32 +0800 Subject: [PATCH 20/94] [fix](outfile) fix jsonb boolean outfile (#39136) ## Proposed changes this pr: https://github.com/apache/doris/pull/38683 omit some outfile to be fixed Issue Number: close #xxx --- .../jsonb_p0/test_jsonb_load_and_function.out | 76 +++++++++---------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/regression-test/data/nereids_p0/jsonb_p0/test_jsonb_load_and_function.out b/regression-test/data/nereids_p0/jsonb_p0/test_jsonb_load_and_function.out index 51c904364cb502b..8a7fa8e0da0e870 100644 --- a/regression-test/data/nereids_p0/jsonb_p0/test_jsonb_load_and_function.out +++ b/regression-test/data/nereids_p0/jsonb_p0/test_jsonb_load_and_function.out @@ -7714,11 +7714,11 @@ 2 null \N 3 true true 4 false false -5 100 \N -6 10000 \N -7 1000000000 \N -8 1152921504606846976 \N -9 6.18 \N +5 100 true +6 10000 true +7 1000000000 true +8 1152921504606846976 true +9 6.18 true 10 "abcd" \N 11 {} \N 12 {"k1":"v31","k2":300} \N @@ -7731,9 +7731,9 @@ 26 \N \N 27 {"k1":"v1","k2":200} \N 28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N -29 12524337771678448270 \N -30 -9223372036854775808 \N -31 18446744073709551615 \N +29 12524337771678448270 true +30 -9223372036854775808 true +31 18446744073709551615 true 32 {"":"v1"} \N 33 {"":1,"":"v1"} \N 34 {"":1,"ab":"v1","":"v1","":2} \N @@ -7741,13 +7741,13 @@ -- !select -- 1 \N \N 2 null \N -3 true \N -4 false \N +3 true 1 +4 false 0 5 100 100 6 10000 10000 7 1000000000 -13824 8 1152921504606846976 0 -9 6.18 \N +9 6.18 6 10 "abcd" \N 11 {} \N 12 {"k1":"v31","k2":300} \N @@ -7770,13 +7770,13 @@ -- !select -- 1 \N \N 2 null \N -3 true \N -4 false \N +3 true 1 +4 false 0 5 100 100 6 10000 10000 7 1000000000 1000000000 8 1152921504606846976 0 -9 6.18 \N +9 6.18 6 10 "abcd" \N 11 {} \N 12 {"k1":"v31","k2":300} \N @@ -7799,13 +7799,13 @@ -- !select -- 1 \N \N 2 null \N -3 true \N -4 false \N +3 true 1 +4 false 0 5 100 100 6 10000 10000 7 1000000000 1000000000 8 1152921504606846976 1152921504606846976 -9 6.18 \N +9 6.18 6 10 "abcd" \N 11 {} \N 12 {"k1":"v31","k2":300} \N @@ -7828,8 +7828,8 @@ -- !select -- 1 \N \N 2 null \N -3 true \N -4 false \N +3 true 1.0 +4 false 0.0 5 100 100.0 6 10000 10000.0 7 1000000000 1.0E9 @@ -7888,11 +7888,11 @@ 2 null \N 3 true true 4 false false -5 100 \N -6 10000 \N -7 1000000000 \N -8 1152921504606846976 \N -9 6.18 \N +5 100 true +6 10000 true +7 1000000000 true +8 1152921504606846976 true +9 6.18 true 10 "abcd" \N 11 {} \N 12 {"k1":"v31","k2":300} \N @@ -7905,9 +7905,9 @@ 26 \N \N 27 {"k1":"v1","k2":200} \N 28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} \N -29 12524337771678448270 \N -30 -9223372036854775808 \N -31 18446744073709551615 \N +29 12524337771678448270 true +30 -9223372036854775808 true +31 18446744073709551615 true 32 {"":"v1"} \N 33 {"":1,"":"v1"} \N 34 {"":1,"ab":"v1","":"v1","":2} \N @@ -7915,13 +7915,13 @@ -- !select -- 1 \N \N 2 null \N -3 true \N -4 false \N +3 true 1 +4 false 0 5 100 100 6 10000 10000 7 1000000000 -13824 8 1152921504606846976 0 -9 6.18 \N +9 6.18 6 10 "abcd" \N 11 {} \N 12 {"k1":"v31","k2":300} \N @@ -7944,13 +7944,13 @@ -- !select -- 1 \N \N 2 null \N -3 true \N -4 false \N +3 true 1 +4 false 0 5 100 100 6 10000 10000 7 1000000000 1000000000 8 1152921504606846976 0 -9 6.18 \N +9 6.18 6 10 "abcd" \N 11 {} \N 12 {"k1":"v31","k2":300} \N @@ -7973,13 +7973,13 @@ -- !select -- 1 \N \N 2 null \N -3 true \N -4 false \N +3 true 1 +4 false 0 5 100 100 6 10000 10000 7 1000000000 1000000000 8 1152921504606846976 1152921504606846976 -9 6.18 \N +9 6.18 6 10 "abcd" \N 11 {} \N 12 {"k1":"v31","k2":300} \N @@ -8002,8 +8002,8 @@ -- !select -- 1 \N \N 2 null \N -3 true \N -4 false \N +3 true 1 +4 false 0 5 100 100 6 10000 10000 7 1000000000 1000000000 From 0e23c2d25bb50f12e9930103e2c4918a31a3f60c Mon Sep 17 00:00:00 2001 From: yujun Date: Fri, 9 Aug 2024 21:34:14 +0800 Subject: [PATCH 21/94] [improvement](balance) don't balance tablet which has unfinish alter job (#39121) Improvement: don't balance tablets that have unfished alter job. Also fix partition rebalancer may balance colocate tablets. --- .../java/org/apache/doris/alter/Alter.java | 23 ++++++++++++++ .../doris/alter/SchemaChangeHandler.java | 4 +++ .../apache/doris/clone/BeLoadRebalancer.java | 23 +------------- .../apache/doris/clone/DiskRebalancer.java | 14 ++------- .../doris/clone/PartitionRebalancer.java | 2 +- .../org/apache/doris/clone/Rebalancer.java | 30 ++++++++++++++++++- .../apache/doris/clone/TabletScheduler.java | 4 +++ 7 files changed, 64 insertions(+), 36 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java index 8e8be7c567e48b0..1fcb4fe65c38a34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java @@ -81,6 +81,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -89,6 +90,7 @@ import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; public class Alter { private static final Logger LOG = LogManager.getLogger(Alter.class); @@ -905,6 +907,27 @@ private void processModifyMinLoadReplicaNum(Database db, OlapTable olapTable, Al } } + public Set getUnfinishedAlterTableIds() { + Set unfinishedTableIds = Sets.newHashSet(); + for (AlterJobV2 job : schemaChangeHandler.getAlterJobsV2().values()) { + if (!job.isDone()) { + unfinishedTableIds.add(job.getTableId()); + } + } + for (IndexChangeJob job : ((SchemaChangeHandler) schemaChangeHandler).getIndexChangeJobs().values()) { + if (!job.isDone()) { + unfinishedTableIds.add(job.getTableId()); + } + } + for (AlterJobV2 job : materializedViewHandler.getAlterJobsV2().values()) { + if (!job.isDone()) { + unfinishedTableIds.add(job.getTableId()); + } + } + + return unfinishedTableIds; + } + public AlterHandler getSchemaChangeHandler() { return schemaChangeHandler; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 9bcfa1cde04de47..1a4900a3fd3be17 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -1800,6 +1800,10 @@ private void changeTableState(long dbId, long tableId, OlapTableState olapTableS } } + public Map getIndexChangeJobs() { + return indexChangeJobs; + } + public List> getAllIndexChangeJobInfos() { List> indexChangeJobInfos = new LinkedList<>(); for (IndexChangeJob indexChangeJob : ImmutableList.copyOf(indexChangeJobs.values())) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java index 0da7428e4225212..78452000ca50dd8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/BeLoadRebalancer.java @@ -17,9 +17,6 @@ package org.apache.doris.clone; -import org.apache.doris.catalog.CatalogRecycleBin; -import org.apache.doris.catalog.ColocateTableIndex; -import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.catalog.TabletMeta; @@ -31,7 +28,6 @@ import org.apache.doris.clone.TabletSchedCtx.Priority; import org.apache.doris.clone.TabletScheduler.PathSlot; import org.apache.doris.common.Config; -import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; @@ -120,15 +116,7 @@ protected List selectAlternativeTabletsForCluster( LOG.info("get number of low load paths: {}, with medium: {}", numOfLowPaths, medium); List alternativeTabletInfos = Lists.newArrayList(); - - // Clone ut mocked env, but CatalogRecycleBin is not mockable (it extends from Thread) - // so in clone ut recycleBin need to set to null. - CatalogRecycleBin recycleBin = null; - if (!FeConstants.runningUnitTest) { - recycleBin = Env.getCurrentRecycleBin(); - } int clusterAvailableBEnum = infoService.getAllBackendIds(true).size(); - ColocateTableIndex colocateTableIndex = Env.getCurrentColocateIndex(); List> lowBETablets = lowBEs.stream() .map(beStat -> Sets.newHashSet(invertedIndex.getTabletIdsByBackendId(beStat.getBeId()))) .collect(Collectors.toList()); @@ -230,11 +218,7 @@ protected List selectAlternativeTabletsForCluster( long replicaDataSize = replica.getDataSize(); if (remainingPaths.containsKey(replicaPathHash)) { TabletMeta tabletMeta = invertedIndex.getTabletMeta(tabletId); - if (tabletMeta == null) { - continue; - } - - if (colocateTableIndex.isColocateTable(tabletMeta.getTableId())) { + if (!canBalanceTablet(tabletMeta)) { continue; } @@ -245,11 +229,6 @@ protected List selectAlternativeTabletsForCluster( continue; } - if (recycleBin != null && recycleBin.isRecyclePartition(tabletMeta.getDbId(), - tabletMeta.getTableId(), tabletMeta.getPartitionId())) { - continue; - } - boolean isFit = lowBEs.stream().anyMatch(be -> be.isFit(replicaDataSize, medium, null, false) == BalanceStatus.OK); if (!isFit) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java index 96eef52d5978706..a8448b8ffd2f94a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/DiskRebalancer.java @@ -17,7 +17,6 @@ package org.apache.doris.clone; -import org.apache.doris.catalog.CatalogRecycleBin; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Replica; import org.apache.doris.catalog.TabletInvertedIndex; @@ -59,6 +58,7 @@ public class DiskRebalancer extends Rebalancer { public DiskRebalancer(SystemInfoService infoService, TabletInvertedIndex invertedIndex, Map backendsWorkingSlots) { super(infoService, invertedIndex, backendsWorkingSlots); + canBalanceColocateTable = true; } public List filterByPrioBackends(List bes) { @@ -163,12 +163,6 @@ protected List selectAlternativeTabletsForCluster( return alternativeTablets; } - // Clone ut mocked env, but CatalogRecycleBin is not mockable (it extends from Thread) - // so in clone ut recycleBin need to set to null. - CatalogRecycleBin recycleBin = null; - if (!FeConstants.runningUnitTest) { - recycleBin = Env.getCurrentRecycleBin(); - } Set alternativeTabletIds = Sets.newHashSet(); Set unbalancedBEs = Sets.newHashSet(); // choose tablets from backends randomly. @@ -243,11 +237,7 @@ protected List selectAlternativeTabletsForCluster( long replicaPathHash = replica.getPathHash(); if (remainingPaths.containsKey(replicaPathHash)) { TabletMeta tabletMeta = invertedIndex.getTabletMeta(tabletId); - if (tabletMeta == null) { - continue; - } - if (recycleBin != null && recycleBin.isRecyclePartition(tabletMeta.getDbId(), - tabletMeta.getTableId(), tabletMeta.getPartitionId())) { + if (!canBalanceTablet(tabletMeta)) { continue; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java index 7095ad8dc54315c..5af920c74fdde27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java @@ -138,7 +138,7 @@ protected List selectAlternativeTabletsForCluster( invertedIndex.getTabletIdsByBackendIdAndStorageMedium(move.toBe, medium)); BiPredicate canMoveTablet = (Long tabletId, TabletMeta tabletMeta) -> { - return tabletMeta != null + return canBalanceTablet(tabletMeta) && tabletMeta.getPartitionId() == move.partitionId && tabletMeta.getIndexId() == move.indexId && !invalidIds.contains(tabletId) diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java index 682c29159898954..af8bc6d67fc9d5c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/Rebalancer.java @@ -17,9 +17,14 @@ package org.apache.doris.clone; +import org.apache.doris.catalog.CatalogRecycleBin; +import org.apache.doris.catalog.ColocateTableIndex; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.TabletInvertedIndex; +import org.apache.doris.catalog.TabletMeta; import org.apache.doris.clone.TabletScheduler.PathSlot; import org.apache.doris.common.Config; +import org.apache.doris.common.FeConstants; import org.apache.doris.resource.Tag; import org.apache.doris.system.Backend; import org.apache.doris.system.SystemInfoService; @@ -29,13 +34,14 @@ import com.google.common.collect.HashBasedTable; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import com.google.common.collect.Table; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.List; import java.util.Map; - +import java.util.Set; /* * Rebalancer is responsible for @@ -61,6 +67,9 @@ public abstract class Rebalancer { // be id -> end time of prio protected Map prioBackends = Maps.newConcurrentMap(); + protected boolean canBalanceColocateTable = false; + private Set alterTableIds = Sets.newHashSet(); + // tag -> (medium, timestamp) private Table lastPickTimeTable = HashBasedTable.create(); @@ -106,6 +115,21 @@ protected boolean unPickOverLongTime(Tag tag, TStorageMedium medium) { return lastPickTime == null || now - lastPickTime >= Config.be_rebalancer_idle_seconds * 1000L; } + protected boolean canBalanceTablet(TabletMeta tabletMeta) { + // Clone ut mocked env, but CatalogRecycleBin is not mockable (it extends from Thread) + // so in clone ut recycleBin need to set to null. + ColocateTableIndex colocateTableIndex = Env.getCurrentColocateIndex(); + CatalogRecycleBin recycleBin = null; + if (!FeConstants.runningUnitTest) { + recycleBin = Env.getCurrentRecycleBin(); + } + return tabletMeta != null + && !alterTableIds.contains(tabletMeta.getTableId()) + && (canBalanceColocateTable || !colocateTableIndex.isColocateTable(tabletMeta.getTableId())) + && (recycleBin == null || !recycleBin.isRecyclePartition(tabletMeta.getDbId(), + tabletMeta.getTableId(), tabletMeta.getPartitionId())); + } + public AgentTask createBalanceTask(TabletSchedCtx tabletCtx) throws SchedException { completeSchedCtx(tabletCtx); @@ -139,6 +163,10 @@ public void updateLoadStatistic(Map statisticMap) { this.statisticMap = statisticMap; } + public void updateAlterTableIds(Set alterTableIds) { + this.alterTableIds = alterTableIds; + } + public void addPrioBackends(List backends, long timeoutS) { long currentTimeMillis = System.currentTimeMillis(); for (Backend backend : backends) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index 7c58d6acc5321cd..a83308a650bad74 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -358,6 +358,10 @@ private void updateLoadStatistics() { rebalancer.updateLoadStatistic(statisticMap); diskRebalancer.updateLoadStatistic(statisticMap); + Set alterTableIds = Env.getCurrentEnv().getAlterInstance().getUnfinishedAlterTableIds(); + rebalancer.updateAlterTableIds(alterTableIds); + diskRebalancer.updateAlterTableIds(alterTableIds); + lastStatUpdateTime = System.currentTimeMillis(); } From bc4390d60be68fc4415505d81a949effe945ebb9 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Fri, 9 Aug 2024 21:44:32 +0800 Subject: [PATCH 22/94] [fix](delete) Fix delete stmt on MOW table doesn't use partial update in Nereids planner (#38751) ## Proposed changes 1. Fix nereids planner don't use partial update for delete statement on merge-on-write table introduced in https://github.com/apache/doris/pull/36782 2. add `IS_PARTIAL_UPDATE: true` in `OlapTableSink.getExplainString()` 3. don't convert the delete stmt to partial update if the table has cluster key. --- .../plans/commands/DeleteFromCommand.java | 20 ++++-- .../apache/doris/planner/OlapTableSink.java | 1 + .../data/compaction/test_full_compaction.out | 4 +- .../test_full_compaction_by_table_id.out | 4 +- ...out => test_delete_predicate_on_value.out} | 48 ++++++-------- .../delete/delete_mow_partial_update.out | 24 +++++-- .../test_new_partial_update_delete.out | 4 +- .../test_partial_update_delete.out | 22 +++++-- ...ction_uniq_cluster_keys_with_delete.groovy | 3 +- ... => test_delete_predicate_on_value.groovy} | 65 +++++++++---------- .../delete/delete_mow_partial_update.groovy | 24 ++++++- .../ddl/customer_create.sql | 3 +- .../ssb_unique_load_zstd/ddl/date_create.sql | 3 +- .../ddl/lineorder_create.sql | 3 +- .../ssb_unique_load_zstd/ddl/part_create.sql | 3 +- .../ddl/supplier_create.sql | 3 +- .../ddl/supplier_sequence_create.sql | 3 +- .../test_pk_uk_case.groovy | 3 +- .../ddl/customer_create.sql | 3 +- .../ssb_unique_load_zstd/ddl/date_create.sql | 3 +- .../ddl/lineorder_create.sql | 3 +- .../ssb_unique_load_zstd/ddl/part_create.sql | 3 +- .../ddl/supplier_create.sql | 3 +- .../ddl/supplier_sequence_create.sql | 3 +- .../test_pk_uk_case.groovy | 3 +- .../test_new_partial_update_delete.groovy | 4 +- .../test_partial_update_delete.groovy | 6 +- 27 files changed, 164 insertions(+), 107 deletions(-) rename regression-test/data/delete_p0/{test_delete_on_value.out => test_delete_predicate_on_value.out} (60%) rename regression-test/suites/delete_p0/{test_delete_on_value.groovy => test_delete_predicate_on_value.groovy} (76%) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java index 0778765f0bd4881..bbedc4fe8d3cde8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/DeleteFromCommand.java @@ -400,26 +400,32 @@ public LogicalPlan completeQueryPlan(ConnectContext ctx, LogicalPlan logicalQuer List cols = Lists.newArrayList(); boolean isMow = targetTable.getEnableUniqueKeyMergeOnWrite(); String tableName = tableAlias != null ? tableAlias : targetTable.getName(); + boolean hasClusterKey = targetTable.getBaseSchema().stream().anyMatch(Column::isClusterKey); + // currently cluster key doesn't support partial update, so we can't convert + // a delete stmt to partial update load if the table has cluster key for (Column column : targetTable.getFullSchema()) { + NamedExpression expr = null; if (column.getName().equalsIgnoreCase(Column.DELETE_SIGN)) { - selectLists.add(new UnboundAlias(new TinyIntLiteral(((byte) 1)), Column.DELETE_SIGN)); + expr = new UnboundAlias(new TinyIntLiteral(((byte) 1)), Column.DELETE_SIGN); } else if (column.getName().equalsIgnoreCase(Column.SEQUENCE_COL) && targetTable.getSequenceMapCol() != null) { - selectLists.add(new UnboundSlot(tableName, targetTable.getSequenceMapCol())); + expr = new UnboundSlot(tableName, targetTable.getSequenceMapCol()); } else if (column.isKey()) { - selectLists.add(new UnboundSlot(tableName, column.getName())); + expr = new UnboundSlot(tableName, column.getName()); } else if (!isMow && (!column.isVisible() || (!column.isAllowNull() && !column.hasDefaultValue()))) { - selectLists.add(new UnboundSlot(tableName, column.getName())); + expr = new UnboundSlot(tableName, column.getName()); + } else if (hasClusterKey) { + expr = new UnboundSlot(tableName, column.getName()); } else { - selectLists.add(new UnboundSlot(tableName, column.getName())); + continue; } + selectLists.add(expr); cols.add(column.getName()); } logicalQuery = new LogicalProject<>(selectLists, logicalQuery); - boolean isPartialUpdate = targetTable.getEnableUniqueKeyMergeOnWrite() - && cols.size() < targetTable.getColumns().size(); + boolean isPartialUpdate = isMow && !hasClusterKey && cols.size() < targetTable.getColumns().size(); logicalQuery = handleCte(logicalQuery); // make UnboundTableSink return UnboundTableSinkCreator.createUnboundTableSink(nameParts, cols, ImmutableList.of(), diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java index 621ba63a20d6768..6c2cb8bd130a0f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapTableSink.java @@ -242,6 +242,7 @@ public String getExplainString(String prefix, TExplainLevel explainLevel) { } strBuilder.append(prefix + " TUPLE ID: " + tupleDescriptor.getId() + "\n"); strBuilder.append(prefix + " " + DataPartition.RANDOM.getExplainString(explainLevel)); + strBuilder.append(prefix + " IS_PARTIAL_UPDATE: " + isPartialUpdate); return strBuilder.toString(); } diff --git a/regression-test/data/compaction/test_full_compaction.out b/regression-test/data/compaction/test_full_compaction.out index 7098be6d89d2e68..b25fdad93145cc3 100644 --- a/regression-test/data/compaction/test_full_compaction.out +++ b/regression-test/data/compaction/test_full_compaction.out @@ -32,12 +32,12 @@ 2 2 2 20 2 200 -3 100 +3 0 3 100 3 300 -- !select_final -- 1 100 2 200 -3 100 +3 0 diff --git a/regression-test/data/compaction/test_full_compaction_by_table_id.out b/regression-test/data/compaction/test_full_compaction_by_table_id.out index 7098be6d89d2e68..b25fdad93145cc3 100644 --- a/regression-test/data/compaction/test_full_compaction_by_table_id.out +++ b/regression-test/data/compaction/test_full_compaction_by_table_id.out @@ -32,12 +32,12 @@ 2 2 2 20 2 200 -3 100 +3 0 3 100 3 300 -- !select_final -- 1 100 2 200 -3 100 +3 0 diff --git a/regression-test/data/delete_p0/test_delete_on_value.out b/regression-test/data/delete_p0/test_delete_predicate_on_value.out similarity index 60% rename from regression-test/data/delete_p0/test_delete_on_value.out rename to regression-test/data/delete_p0/test_delete_predicate_on_value.out index 4a6e652aef38879..e54cb9a6b4a4402 100644 --- a/regression-test/data/delete_p0/test_delete_on_value.out +++ b/regression-test/data/delete_p0/test_delete_predicate_on_value.out @@ -1,5 +1,5 @@ -- This file is automatically generated. You should know what you did if you want to edit this --- !sql -- +-- !sql_1 -- 1 1 1 2 2 2 3 3 3 @@ -10,7 +10,7 @@ 8 8 8 9 9 9 --- !sql -- +-- !sql_1 -- 1 1 1 2 2 2 3 3 3 @@ -20,19 +20,24 @@ 8 8 8 9 9 9 --- !sql -- +-- !sql_1 -- 1 1 1 2 2 2 8 8 8 9 9 9 --- !sql -- +-- !skip_delete_predicate_sql_1 -- 1 1 1 0 2 2 2 0 +3 3 3 0 +4 4 4 0 +5 5 5 0 +6 6 6 0 +7 7 7 0 8 8 8 0 9 9 9 0 --- !sql -- +-- !sql_1 -- 1 1 1 2 2 2 4 4 4 @@ -40,57 +45,44 @@ 8 8 8 9 9 9 --- !sql -- +-- !sql_1 -- 1 1 1 2 2 2 4 4 4 8 8 8 9 9 9 --- !sql -- +-- !skip_delete_predicate_sql_1 -- 1 1 1 0 2 2 2 0 3 3 3 0 -3 3 3 1 4 4 4 0 -4 4 4 0 -4 4 4 1 -5 5 5 0 5 5 5 0 -5 5 5 1 -5 5 5 1 6 6 6 0 -6 6 6 1 7 7 7 0 -7 7 7 1 8 8 8 0 9 9 9 0 --- !sql -- +-- !sql_3 -- 1 1 10 --- !sql -- +-- !skip_delete_predicate_sql_3 -- 1 1 5 0 3 5 1 1 10 0 2 10 --- !sql -- +-- !sql_3 -- --- !sql -- +-- !skip_delete_predicate_sql_3 -- 1 1 5 0 3 5 1 1 10 0 2 10 -1 1 10 1 4 10 --- !sql -- +-- !sql_4 -- 1 1 10 --- !sql -- -1 1 5 0 3 5 -1 1 10 0 2 10 - --- !sql -- +-- !sql_4 -- --- !sql -- +-- !skip_delete_predicate_sql_4 -- +1 \N \N 1 4 10 1 1 5 0 3 5 1 1 10 0 2 10 -1 1 10 1 4 10 diff --git a/regression-test/data/nereids_p0/delete/delete_mow_partial_update.out b/regression-test/data/nereids_p0/delete/delete_mow_partial_update.out index b4237a038036db1..54b9aa4cee9bae1 100644 --- a/regression-test/data/nereids_p0/delete/delete_mow_partial_update.out +++ b/regression-test/data/nereids_p0/delete/delete_mow_partial_update.out @@ -7,20 +7,26 @@ 5 5 -- !sql -- +2 2 4 4 5 5 -- !sql_skip_delete_predicate -- +2 2 4 4 5 5 -- !sql -- +2 2 +5 5 + +-- !sql -- +1 \N 1 1 1 0 -1 1 1 2 2 0 -2 2 1 +3 \N 1 3 3 0 -3 3 1 +4 \N 1 4 4 0 5 5 0 @@ -53,20 +59,26 @@ 5 5 -- !sql -- +2 2 4 4 5 5 -- !sql_skip_delete_predicate -- +2 2 4 4 5 5 -- !sql -- +2 2 +5 5 + +-- !sql -- +1 \N 1 1 1 0 -1 1 1 2 2 0 -2 2 1 +3 \N 1 3 3 0 -3 3 1 +4 \N 1 4 4 0 5 5 0 diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_new_partial_update_delete.out b/regression-test/data/unique_with_mow_p0/partial_update/test_new_partial_update_delete.out index 318f28c8000ccb6..36507eff4fdc018 100644 --- a/regression-test/data/unique_with_mow_p0/partial_update/test_new_partial_update_delete.out +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_new_partial_update_delete.out @@ -5,7 +5,7 @@ -- !sql2 -- -- !sql3 -- -1 1 1 1 1 1 +1 \N \N \N \N 1 -- !sql4 -- 1 2 \N \N \N @@ -57,7 +57,7 @@ -- !sql2 -- -- !sql3 -- -1 1 1 1 1 1 +1 \N \N \N \N 1 -- !sql4 -- 1 2 \N \N \N diff --git a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete.out b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete.out index c16e954d73309c9..e1623d424601815 100644 --- a/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete.out +++ b/regression-test/data/unique_with_mow_p0/partial_update/test_partial_update_delete.out @@ -6,17 +6,22 @@ 4 4 4 4 4 5 5 5 5 5 +-- !sql -- +2 2 2 2 2 +4 4 4 4 4 +5 5 5 5 5 + -- !sql -- 4 4 4 4 4 5 5 5 5 5 -- !with_delete_sign -- +1 \N \N 0 \N 1 1 1 1 1 1 0 -1 1 1 1 1 1 +2 \N \N 0 \N 1 2 2 2 2 2 0 -2 2 2 2 2 1 +3 \N \N 0 \N 1 3 3 3 3 3 0 -3 3 3 3 3 1 4 4 4 4 4 0 5 5 5 5 5 0 @@ -48,17 +53,22 @@ 4 4 4 4 4 5 5 5 5 5 +-- !sql -- +2 2 2 2 2 +4 4 4 4 4 +5 5 5 5 5 + -- !sql -- 4 4 4 4 4 5 5 5 5 5 -- !with_delete_sign -- +1 \N \N 0 \N 1 1 1 1 1 1 0 -1 1 1 1 1 1 +2 \N \N 0 \N 1 2 2 2 2 2 0 -2 2 2 2 2 1 +3 \N \N 0 \N 1 3 3 3 3 3 0 -3 3 3 3 3 1 4 4 4 4 4 0 5 5 5 5 5 0 diff --git a/regression-test/suites/compaction/test_compaction_uniq_cluster_keys_with_delete.groovy b/regression-test/suites/compaction/test_compaction_uniq_cluster_keys_with_delete.groovy index 932835ec3c8644a..0a6199bb406ba82 100644 --- a/regression-test/suites/compaction/test_compaction_uniq_cluster_keys_with_delete.groovy +++ b/regression-test/suites/compaction/test_compaction_uniq_cluster_keys_with_delete.groovy @@ -66,7 +66,8 @@ suite("test_compaction_uniq_cluster_keys_with_delete") { DISTRIBUTED BY HASH(`user_id`) PROPERTIES ( "replication_num" = "1", - "enable_unique_key_merge_on_write" = "true" + "enable_unique_key_merge_on_write" = "true", + "enable_mow_light_delete" = "true" ); """ diff --git a/regression-test/suites/delete_p0/test_delete_on_value.groovy b/regression-test/suites/delete_p0/test_delete_predicate_on_value.groovy similarity index 76% rename from regression-test/suites/delete_p0/test_delete_on_value.groovy rename to regression-test/suites/delete_p0/test_delete_predicate_on_value.groovy index d980fb09ca10658..fc013930bea1bc9 100644 --- a/regression-test/suites/delete_p0/test_delete_on_value.groovy +++ b/regression-test/suites/delete_p0/test_delete_predicate_on_value.groovy @@ -15,11 +15,8 @@ // specific language governing permissions and limitations // under the License. -suite("test_delete_on_value") { +suite("test_delete_predicate_on_value") { - sql "set skip_storage_engine_merge=false;" - sql "set skip_delete_bitmap=false;" - sql "set skip_delete_predicate=false;" def tableName = "test_delete_on_value" sql """ DROP TABLE IF EXISTS ${tableName} """ sql """ CREATE TABLE ${tableName} ( @@ -33,25 +30,27 @@ suite("test_delete_on_value") { PROPERTIES ( "replication_num" = "1", "disable_auto_compaction" = "true", - "enable_unique_key_merge_on_write" = "true" + "enable_unique_key_merge_on_write" = "true", + "enable_mow_light_delete" = "true" );""" sql """ insert into ${tableName} values(1,1,1),(2,2,2),(3,3,3),(4,4,4),(5,5,5),(6,6,6),(7,7,7),(8,8,8),(9,9,9); """ - qt_sql "select * from ${tableName} order by x,y,z;" + qt_sql_1 "select * from ${tableName} order by x,y,z;" sql "delete from ${tableName} where y=4;" - qt_sql "select * from ${tableName} order by x,y,z;" + qt_sql_1 "select * from ${tableName} order by x,y,z;" sql "delete from ${tableName} where z>=3 and z<=7;" - qt_sql "select * from ${tableName} order by x,y,z;" + qt_sql_1 "select * from ${tableName} order by x,y,z;" sql "set skip_delete_predicate=true;" - qt_sql "select x,y,z,__DORIS_DELETE_SIGN__ from ${tableName} order by x,y,z,__DORIS_DELETE_SIGN__;" + qt_skip_delete_predicate_sql_1 "select x,y,z,__DORIS_DELETE_SIGN__ from ${tableName} order by x,y,z,__DORIS_DELETE_SIGN__;" sql "set skip_delete_predicate=false;" sql "insert into ${tableName} values(4,4,4),(5,5,5);" - qt_sql "select * from ${tableName} order by x,y,z;" + qt_sql_1 "select * from ${tableName} order by x,y,z;" sql "delete from ${tableName} where y=5;" - qt_sql "select * from ${tableName} order by x,y,z;" - sql "set skip_storage_engine_merge=true;" - sql "set skip_delete_bitmap=true;" + qt_sql_1 "select * from ${tableName} order by x,y,z;" + sql "set skip_delete_predicate=true;" - qt_sql "select x,y,z,__DORIS_DELETE_SIGN__ from ${tableName} order by x,y,z,__DORIS_DELETE_SIGN__;" + qt_skip_delete_predicate_sql_1 "select x,y,z,__DORIS_DELETE_SIGN__ from ${tableName} order by x,y,z,__DORIS_DELETE_SIGN__;" + sql "set skip_storage_engine_merge=false;" + sql "DROP TABLE IF EXISTS ${tableName};" @@ -75,9 +74,6 @@ suite("test_delete_on_value") { exception "delete predicate on value column only supports Unique table with merge-on-write enabled and Duplicate table, but Table[test_delete_on_value2] is an Aggregate table." } - sql "set skip_storage_engine_merge=false;" - sql "set skip_delete_bitmap=false;" - sql "set skip_delete_predicate=false;" def tableName3 = "test_delete_on_value_with_seq_col" sql """ DROP TABLE IF EXISTS ${tableName3} """ sql """ CREATE TABLE ${tableName3} ( @@ -92,29 +88,34 @@ suite("test_delete_on_value") { "disable_auto_compaction" = "true", "replication_num" = "1", "enable_unique_key_merge_on_write" = "true", + "enable_mow_light_delete" = "true", "function_column.sequence_col" = "z" );""" sql "insert into ${tableName3} values(1,1,10);" sql "insert into ${tableName3} values(1,1,5);" - qt_sql "select * from ${tableName3} order by x,y,z;" + qt_sql_3 "select * from ${tableName3} order by x,y,z;" + sql "set skip_storage_engine_merge=true;" sql "set skip_delete_bitmap=true;" sql "set skip_delete_predicate=true;" - qt_sql "select * from ${tableName3} order by x,y,z;" + qt_skip_delete_predicate_sql_3 "select * from ${tableName3} order by x,y,z;" sql "set skip_storage_engine_merge=false;" sql "set skip_delete_bitmap=false;" sql "set skip_delete_predicate=false;" + sql "delete from ${tableName3} where z>=10;" - qt_sql "select * from ${tableName3} order by x,y,z;" + qt_sql_3 "select * from ${tableName3} order by x,y,z;" + sql "set skip_storage_engine_merge=true;" sql "set skip_delete_bitmap=true;" sql "set skip_delete_predicate=true;" - qt_sql "select * from ${tableName3} order by x,y,z;" - sql "DROP TABLE IF EXISTS ${tableName3}" - + qt_skip_delete_predicate_sql_3 "select * from ${tableName3} order by x,y,z;" sql "set skip_storage_engine_merge=false;" sql "set skip_delete_bitmap=false;" sql "set skip_delete_predicate=false;" + sql "DROP TABLE IF EXISTS ${tableName3}" + + def tableName4 = "test_delete_on_value_with_seq_col_mor" sql """ DROP TABLE IF EXISTS ${tableName4} """ sql """ CREATE TABLE ${tableName4} ( @@ -134,23 +135,17 @@ suite("test_delete_on_value") { // test mor table sql "insert into ${tableName4} values(1,1,10);" sql "insert into ${tableName4} values(1,1,5);" - qt_sql "select * from ${tableName4} order by x,y,z;" - sql "set skip_storage_engine_merge=true;" - sql "set skip_delete_bitmap=true;" - sql "set skip_delete_predicate=true;" - qt_sql "select * from ${tableName4} order by x,y,z;" - sql "set skip_storage_engine_merge=false;" - sql "set skip_delete_bitmap=false;" - sql "set skip_delete_predicate=false;" + qt_sql_4 "select * from ${tableName4} order by x,y,z;" sql "delete from ${tableName4} where z>=10;" - qt_sql "select * from ${tableName4} order by x,y,z;" + qt_sql_4 "select * from ${tableName4} order by x,y,z;" + sql "set skip_storage_engine_merge=true;" sql "set skip_delete_bitmap=true;" sql "set skip_delete_predicate=true;" - qt_sql "select * from ${tableName4} order by x,y,z;" - sql "DROP TABLE IF EXISTS ${tableName4};" - + qt_skip_delete_predicate_sql_4 "select * from ${tableName4} order by x,y,z;" sql "set skip_storage_engine_merge=false;" sql "set skip_delete_bitmap=false;" sql "set skip_delete_predicate=false;" + + sql "DROP TABLE IF EXISTS ${tableName4};" } diff --git a/regression-test/suites/nereids_p0/delete/delete_mow_partial_update.groovy b/regression-test/suites/nereids_p0/delete/delete_mow_partial_update.groovy index bfb27ce14ba7b32..87a93ae73a6e21d 100644 --- a/regression-test/suites/nereids_p0/delete/delete_mow_partial_update.groovy +++ b/regression-test/suites/nereids_p0/delete/delete_mow_partial_update.groovy @@ -60,7 +60,12 @@ suite('nereids_delete_mow_partial_update') { sql "insert into ${tableName1} values(1, 1), (2, 2), (3, 3), (4, 4), (5, 5);" qt_sql "select * from ${tableName1} order by uid;" - sql "insert into ${tableName2} values(1), (2), (3);" + sql "insert into ${tableName2} values(1), (3);" + explain { + // delete from using command should use partial update + sql "delete from ${tableName1} A using ${tableName2} B where A.uid=B.uid;" + contains "IS_PARTIAL_UPDATE: true" + } sql "delete from ${tableName1} A using ${tableName2} B where A.uid=B.uid;" qt_sql "select * from ${tableName1} order by uid;" // when using parital update insert stmt for delete stmt, it will use delete bitmap or delete sign rather than @@ -68,6 +73,23 @@ suite('nereids_delete_mow_partial_update') { sql "set skip_delete_predicate=true;" sql "sync" qt_sql_skip_delete_predicate "select * from ${tableName1} order by uid;" + sql "set skip_delete_predicate=false;" + sql "sync" + + explain { + // delete from command should use partial update + sql "delete from ${tableName1} where ${tableName1}.uid=2;" + contains "IS_PARTIAL_UPDATE: true" + } + + explain { + // delete from command should use partial update + sql "delete from ${tableName1} where ${tableName1}.v1=4;" + contains "IS_PARTIAL_UPDATE: true" + } + + sql "delete from ${tableName1} where ${tableName1}.v1=4;" + qt_sql "select * from ${tableName1} order by uid;" sql "set skip_delete_sign=true;" sql "set skip_storage_engine_merge=true;" diff --git a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/customer_create.sql b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/customer_create.sql index 789c8fd79b89723..53c1b1033284935 100644 --- a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/customer_create.sql +++ b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/customer_create.sql @@ -15,5 +15,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/date_create.sql b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/date_create.sql index cf6b4b6a73739f8..b96d1038c74b405 100644 --- a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/date_create.sql +++ b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/date_create.sql @@ -24,5 +24,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/lineorder_create.sql b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/lineorder_create.sql index 2dff3181c9d686c..37ba740fa0697ad 100644 --- a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/lineorder_create.sql +++ b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/lineorder_create.sql @@ -32,5 +32,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/part_create.sql b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/part_create.sql index b1b01bcaeca32df..f8b9438a192a2d4 100644 --- a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/part_create.sql +++ b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/part_create.sql @@ -16,5 +16,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/supplier_create.sql b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/supplier_create.sql index 53b607a53ffb019..a40662617f6a58f 100644 --- a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/supplier_create.sql +++ b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/supplier_create.sql @@ -14,5 +14,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/supplier_sequence_create.sql b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/supplier_sequence_create.sql index 9fef263bf07312e..ef1136bbfda22b1 100644 --- a/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/supplier_sequence_create.sql +++ b/regression-test/suites/unique_with_mow_c_p0/ssb_unique_load_zstd/ddl/supplier_sequence_create.sql @@ -15,5 +15,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p0/test_pk_uk_case.groovy b/regression-test/suites/unique_with_mow_c_p0/test_pk_uk_case.groovy index c0cb1add123c994..f4bc7b2f205ba9c 100644 --- a/regression-test/suites/unique_with_mow_c_p0/test_pk_uk_case.groovy +++ b/regression-test/suites/unique_with_mow_c_p0/test_pk_uk_case.groovy @@ -61,7 +61,8 @@ suite("test_pk_uk_case_cluster_key") { PROPERTIES ( "replication_num" = "1", "disable_auto_compaction" = "true", - "enable_unique_key_merge_on_write" = "true" + "enable_unique_key_merge_on_write" = "true", + "enable_mow_light_delete" = "true" ) """ diff --git a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/customer_create.sql b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/customer_create.sql index 0bf16f3911ad520..30df14525cf71cb 100644 --- a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/customer_create.sql +++ b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/customer_create.sql @@ -15,5 +15,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/date_create.sql b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/date_create.sql index 32b4e24f6cbbb31..ffd796f227a2878 100644 --- a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/date_create.sql +++ b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/date_create.sql @@ -24,5 +24,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/lineorder_create.sql b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/lineorder_create.sql index 8cb2ae73098772c..0945fe0af46982a 100644 --- a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/lineorder_create.sql +++ b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/lineorder_create.sql @@ -32,5 +32,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/part_create.sql b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/part_create.sql index 722b7eba1a826fa..86e906b4c2b2e61 100644 --- a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/part_create.sql +++ b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/part_create.sql @@ -16,5 +16,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/supplier_create.sql b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/supplier_create.sql index aa798357e819d3b..404e4987b444b8b 100644 --- a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/supplier_create.sql +++ b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/supplier_create.sql @@ -14,5 +14,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/supplier_sequence_create.sql b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/supplier_sequence_create.sql index fd109360fda7a2e..74fa9c46baa222f 100644 --- a/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/supplier_sequence_create.sql +++ b/regression-test/suites/unique_with_mow_c_p2/ssb_unique_load_zstd/ddl/supplier_sequence_create.sql @@ -15,5 +15,6 @@ PROPERTIES ( "compression"="zstd", "replication_num" = "1", "disable_auto_compaction" = "true", -"enable_unique_key_merge_on_write" = "true" +"enable_unique_key_merge_on_write" = "true", +"enable_mow_light_delete" = "true" ); diff --git a/regression-test/suites/unique_with_mow_c_p2/test_pk_uk_case.groovy b/regression-test/suites/unique_with_mow_c_p2/test_pk_uk_case.groovy index 2ff883ecb5087be..c962dc9109ef8b2 100644 --- a/regression-test/suites/unique_with_mow_c_p2/test_pk_uk_case.groovy +++ b/regression-test/suites/unique_with_mow_c_p2/test_pk_uk_case.groovy @@ -60,7 +60,8 @@ suite("test_pk_uk_case") { DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 1 PROPERTIES ( "replication_num" = "1", - "enable_unique_key_merge_on_write" = "true" + "enable_unique_key_merge_on_write" = "true", + "enable_mow_light_delete" = "true" ) """ diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_new_partial_update_delete.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_new_partial_update_delete.groovy index bcd8d5f2842f973..f82510914b58656 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_new_partial_update_delete.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_new_partial_update_delete.groovy @@ -97,7 +97,7 @@ suite('test_new_partial_update_delete') { // empty qt_sql2 "select * from ${tableName1} order by k1;" sql "set show_hidden_columns = true;" - // 1,1,1,1,1,1 + // 1,null,null,null,null,1 qt_sql3 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName1} order by k1;" sql "set show_hidden_columns = false;" sql "set enable_unique_key_partial_update=true;" @@ -210,7 +210,7 @@ suite('test_new_partial_update_delete') { // empty qt_sql22 "select * from ${tableName2} order by k1;" sql "set show_hidden_columns = true;" - // 1,1,1,1,1,1 + // 1,null,null,null,1 qt_sql23 "select k1,c1,c2,c3,c4,__DORIS_DELETE_SIGN__ from ${tableName2} order by k1;" sql "set show_hidden_columns = false;" sql "set enable_unique_key_partial_update=true;" diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete.groovy index 38720646b2b5330..0d83d94f91c9187 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_delete.groovy @@ -56,9 +56,13 @@ suite('test_partial_update_delete') { sql "insert into ${tableName1} values(1,1,1,1,1),(2,2,2,2,2),(3,3,3,3,3),(4,4,4,4,4),(5,5,5,5,5);" qt_sql "select * from ${tableName1} order by k1;" - sql "insert into ${tableName2} values(1),(2),(3);" + sql "insert into ${tableName2} values(1),(3);" sql "delete from ${tableName1} A using ${tableName2} B where A.k1=B.k;" qt_sql "select * from ${tableName1} order by k1;" + + sql "delete from ${tableName1} where c2=2;" + qt_sql "select * from ${tableName1} order by k1;" + sql "set skip_delete_sign=true;" sql "set skip_storage_engine_merge=true;" sql "set skip_delete_bitmap=true;" From f9c7c035cfdeba58635b102976637761582398cc Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Sat, 10 Aug 2024 09:31:56 +0800 Subject: [PATCH 23/94] [Fix](merge-on-write) Fix FE may use the staled response to wrongly commit txn (#39018) ## Problem consider the following scenarios for merge-on-write table in cloud mode ### Scenario 1: Load-Load Conflict 1. load txn1 tries to commit version n and gets the delete bitmap update lock 2. load txn1 begins to calculate delete bitmap on BEs, this is a heavy calculating process and lasts long 3. load txn2 tries to commit version n and gets the delete bitmap update lock because load txn1's delete bitmap update lock has expired 4. load txn1's delete bitmap update lock expires and load txn2 get the delete bitmap update lock 5. load txn2 commits successfully with version n and release the delete bitmap update lock 6. load txn1 fails to commit due to timeout of the calculation of delete bitmap 7. load txn1 retries the commit process with version n+1, gets the bitmap update lock and sends delete bitmap calculation task to BEs 8. BE fails to register this new calculation task because there is a task with the same signatrure(txn_id) running in the task_worker_pool 9. BE finishes the calculation of delete bitmap and report success status to FE 10. load txn1 commits successfully with n+1 Finally, load txn1 failed to calculate delete bitmap for version n from load txn2 ### Scenario 2: Load-Compaction Conflict 1. load txn tries to commit and gets the delete bitmap update lock 2. load txn collects rowset_ids and submit a delete bitmap calculation task to the threadpool for the diff rowsets. But the theadpool is full, so the task is queued in the threadpool. 3. load txn's delete bitmap update lock expired and a compaction job on the same tablet finished successfully. 4. load txn fails to commit due to timeout of the calculation of delete bitmap 5. load txn retries the commit process, gets the bitmap update lock and sends delete bitmap calculation task to BEs 6. BE fails to register this new calculation task because there is a task with the same signatrure(txn_id) running in the task_worker_pool 7. BE finishes the calculation of delete bitmap and report success status to FE 8. load txn1 commits successfully Finally, load txn failed to calculate delete bitmap for the compaction produced by compaction ## Solution The root cause of the above failures is that when the commit process is retried many times, FE may use the previous stale success response from BEs and commit txns. One solution for that problem is that FE attaches an unique id within the delete bitmap calculation task sent to BE and BE takes it in the response for FE to check if the response is for the current latest task. However, if the delete bitmap calculation always consumes more time than the timeout of the delete bitmap calculation, FE will retry the commit process infinitely which causes live lock. This PR let the BE's response take the compaction stats(to avoid load-compaction conflict) and versions(to avoid load-load conflict) from the task request and let the FE compares it with the current task's to know that if there is any compaction or load finished during the time periods since the current load get the delete bitmap lock due to lock expiration. If so, the current txn should retry or abort. If not, the current txn can commit successfully. --- be/src/agent/task_worker_pool.cpp | 1 + .../cloud_engine_calc_delete_bitmap_task.cpp | 16 ++- be/src/cloud/cloud_meta_mgr.cpp | 23 +++- be/src/cloud/cloud_tablet.cpp | 9 +- .../cloud/cloud_txn_delete_bitmap_cache.cpp | 19 ++- be/src/cloud/cloud_txn_delete_bitmap_cache.h | 11 +- be/src/olap/base_tablet.cpp | 28 ++-- be/src/olap/base_tablet.h | 1 - be/src/olap/tablet_meta.cpp | 10 ++ be/src/olap/tablet_meta.h | 2 + be/src/olap/txn_manager.h | 32 +++-- .../org/apache/doris/master/MasterImpl.java | 11 ++ .../doris/task/CalcDeleteBitmapTask.java | 4 + gensrc/thrift/MasterService.thrift | 2 + ...ow_stale_resp_load_compaction_conflict.out | 16 +++ ...loud_mow_stale_resp_load_load_conflict.out | 16 +++ ...stale_resp_load_compaction_conflict.groovy | 129 ++++++++++++++++++ ...d_mow_stale_resp_load_load_conflict.groovy | 97 +++++++++++++ 18 files changed, 391 insertions(+), 36 deletions(-) create mode 100644 regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.out create mode 100644 regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.out create mode 100644 regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy create mode 100644 regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 7bbd602f571ede8..8a6378794e92d58 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -2052,6 +2052,7 @@ void calc_delete_bitmap_callback(CloudStorageEngine& engine, const TAgentTaskReq finish_task_request.__set_signature(req.signature); finish_task_request.__set_report_version(s_report_version); finish_task_request.__set_error_tablet_ids(error_tablet_ids); + finish_task_request.__set_resp_partitions(calc_delete_bitmap_req.partitions); finish_task(finish_task_request); remove_task_info(req.task_type, req.signature); diff --git a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp index 5d1a957d14df19d..b6c9aa318f387c0 100644 --- a/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp +++ b/be/src/cloud/cloud_engine_calc_delete_bitmap_task.cpp @@ -186,9 +186,10 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const { std::shared_ptr partial_update_info; std::shared_ptr publish_status; int64_t txn_expiration; + TxnPublishInfo previous_publish_info; Status status = _engine.txn_delete_bitmap_cache().get_tablet_txn_info( _transaction_id, _tablet_id, &rowset, &delete_bitmap, &rowset_ids, &txn_expiration, - &partial_update_info, &publish_status); + &partial_update_info, &publish_status, &previous_publish_info); if (status != Status::OK()) { LOG(WARNING) << "failed to get tablet txn info. tablet_id=" << _tablet_id << ", txn_id=" << _transaction_id << ", status=" << status; @@ -204,8 +205,19 @@ Status CloudTabletCalcDeleteBitmapTask::handle() const { txn_info.rowset_ids = rowset_ids; txn_info.partial_update_info = partial_update_info; txn_info.publish_status = publish_status; + txn_info.publish_info = {.publish_version = _version, + .base_compaction_cnt = _ms_base_compaction_cnt, + .cumulative_compaction_cnt = _ms_cumulative_compaction_cnt, + .cumulative_point = _ms_cumulative_point}; auto update_delete_bitmap_time_us = 0; - if (txn_info.publish_status && (*(txn_info.publish_status) == PublishStatus::SUCCEED)) { + if (txn_info.publish_status && (*(txn_info.publish_status) == PublishStatus::SUCCEED) && + _version == previous_publish_info.publish_version && + _ms_base_compaction_cnt == previous_publish_info.base_compaction_cnt && + _ms_cumulative_compaction_cnt == previous_publish_info.cumulative_compaction_cnt && + _ms_cumulative_point == previous_publish_info.cumulative_point) { + // if version or compaction stats can't match, it means that this is a retry and there are + // compaction or other loads finished successfully on the same tablet. So the previous publish + // is stale and we should re-calculate the delete bitmap LOG(INFO) << "tablet=" << _tablet_id << ",txn=" << _transaction_id << ",publish_status=SUCCEED,not need to recalculate and update delete_bitmap."; } else { diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index e743ea9b12c8ce5..88725b177863f60 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -559,14 +559,29 @@ bool CloudMetaMgr::sync_tablet_delete_bitmap_by_cache(CloudTablet* tablet, int64 } txn_processed.insert(txn_id); DeleteBitmapPtr tmp_delete_bitmap; - RowsetIdUnorderedSet tmp_rowset_ids; std::shared_ptr publish_status = std::make_shared(PublishStatus::INIT); CloudStorageEngine& engine = ExecEnv::GetInstance()->storage_engine().to_cloud(); Status status = engine.txn_delete_bitmap_cache().get_delete_bitmap( - txn_id, tablet->tablet_id(), &tmp_delete_bitmap, &tmp_rowset_ids, &publish_status); - if (status.ok() && *(publish_status.get()) == PublishStatus::SUCCEED) { - delete_bitmap->merge(*tmp_delete_bitmap); + txn_id, tablet->tablet_id(), &tmp_delete_bitmap, nullptr, &publish_status); + // CloudMetaMgr::sync_tablet_delete_bitmap_by_cache() is called after we sync rowsets from meta services. + // If the control flows reaches here, it's gauranteed that the rowsets is commited in meta services, so we can + // use the delete bitmap from cache directly if *publish_status == PublishStatus::SUCCEED without checking other + // stats(version or compaction stats) + if (status.ok() && *publish_status == PublishStatus::SUCCEED) { + // tmp_delete_bitmap contains sentinel marks, we should remove it before merge it to delete bitmap. + // Also, the version of delete bitmap key in tmp_delete_bitmap is DeleteBitmap::TEMP_VERSION_COMMON, + // we should replace it with the rowset's real version + DCHECK(rs_meta.start_version() == rs_meta.end_version()); + int64_t rowset_version = rs_meta.start_version(); + for (const auto& [delete_bitmap_key, bitmap_value] : tmp_delete_bitmap->delete_bitmap) { + // skip sentinel mark, which is used for delete bitmap correctness check + if (std::get<1>(delete_bitmap_key) != DeleteBitmap::INVALID_SEGMENT_ID) { + delete_bitmap->merge({std::get<0>(delete_bitmap_key), + std::get<1>(delete_bitmap_key), rowset_version}, + bitmap_value); + } + } engine.txn_delete_bitmap_cache().remove_unused_tablet_txn_info(txn_id, tablet->tablet_id()); } else { diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 2c6b841be546f84..7f308ddb7bec7e1 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -680,8 +680,13 @@ Status CloudTablet::save_delete_bitmap(const TabletTxnInfo* txn_info, int64_t tx RETURN_IF_ERROR(_engine.meta_mgr().update_delete_bitmap( *this, txn_id, COMPACTION_DELETE_BITMAP_LOCK_ID, new_delete_bitmap.get())); - _engine.txn_delete_bitmap_cache().update_tablet_txn_info( - txn_id, tablet_id(), new_delete_bitmap, cur_rowset_ids, PublishStatus::SUCCEED); + + // store the delete bitmap with sentinel marks in txn_delete_bitmap_cache because if the txn is retried for some reason, + // it will use the delete bitmap from txn_delete_bitmap_cache when re-calculating the delete bitmap, during which it will do + // delete bitmap correctness check. If we store the new_delete_bitmap, the delete bitmap correctness check will fail + _engine.txn_delete_bitmap_cache().update_tablet_txn_info(txn_id, tablet_id(), delete_bitmap, + cur_rowset_ids, PublishStatus::SUCCEED, + txn_info->publish_info); return Status::OK(); } diff --git a/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp b/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp index 583992e76f7aba7..c6a3b54edc3f67f 100644 --- a/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp +++ b/be/src/cloud/cloud_txn_delete_bitmap_cache.cpp @@ -27,6 +27,7 @@ #include "cpp/sync_point.h" #include "olap/olap_common.h" #include "olap/tablet_meta.h" +#include "olap/txn_manager.h" namespace doris { @@ -54,7 +55,7 @@ Status CloudTxnDeleteBitmapCache::get_tablet_txn_info( TTransactionId transaction_id, int64_t tablet_id, RowsetSharedPtr* rowset, DeleteBitmapPtr* delete_bitmap, RowsetIdUnorderedSet* rowset_ids, int64_t* txn_expiration, std::shared_ptr* partial_update_info, - std::shared_ptr* publish_status) { + std::shared_ptr* publish_status, TxnPublishInfo* previous_publish_info) { { std::shared_lock rlock(_rwlock); TxnKey key(transaction_id, tablet_id); @@ -68,6 +69,7 @@ Status CloudTxnDeleteBitmapCache::get_tablet_txn_info( *txn_expiration = iter->second.txn_expiration; *partial_update_info = iter->second.partial_update_info; *publish_status = iter->second.publish_status; + *previous_publish_info = iter->second.publish_info; } RETURN_IF_ERROR( get_delete_bitmap(transaction_id, tablet_id, delete_bitmap, rowset_ids, nullptr)); @@ -96,7 +98,9 @@ Status CloudTxnDeleteBitmapCache::get_delete_bitmap( handle == nullptr ? nullptr : reinterpret_cast(value(handle)); if (val) { *delete_bitmap = val->delete_bitmap; - *rowset_ids = val->rowset_ids; + if (rowset_ids) { + *rowset_ids = val->rowset_ids; + } // must call release handle to reduce the reference count, // otherwise there will be memory leak release(handle); @@ -153,12 +157,17 @@ void CloudTxnDeleteBitmapCache::update_tablet_txn_info(TTransactionId transactio int64_t tablet_id, DeleteBitmapPtr delete_bitmap, const RowsetIdUnorderedSet& rowset_ids, - PublishStatus publish_status) { + PublishStatus publish_status, + TxnPublishInfo publish_info) { { std::unique_lock wlock(_rwlock); TxnKey txn_key(transaction_id, tablet_id); - CHECK(_txn_map.count(txn_key) > 0); - *(_txn_map[txn_key].publish_status.get()) = publish_status; + CHECK(_txn_map.contains(txn_key)); + TxnVal& txn_val = _txn_map[txn_key]; + *(txn_val.publish_status) = publish_status; + if (publish_status == PublishStatus::SUCCEED) { + txn_val.publish_info = publish_info; + } } std::string key_str = fmt::format("{}/{}", transaction_id, tablet_id); CacheKey key(key_str); diff --git a/be/src/cloud/cloud_txn_delete_bitmap_cache.h b/be/src/cloud/cloud_txn_delete_bitmap_cache.h index 5012db6b8e5bf3f..75577ae2e3fee0a 100644 --- a/be/src/cloud/cloud_txn_delete_bitmap_cache.h +++ b/be/src/cloud/cloud_txn_delete_bitmap_cache.h @@ -42,7 +42,8 @@ class CloudTxnDeleteBitmapCache : public LRUCachePolicyTrackingManual { RowsetSharedPtr* rowset, DeleteBitmapPtr* delete_bitmap, RowsetIdUnorderedSet* rowset_ids, int64_t* txn_expiration, std::shared_ptr* partial_update_info, - std::shared_ptr* publish_status); + std::shared_ptr* publish_status, + TxnPublishInfo* previous_publish_info); void set_tablet_txn_info(TTransactionId transaction_id, int64_t tablet_id, DeleteBitmapPtr delete_bitmap, const RowsetIdUnorderedSet& rowset_ids, @@ -52,12 +53,16 @@ class CloudTxnDeleteBitmapCache : public LRUCachePolicyTrackingManual { void update_tablet_txn_info(TTransactionId transaction_id, int64_t tablet_id, DeleteBitmapPtr delete_bitmap, const RowsetIdUnorderedSet& rowset_ids, - PublishStatus publish_status); + PublishStatus publish_status, TxnPublishInfo publish_info = {}); void remove_expired_tablet_txn_info(); void remove_unused_tablet_txn_info(TTransactionId transaction_id, int64_t tablet_id); + // !!!ATTENTION!!!: the delete bitmap stored in CloudTxnDeleteBitmapCache contains sentinel marks, + // and the version in BitmapKey is DeleteBitmap::TEMP_VERSION_COMMON. + // when using delete bitmap from this cache, the caller should manually remove these marks if don't need it + // and should replace versions in BitmapKey by the correct version Status get_delete_bitmap(TTransactionId transaction_id, int64_t tablet_id, DeleteBitmapPtr* delete_bitmap, RowsetIdUnorderedSet* rowset_ids, std::shared_ptr* publish_status); @@ -88,6 +93,8 @@ class CloudTxnDeleteBitmapCache : public LRUCachePolicyTrackingManual { int64_t txn_expiration; std::shared_ptr partial_update_info; std::shared_ptr publish_status = nullptr; + // used to determine if the retry needs to re-calculate the delete bitmap + TxnPublishInfo publish_info; TxnVal() : txn_expiration(0) {}; TxnVal(RowsetSharedPtr rowset_, int64_t txn_expiration_, std::shared_ptr partial_update_info_, diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 4ca36684383939a..0fb12dd074f8b06 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -1208,17 +1208,6 @@ Status BaseTablet::check_delete_bitmap_correctness(DeleteBitmapPtr delete_bitmap return Status::OK(); } -void BaseTablet::_remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap) { - for (auto it = delete_bitmap->delete_bitmap.begin(), end = delete_bitmap->delete_bitmap.end(); - it != end;) { - if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) { - it = delete_bitmap->delete_bitmap.erase(it); - } else { - ++it; - } - } -} - Status BaseTablet::update_delete_bitmap(const BaseTabletSPtr& self, TabletTxnInfo* txn_info, int64_t txn_id, int64_t txn_expiration) { SCOPED_BVAR_LATENCY(g_tablet_update_delete_bitmap_latency); @@ -1296,6 +1285,21 @@ Status BaseTablet::update_delete_bitmap(const BaseTabletSPtr& self, TabletTxnInf } } + DBUG_EXECUTE_IF("BaseTablet::update_delete_bitmap.enable_spin_wait", { + auto token = dp->param("token", "invalid_token"); + while (DebugPoints::instance()->is_enable("BaseTablet::update_delete_bitmap.block")) { + auto block_dp = DebugPoints::instance()->get_debug_point( + "BaseTablet::update_delete_bitmap.block"); + if (block_dp) { + auto wait_token = block_dp->param("wait_token", ""); + if (wait_token != token) { + break; + } + } + std::this_thread::sleep_for(std::chrono::milliseconds(50)); + } + }); + if (!rowsets_skip_alignment.empty()) { auto token = self->calc_delete_bitmap_executor()->create_token(); // set rowset_writer to nullptr to skip the alignment process @@ -1544,7 +1548,7 @@ Status BaseTablet::update_delete_bitmap_without_lock( if (!st.ok()) { LOG(WARNING) << fmt::format("delete bitmap correctness check failed in publish phase!"); } - self->_remove_sentinel_mark_from_delete_bitmap(delete_bitmap); + delete_bitmap->remove_sentinel_marks(); } for (auto& iter : delete_bitmap->delete_bitmap) { self->_tablet_meta->delete_bitmap().merge( diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index f958d398fd5d00f..d329c786fc97817 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -289,7 +289,6 @@ class BaseTablet { static void _rowset_ids_difference(const RowsetIdUnorderedSet& cur, const RowsetIdUnorderedSet& pre, RowsetIdUnorderedSet* to_add, RowsetIdUnorderedSet* to_del); - static void _remove_sentinel_mark_from_delete_bitmap(DeleteBitmapPtr delete_bitmap); Status _capture_consistent_rowsets_unlocked(const std::vector& version_path, std::vector* rowsets) const; diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index a3526781dddd879..ed9a446551d00e6 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -1080,6 +1080,16 @@ bool DeleteBitmap::contains_agg_without_cache(const BitmapKey& bmk, uint32_t row return false; } +void DeleteBitmap::remove_sentinel_marks() { + for (auto it = delete_bitmap.begin(), end = delete_bitmap.end(); it != end;) { + if (std::get<1>(it->first) == DeleteBitmap::INVALID_SEGMENT_ID) { + it = delete_bitmap.erase(it); + } else { + ++it; + } + } +} + int DeleteBitmap::set(const BitmapKey& bmk, const roaring::Roaring& segment_delete_bitmap) { std::lock_guard l(lock); auto [_, inserted] = delete_bitmap.insert_or_assign(bmk, segment_delete_bitmap); diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 32c6fde568c87b2..bb6b5b8cd51725b 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -516,6 +516,8 @@ class DeleteBitmap { */ std::shared_ptr get_agg(const BitmapKey& bmk) const; + void remove_sentinel_marks(); + class AggCachePolicy : public LRUCachePolicyTrackingManual { public: AggCachePolicy(size_t capacity) diff --git a/be/src/olap/txn_manager.h b/be/src/olap/txn_manager.h index 5a0a74c76a28256..5944bbf0fc31368 100644 --- a/be/src/olap/txn_manager.h +++ b/be/src/olap/txn_manager.h @@ -63,6 +63,13 @@ enum class TxnState { }; enum class PublishStatus { INIT = 0, PREPARE = 1, SUCCEED = 2 }; +struct TxnPublishInfo { + int64_t publish_version {-1}; + int64_t base_compaction_cnt {-1}; + int64_t cumulative_compaction_cnt {-1}; + int64_t cumulative_point {-1}; +}; + struct TabletTxnInfo { PUniqueId load_id; RowsetSharedPtr rowset; @@ -74,24 +81,33 @@ struct TabletTxnInfo { int64_t creation_time; bool ingest {false}; std::shared_ptr partial_update_info; + + // for cloud only, used to determine if a retry CloudTabletCalcDeleteBitmapTask + // needs to re-calculate the delete bitmap std::shared_ptr publish_status; - TxnState state {TxnState::PREPARED}; + TxnPublishInfo publish_info; + TxnState state {TxnState::PREPARED}; TabletTxnInfo() = default; TabletTxnInfo(PUniqueId load_id, RowsetSharedPtr rowset) - : load_id(load_id), rowset(rowset), creation_time(UnixSeconds()) {} + : load_id(std::move(load_id)), + rowset(std::move(rowset)), + creation_time(UnixSeconds()) {} TabletTxnInfo(PUniqueId load_id, RowsetSharedPtr rowset, bool ingest_arg) - : load_id(load_id), rowset(rowset), creation_time(UnixSeconds()), ingest(ingest_arg) {} + : load_id(std::move(load_id)), + rowset(std::move(rowset)), + creation_time(UnixSeconds()), + ingest(ingest_arg) {} TabletTxnInfo(PUniqueId load_id, RowsetSharedPtr rowset, bool merge_on_write, - DeleteBitmapPtr delete_bitmap, const RowsetIdUnorderedSet& ids) - : load_id(load_id), - rowset(rowset), + DeleteBitmapPtr delete_bitmap, RowsetIdUnorderedSet ids) + : load_id(std::move(load_id)), + rowset(std::move(rowset)), unique_key_merge_on_write(merge_on_write), - delete_bitmap(delete_bitmap), - rowset_ids(ids), + delete_bitmap(std::move(delete_bitmap)), + rowset_ids(std::move(ids)), creation_time(UnixSeconds()) {} void prepare() { state = TxnState::PREPARED; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 0eef0c684d60625..4e01f3a5058774b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -671,6 +671,17 @@ private void finishCalcDeleteBitmap(AgentTask task, TFinishTaskRequest request) "backend: " + task.getBackendId() + ", error_tablet_size: " + request.getErrorTabletIdsSize() + ", err_msg: " + request.getTaskStatus().getErrorMsgs().toString()); + } else if (request.isSetRespPartitions() + && calcDeleteBitmapTask.isFinishRequestStale(request.getRespPartitions())) { + LOG.warn("get staled response from backend: {}, report version: {}. calcDeleteBitmapTask's" + + "partitionInfos: {}. response's partitionInfos: {}", task.getBackendId(), + request.getReportVersion(), + calcDeleteBitmapTask.getCalcDeleteBimapPartitionInfos().toString(), + request.getRespPartitions().toString()); + // DELETE_BITMAP_LOCK_ERROR will be retried + calcDeleteBitmapTask.countDownToZero(TStatusCode.DELETE_BITMAP_LOCK_ERROR, + "get staled response from backend " + task.getBackendId() + ", report version: " + + request.getReportVersion()); } else { calcDeleteBitmapTask.countDownLatch(task.getBackendId(), calcDeleteBitmapTask.getTransactionId()); if (LOG.isDebugEnabled()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/task/CalcDeleteBitmapTask.java b/fe/fe-core/src/main/java/org/apache/doris/task/CalcDeleteBitmapTask.java index 4188cf61849a918..49a653c7a32c26c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/task/CalcDeleteBitmapTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/task/CalcDeleteBitmapTask.java @@ -79,6 +79,10 @@ public void countDownToZero(TStatusCode code, String errMsg) { } } + public boolean isFinishRequestStale(List respPartitionInfos) { + return !respPartitionInfos.equals(partitionInfos); + } + public void setLatch(MarkedCountDownLatch latch) { this.latch = latch; } diff --git a/gensrc/thrift/MasterService.thrift b/gensrc/thrift/MasterService.thrift index 1db7a109f55078b..ecedf0ee1afad53 100644 --- a/gensrc/thrift/MasterService.thrift +++ b/gensrc/thrift/MasterService.thrift @@ -72,6 +72,8 @@ struct TFinishTaskRequest { 17: optional map succ_tablets 18: optional map table_id_to_delta_num_rows 19: optional map> table_id_to_tablet_id_to_delta_num_rows + // for Cloud mow table only, used by FE to check if the response is for the latest request + 20: optional list resp_partitions; } struct TTablet { diff --git a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.out b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.out new file mode 100644 index 000000000000000..09882a909b391b7 --- /dev/null +++ b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.out @@ -0,0 +1,16 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 1 +2 2 2 +3 3 3 + +-- !sql -- +1 1 1 +2 2 2 +3 3 3 + +-- !sql -- +1 999 999 +2 888 888 +3 3 3 + diff --git a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.out b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.out new file mode 100644 index 000000000000000..6fd2178fd94ac93 --- /dev/null +++ b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.out @@ -0,0 +1,16 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 1 +2 2 2 +3 3 3 + +-- !sql -- +1 666 666 +2 555 555 +3 3 3 + +-- !sql -- +1 999 999 +2 888 888 +3 3 3 + diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy new file mode 100644 index 000000000000000..8f4fa45700b81fd --- /dev/null +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_compaction_conflict.groovy @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.Assert +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility + +suite("test_cloud_mow_stale_resp_load_compaction_conflict", "nonConcurrent") { + if (!isCloudMode()) { + return + } + + def customFeConfig = [ + delete_bitmap_lock_expiration_seconds : 10, + calculate_delete_bitmap_task_timeout_seconds : 15, + ] + + setFeConfigTemporary(customFeConfig) { + + def table1 = "test_cloud_mow_stale_resp_load_compaction_conflict" + sql "DROP TABLE IF EXISTS ${table1} FORCE;" + sql """ CREATE TABLE IF NOT EXISTS ${table1} ( + `k1` int NOT NULL, + `c1` int, + `c2` int + )UNIQUE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 1 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "disable_auto_compaction" = "true", + "replication_num" = "1"); """ + + sql "insert into ${table1} values(1,1,1);" + sql "insert into ${table1} values(2,2,2);" + sql "insert into ${table1} values(3,3,3);" + sql "sync;" + order_qt_sql "select * from ${table1};" + + + def beNodes = sql_return_maparray("show backends;") + def tabletStat = sql_return_maparray("show tablets from ${table1};").get(0) + def tabletBackendId = tabletStat.BackendId + def tabletId = tabletStat.TabletId + def tabletBackend; + for (def be : beNodes) { + if (be.BackendId == tabletBackendId) { + tabletBackend = be + break; + } + } + logger.info("tablet ${tabletId} on backend ${tabletBackend.Host} with backendId=${tabletBackend.BackendId}"); + + + try { + GetDebugPoint().clearDebugPointsForAllFEs() + GetDebugPoint().clearDebugPointsForAllBEs() + + // block the first load + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.enable_spin_wait", [token: "token1"]) + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block", [wait_token: "token1"]) + + // the first load + t1 = Thread.start { + sql "insert into ${table1} values(1,999,999),(2,888,888);" + } + + // wait util the first load's delete bitmap update lock expired + // to ensure that the second load can take the delete bitmap update lock + // Config.delete_bitmap_lock_expiration_seconds = 10s + Thread.sleep(11 * 1000) + + // trigger full compaction on tablet + logger.info("trigger compaction on another BE ${tabletBackend.Host} with backendId=${tabletBackend.BackendId}") + def (code, out, err) = be_run_full_compaction(tabletBackend.Host, tabletBackend.HttpPort, tabletId) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + Assert.assertEquals(code, 0) + def compactJson = parseJson(out.trim()) + Assert.assertEquals("success", compactJson.status.toLowerCase()) + + // wait for full compaction to complete + Awaitility.await().atMost(3, TimeUnit.SECONDS).pollDelay(200, TimeUnit.MILLISECONDS).pollInterval(100, TimeUnit.MILLISECONDS).until( + { + (code, out, err) = be_get_compaction_status(tabletBackend.Host, tabletBackend.HttpPort, tabletId) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + Assert.assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + Assert.assertEquals("success", compactionStatus.status.toLowerCase()) + return !compactionStatus.run_status + } + ) + order_qt_sql "select * from ${table1};" + + + // keep waiting util the delete bitmap calculation timeout(Config.calculate_delete_bitmap_task_timeout_seconds = 15s) + // and the coordinator BE will retry to commit the first load's txn + Thread.sleep(15 * 1000) + + // let the first partial update load finish + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block") + t1.join() + + Thread.sleep(1000) + + order_qt_sql "select * from ${table1};" + + } catch(Exception e) { + logger.info(e.getMessage()) + throw e + } finally { + GetDebugPoint().clearDebugPointsForAllBEs() + } + + sql "DROP TABLE IF EXISTS ${table1};" + } +} diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy new file mode 100644 index 000000000000000..377ff70cf2101dd --- /dev/null +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_stale_resp_load_load_conflict.groovy @@ -0,0 +1,97 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_cloud_mow_stale_resp_load_load_conflict", "nonConcurrent") { + if (!isCloudMode()) { + return + } + + def customFeConfig = [ + delete_bitmap_lock_expiration_seconds : 10, + calculate_delete_bitmap_task_timeout_seconds : 15, + ] + + setFeConfigTemporary(customFeConfig) { + + def table1 = "test_cloud_mow_stale_resp_load_load_conflict" + sql "DROP TABLE IF EXISTS ${table1} FORCE;" + sql """ CREATE TABLE IF NOT EXISTS ${table1} ( + `k1` int NOT NULL, + `c1` int, + `c2` int + )UNIQUE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 1 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "disable_auto_compaction" = "true", + "replication_num" = "1"); """ + + sql "insert into ${table1} values(1,1,1);" + sql "insert into ${table1} values(2,2,2);" + sql "insert into ${table1} values(3,3,3);" + sql "sync;" + order_qt_sql "select * from ${table1};" + + try { + GetDebugPoint().clearDebugPointsForAllFEs() + GetDebugPoint().clearDebugPointsForAllBEs() + + // block the first load + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.enable_spin_wait", [token: "token1"]) + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block", [wait_token: "token1"]) + + // the first load + t1 = Thread.start { + sql "insert into ${table1} values(1,999,999),(2,888,888);" + } + + // wait util the first load's delete bitmap update lock expired + // to ensure that the second load can take the delete bitmap update lock + // Config.delete_bitmap_lock_expiration_seconds = 10s + Thread.sleep(11 * 1000) + + // the second load + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.enable_spin_wait", [token: "token2"]) + Thread.sleep(200) + + sql "insert into ${table1}(k1,c1,c2) values(1,666,666),(2,555,555);" + + order_qt_sql "select * from ${table1};" + + + // keep waiting util the delete bitmap calculation timeout(Config.calculate_delete_bitmap_task_timeout_seconds = 15s) + // and the coordinator BE will retry to commit the first load's txn + Thread.sleep(15 * 1000) + + // let the first partial update load finish + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block") + t1.join() + + Thread.sleep(1000) + + order_qt_sql "select * from ${table1};" + + } catch(Exception e) { + logger.info(e.getMessage()) + throw e + } finally { + GetDebugPoint().clearDebugPointsForAllBEs() + } + + sql "DROP TABLE IF EXISTS ${table1};" + } +} From 703a0dc48a9aaec529cb37d3c8351177925489dc Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Sat, 10 Aug 2024 09:44:14 +0800 Subject: [PATCH 24/94] [Fix](regression) Fix flaky case `test_schema_change_unique_mow` (#39173) ## Proposed changes --- .../test_schema_change_unique_mow.groovy | 99 ++++++++----------- 1 file changed, 42 insertions(+), 57 deletions(-) diff --git a/regression-test/suites/schema_change_p0/test_schema_change_unique_mow.groovy b/regression-test/suites/schema_change_p0/test_schema_change_unique_mow.groovy index ad4a19d8d94b120..71941b87882e90f 100644 --- a/regression-test/suites/schema_change_p0/test_schema_change_unique_mow.groovy +++ b/regression-test/suites/schema_change_p0/test_schema_change_unique_mow.groovy @@ -32,6 +32,8 @@ import org.apache.http.client.methods.RequestBuilder import org.apache.http.entity.StringEntity import org.apache.http.client.methods.CloseableHttpResponse import org.apache.http.util.EntityUtils +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility suite("test_schema_change_unique_mow", "p0") { def tableName3 = "test_all_unique_mow" @@ -97,40 +99,31 @@ suite("test_schema_change_unique_mow", "p0") { execStreamLoad() sql """ alter table ${tableName3} modify column k4 string NULL""" - sleep(10) - int max_try_num = 60 - while (max_try_num--) { - String res = getJobState(tableName3) - if (res == "FINISHED" || res == "CANCELLED") { - assertEquals("FINISHED", res) - sleep(3000) - break - } else { - execStreamLoad() - if (max_try_num < 1) { - println "test timeout," + "state:" + res - assertEquals("FINISHED",res) + + Awaitility.await().atMost(12, TimeUnit.SECONDS).pollDelay(10, TimeUnit.MILLISECONDS).pollInterval(10, TimeUnit.MILLISECONDS).until( + { + String res = getJobState(tableName3) + if (res == "FINISHED" || res == "CANCELLED") { + assertEquals("FINISHED", res) + return true } + execStreamLoad() + return false } - } + ) sql """ alter table ${tableName3} modify column k2 bigint(11) key NULL""" - sleep(10) - max_try_num = 60 - while (max_try_num--) { - String res = getJobState(tableName3) - if (res == "FINISHED" || res == "CANCELLED") { - assertEquals("FINISHED", res) - sleep(3000) - break - } else { - execStreamLoad() - if (max_try_num < 1) { - println "test timeout," + "state:" + res - assertEquals("FINISHED",res) + Awaitility.await().atMost(12, TimeUnit.SECONDS).pollDelay(10, TimeUnit.MILLISECONDS).pollInterval(10, TimeUnit.MILLISECONDS).until( + { + String res = getJobState(tableName3) + if (res == "FINISHED" || res == "CANCELLED") { + assertEquals("FINISHED", res) + return true } + execStreamLoad() + return false } - } + ) /* sql """ create materialized view view_1 as select k2, k1, k4, k5 from ${tableName3} """ @@ -153,47 +146,39 @@ suite("test_schema_change_unique_mow", "p0") { */ sql """ alter table ${tableName3} modify column k5 string NULL""" - sleep(10) - max_try_num = 60 - while (max_try_num--) { - String res = getJobState(tableName3) - if (res == "FINISHED" || res == "CANCELLED") { - assertEquals("FINISHED", res) - sleep(3000) - break - } else { - execStreamLoad() - if (max_try_num < 1) { - println "test timeout," + "state:" + res - assertEquals("FINISHED",res) + Awaitility.await().atMost(12, TimeUnit.SECONDS).pollDelay(10, TimeUnit.MILLISECONDS).pollInterval(10, TimeUnit.MILLISECONDS).until( + { + String res = getJobState(tableName3) + if (res == "FINISHED" || res == "CANCELLED") { + assertEquals("FINISHED", res) + return true } + execStreamLoad() + return false } - } + ) sql """ alter table ${tableName3} add column v14 int NOT NULL default "1" after k13 """ sql """ insert into ${tableName3} values (10001, 2, 3, 4, 5, 6.6, 1.7, 8.8, 'a', 'b', 'c', '2021-10-30', '2021-10-30 00:00:00', 10086) """ sql """ alter table ${tableName3} modify column v14 int NULL default "1" """ - sleep(10) - max_try_num = 6000 - while (max_try_num--) { - String res = getJobState(tableName3) - if (res == "FINISHED" || res == "CANCELLED") { - assertEquals("FINISHED", res) - sleep(3000) - break - } else { + + int cnt = 6000 + Awaitility.await().atMost(20, TimeUnit.SECONDS).pollDelay(10, TimeUnit.MILLISECONDS).pollInterval(10, TimeUnit.MILLISECONDS).until( + { + String res = getJobState(tableName3) + if (res == "FINISHED" || res == "CANCELLED") { + assertEquals("FINISHED", res) + return true + } + cnt--; int val = 100000 + max_try_num sql """ insert into ${tableName3} values (${val}, 2, 3, 4, 5, 6.6, 1.7, 8.8, 'a', 'b', 'c', '2021-10-30', '2021-10-30 00:00:00', 9527) """ - sleep(10) - if (max_try_num < 1) { - println "test timeout," + "state:" + res - assertEquals("FINISHED",res) - } + return false } - } + ) sql """ alter table ${tableName3} drop column v14 """ execStreamLoad() From 7bab58a7de010297f11d9756c942f1dcced25c6c Mon Sep 17 00:00:00 2001 From: Pxl Date: Sat, 10 Aug 2024 13:49:54 +0800 Subject: [PATCH 25/94] [Chore](runtime-filter) avoid dcheck fail when rf merge failed (#39172) ## Proposed changes avoid dcheck fail when rf merge failed there is a difference in the logic of calculating bf size between 2.1 and 3.0, so the merge will fail this pr is to prevent core dump caused by dcheck failure during rolling upgrade. --- be/src/runtime/runtime_filter_mgr.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 625b487d0ee1f31..8eb3ab5eebf8d73 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -396,12 +396,7 @@ Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* requ RuntimeFilterWrapperHolder holder; RETURN_IF_ERROR(IRuntimeFilter::create_wrapper(¶ms, holder.getHandle())); - auto st = cnt_val->filter->merge_from(holder.getHandle()->get()); - if (!st) { - // prevent error ignored - DCHECK(false) << st.msg(); - return st; - } + RETURN_IF_ERROR(cnt_val->filter->merge_from(holder.getHandle()->get())); cnt_val->arrive_id.insert(UniqueId(request->fragment_instance_id())); merged_size = cnt_val->arrive_id.size(); From 947397e999429104ce941e13df1d7369f4077160 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Sat, 10 Aug 2024 18:27:31 +0800 Subject: [PATCH 26/94] [Bug](rf) fix rf of in filter cast data as different class type maybe return wrong result (#39026) two point have changed: 1. in batch_assign function: const std::string& string_value = column.stringval(); if call **insert(&string_value)**, will cast as string_ref: reinterpret_cast(data), this maybe error; ``` void insert(const void* data) override { if (data == nullptr) { _contains_null = true; return; } const auto* value = reinterpret_cast(data); std::string str_value(value->data, value->size); _set.insert(str_value); } ``` 2. in batch_copy function, will cast void_value as T* but the it->get_value() return is StringRef, so need change T as StringRef ``` template void batch_copy(PInFilter* filter, HybridSetBase::IteratorBase* it, void (*set_func)(PColumnValue*, const T*)) { while (it->has_next()) { const void* void_value = it->get_value(); auto origin_value = reinterpret_cast(void_value); set_func(filter->add_values(), origin_value); it->next(); } } ``` --- be/src/exprs/runtime_filter.cpp | 12 ++++++++---- .../data/query_p0/join/test_runtimefilter_2.out | 9 +++++++++ .../suites/query_p0/join/test_runtimefilter_2.groovy | 11 +++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index c6fd3338b14656b..5a241326f9099b1 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -694,8 +694,10 @@ class RuntimePredicateWrapper { case TYPE_CHAR: case TYPE_STRING: { batch_assign(in_filter, [](std::shared_ptr& set, PColumnValue& column) { - const auto& string_val_ref = column.stringval(); - set->insert(&string_val_ref); + const std::string& string_value = column.stringval(); + // string_value is std::string, call insert(data, size) function in StringSet will not cast as StringRef + // so could avoid some cast error at different class object. + set->insert((void*)string_value.data(), string_value.size()); }); break; } @@ -1630,8 +1632,10 @@ void IRuntimeFilter::to_protobuf(PInFilter* filter) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: { - batch_copy(filter, it, [](PColumnValue* column, const std::string* value) { - column->set_stringval(*value); + //const void* void_value = it->get_value(); + //Now the get_value return void* is StringRef + batch_copy(filter, it, [](PColumnValue* column, const StringRef* value) { + column->set_stringval(value->to_string()); }); return; } diff --git a/regression-test/data/query_p0/join/test_runtimefilter_2.out b/regression-test/data/query_p0/join/test_runtimefilter_2.out index d6cc7fc59a016b4..005406e6793fa0a 100644 --- a/regression-test/data/query_p0/join/test_runtimefilter_2.out +++ b/regression-test/data/query_p0/join/test_runtimefilter_2.out @@ -2,3 +2,12 @@ -- !select_1 -- aaa +-- !select_2 -- +aaa + +-- !select_3 -- +BSDSAE1018 1 1 true BSDSAE1018 1 true true + +-- !select_4 -- +2 3 BSDSAE1018 + diff --git a/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy b/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy index 6e6e57c6c2da29f..50a61a366b1bd25 100644 --- a/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy +++ b/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy @@ -30,4 +30,15 @@ qt_select_1 """ select "aaa" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID ) jpa4 on tpisyncjpa4.USER_ID = jpa4.USER_ID; """ + sql """set runtime_filter_type='IN';""" + qt_select_2 """ + select "aaa" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID ) jpa4 on tpisyncjpa4.USER_ID = jpa4.USER_ID; + """ + qt_select_3 """ + select *, tpisyncjpp1.POST_ID=jpp1.POST_ID, tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE from ( select tpisyncjpp1.POST_ID,tpisyncjpp1.INTERNAL_CODE as INTERNAL_CODE, tpisyncjpp1.STATE_ID, tpisyncjpp1.STATE_ID ='1' from ( select tpisyncjpa4.* from t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join [broadcast] ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID )jpa4 on tpisyncjpa4.USER_ID=jpa4.USER_ID and tpisyncjpa4.INTERNAL_CODE=jpa4.INTERNAL_CODE where tpisyncjpa4.STATE_ID ='1' ) tpisyncjpa4 inner join [broadcast] t_ods_tpisyncjpp1_2 tpisyncjpp1 where tpisyncjpa4.USER_ID = tpisyncjpp1.USER_ID AND tpisyncjpp1.STATE_ID ='1' AND tpisyncjpp1.POST_ID='BSDSAE1018' ) tpisyncjpp1 inner join [broadcast] ( SELECT POST_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpp1_2 WHERE STATE_ID = '1' GROUP BY POST_ID )jpp1 on tpisyncjpp1.POST_ID=jpp1.POST_ID and tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE; + """ + qt_select_4 """ + select DISTINCT tpisyncjpa4.USER_ID as USER_ID, tpisyncjpa4.USER_NAME as USER_NAME, tpisyncjpp1.POST_ID AS "T4_POST_ID" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 cross join [shuffle] t_ods_tpisyncjpp1_2 tpisyncjpp1 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID )jpa4 on tpisyncjpa4.USER_ID=jpa4.USER_ID and tpisyncjpa4.INTERNAL_CODE=jpa4.INTERNAL_CODE inner join [shuffle] ( SELECT POST_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpp1_2 WHERE STATE_ID = '1' GROUP BY POST_ID )jpp1 on tpisyncjpp1.POST_ID=jpp1.POST_ID and tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE where tpisyncjpa4.USER_ID = tpisyncjpp1.USER_ID AND tpisyncjpp1.STATE_ID ='1' AND tpisyncjpa4.STATE_ID ='1' AND tpisyncjpp1.POST_ID='BSDSAE1018'; + """ + } \ No newline at end of file From 0e9951f9cb4a609cb88ef47b50334a724dd17cb6 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Sat, 10 Aug 2024 18:31:37 +0800 Subject: [PATCH 27/94] [Bug](partition-topn) fix partition-topn calculate partition input rows have error (#39100) 1. fix the _sorted_partition_input_rows calculate have error, it's should only update the rows which have been emplace into hash table, not include the rows which is pass through. 2. add some counter in profile could get some info of about input/output rows have been do partition-topn. --- .../pipeline/exec/partition_sort_sink_operator.cpp | 12 ++++++++---- be/src/pipeline/exec/partition_sort_sink_operator.h | 3 ++- .../pipeline/exec/partition_sort_source_operator.cpp | 8 +++++--- .../pipeline/exec/partition_sort_source_operator.h | 6 +++--- 4 files changed, 18 insertions(+), 11 deletions(-) diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.cpp b/be/src/pipeline/exec/partition_sort_sink_operator.cpp index 62dafd548492054..404d9095f96a9f2 100644 --- a/be/src/pipeline/exec/partition_sort_sink_operator.cpp +++ b/be/src/pipeline/exec/partition_sort_sink_operator.cpp @@ -115,6 +115,8 @@ Status PartitionSortSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo _selector_block_timer = ADD_TIMER(_profile, "SelectorBlockTime"); _emplace_key_timer = ADD_TIMER(_profile, "EmplaceKeyTime"); _passthrough_rows_counter = ADD_COUNTER(_profile, "PassThroughRowsCounter", TUnit::UNIT); + _sorted_partition_input_rows_counter = + ADD_COUNTER(_profile, "SortedPartitionInputRows", TUnit::UNIT); _partition_sort_info = std::make_shared( &_vsort_exec_exprs, p._limit, 0, p._pool, p._is_asc_order, p._nulls_first, p._child_x->row_desc(), state, _profile, p._has_global_limit, p._partition_inner_limit, @@ -173,7 +175,6 @@ Status PartitionSortSinkOperatorX::sink(RuntimeState* state, vectorized::Block* SCOPED_TIMER(local_state.exec_time_counter()); if (current_rows > 0) { COUNTER_UPDATE(local_state.rows_input_counter(), (int64_t)input_block->rows()); - local_state.child_input_rows = local_state.child_input_rows + current_rows; if (UNLIKELY(_partition_exprs_num == 0)) { if (UNLIKELY(local_state._value_places.empty())) { local_state._value_places.push_back(_pool->add(new PartitionBlocks( @@ -185,10 +186,9 @@ Status PartitionSortSinkOperatorX::sink(RuntimeState* state, vectorized::Block* //if is TWO_PHASE_GLOBAL, must be sort all data thought partition num threshold have been exceeded. if (_topn_phase != TPartTopNPhase::TWO_PHASE_GLOBAL && local_state._num_partition > config::partition_topn_partition_threshold && - local_state.child_input_rows < 10000 * local_state._num_partition) { + local_state._sorted_partition_input_rows < 10000 * local_state._num_partition) { { - COUNTER_UPDATE(local_state._passthrough_rows_counter, - (int64_t)input_block->rows()); + COUNTER_UPDATE(local_state._passthrough_rows_counter, (int64_t)current_rows); std::lock_guard lock(local_state._shared_state->buffer_mutex); local_state._shared_state->blocks_buffer.push(std::move(*input_block)); // buffer have data, source could read this. @@ -198,6 +198,8 @@ Status PartitionSortSinkOperatorX::sink(RuntimeState* state, vectorized::Block* RETURN_IF_ERROR(_split_block_by_partition(input_block, local_state, eos)); RETURN_IF_CANCELLED(state); input_block->clear_column_data(); + local_state._sorted_partition_input_rows = + local_state._sorted_partition_input_rows + current_rows; } } } @@ -220,6 +222,8 @@ Status PartitionSortSinkOperatorX::sink(RuntimeState* state, vectorized::Block* } COUNTER_SET(local_state._hash_table_size_counter, int64_t(local_state._num_partition)); + COUNTER_SET(local_state._sorted_partition_input_rows_counter, + local_state._sorted_partition_input_rows); //so all data from child have sink completed { std::unique_lock lc(local_state._shared_state->sink_eos_lock); diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.h b/be/src/pipeline/exec/partition_sort_sink_operator.h index b7e83763f1dd944..25ad0309bdeac83 100644 --- a/be/src/pipeline/exec/partition_sort_sink_operator.h +++ b/be/src/pipeline/exec/partition_sort_sink_operator.h @@ -224,7 +224,7 @@ class PartitionSortSinkLocalState : public PipelineXSinkLocalState _value_places; int _num_partition = 0; std::vector _partition_columns; @@ -238,6 +238,7 @@ class PartitionSortSinkLocalState : public PipelineXSinkLocalStateempty()) { COUNTER_UPDATE(local_state.blocks_returned_counter(), 1); - COUNTER_UPDATE(local_state.rows_returned_counter(), output_block->rows()); + local_state._num_rows_returned += output_block->rows(); } return Status::OK(); } @@ -79,7 +81,7 @@ Status PartitionSortSourceOperatorX::get_block(RuntimeState* state, vectorized:: } if (!output_block->empty()) { COUNTER_UPDATE(local_state.blocks_returned_counter(), 1); - COUNTER_UPDATE(local_state.rows_returned_counter(), output_block->rows()); + local_state._num_rows_returned += output_block->rows(); } return Status::OK(); } @@ -98,7 +100,7 @@ Status PartitionSortSourceOperatorX::get_sorted_block(RuntimeState* state, //current sort have eos, so get next idx auto rows = local_state._shared_state->partition_sorts[local_state._sort_idx] ->get_output_rows(); - local_state._num_rows_returned += rows; + COUNTER_UPDATE(local_state._sorted_partition_output_rows_counter, rows); local_state._shared_state->partition_sorts[local_state._sort_idx].reset(nullptr); local_state._sort_idx++; } diff --git a/be/src/pipeline/exec/partition_sort_source_operator.h b/be/src/pipeline/exec/partition_sort_source_operator.h index 4b5589c0e8f0cd1..1f75e1f49d4cf76 100644 --- a/be/src/pipeline/exec/partition_sort_source_operator.h +++ b/be/src/pipeline/exec/partition_sort_source_operator.h @@ -34,14 +34,14 @@ class PartitionSortSourceLocalState final ENABLE_FACTORY_CREATOR(PartitionSortSourceLocalState); using Base = PipelineXLocalState; PartitionSortSourceLocalState(RuntimeState* state, OperatorXBase* parent) - : PipelineXLocalState(state, parent), - _get_sorted_timer(nullptr) {} + : PipelineXLocalState(state, parent) {} Status init(RuntimeState* state, LocalStateInfo& info) override; private: friend class PartitionSortSourceOperatorX; - RuntimeProfile::Counter* _get_sorted_timer; + RuntimeProfile::Counter* _get_sorted_timer = nullptr; + RuntimeProfile::Counter* _sorted_partition_output_rows_counter = nullptr; std::atomic _sort_idx = 0; }; From 289100b063a3f739851477c74970e4b19a56b1db Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Sat, 10 Aug 2024 21:22:05 +0800 Subject: [PATCH 28/94] [opt](catalog) modify some meta cache logic (#38506) 1. Add a new FE config `max_meta_object_cache_num` to control the meta number of db/table in external catalog 2. Shorten the expire time of some cache to make data more refresh --- .../java/org/apache/doris/common/Config.java | 20 +++++++++++-------- .../doris/datasource/ExternalCatalog.java | 4 +--- .../doris/datasource/ExternalDatabase.java | 2 +- .../datasource/hive/HiveMetaStoreCache.java | 2 +- .../source/HudiCachedPartitionProcessor.java | 4 ++-- .../iceberg/IcebergMetadataCache.java | 8 ++++---- .../doris/datasource/metacache/MetaCache.java | 2 +- 7 files changed, 22 insertions(+), 20 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index de3c8c2c6621350..3588a3a0bffddbd 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1979,16 +1979,20 @@ public class Config extends ConfigBase { * Max cache num of hive partition. * Decrease this value if FE's memory is small */ - @ConfField(mutable = false, masterOnly = false) - public static long max_hive_partition_cache_num = 100000; + @ConfField(description = {"Hive Metastore 表级别分区缓存的最大数量。", + "Max cache number of partition at table level in Hive Metastore."}) + public static long max_hive_partition_cache_num = 10000; - @ConfField(mutable = false, masterOnly = false, description = {"Hive表名缓存的最大数量。", - "Max cache number of hive table name list."}) - public static long max_hive_table_cache_num = 1000; + @ConfField(description = {"Hudi/Iceberg 表级别缓存的最大数量。", + "Max cache number of hudi/iceberg table."}) + public static long max_external_table_cache_num = 1000; - @ConfField(mutable = false, masterOnly = false, description = { - "Hive分区表缓存的最大数量", "Max cache number of hive partition table" - }) + @ConfField(description = {"External Catalog 中,Database 和 Table 的实例缓存的最大数量。", + "Max cache number of database and table instance in external catalog."}) + public static long max_meta_object_cache_num = 1000; + + @ConfField(description = {"Hive分区表缓存的最大数量", + "Max cache number of hive partition table"}) public static long max_hive_partition_table_cache_num = 1000; @ConfField(mutable = false, masterOnly = false, description = {"获取Hive分区值时候的最大返回数量,-1代表没有限制。", diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java index 4f587ab74659584..2dfcec1d8aef4d7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalCatalog.java @@ -238,7 +238,7 @@ public final synchronized void makeSureInitialized() { name, OptionalLong.of(86400L), OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60L), - Config.max_hive_table_cache_num, + Config.max_meta_object_cache_num, ignored -> getFilteredDatabaseNames(), dbName -> Optional.ofNullable( buildDbForInit(dbName, Util.genIdByName(name, dbName), logType)), @@ -660,8 +660,6 @@ protected ExternalDatabase buildDbForInit(String dbName return new IcebergExternalDatabase(this, dbId, dbName); case MAX_COMPUTE: return new MaxComputeExternalDatabase(this, dbId, dbName); - //case HUDI: - //return new HudiExternalDatabase(this, dbId, dbName); case LAKESOUL: return new LakeSoulExternalDatabase(this, dbId, dbName); case TEST: diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalDatabase.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalDatabase.java index b564a17ce8687b9..dc6f9aaea73d8c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalDatabase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalDatabase.java @@ -145,7 +145,7 @@ public final synchronized void makeSureInitialized() { name, OptionalLong.of(86400L), OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60L), - Config.max_hive_table_cache_num, + Config.max_meta_object_cache_num, ignored -> listTableNames(), tableName -> Optional.ofNullable( buildTableForInit(tableName, diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index da88a03f2ebabe4..631362a5b417019 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -177,7 +177,7 @@ public void setNewFileCache() { CacheFactory fileCacheFactory = new CacheFactory( OptionalLong.of(fileMetaCacheTtlSecond >= HMSExternalCatalog.FILE_META_CACHE_TTL_DISABLE_CACHE - ? fileMetaCacheTtlSecond : 86400L), + ? fileMetaCacheTtlSecond : 28800L), OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60L), Config.max_external_file_cache_num, false, diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java index d9c1c2082717671..2372b88e0db32e6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiCachedPartitionProcessor.java @@ -52,9 +52,9 @@ public HudiCachedPartitionProcessor(long catalogId, ExecutorService executor) { this.catalogId = catalogId; this.executor = executor; CacheFactory partitionCacheFactory = new CacheFactory( - OptionalLong.of(86400L), + OptionalLong.of(28800L), OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60), - Config.max_hive_table_cache_num, + Config.max_external_table_cache_num, false, null); this.partitionCache = partitionCacheFactory.buildCache(key -> new TablePartitionValues(), null, executor); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java index dc11a6cacc24cf4..13bd9650978f81b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergMetadataCache.java @@ -55,17 +55,17 @@ public class IcebergMetadataCache { public IcebergMetadataCache(ExecutorService executor) { CacheFactory snapshotListCacheFactory = new CacheFactory( - OptionalLong.of(86400L), + OptionalLong.of(28800L), OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60), - Config.max_hive_table_cache_num, + Config.max_external_table_cache_num, false, null); this.snapshotListCache = snapshotListCacheFactory.buildCache(key -> loadSnapshots(key), null, executor); CacheFactory tableCacheFactory = new CacheFactory( - OptionalLong.of(86400L), + OptionalLong.of(28800L), OptionalLong.of(Config.external_cache_expire_time_minutes_after_access * 60), - Config.max_hive_table_cache_num, + Config.max_external_table_cache_num, false, null); this.tableCache = tableCacheFactory.buildCache(key -> loadTable(key), null, executor); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java index c251db3a5c19f79..e3ad8668fb55258 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/metacache/MetaCache.java @@ -57,7 +57,7 @@ public MetaCache(String name, CacheFactory namesCacheFactory = new CacheFactory( expireAfterWriteSec, refreshAfterWriteSec, - maxSize, + 1, // names cache has one and only one entry true, null); CacheFactory objCacheFactory = new CacheFactory( From de31d99d667556747e9f17af7ea3337ce28f192c Mon Sep 17 00:00:00 2001 From: Siyang Tang <82279870+TangSiyang2001@users.noreply.github.com> Date: Sat, 10 Aug 2024 23:31:08 +0800 Subject: [PATCH 29/94] [fix](schema-change) Fix potential data race when a schema change jobs is set to cancelled but the table state is still SCHEMA_CHANGE (#39164) ## Proposed changes Set job cancel state after table state changed to normal. --- .../java/org/apache/doris/alter/SchemaChangeJobV2.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java index f0d0df319c9390e..539ebcb6834f19e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeJobV2.java @@ -755,11 +755,11 @@ protected synchronized boolean cancelImpl(String errMsg) { pruneMeta(); this.errMsg = errMsg; this.finishedTimeMs = System.currentTimeMillis(); - LOG.info("cancel {} job {}, err: {}", this.type, jobId, errMsg); - Env.getCurrentEnv().getEditLog().logAlterJob(this); - changeTableState(dbId, tableId, OlapTableState.NORMAL); LOG.info("set table's state to NORMAL when cancel, table id: {}, job id: {}", tableId, jobId); + jobState = JobState.CANCELLED; + Env.getCurrentEnv().getEditLog().logAlterJob(this); + LOG.info("cancel {} job {}, err: {}", this.type, jobId, errMsg); postProcessShadowIndex(); return true; @@ -797,8 +797,6 @@ private void cancelInternal() { } } } - - jobState = JobState.CANCELLED; } // Check whether transactions of the given database which txnId is less than 'watershedTxnId' are finished From 47bbca9284cf4e3f83eb5092de3b49b9af6726cb Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Sat, 10 Aug 2024 23:32:40 +0800 Subject: [PATCH 30/94] [fix](regression) fix some jdbc datasource docker health check (#39141) Verify that the database table has been created by adding the database table to the SQL in the health check script. --- .../clickhouse/clickhouse.yaml.tpl | 2 +- .../clickhouse/init/05-check.sql | 23 +++++++++++++++++++ .../docker-compose/mariadb/init/05-check.sql | 21 +++++++++++++++++ .../mariadb/mariadb-10.yaml.tpl | 2 +- .../docker-compose/mysql/init/05-check.sql | 21 +++++++++++++++++ .../docker-compose/mysql/mysql-5.7.yaml.tpl | 2 +- .../docker-compose/oracle/init/05-check.sql | 21 +++++++++++++++++ .../docker-compose/oracle/oracle-11.yaml.tpl | 2 +- .../postgresql/init/05-check.sql | 21 +++++++++++++++++ .../postgresql/postgresql-14.yaml.tpl | 2 +- 10 files changed, 112 insertions(+), 5 deletions(-) create mode 100644 docker/thirdparties/docker-compose/clickhouse/init/05-check.sql create mode 100644 docker/thirdparties/docker-compose/mariadb/init/05-check.sql create mode 100644 docker/thirdparties/docker-compose/mysql/init/05-check.sql create mode 100644 docker/thirdparties/docker-compose/oracle/init/05-check.sql create mode 100644 docker/thirdparties/docker-compose/postgresql/init/05-check.sql diff --git a/docker/thirdparties/docker-compose/clickhouse/clickhouse.yaml.tpl b/docker/thirdparties/docker-compose/clickhouse/clickhouse.yaml.tpl index ba0501f3792512d..c4a919e1f158089 100644 --- a/docker/thirdparties/docker-compose/clickhouse/clickhouse.yaml.tpl +++ b/docker/thirdparties/docker-compose/clickhouse/clickhouse.yaml.tpl @@ -30,7 +30,7 @@ services: ports: - ${DOCKER_CLICKHOUSE_EXTERNAL_HTTP_PORT}:8123 healthcheck: - test: ["CMD-SHELL", "clickhouse-client --password=123456 --query 'SELECT 1'"] + test: ["CMD-SHELL", "clickhouse-client --password=123456 --query 'SELECT 1 FROM doris_test.deadline'"] interval: 30s timeout: 10s retries: 5 diff --git a/docker/thirdparties/docker-compose/clickhouse/init/05-check.sql b/docker/thirdparties/docker-compose/clickhouse/init/05-check.sql new file mode 100644 index 000000000000000..a06851aa7d2400e --- /dev/null +++ b/docker/thirdparties/docker-compose/clickhouse/init/05-check.sql @@ -0,0 +1,23 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE TABLE doris_test.deadline +( + id Int64 +) +ENGINE = MergeTree +ORDER BY id; diff --git a/docker/thirdparties/docker-compose/mariadb/init/05-check.sql b/docker/thirdparties/docker-compose/mariadb/init/05-check.sql new file mode 100644 index 000000000000000..56541c08f486a2e --- /dev/null +++ b/docker/thirdparties/docker-compose/mariadb/init/05-check.sql @@ -0,0 +1,21 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE TABLE doris_test.deadline +( + id int +); diff --git a/docker/thirdparties/docker-compose/mariadb/mariadb-10.yaml.tpl b/docker/thirdparties/docker-compose/mariadb/mariadb-10.yaml.tpl index f6af7ffed845129..f22b1a0b0af4fee 100644 --- a/docker/thirdparties/docker-compose/mariadb/mariadb-10.yaml.tpl +++ b/docker/thirdparties/docker-compose/mariadb/mariadb-10.yaml.tpl @@ -30,7 +30,7 @@ services: ports: - ${DOCKER_MARIADB_EXTERNAL_PORT}:3306 healthcheck: - test: mysqladmin ping -h 127.0.0.1 -u root --password=$$MARIADB_ROOT_PASSWORD + test: mysqladmin ping -h 127.0.0.1 -u root --password=$$MARIADB_ROOT_PASSWORD && mysql -h 127.0.0.1 -u root --password=$$MARIADB_ROOT_PASSWORD -e "SELECT 1 FROM doris_test.deadline;" interval: 5s timeout: 60s retries: 120 diff --git a/docker/thirdparties/docker-compose/mysql/init/05-check.sql b/docker/thirdparties/docker-compose/mysql/init/05-check.sql new file mode 100644 index 000000000000000..56541c08f486a2e --- /dev/null +++ b/docker/thirdparties/docker-compose/mysql/init/05-check.sql @@ -0,0 +1,21 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE TABLE doris_test.deadline +( + id int +); diff --git a/docker/thirdparties/docker-compose/mysql/mysql-5.7.yaml.tpl b/docker/thirdparties/docker-compose/mysql/mysql-5.7.yaml.tpl index 6c6ad81e41148c9..c335fecace7b52f 100644 --- a/docker/thirdparties/docker-compose/mysql/mysql-5.7.yaml.tpl +++ b/docker/thirdparties/docker-compose/mysql/mysql-5.7.yaml.tpl @@ -30,7 +30,7 @@ services: ports: - ${DOCKER_MYSQL_57_EXTERNAL_PORT}:3306 healthcheck: - test: mysqladmin ping -h 127.0.0.1 -u root --password=$$MYSQL_ROOT_PASSWORD + test: mysqladmin ping -h 127.0.0.1 -u root --password=$$MYSQL_ROOT_PASSWORD && mysql -h 127.0.0.1 -u root --password=$$MYSQL_ROOT_PASSWORD -e "SELECT 1 FROM doris_test.deadline;" interval: 5s timeout: 60s retries: 120 diff --git a/docker/thirdparties/docker-compose/oracle/init/05-check.sql b/docker/thirdparties/docker-compose/oracle/init/05-check.sql new file mode 100644 index 000000000000000..640c51fe1805d59 --- /dev/null +++ b/docker/thirdparties/docker-compose/oracle/init/05-check.sql @@ -0,0 +1,21 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE TABLE doris_test.deadline +( + id number(5) +); diff --git a/docker/thirdparties/docker-compose/oracle/oracle-11.yaml.tpl b/docker/thirdparties/docker-compose/oracle/oracle-11.yaml.tpl index 9a1dc15da6c33bd..c9fb89016fa71ce 100644 --- a/docker/thirdparties/docker-compose/oracle/oracle-11.yaml.tpl +++ b/docker/thirdparties/docker-compose/oracle/oracle-11.yaml.tpl @@ -25,7 +25,7 @@ services: - ${DOCKER_ORACLE_EXTERNAL_PORT}:1521 privileged: true healthcheck: - test: [ "CMD", "bash", "-c", "echo 'select 1 from dual;' | ORACLE_HOME=/u01/app/oracle/product/11.2.0/xe /u01/app/oracle/product/11.2.0/xe/bin/sqlplus -s DORIS_TEST/123456@localhost"] + test: [ "CMD", "bash", "-c", "echo 'SELECT 1 FROM doris_test.deadline;' | ORACLE_HOME=/u01/app/oracle/product/11.2.0/xe /u01/app/oracle/product/11.2.0/xe/bin/sqlplus -s DORIS_TEST/123456@localhost" ] interval: 20s timeout: 60s retries: 120 diff --git a/docker/thirdparties/docker-compose/postgresql/init/05-check.sql b/docker/thirdparties/docker-compose/postgresql/init/05-check.sql new file mode 100644 index 000000000000000..56541c08f486a2e --- /dev/null +++ b/docker/thirdparties/docker-compose/postgresql/init/05-check.sql @@ -0,0 +1,21 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE TABLE doris_test.deadline +( + id int +); diff --git a/docker/thirdparties/docker-compose/postgresql/postgresql-14.yaml.tpl b/docker/thirdparties/docker-compose/postgresql/postgresql-14.yaml.tpl index ebc8ed185fa2838..9cea58777ecea49 100644 --- a/docker/thirdparties/docker-compose/postgresql/postgresql-14.yaml.tpl +++ b/docker/thirdparties/docker-compose/postgresql/postgresql-14.yaml.tpl @@ -26,7 +26,7 @@ services: ports: - ${DOCKER_PG_14_EXTERNAL_PORT}:5432 healthcheck: - test: ["CMD-SHELL", "pg_isready -U postgres"] + test: [ "CMD-SHELL", "pg_isready -U postgres && psql -U postgres -c 'SELECT 1 FROM doris_test.deadline;'" ] interval: 5s timeout: 60s retries: 120 From a1ba0a1983c180ed0959c613f69965a597244f73 Mon Sep 17 00:00:00 2001 From: Mryange <59914473+Mryange@users.noreply.github.com> Date: Sun, 11 Aug 2024 06:30:30 +0800 Subject: [PATCH 31/94] [fix](function) MicroSecondsSub without scale (#38945) ## Proposed changes Added the computeSignature function for millisecond/microsecond calculation functions to generate parameters and return values with the appropriate precision. Modified the microSecondsAdd function, which was used for constant folding, because constant folding uses the precision of the parameters for calculation. However, for millisecond/microsecond calculations, it is necessary to set the precision to the maximum to ensure correct display. before ``` mysql> SELECT MICROSECONDS_SUB('2010-11-30 23:50:50', 2); +-------------------------------------------------------------------+ | microseconds_sub(cast('2010-11-30 23:50:50' as DATETIMEV2(0)), 2) | +-------------------------------------------------------------------+ | 2010-11-30 23:50:49 | +-------------------------------------------------------------------+ ``` now ``` mysql> SELECT MICROSECONDS_SUB('2010-11-30 23:50:50', 2); +-------------------------------------------------------------------+ | microseconds_sub(cast('2010-11-30 23:50:50' as DATETIMEV2(0)), 2) | +-------------------------------------------------------------------+ | 2010-11-30 23:50:49.999998 | +-------------------------------------------------------------------+ ``` --- .../executable/DateTimeArithmetic.java | 24 ++++++++++++++ .../functions/scalar/MicroSecondsAdd.java | 11 +++++-- .../functions/scalar/MicroSecondsSub.java | 11 +++++-- .../functions/scalar/MilliSecondsAdd.java | 11 +++++-- .../functions/scalar/MilliSecondsSub.java | 11 +++++-- .../literal/DateTimeV2Literal.java | 8 ++++- .../rules/expression/FoldConstantTest.java | 19 +++++++++--- .../test_from_millisecond_microsecond.out | 16 ++++++++++ .../test_from_millisecond_microsecond.groovy | 31 +++++++++++++++++++ 9 files changed, 125 insertions(+), 17 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeArithmetic.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeArithmetic.java index 033bff2afd33ddc..c10181a1040db4b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeArithmetic.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/DateTimeArithmetic.java @@ -218,6 +218,30 @@ public static Expression microSecondsAdd(DateTimeV2Literal date, IntegerLiteral return date.plusMicroSeconds(microSecond.getValue()); } + /** + * datetime arithmetic function microseconds_sub. + */ + @ExecFunction(name = "microseconds_sub", argTypes = { "DATETIMEV2", "INT" }, returnType = "DATETIMEV2") + public static Expression microSecondsSub(DateTimeV2Literal date, IntegerLiteral microSecond) { + return date.plusMicroSeconds(-microSecond.getValue()); + } + + /** + * datetime arithmetic function milliseconds_add. + */ + @ExecFunction(name = "milliseconds_add", argTypes = { "DATETIMEV2", "INT" }, returnType = "DATETIMEV2") + public static Expression milliSecondsAdd(DateTimeV2Literal date, IntegerLiteral milliSecond) { + return date.plusMilliSeconds(milliSecond.getValue()); + } + + /** + * datetime arithmetic function milliseconds_sub. + */ + @ExecFunction(name = "milliseconds_sub", argTypes = { "DATETIMEV2", "INT" }, returnType = "DATETIMEV2") + public static Expression milliSecondsSub(DateTimeV2Literal date, IntegerLiteral milliSecond) { + return date.plusMilliSeconds(-milliSecond.getValue()); + } + /** * datetime arithmetic function years-sub. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsAdd.java index 059577143ef0f38..8d792259440dd2f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsAdd.java @@ -38,9 +38,8 @@ public class MicroSecondsAdd extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { private static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) - .args(DateTimeV2Type.SYSTEM_DEFAULT, IntegerType.INSTANCE) - ); + FunctionSignature.ret(DateTimeV2Type.MAX) + .args(DateTimeV2Type.MAX, IntegerType.INSTANCE)); public MicroSecondsAdd(Expression arg0, Expression arg1) { super("microseconds_add", arg0, arg1); @@ -57,6 +56,12 @@ public List getSignatures() { return SIGNATURES; } + @Override + public FunctionSignature computeSignature(FunctionSignature signature) { + signature = super.computeSignature(signature); + return signature.withArgumentType(0, DateTimeV2Type.MAX).withReturnType(DateTimeV2Type.MAX); + } + @Override public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMicroSecondsAdd(this, context); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsSub.java index c0b99eade7914aa..2894d1fffc902f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MicroSecondsSub.java @@ -38,9 +38,8 @@ public class MicroSecondsSub extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { private static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) - .args(DateTimeV2Type.SYSTEM_DEFAULT, IntegerType.INSTANCE) - ); + FunctionSignature.ret(DateTimeV2Type.MAX) + .args(DateTimeV2Type.MAX, IntegerType.INSTANCE)); public MicroSecondsSub(Expression arg0, Expression arg1) { super("microseconds_sub", arg0, arg1); @@ -57,6 +56,12 @@ public List getSignatures() { return SIGNATURES; } + @Override + public FunctionSignature computeSignature(FunctionSignature signature) { + signature = super.computeSignature(signature); + return signature.withArgumentType(0, DateTimeV2Type.MAX).withReturnType(DateTimeV2Type.MAX); + } + @Override public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMicroSecondsSub(this, context); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsAdd.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsAdd.java index ddf58907e884733..1cb56b13f84ed53 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsAdd.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsAdd.java @@ -38,9 +38,8 @@ public class MilliSecondsAdd extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { private static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) - .args(DateTimeV2Type.SYSTEM_DEFAULT, IntegerType.INSTANCE) - ); + FunctionSignature.ret(DateTimeV2Type.MAX) + .args(DateTimeV2Type.MAX, IntegerType.INSTANCE)); public MilliSecondsAdd(Expression arg0, Expression arg1) { super("milliseconds_add", arg0, arg1); @@ -57,6 +56,12 @@ public List getSignatures() { return SIGNATURES; } + @Override + public FunctionSignature computeSignature(FunctionSignature signature) { + signature = super.computeSignature(signature); + return signature.withArgumentType(0, DateTimeV2Type.MAX).withReturnType(DateTimeV2Type.MAX); + } + @Override public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMilliSecondsAdd(this, context); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsSub.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsSub.java index eb96aa59ac25a10..42891b7e7e0b223 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsSub.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MilliSecondsSub.java @@ -38,9 +38,8 @@ public class MilliSecondsSub extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { private static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) - .args(DateTimeV2Type.SYSTEM_DEFAULT, IntegerType.INSTANCE) - ); + FunctionSignature.ret(DateTimeV2Type.MAX) + .args(DateTimeV2Type.MAX, IntegerType.INSTANCE)); public MilliSecondsSub(Expression arg0, Expression arg1) { super("milliseconds_sub", arg0, arg1); @@ -57,6 +56,12 @@ public List getSignatures() { return SIGNATURES; } + @Override + public FunctionSignature computeSignature(FunctionSignature signature) { + signature = super.computeSignature(signature); + return signature.withArgumentType(0, DateTimeV2Type.MAX).withReturnType(DateTimeV2Type.MAX); + } + @Override public R accept(ExpressionVisitor visitor, C context) { return visitor.visitMilliSecondsSub(this, context); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java index a769bd03717c905..a3457f2463d65cc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateTimeV2Literal.java @@ -215,8 +215,14 @@ public Expression plusSeconds(long seconds) { return fromJavaDateType(toJavaDateType().plusSeconds(seconds), getDataType().getScale()); } + // When performing addition or subtraction with MicroSeconds, the precision must + // be set to 6 to display it completely. public Expression plusMicroSeconds(long microSeconds) { - return fromJavaDateType(toJavaDateType().plusNanos(microSeconds * 1000L), getDataType().getScale()); + return fromJavaDateType(toJavaDateType().plusNanos(microSeconds * 1000L), 6); + } + + public Expression plusMilliSeconds(long microSeconds) { + return plusMicroSeconds(microSeconds * 1000L); } /** diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java index 6e4febe0ea64612..4a5a5e9065c2b77 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/FoldConstantTest.java @@ -549,12 +549,17 @@ void testDateTimeV2TypeDateTimeArithmeticFunctions() { VarcharLiteral format = new VarcharLiteral("%Y-%m-%d"); String[] answer = { - "'2000-01-30 23:59:59'", "'1999-12-01 23:59:59'", "'2029-12-31 23:59:59'", "'1969-12-31 23:59:59'", - "'2002-06-30 23:59:59'", "'1997-06-30 23:59:59'", "'2000-01-30 23:59:59'", "'1999-12-01 23:59:59'", + "'2000-01-30 23:59:59'", "'1999-12-01 23:59:59'", "'2029-12-31 23:59:59'", + "'1969-12-31 23:59:59'", + "'2002-06-30 23:59:59'", "'1997-06-30 23:59:59'", "'2000-01-30 23:59:59'", + "'1999-12-01 23:59:59'", "'2000-01-02 05:59:59'", "'1999-12-30 17:59:59'", "'2000-01-01 00:29:59'", - "'1999-12-31 23:29:59'", "'2000-01-01 00:00:29'", "'1999-12-31 23:59:29'", "'1999-12-31 23:59:59'", + "'1999-12-31 23:29:59'", "'2000-01-01 00:00:29'", "'1999-12-31 23:59:29'", + "'1999-12-31 23:59:59.000030'", "'1999-12-31 23:59:58.999970'", "'1999-12-31 23:59:59.030000'", + "'1999-12-31 23:59:58.970000'", "1999", "4", "12", "6", "31", "365", "31", "23", "59", "59", - "'1999-12-31'", "'1999-12-27'", "'1999-12-31'", "'1999-12-31'", "730484", "'1999-12-31'", "'1999-12-31'" + "'1999-12-31'", "'1999-12-27'", "'1999-12-31'", "'1999-12-31'", "730484", "'1999-12-31'", + "'1999-12-31'" }; int answerIdx = 0; @@ -578,6 +583,12 @@ void testDateTimeV2TypeDateTimeArithmeticFunctions() { answer[answerIdx++]); Assertions.assertEquals(DateTimeArithmetic.microSecondsAdd(dateLiteral, integerLiteral).toSql(), answer[answerIdx++]); + Assertions.assertEquals(DateTimeArithmetic.microSecondsSub(dateLiteral, integerLiteral).toSql(), + answer[answerIdx++]); + Assertions.assertEquals(DateTimeArithmetic.milliSecondsAdd(dateLiteral, integerLiteral).toSql(), + answer[answerIdx++]); + Assertions.assertEquals(DateTimeArithmetic.milliSecondsSub(dateLiteral, integerLiteral).toSql(), + answer[answerIdx++]); Assertions.assertEquals(DateTimeExtractAndTransform.year(dateLiteral).toSql(), answer[answerIdx++]); Assertions.assertEquals(DateTimeExtractAndTransform.quarter(dateLiteral).toSql(), answer[answerIdx++]); diff --git a/regression-test/data/correctness/test_from_millisecond_microsecond.out b/regression-test/data/correctness/test_from_millisecond_microsecond.out index 50cdb55766dfed1..84b1f2d5359a24e 100644 --- a/regression-test/data/correctness/test_from_millisecond_microsecond.out +++ b/regression-test/data/correctness/test_from_millisecond_microsecond.out @@ -159,3 +159,19 @@ -- !sql -- \N +-- !sql_all_constent -- +2010-11-30T23:50:50.000002 2010-11-30T23:50:49.999998 2010-11-30T23:50:50.002 2010-11-30T23:50:49.998 + +-- !sql_all_constent -- +2010-11-30T23:50:50.000002 2010-11-30T23:50:49.999998 2010-11-30T23:50:50.002 2010-11-30T23:50:49.998 + +-- !select_null_datetime -- +1 2023-01-01T00:00:00.000002 2022-12-31T23:59:59.999998 2023-01-01T00:00:00.002 2022-12-31T23:59:59.998 +2 2023-01-01T00:00:00.123002 2023-01-01T00:00:00.122998 2023-01-01T00:00:00.125 2023-01-01T00:00:00.121 +3 2023-01-01T00:00:00.123458 2023-01-01T00:00:00.123454 2023-01-01T00:00:00.125456 2023-01-01T00:00:00.121456 + +-- !select_null_datetime -- +1 2023-01-01T00:00:00.000002 2022-12-31T23:59:59.999998 2023-01-01T00:00:00.002 2022-12-31T23:59:59.998 +2 2023-01-01T00:00:00.123002 2023-01-01T00:00:00.122998 2023-01-01T00:00:00.125 2023-01-01T00:00:00.121 +3 2023-01-01T00:00:00.123458 2023-01-01T00:00:00.123454 2023-01-01T00:00:00.125456 2023-01-01T00:00:00.121456 + diff --git a/regression-test/suites/correctness/test_from_millisecond_microsecond.groovy b/regression-test/suites/correctness/test_from_millisecond_microsecond.groovy index 48df29472f6a4b0..afbb386931b0c6e 100644 --- a/regression-test/suites/correctness/test_from_millisecond_microsecond.groovy +++ b/regression-test/suites/correctness/test_from_millisecond_microsecond.groovy @@ -316,4 +316,35 @@ suite("test_from_millisecond_microsecond") { qt_sql " select from_second(-1) " qt_sql " select from_microsecond(253402271999999999) " qt_sql " select from_microsecond(253402272000000000) " + + + qt_sql_all_constent """ + select microseconds_add('2010-11-30 23:50:50', 2) , microseconds_sub('2010-11-30 23:50:50', 2) , milliseconds_add('2010-11-30 23:50:50', 2) , milliseconds_sub('2010-11-30 23:50:50', 2); + """ + + qt_sql_all_constent """ + select microseconds_add(cast('2010-11-30 23:50:50' as DATETIME(3)), 2) , microseconds_sub(cast('2010-11-30 23:50:50' as DATETIME(3)), 2) , milliseconds_add(cast('2010-11-30 23:50:50' as DATETIME(3)), 2) , milliseconds_sub(cast('2010-11-30 23:50:50' as DATETIME(3)), 2); + """ + + qt_select_null_datetime """ + select + id, + microseconds_add(t,2), + microseconds_sub(t,2), + milliseconds_add(t,2), + milliseconds_sub(t,2) + from millimicro + order by id; + """ + + qt_select_null_datetime """ + select + id, + microseconds_add(cast(t as DATETIME(3)),2), + microseconds_sub(cast(t as DATETIME(3)),2), + milliseconds_add(cast(t as DATETIME(3)),2), + milliseconds_sub(cast(t as DATETIME(3)),2) + from millimicro + order by id; + """ } From 7697f02b890976523d851cfb5dd3a93adb0d2826 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Sun, 11 Aug 2024 06:32:23 +0800 Subject: [PATCH 32/94] [fix](ub) undefined behavior in FixedContainer (#39191) ## Proposed changes Undefined behavior occurs if there is a null value in the list. ``` /root/doris/be/src/vec/common/string_ref.h:271:54: runtime error: null pointer passed as argument 2, which is declared to never be null /var/local/ldb-toolchain/bin/../usr/include/string.h:64:33: note: nonnull attribute specified here #0 0x5616d072245d in doris::StringRef::eq(doris::StringRef const&) const /root/doris/be/src/vec/common/string_ref.h:271:41 #1 0x5616d072245d in doris::StringRef::operator==(doris::StringRef const&) const /root/doris/be/src/vec/common/string_ref.h:274:60 #2 0x5616d072245d in doris::FixedContainer::find(doris::StringRef const&) const /root/doris/be/src/exprs/hybrid_set.h:76:36 #3 0x5616d072245d in void doris::StringValueSet>::_find_batch(doris::vectorized::IColumn const&, unsigned long, doris::vectorized::PODArray, 16ul, 15ul> const*, doris::vectorized::PODArray, 16ul, 15ul>&) /root/doris/be/src/exprs/hybrid_set.h:688:63 #4 0x5616d0747857 in doris::vectorized::FunctionIn::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /root/doris/be/src/vec/functions/in.h:170:21 #5 0x5616c741fa3a in doris::vectorized::DefaultExecutable::execute_impl(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long) const /root/doris/be/src/vec/functions/function.h:462:26 #6 0x5616cbb5b650 in doris::vectorized::PreparedFunctionImpl::_execute_skipped_constant_deal(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp #7 0x5616cbb4e14e in doris::vectorized::PreparedFunctionImpl::execute_without_low_cardinality_columns(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:244:12 #8 0x5616cbb4e3c2 in doris::vectorized::PreparedFunctionImpl::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.cpp:250:12 #9 0x5616c741cd68 in doris::vectorized::IFunctionBase::execute(doris::FunctionContext*, doris::vectorized::Block&, std::vector> const&, unsigned long, unsigned long, bool) const /root/doris/be/src/vec/functions/function.h:190:19 #10 0x5616c74cf712 in doris::vectorized::VInPredicate::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vin_predicate.cpp:130:5 #11 0x5616c740d5c0 in doris::vectorized::VectorizedFnCall::_do_execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*, std::vector>&) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:183:9 #12 0x5616c740ecf5 in doris::vectorized::VectorizedFnCall::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vectorized_fn_call.cpp:215:12 #13 0x5616c7462e24 in doris::vectorized::VCompoundPred::execute(doris::vectorized::VExprContext*, doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vcompound_pred.h:127:38 #14 0x5616c74bccec in doris::vectorized::VExprContext::execute(doris::vectorized::Block*, int*) /root/doris/be/src/vec/exprs/vexpr_context.cpp:54:5 #15 0x5616c74c1dcc in doris::vectorized::VExprContext::execute_conjuncts(std::vector, std::allocator>> const&, std::vector, 16ul, 15ul>, std::allocator, 16ul, 15ul>>> const*, bool, doris::vectorized::Block*, doris::vectorized::PODArray, 16ul, 15ul>, bool) /root/doris/be/src/vec/exprs/vexpr_context.cpp:169:9 #16 0x5616c74c5108 in doris::vectorized::VExprContext::execute_conjuncts_and_filter_block(std::vector, std::allocator>> const&, doris::vectorized::Block*, std::vector>&, int, doris::vectorized::PODArray, 16ul, 15ul>&) /root/doris/be/src/vec/exprs/vexpr_context.cpp:322:5 #17 0x5616ad8a7f1a in doris::segment_v2::SegmentIterator::_execute_common_expr(unsigned short*, unsigned short&, doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2680:5 #18 0x5616ad89e86e in doris::segment_v2::SegmentIterator::_next_batch_internal(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2582:25 #19 0x5616ad892f5c in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*)::$_0::operator()() const /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2315:9 #20 0x5616ad892f5c in doris::segment_v2::SegmentIterator::next_batch(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/segment_iterator.cpp:2314:19 #21 0x5616ad6dd9cc in doris::segment_v2::LazyInitSegmentIterator::next_batch(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/segment_v2/lazy_init_segment_iterator.h:44:33 #22 0x5616ad269d67 in doris::BetaRowsetReader::next_block(doris::vectorized::Block*) /root/doris/be/src/olap/rowset/beta_rowset_reader.cpp:380:29 #23 0x5616de6de110 in doris::vectorized::VCollectIterator::Level0Iterator::_refresh() /root/doris/be/src/vec/olap/vcollect_iterator.h #24 0x5616de6c967f in doris::vectorized::VCollectIterator::Level0Iterator::refresh_current_row() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:514:24 #25 0x5616de6ca8a6 in doris::vectorized::VCollectIterator::Level0Iterator::ensure_first_row_ref() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:493:14 #26 0x5616de6d7008 in doris::vectorized::VCollectIterator::Level1Iterator::ensure_first_row_ref() /root/doris/be/src/vec/olap/vcollect_iterator.cpp:692:27 #27 0x5616de6bd200 in doris::vectorized::VCollectIterator::build_heap(std::vector, std::allocator>>&) /root/doris/be/src/vec/olap/vcollect_iterator.cpp:186:9 #28 0x5616de651b6c in doris::vectorized::BlockReader::_init_collect_iter(doris::TabletReader::ReaderParams const&) /root/doris/be/src/vec/olap/block_reader.cpp:157:5 #29 0x5616de65526f in doris::vectorized::BlockReader::init(doris::TabletReader::ReaderParams const&) /root/doris/be/src/vec/olap/block_reader.cpp:229:19 #30 0x5616e175a0f9 in doris::vectorized::NewOlapScanner::open(doris::RuntimeState*) /root/doris/be/src/vec/exec/scan/new_olap_scanner.cpp:237:32 #31 0x5616c736ad34 in doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr, std::shared_ptr) /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:236:5 #32 0x5616c736f05e in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:176:21 #33 0x5616c736f05e in doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()::operator()() const /root/doris/be/src/vec/exec/scan/scanner_scheduler.cpp:175:31 #34 0x5616c736f05e in void std::_invoke_impl, std::shared_ptr)::$_1::operator()() const::'lambda'()&>(std::_invoke_other, doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:61:14 #35 0x5616c736f05e in std::enable_if, std::shared_ptr)::$1::operator()() const::'lambda'()&>, void>::type std::_invoke_r, std::shared_ptr)::$_1::operator()() const::'lambda'()&>(doris::vectorized::ScannerScheduler::submit(std::shared_ptr, std::shared_ptr)::$_1::operator()() const::'lambda'()&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/invoke.h:111:2 #36 0x5616c736f05e in std::_Function_handler, std::shared_ptr)::$_1::operator()() const::'lambda'()>::_M_invoke(std::_Any_data const&) /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291:9 #37 0x5616aeed6a3b in doris::ThreadPool::dispatch_thread() /root/doris/be/src/util/threadpool.cpp:543:24 #38 0x5616aeeae4f7 in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5 #39 0x7f7e663e3ac2 in start_thread nptl/pthread_create.c:442:8 #40 0x7f7e6647584f misc/../sysdeps/unix/sysv/linux/x86_64/clone3.S:81 SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /root/doris/be/src/vec/common/string_ref.h:271:54 in ``` --- be/src/exprs/hybrid_set.h | 35 +++++++++++++++++++ be/src/vec/functions/in.h | 2 +- .../data/nereids_syntax_p0/inpredicate.out | 9 +++++ .../nereids_syntax_p0/inpredicate.groovy | 16 +++++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/be/src/exprs/hybrid_set.h b/be/src/exprs/hybrid_set.h index b75cc81ebf1f144..f0977a652b1cbe6 100644 --- a/be/src/exprs/hybrid_set.h +++ b/be/src/exprs/hybrid_set.h @@ -17,7 +17,13 @@ #pragma once +#include + +#include + +#include "common/exception.h" #include "common/object_pool.h" +#include "common/status.h" #include "exprs/runtime_filter.h" #include "runtime/decimalv2_value.h" #include "runtime/define_primitive_type.h" @@ -60,8 +66,16 @@ class FixedContainer { } } + void check_size() { + if (N != _size) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "invalid size of FixedContainer<{}>: {}", N, _size); + } + } + // Use '|' instead of '||' has better performance by test. ALWAYS_INLINE bool find(const T& value) const { + DCHECK_EQ(N, _size); if constexpr (N == 0) { return false; } @@ -144,6 +158,12 @@ class FixedContainer { size_t _size {}; }; +template +struct IsFixedContainer : std::false_type {}; + +template +struct IsFixedContainer> : std::true_type {}; + /** * Dynamic Container uses phmap::flat_hash_set. * @tparam T Element Type @@ -354,6 +374,11 @@ class HybridSet : public HybridSetBase { if constexpr (is_nullable) { null_map_data = null_map->data(); } + + if constexpr (IsFixedContainer::value) { + _set.check_size(); + } + auto* __restrict result_data = results.data(); for (size_t i = 0; i < rows; ++i) { if constexpr (!is_nullable && !is_negative) { @@ -507,6 +532,11 @@ class StringSet : public HybridSetBase { if constexpr (is_nullable) { null_map_data = null_map->data(); } + + if constexpr (IsFixedContainer::value) { + _set.check_size(); + } + auto* __restrict result_data = results.data(); for (size_t i = 0; i < rows; ++i) { const auto& string_data = col.get_data_at(i).to_string(); @@ -675,6 +705,11 @@ class StringValueSet : public HybridSetBase { if constexpr (is_nullable) { null_map_data = null_map->data(); } + + if constexpr (IsFixedContainer::value) { + _set.check_size(); + } + auto* __restrict result_data = results.data(); for (size_t i = 0; i < rows; ++i) { uint32_t len = offset[i] - offset[i - 1]; diff --git a/be/src/vec/functions/in.h b/be/src/vec/functions/in.h index b25ad8eeb67e06e..9b5c5bb023aee25 100644 --- a/be/src/vec/functions/in.h +++ b/be/src/vec/functions/in.h @@ -114,7 +114,7 @@ class FunctionIn : public IFunction { context->get_arg_type(0)->type == PrimitiveType::TYPE_VARCHAR || context->get_arg_type(0)->type == PrimitiveType::TYPE_STRING) { // the StringValue's memory is held by FunctionContext, so we can use StringValueSet here directly - state->hybrid_set.reset(create_string_value_set((size_t)(context->get_num_args() - 1))); + state->hybrid_set.reset(create_string_value_set(get_size_with_out_null(context))); } else { state->hybrid_set.reset( create_set(context->get_arg_type(0)->type, get_size_with_out_null(context))); diff --git a/regression-test/data/nereids_syntax_p0/inpredicate.out b/regression-test/data/nereids_syntax_p0/inpredicate.out index cee03178b5cba61..ac6219c69cee609 100644 --- a/regression-test/data/nereids_syntax_p0/inpredicate.out +++ b/regression-test/data/nereids_syntax_p0/inpredicate.out @@ -31,3 +31,12 @@ 29 Supplier#000000029 VVSymB3fbwaN ARGENTINA4 ARGENTINA AMERICA 11-773-203-7342 9 Supplier#000000009 ,gJ6K2MKveYxQT IRAN 6 IRAN MIDDLE EAST 20-338-906-3675 +-- !in_predicate_11 -- +15 Supplier#000000015 DF35PepL5saAK INDIA 0 INDIA ASIA 18-687-542-7601 + +-- !in_predicate_12 -- + +-- !in_predicate_13 -- + +-- !in_predicate_14 -- + diff --git a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy index 3cdf096519c4fa7..bf4ec9787f9e07b 100644 --- a/regression-test/suites/nereids_syntax_p0/inpredicate.groovy +++ b/regression-test/suites/nereids_syntax_p0/inpredicate.groovy @@ -61,5 +61,21 @@ suite("inpredicate") { order_qt_in_predicate_10 """ SELECT * FROM supplier WHERE s_suppkey not in (15); """ + + order_qt_in_predicate_11 """ + SELECT * FROM supplier WHERE s_suppkey in (15, null); + """ + + order_qt_in_predicate_12 """ + SELECT * FROM supplier WHERE s_suppkey not in (15, null); + """ + + order_qt_in_predicate_13 """ + SELECT * FROM supplier WHERE s_nation in ('PERU', 'ETHIOPIA', null); + """ + + order_qt_in_predicate_14 """ + SELECT * FROM supplier WHERE s_nation not in ('PERU', 'ETHIOPIA', null); + """ } From 46fceed7d7f17c87355af17b76acbb910c20dd36 Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Sun, 11 Aug 2024 10:59:40 +0800 Subject: [PATCH 33/94] [fix](nereids)semi join transpose rule produce wrong plan if there is mark join (#39152) --- .../LogicalJoinSemiJoinTransposeProject.java | 4 ++++ .../SemiJoinSemiJoinTransposeProject.java | 1 + ...gicalJoinSemiJoinTransposeProjectTest.java | 24 +++++++++++++++++++ .../SemiJoinSemiJoinTransposeProjectTest.java | 6 ++--- .../nereids/util/LogicalPlanBuilder.java | 11 +++++++++ 5 files changed, 43 insertions(+), 3 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/LogicalJoinSemiJoinTransposeProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/LogicalJoinSemiJoinTransposeProject.java index a0e2b83cc1b39a1..0531c6e54aca771 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/LogicalJoinSemiJoinTransposeProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/LogicalJoinSemiJoinTransposeProject.java @@ -68,6 +68,8 @@ public List buildRules() { .forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); bottomJoin.getOtherJoinConjuncts() .forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); + bottomJoin.getMarkJoinConjuncts() + .forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); Plan newBottomJoin = topJoin.withChildrenNoContext(a, c, null); Plan left = CBOUtils.newProject(topUsedExprIds, newBottomJoin); Plan right = CBOUtils.newProjectIfNeeded(topUsedExprIds, b); @@ -100,6 +102,8 @@ public List buildRules() { .forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); bottomJoin.getOtherJoinConjuncts() .forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); + bottomJoin.getMarkJoinConjuncts() + .forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); Plan newBottomJoin = topJoin.withChildrenNoContext(a, b, null); Plan left = CBOUtils.newProject(topUsedExprIds, newBottomJoin); Plan right = CBOUtils.newProjectIfNeeded(topUsedExprIds, c); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java index b4a5b177f8c0a94..359d6e13552c189 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProject.java @@ -104,6 +104,7 @@ public Rule build() { topProject.getProjects().forEach(expr -> topUsedExprIds.addAll(expr.getInputSlotExprIds())); bottomSemi.getHashJoinConjuncts().forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); bottomSemi.getOtherJoinConjuncts().forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); + bottomSemi.getMarkJoinConjuncts().forEach(e -> topUsedExprIds.addAll(e.getInputSlotExprIds())); Plan left = CBOUtils.newProject(topUsedExprIds, newBottomSemi); Plan right = CBOUtils.newProjectIfNeeded(topUsedExprIds, b); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/LogicalJoinSemiJoinTransposeProjectTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/LogicalJoinSemiJoinTransposeProjectTest.java index 27e162f4af79f15..70f6de6c320ac07 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/LogicalJoinSemiJoinTransposeProjectTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/LogicalJoinSemiJoinTransposeProjectTest.java @@ -105,4 +105,28 @@ public void generateTopProject() { ) ); } + + @Test + public void generateTopProjectMarkJoin() { + LogicalPlan topJoin1 = new LogicalPlanBuilder(scan1) + .markJoinWithMarkConjuncts(scan2, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 0)) // t1.id = t2.id + .project(ImmutableList.of(1)) + .join(scan3, JoinType.INNER_JOIN, Pair.of(0, 0)) // t1.id = t3.id + .project(ImmutableList.of(0)) + .build(); + + PlanChecker.from(MemoTestUtils.createConnectContext(), topJoin1) + .applyExploration(LogicalJoinSemiJoinTransposeProject.INSTANCE.buildRules()) + .matchesExploration( + logicalProject( + leftSemiLogicalJoin( + logicalProject(innerLogicalJoin( + logicalOlapScan().when(scan -> scan.getTable().getName().equals("t1")), + logicalOlapScan().when(scan -> scan.getTable().getName().equals("t3")) + )), + logicalProject(logicalOlapScan().when(scan -> scan.getTable().getName().equals("t2"))) + ) + ) + ); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java index dd654a2f42839de..d37be0a1a13fe5a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/exploration/join/SemiJoinSemiJoinTransposeProjectTest.java @@ -74,10 +74,10 @@ public void testSemiProjectSemiCommute() { @Test public void testSemiProjectSemiCommuteMarkJoin() { LogicalPlan topJoin = new LogicalPlanBuilder(scan1) - .markJoin(scan2, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 0)) + .markJoinWithMarkConjuncts(scan2, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 0)) .project(ImmutableList.of(0, 2)) - .markJoin(scan3, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 1)) - .projectAll() + .markJoinWithMarkConjuncts(scan3, JoinType.LEFT_SEMI_JOIN, Pair.of(0, 1)) + .project(ImmutableList.of(1, 2)) .build(); PlanChecker.from(MemoTestUtils.createConnectContext(), topJoin) .applyExploration(SemiJoinSemiJoinTransposeProject.INSTANCE.build()) diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/LogicalPlanBuilder.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/LogicalPlanBuilder.java index ba81fa7e4a39cf1..ccdcd2532795719 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/LogicalPlanBuilder.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/LogicalPlanBuilder.java @@ -120,6 +120,17 @@ public LogicalPlanBuilder markJoin(LogicalPlan right, JoinType joinType, Pair hashOnSlots) { + ImmutableList markConjuncts = ImmutableList.of( + new EqualTo(this.plan.getOutput().get(hashOnSlots.first), right.getOutput().get(hashOnSlots.second))); + + LogicalJoin join = new LogicalJoin<>(joinType, Collections.emptyList(), + Collections.emptyList(), new ArrayList<>(markConjuncts), + new DistributeHint(DistributeType.NONE), Optional.of(new MarkJoinSlotReference("fake")), + this.plan, right, null); + return from(join); + } + public LogicalPlanBuilder join(LogicalPlan right, JoinType joinType, Pair hashOnSlots) { ImmutableList hashConjuncts = ImmutableList.of( new EqualTo(this.plan.getOutput().get(hashOnSlots.first), right.getOutput().get(hashOnSlots.second))); From 476a159dea4c99cc19c1bcd065a91806ecfc5d89 Mon Sep 17 00:00:00 2001 From: Sun Chenyang Date: Sun, 11 Aug 2024 11:30:32 +0800 Subject: [PATCH 34/94] [fix] (topn) fix uncleared block in topn_next() (#39119) When reusing blocks in topn, the extra columns were not cleaned up. --- be/src/vec/olap/vcollect_iterator.cpp | 32 +++---- .../inverted_index_p0/topn_clear_block.out | 5 ++ .../inverted_index_p0/topn_clear_block.groovy | 84 +++++++++++++++++++ 3 files changed, 105 insertions(+), 16 deletions(-) create mode 100644 regression-test/data/inverted_index_p0/topn_clear_block.out create mode 100644 regression-test/suites/inverted_index_p0/topn_clear_block.groovy diff --git a/be/src/vec/olap/vcollect_iterator.cpp b/be/src/vec/olap/vcollect_iterator.cpp index ceeede7c7c337cd..0d5e595b404f28c 100644 --- a/be/src/vec/olap/vcollect_iterator.cpp +++ b/be/src/vec/olap/vcollect_iterator.cpp @@ -256,18 +256,21 @@ Status VCollectIterator::_topn_next(Block* block) { return Status::Error(""); } + // clear TEMP columns to avoid column align problem + auto clear_temp_columns = [](Block* block) { + auto all_column_names = block->get_names(); + for (auto& name : all_column_names) { + if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) { + // clear TEMP columns from block to prevent from storage engine merge with this + // fake column + block->erase(name); + } + } + }; + + clear_temp_columns(block); auto clone_block = block->clone_empty(); MutableBlock mutable_block = vectorized::MutableBlock::build_mutable_block(&clone_block); - // clear TEMP columns to avoid column align problem in mutable_block.add_rows bellow - auto all_column_names = mutable_block.get_names(); - for (auto& name : all_column_names) { - if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) { - mutable_block.erase(name); - // clear TEMP columns from block to prevent from storage engine merge with this - // fake column - block->erase(name); - } - } if (!_reader->_reader_context.read_orderby_key_columns) { return Status::Error( @@ -301,6 +304,8 @@ Status VCollectIterator::_topn_next(Block* block) { if (status.is()) { eof = true; if (block->rows() == 0) { + // clear TEMP columns to avoid column align problem in segment iterator + clear_temp_columns(block); break; } } else { @@ -312,12 +317,7 @@ Status VCollectIterator::_topn_next(Block* block) { RETURN_IF_ERROR(VExprContext::filter_block( _reader->_reader_context.filter_block_conjuncts, block, block->columns())); // clear TMPE columns to avoid column align problem in mutable_block.add_rows bellow - auto all_column_names = block->get_names(); - for (auto& name : all_column_names) { - if (name.rfind(BeConsts::BLOCK_TEMP_COLUMN_PREFIX, 0) == 0) { - block->erase(name); - } - } + clear_temp_columns(block); // update read rows read_rows += block->rows(); diff --git a/regression-test/data/inverted_index_p0/topn_clear_block.out b/regression-test/data/inverted_index_p0/topn_clear_block.out new file mode 100644 index 000000000000000..6f6227298abbb85 --- /dev/null +++ b/regression-test/data/inverted_index_p0/topn_clear_block.out @@ -0,0 +1,5 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +17.0.0.0 +17.0.0.0 + diff --git a/regression-test/suites/inverted_index_p0/topn_clear_block.groovy b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy new file mode 100644 index 000000000000000..7486a658d605c91 --- /dev/null +++ b/regression-test/suites/inverted_index_p0/topn_clear_block.groovy @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_clear_block") { + + // load data + def load_data = { loadTableName, fileName -> + streamLoad { + table loadTableName + set 'read_json_by_line', 'true' + set 'format', 'json' + file fileName + time 10000 + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals("success", json.Status.toLowerCase()) + assertEquals(json.NumberTotalRows, json.NumberLoadedRows) + assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0) + } + } + } + + sql """ set enable_match_without_inverted_index = false; """ + // sql """ set + def dupTableName = "dup_httplogs" + sql """ drop table if exists ${dupTableName} """ + // create table + sql """ + CREATE TABLE IF NOT EXISTS dup_httplogs + ( + `id` bigint NOT NULL AUTO_INCREMENT(100), + `@timestamp` int(11) NULL, + `clientip` varchar(20) NULL, + `request` text NULL, + `status` int(11) NULL, + `size` int(11) NULL, + INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '', + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '', + INDEX status_idx (`status`) USING INVERTED COMMENT '', + INDEX size_idx (`size`) USING INVERTED COMMENT '' + ) DUPLICATE KEY(`id`) + DISTRIBUTED BY HASH (`id`) BUCKETS 32 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "compaction_policy" = "time_series", + "inverted_index_storage_format" = "v2", + "compression" = "ZSTD", + "disable_auto_compaction" = "true" + ); + """ + + load_data.call(dupTableName, 'documents-1000.json'); + load_data.call(dupTableName, 'documents-1000.json'); + load_data.call(dupTableName, 'documents-1000.json'); + load_data.call(dupTableName, 'documents-1000.json'); + load_data.call(dupTableName, 'documents-1000.json'); + sql """ delete from dup_httplogs where clientip = '40.135.0.0'; """ + sql """ delete from dup_httplogs where status = 304; """ + sql """ delete from dup_httplogs where size = 24736; """ + sql """ delete from dup_httplogs where request = 'GET /images/hm_bg.jpg HTTP/1.0'; """ + + sql """ sync """ + + qt_sql """ SELECT clientip from ${dupTableName} WHERE clientip NOT IN (NULL, '') or clientip IN ('17.0.0.0') ORDER BY id LIMIT 2 """ +} \ No newline at end of file From 505fa54b7cb51e411d1142c0f378d613d4853a86 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Sun, 11 Aug 2024 17:40:14 +0800 Subject: [PATCH 35/94] [Fix](regression) Fix typo in `test_schema_change_unique_mow` (#39196) ## Proposed changes fix typo in https://github.com/apache/doris/pull/39173 --- .../schema_change_p0/test_schema_change_unique_mow.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/regression-test/suites/schema_change_p0/test_schema_change_unique_mow.groovy b/regression-test/suites/schema_change_p0/test_schema_change_unique_mow.groovy index 71941b87882e90f..6bf9c79372e9729 100644 --- a/regression-test/suites/schema_change_p0/test_schema_change_unique_mow.groovy +++ b/regression-test/suites/schema_change_p0/test_schema_change_unique_mow.groovy @@ -173,7 +173,7 @@ suite("test_schema_change_unique_mow", "p0") { return true } cnt--; - int val = 100000 + max_try_num + int val = 100000 + cnt sql """ insert into ${tableName3} values (${val}, 2, 3, 4, 5, 6.6, 1.7, 8.8, 'a', 'b', 'c', '2021-10-30', '2021-10-30 00:00:00', 9527) """ return false From e2a9c6da0ea73787172bea4f39caab7d20a09a15 Mon Sep 17 00:00:00 2001 From: meiyi Date: Sun, 11 Aug 2024 17:47:50 +0800 Subject: [PATCH 36/94] [fix](regression) Fix some insert_p2 cases (#39134) 1. txn load is not supported for mow in cloud mode 2. the `txn_insert_with_schema_change` wait for some specified state of schema change, rather than schema change is done --- .../txn_insert_concurrent_insert_mow.groovy | 5 +++++ .../txn_insert_concurrent_insert_ud.groovy | 5 +++++ .../txn_insert_concurrent_insert_update.groovy | 5 +++++ .../txn_insert_with_schema_change.groovy | 17 ++++++++++++++--- 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_mow.groovy b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_mow.groovy index cf16dd955409e9a..f8a971db75ee2f9 100644 --- a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_mow.groovy +++ b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_mow.groovy @@ -22,6 +22,11 @@ import java.util.concurrent.TimeUnit import java.util.concurrent.CompletableFuture suite("txn_insert_concurrent_insert_mow") { + if (isCloudMode()) { + logger.info("cloud txn load does not support mow") + return + } + def tableName = "txn_insert_concurrent_insert_mow" List errors = new ArrayList<>() diff --git a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_ud.groovy b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_ud.groovy index 7f2f3831149af4e..fe1f5533701df8e 100644 --- a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_ud.groovy +++ b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_ud.groovy @@ -23,6 +23,11 @@ import java.util.concurrent.CompletableFuture // test update and delete command suite("txn_insert_concurrent_insert_ud") { + if (isCloudMode()) { + logger.info("cloud txn load does not support mow") + return + } + def tableName = "txn_insert_concurrent_insert_ud" List errors = new ArrayList<>() diff --git a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_update.groovy b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_update.groovy index 37a9fb8697ddbba..a5d0bcd114b5685 100644 --- a/regression-test/suites/insert_p2/txn_insert_concurrent_insert_update.groovy +++ b/regression-test/suites/insert_p2/txn_insert_concurrent_insert_update.groovy @@ -23,6 +23,11 @@ import java.util.concurrent.CompletableFuture // test partial columns update suite("txn_insert_concurrent_insert_update") { + if (isCloudMode()) { + logger.info("cloud txn load does not support mow") + return + } + def tableName = "txn_insert_concurrent_insert_update" List errors = new ArrayList<>() diff --git a/regression-test/suites/insert_p2/txn_insert_with_schema_change.groovy b/regression-test/suites/insert_p2/txn_insert_with_schema_change.groovy index e16be16c6d2e28c..3fef927c322843b 100644 --- a/regression-test/suites/insert_p2/txn_insert_with_schema_change.groovy +++ b/regression-test/suites/insert_p2/txn_insert_with_schema_change.groovy @@ -96,10 +96,21 @@ suite("txn_insert_with_schema_change") { def getAlterTableState = { tName, job_state -> def retry = 0 sql "use ${dbName};" - waitForSchemaChangeDone { - sql """ SHOW ALTER TABLE COLUMN WHERE tablename='${tName}' ORDER BY createtime DESC LIMIT 1 """ - time 600 + def last_state = "" + while (true) { + sleep(2000) + def state = sql """ show alter table column where tablename = "${tName}" order by CreateTime desc limit 1""" + logger.info("alter table state: ${state}") + last_state = state[0][9] + if (state.size() > 0 && last_state == job_state) { + return + } + retry++ + if (retry >= 10 || last_state == "FINISHED" || last_state == "CANCELLED") { + break + } } + assertTrue(false, "alter table job state is ${last_state}, not ${job_state} after retry ${retry} times") } // sqls size is 2 From 269fc7b52c1b077b77d7c32267349e6f9d612fc1 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Sun, 11 Aug 2024 20:26:29 +0800 Subject: [PATCH 37/94] [Fix](multi-catalog) Fix not throw error when call close() in hive/iceberg writer. (#38987) ## Proposed changes [Fix] (multi-catalog) Fix not throw error when call close() in hive/iceberg writer. When the file writer closes(), it will sync buffer to commit. Therefore, sometimes data is written only when close() is called, which can expose some errors. For example, hdfs_file_writer. Therefore, this error needs to be captured in the entire close process. --- .../writer/iceberg/viceberg_partition_writer.cpp | 14 ++++++++------ .../sink/writer/iceberg/viceberg_table_writer.cpp | 10 +++++++--- be/src/vec/sink/writer/vhive_partition_writer.cpp | 14 ++++++++------ be/src/vec/sink/writer/vhive_table_writer.cpp | 10 +++++++--- 4 files changed, 30 insertions(+), 18 deletions(-) diff --git a/be/src/vec/sink/writer/iceberg/viceberg_partition_writer.cpp b/be/src/vec/sink/writer/iceberg/viceberg_partition_writer.cpp index 9cf7af32204e2fe..924adf68145a7a8 100644 --- a/be/src/vec/sink/writer/iceberg/viceberg_partition_writer.cpp +++ b/be/src/vec/sink/writer/iceberg/viceberg_partition_writer.cpp @@ -101,24 +101,26 @@ Status VIcebergPartitionWriter::open(RuntimeState* state, RuntimeProfile* profil } Status VIcebergPartitionWriter::close(const Status& status) { + Status result_status; if (_file_format_transformer != nullptr) { - Status st = _file_format_transformer->close(); - if (!st.ok()) { + result_status = _file_format_transformer->close(); + if (!result_status.ok()) { LOG(WARNING) << fmt::format("_file_format_transformer close failed, reason: {}", - st.to_string()); + result_status.to_string()); } } - if (!status.ok() && _fs != nullptr) { + bool status_ok = result_status.ok() && status.ok(); + if (!status_ok && _fs != nullptr) { auto path = fmt::format("{}/{}", _write_info.write_path, _file_name); Status st = _fs->delete_file(path); if (!st.ok()) { LOG(WARNING) << fmt::format("Delete file {} failed, reason: {}", path, st.to_string()); } } - if (status.ok()) { + if (status_ok) { _state->iceberg_commit_datas().emplace_back(_build_iceberg_commit_data()); } - return Status::OK(); + return result_status; } Status VIcebergPartitionWriter::write(vectorized::Block& block) { diff --git a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp index 898b71d1d9af86c..a116cfb7f39fdd6 100644 --- a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp +++ b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp @@ -273,15 +273,19 @@ Status VIcebergTableWriter::_filter_block(doris::vectorized::Block& block, } Status VIcebergTableWriter::close(Status status) { + Status result_status; int64_t partitions_to_writers_size = _partitions_to_writers.size(); { SCOPED_RAW_TIMER(&_close_ns); for (const auto& pair : _partitions_to_writers) { Status st = pair.second->close(status); - if (st != Status::OK()) { + if (!st.ok()) { LOG(WARNING) << fmt::format("partition writer close failed for partition {}", st.to_string()); - continue; + if (result_status.ok()) { + result_status = st; + continue; + } } } _partitions_to_writers.clear(); @@ -297,7 +301,7 @@ Status VIcebergTableWriter::close(Status status) { COUNTER_SET(_close_timer, _close_ns); COUNTER_SET(_write_file_counter, _write_file_count); } - return Status::OK(); + return result_status; } std::string VIcebergTableWriter::_partition_to_path(const doris::iceberg::StructLike& data) { diff --git a/be/src/vec/sink/writer/vhive_partition_writer.cpp b/be/src/vec/sink/writer/vhive_partition_writer.cpp index 5322bde57f13e2a..0d6767b6196defb 100644 --- a/be/src/vec/sink/writer/vhive_partition_writer.cpp +++ b/be/src/vec/sink/writer/vhive_partition_writer.cpp @@ -117,24 +117,26 @@ Status VHivePartitionWriter::open(RuntimeState* state, RuntimeProfile* profile) } Status VHivePartitionWriter::close(const Status& status) { + Status result_status; if (_file_format_transformer != nullptr) { - Status st = _file_format_transformer->close(); - if (!st.ok()) { + result_status = _file_format_transformer->close(); + if (!result_status.ok()) { LOG(WARNING) << fmt::format("_file_format_transformer close failed, reason: {}", - st.to_string()); + result_status.to_string()); } } - if (!status.ok() && _fs != nullptr) { + bool status_ok = result_status.ok() && status.ok(); + if (!status_ok && _fs != nullptr) { auto path = fmt::format("{}/{}", _write_info.write_path, _file_name); Status st = _fs->delete_file(path); if (!st.ok()) { LOG(WARNING) << fmt::format("Delete file {} failed, reason: {}", path, st.to_string()); } } - if (status.ok()) { + if (status_ok) { _state->hive_partition_updates().emplace_back(_build_partition_update()); } - return Status::OK(); + return result_status; } Status VHivePartitionWriter::write(vectorized::Block& block) { diff --git a/be/src/vec/sink/writer/vhive_table_writer.cpp b/be/src/vec/sink/writer/vhive_table_writer.cpp index 53f70b6b31aa94c..091560ff8ce3ca8 100644 --- a/be/src/vec/sink/writer/vhive_table_writer.cpp +++ b/be/src/vec/sink/writer/vhive_table_writer.cpp @@ -242,15 +242,19 @@ Status VHiveTableWriter::_filter_block(doris::vectorized::Block& block, } Status VHiveTableWriter::close(Status status) { + Status result_status; int64_t partitions_to_writers_size = _partitions_to_writers.size(); { SCOPED_RAW_TIMER(&_close_ns); for (const auto& pair : _partitions_to_writers) { Status st = pair.second->close(status); - if (st != Status::OK()) { + if (!st.ok()) { LOG(WARNING) << fmt::format("partition writer close failed for partition {}", st.to_string()); - continue; + if (result_status.ok()) { + result_status = st; + continue; + } } } _partitions_to_writers.clear(); @@ -266,7 +270,7 @@ Status VHiveTableWriter::close(Status status) { COUNTER_SET(_close_timer, _close_ns); COUNTER_SET(_write_file_counter, _write_file_count); } - return Status::OK(); + return result_status; } std::shared_ptr VHiveTableWriter::_create_partition_writer( From 343f971768706726ce1a6d93c02a20191548cc36 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Sun, 11 Aug 2024 20:32:29 +0800 Subject: [PATCH 38/94] [improvement](iceberg)]support doris's char/varchar to iceberg's string (#38807) ## Proposed changes Support doris's `char`/`varchar` to iceberg's `string`. --- .../iceberg/DorisTypeToIcebergType.java | 2 +- .../test_iceberg_support_char_varchar.out | 19 ++++ .../test_iceberg_support_char_varchar.groovy | 90 +++++++++++++++++++ 3 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 regression-test/data/external_table_p0/iceberg/write/test_iceberg_support_char_varchar.out create mode 100644 regression-test/suites/external_table_p0/iceberg/write/test_iceberg_support_char_varchar.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/DorisTypeToIcebergType.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/DorisTypeToIcebergType.java index 52e4b6cf17a3c18..d4d62cd4c5f1f27 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/DorisTypeToIcebergType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/DorisTypeToIcebergType.java @@ -110,7 +110,7 @@ public Type atomic(org.apache.doris.catalog.Type atomic) { return Types.FloatType.get(); } else if (primitiveType.equals(PrimitiveType.DOUBLE)) { return Types.DoubleType.get(); - } else if (primitiveType.equals(PrimitiveType.STRING)) { + } else if (primitiveType.isCharFamily()) { return Types.StringType.get(); } else if (primitiveType.equals(PrimitiveType.DATE) || primitiveType.equals(PrimitiveType.DATEV2)) { diff --git a/regression-test/data/external_table_p0/iceberg/write/test_iceberg_support_char_varchar.out b/regression-test/data/external_table_p0/iceberg/write/test_iceberg_support_char_varchar.out new file mode 100644 index 000000000000000..6edcc50c0ac7063 --- /dev/null +++ b/regression-test/data/external_table_p0/iceberg/write/test_iceberg_support_char_varchar.out @@ -0,0 +1,19 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !qt01 -- +a b 1 + +-- !qt02 -- +a b 1 + +-- !qt03 -- +a b 1 + +-- !qt01 -- +a b 1 + +-- !qt02 -- +a b 1 + +-- !qt03 -- +a b 1 + diff --git a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_support_char_varchar.groovy b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_support_char_varchar.groovy new file mode 100644 index 000000000000000..2a756a360d62c15 --- /dev/null +++ b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_support_char_varchar.groovy @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_iceberg_support_char_varchar", "p0,external,iceberg,external_docker,external_docker_iceberg") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable hive test.") + return; + } + + for (String hivePrefix : ["hive2", "hive3"]) { + setHivePrefix(hivePrefix) + String hms_port = context.config.otherConfigs.get(hivePrefix + "HmsPort") + String hdfs_port = context.config.otherConfigs.get(hivePrefix + "HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String iceberg_catalog_name = "test_iceberg_support_char_varchar_iceberg_${hivePrefix}" + String hive_catalog_name = "test_iceberg_support_char_varchar_hive_${hivePrefix}" + + String db = "write_test" + String tb_iceberg = "tb_iceberg_support_char_varchar_iceberg" + String tb_hive = "tb_iceberg_support_char_varchar_hive" + String tb = "tb_iceberg_support_char_varchar_doris" + + try { + + sql """drop catalog if exists ${iceberg_catalog_name}""" + sql """create catalog if not exists ${iceberg_catalog_name} properties ( + 'type'='iceberg', + 'iceberg.catalog.type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', + 'use_meta_cache' = 'true' + );""" + sql """drop catalog if exists ${hive_catalog_name}""" + sql """create catalog if not exists ${hive_catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}', + 'fs.defaultFS' = 'hdfs://${externalEnvIp}:${hdfs_port}', + 'use_meta_cache' = 'true' + );""" + + sql """set enable_fallback_to_original_planner=false;""" + + sql """ create database if not exists internal.${db} """ + sql """ drop table if exists internal.${db}.${tb} """ + sql """ drop table if exists ${hive_catalog_name}.${db}.${tb_hive} """ + sql """ drop table if exists ${iceberg_catalog_name}.${db}.${tb_iceberg} """ + sql """ create table internal.${db}.${tb} (v1 varchar(20), v2 char(10), v3 int) DISTRIBUTED BY HASH(`v1`) BUCKETS 1 PROPERTIES ("replication_allocation" = "tag.location.default: 1") """ + sql """ create table ${hive_catalog_name}.${db}.${tb_hive} (v1 varchar(20), v2 char(10), v3 int); """ + sql """ create table ${iceberg_catalog_name}.${db}.${tb_iceberg} (v1 varchar(20), v2 char(10), v3 int); """ + + sql """ insert into internal.${db}.${tb} values ('a', 'b', 1)""" + sql """ insert into ${hive_catalog_name}.${db}.${tb_hive} values ('a', 'b', 1) """ + sql """ insert into ${iceberg_catalog_name}.${db}.${tb_iceberg} values ('a', 'b', 1) """ + + qt_qt01 """ select * from ${iceberg_catalog_name}.${db}.${tb_iceberg} """ + + // ctas from doris + sql """ drop table ${iceberg_catalog_name}.${db}.${tb_iceberg} """ + sql """ create table ${iceberg_catalog_name}.${db}.${tb_iceberg} as select * from internal.${db}.${tb} """ + qt_qt02 """ select * from ${iceberg_catalog_name}.${db}.${tb_iceberg} """ + + // ctas from hive + sql """ drop table ${iceberg_catalog_name}.${db}.${tb_iceberg} """ + sql """ create table ${iceberg_catalog_name}.${db}.${tb_iceberg} as select * from ${hive_catalog_name}.${db}.${tb_hive} """ + qt_qt03 """ select * from ${iceberg_catalog_name}.${db}.${tb_iceberg} """ + + } finally { + sql """drop table if exists internal.${db}.${tb}""" + sql """drop table if exists ${hive_catalog_name}.${db}.${tb_hive}""" + sql """drop table if exists ${iceberg_catalog_name}.${db}.${tb_iceberg}""" + sql """drop catalog if exists ${iceberg_catalog_name}""" + sql """drop catalog if exists ${hive_catalog_name}""" + } + } +} From 649158b5a0dc4847eec203ef9cf05d7a4cc8994c Mon Sep 17 00:00:00 2001 From: Tiewei Fang <43782773+BePPPower@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:01:55 +0800 Subject: [PATCH 39/94] [fix](test) fix p2 regression test of export (#39088) --- .../suites/export_p2/test_export_with_parallelism.groovy | 2 +- regression-test/suites/export_p2/test_export_with_s3.groovy | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/regression-test/suites/export_p2/test_export_with_parallelism.groovy b/regression-test/suites/export_p2/test_export_with_parallelism.groovy index 629fd83d29739b4..dca7f4c67c45634 100644 --- a/regression-test/suites/export_p2/test_export_with_parallelism.groovy +++ b/regression-test/suites/export_p2/test_export_with_parallelism.groovy @@ -129,7 +129,7 @@ suite("test_export_with_parallelism", "p2") { // check data correctness sql """ insert into ${table_load_name} select * from s3( - "uri" = "http://${bucket}.${s3_endpoint}${outfile_url_list.substring(5 + bucket.length())}.${file_suffix}", + "uri" = "http://${bucket}.${s3_endpoint}${outfile_url_list.get(j).substring(5 + bucket.length())}.${file_suffix}", "s3.access_key"= "${ak}", "s3.secret_key" = "${sk}", "format" = "${format}", diff --git a/regression-test/suites/export_p2/test_export_with_s3.groovy b/regression-test/suites/export_p2/test_export_with_s3.groovy index 530da95a16f7f6f..25e148204008338 100644 --- a/regression-test/suites/export_p2/test_export_with_s3.groovy +++ b/regression-test/suites/export_p2/test_export_with_s3.groovy @@ -109,6 +109,7 @@ suite("test_export_with_s3", "p2") { "ACCESS_KEY"= "${ak}", "SECRET_KEY" = "${sk}", "format" = "${format}", + "column_separator" = ",", "provider" = "${getS3Provider()}", "region" = "${region}" ); From c35e931b85a49a918ac4c38560b70a5939e6ddd2 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Sun, 11 Aug 2024 22:20:07 +0800 Subject: [PATCH 40/94] [improvement](iceberg)add some description for `show create` (#39179) ## Proposed changes 1. add `location` and `properties` for `show create table`. 2. add `location` for `show create database`. --- .../java/org/apache/doris/catalog/Env.java | 18 ++++- .../org/apache/doris/catalog/TableIf.java | 3 +- .../iceberg/IcebergExternalDatabase.java | 11 +++ .../org/apache/doris/qe/ShowExecutor.java | 8 ++ .../iceberg/test_iceberg_show_create.groovy | 73 +++++++++++++++++++ 5 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 regression-test/suites/external_table_p0/iceberg/test_iceberg_show_create.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 7cbbc644733337d..907f25739208a0e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -138,6 +138,7 @@ import org.apache.doris.datasource.es.EsRepository; import org.apache.doris.datasource.hive.HiveTransactionMgr; import org.apache.doris.datasource.hive.event.MetastoreEventsProcessor; +import org.apache.doris.datasource.iceberg.IcebergExternalTable; import org.apache.doris.deploy.DeployManager; import org.apache.doris.deploy.impl.AmbariDeployManager; import org.apache.doris.deploy.impl.K8sDeployManager; @@ -317,6 +318,7 @@ import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -3917,6 +3919,20 @@ public static void getDdlStmt(DdlStmt ddlStmt, String dbName, TableIf table, Lis sb.append("\"table\" = \"").append(jdbcTable.getJdbcTable()).append("\",\n"); sb.append("\"table_type\" = \"").append(jdbcTable.getJdbcTypeName()).append("\""); sb.append("\n)"); + } else if (table.getType() == TableType.ICEBERG_EXTERNAL_TABLE) { + addTableComment(table, sb); + org.apache.iceberg.Table icebergTable = ((IcebergExternalTable) table).getIcebergTable(); + sb.append("\nLOCATION '").append(icebergTable.location()).append("'"); + sb.append("\nPROPERTIES ("); + Iterator> iterator = icebergTable.properties().entrySet().iterator(); + while (iterator.hasNext()) { + Entry prop = iterator.next(); + sb.append("\n \"").append(prop.getKey()).append("\" = \"").append(prop.getValue()).append("\""); + if (iterator.hasNext()) { + sb.append(","); + } + } + sb.append("\n)"); } createTableStmt.add(sb + ";"); @@ -6371,7 +6387,7 @@ public void compactTable(AdminCompactTableStmt stmt) throws DdlException { AgentTaskExecutor.submit(batchTask); } - private static void addTableComment(Table table, StringBuilder sb) { + private static void addTableComment(TableIf table, StringBuilder sb) { if (StringUtils.isNotBlank(table.getComment())) { sb.append("\nCOMMENT '").append(table.getComment(true)).append("'"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index ea23a7ddbad27bc..cb9d780d55a80e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -428,7 +428,8 @@ default boolean needReadLockWhenPlan() { * Doris table type. */ enum TableType { - MYSQL, ODBC, OLAP, SCHEMA, INLINE_VIEW, VIEW, BROKER, ELASTICSEARCH, HIVE, ICEBERG, @Deprecated HUDI, JDBC, + MYSQL, ODBC, OLAP, SCHEMA, INLINE_VIEW, VIEW, BROKER, ELASTICSEARCH, HIVE, + @Deprecated ICEBERG, @Deprecated HUDI, JDBC, TABLE_VALUED_FUNCTION, HMS_EXTERNAL_TABLE, ES_EXTERNAL_TABLE, MATERIALIZED_VIEW, JDBC_EXTERNAL_TABLE, ICEBERG_EXTERNAL_TABLE, TEST_EXTERNAL_TABLE, PAIMON_EXTERNAL_TABLE, MAX_COMPUTE_EXTERNAL_TABLE, HUDI_EXTERNAL_TABLE, TRINO_CONNECTOR_EXTERNAL_TABLE, LAKESOUl_EXTERNAL_TABLE; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalDatabase.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalDatabase.java index 16ac6b01d4082d2..f56183972e36d22 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalDatabase.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/IcebergExternalDatabase.java @@ -21,6 +21,11 @@ import org.apache.doris.datasource.ExternalDatabase; import org.apache.doris.datasource.InitDatabaseLog; +import org.apache.iceberg.catalog.Namespace; +import org.apache.iceberg.catalog.SupportsNamespaces; + +import java.util.Map; + public class IcebergExternalDatabase extends ExternalDatabase { public IcebergExternalDatabase(ExternalCatalog extCatalog, Long id, String name) { @@ -31,4 +36,10 @@ public IcebergExternalDatabase(ExternalCatalog extCatalog, Long id, String name) protected IcebergExternalTable buildTableForInit(String tableName, long tblId, ExternalCatalog catalog) { return new IcebergExternalTable(tblId, tableName, name, (IcebergExternalCatalog) extCatalog); } + + public String getLocation() { + Map props = ((SupportsNamespaces) ((IcebergExternalCatalog) getCatalog()).getCatalog()) + .loadNamespaceMetadata(Namespace.of(name)); + return props.getOrDefault("location", ""); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java index a3d1ca313aef94c..56a88bbd6505c77 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ShowExecutor.java @@ -199,6 +199,8 @@ import org.apache.doris.datasource.hive.HMSExternalCatalog; import org.apache.doris.datasource.hive.HMSExternalTable; import org.apache.doris.datasource.hive.HiveMetaStoreClientHelper; +import org.apache.doris.datasource.iceberg.IcebergExternalCatalog; +import org.apache.doris.datasource.iceberg.IcebergExternalDatabase; import org.apache.doris.datasource.maxcompute.MaxComputeExternalCatalog; import org.apache.doris.job.manager.JobManager; import org.apache.doris.load.DeleteHandler; @@ -1099,6 +1101,12 @@ private void handleShowCreateDb() throws AnalysisException { .append(" LOCATION '") .append(db.getLocationUri()) .append("'"); + } else if (catalog instanceof IcebergExternalCatalog) { + IcebergExternalDatabase db = (IcebergExternalDatabase) catalog.getDbOrAnalysisException(showStmt.getDb()); + sb.append("CREATE DATABASE `").append(showStmt.getDb()).append("`") + .append(" LOCATION '") + .append(db.getLocation()) + .append("'"); } else { DatabaseIf db = catalog.getDbOrAnalysisException(showStmt.getDb()); sb.append("CREATE DATABASE `").append(ClusterNamespace.getNameFromFullName(showStmt.getDb())).append("`"); diff --git a/regression-test/suites/external_table_p0/iceberg/test_iceberg_show_create.groovy b/regression-test/suites/external_table_p0/iceberg/test_iceberg_show_create.groovy new file mode 100644 index 000000000000000..8065998fa711994 --- /dev/null +++ b/regression-test/suites/external_table_p0/iceberg/test_iceberg_show_create.groovy @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_iceberg_show_create", "p0,external,doris,external_docker,external_docker_doris") { + String enabled = context.config.otherConfigs.get("enableIcebergTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable iceberg test.") + return + } + + String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port") + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String catalog_name = "test_iceberg_show_create" + + sql """drop catalog if exists ${catalog_name}""" + sql """ + CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='iceberg', + 'iceberg.catalog.type'='rest', + 'uri' = 'http://${externalEnvIp}:${rest_port}', + "s3.access_key" = "admin", + "s3.secret_key" = "password", + "s3.endpoint" = "http://${externalEnvIp}:${minio_port}", + "s3.region" = "us-east-1" + );""" + + sql """ switch ${catalog_name} """ + + String db1 = "test_db1" + String db2 = "test_db2" + String tb1 = "test_tb1" + + sql """ drop table if exists ${db1}.${tb1} """ + sql """ drop database if exists ${db1} """ + sql """ drop database if exists ${db2} """ + + sql """ create database ${db1} properties ('location'='s3a://warehouse/wh/${db1}') """ + sql """ create database ${db2} """ + + String result = "" + result = sql "show create database ${db1}" + logger.info("${result}") + assertTrue(result.toString().containsIgnoreCase("s3a://warehouse/wh/${db1}")) + + result = sql "show create database ${db2}" + logger.info("${result}") + assertTrue(result.toString().containsIgnoreCase("s3a://warehouse/wh/${db2}")) + + sql """ create table ${db1}.${tb1} (id int) """ + result = sql "show create table ${db1}.${tb1}" + logger.info("${result}") + assertTrue(result.toString().containsIgnoreCase("s3a://warehouse/wh/${db1}/${tb1}")) + + sql """ drop table ${db1}.${tb1} """ + sql """ drop database ${db1} """ + sql """ drop database ${db2} """ + +} From dc8bd00bcdb12651e975134b580c4fc851321213 Mon Sep 17 00:00:00 2001 From: daidai <2017501503@qq.com> Date: Sun, 11 Aug 2024 22:21:01 +0800 Subject: [PATCH 41/94] [case](iceberg)append iceberg schema change case. (#38766) --- .../docker-compose/iceberg/entrypoint.sh.tpl | 23 +- .../docker-compose/iceberg/iceberg.yaml.tpl | 25 +- .../create_preinstalled_scripts/run01.sql | 77 ++ .../create_preinstalled_scripts/run02.sql | 75 ++ .../create_preinstalled_scripts/run03.sql | 75 ++ .../create_preinstalled_scripts/run04.sql | 80 ++ .../create_preinstalled_scripts/run05.sql} | 0 .../iceberg/spark-init-paimon.sql | 1 - .../thirdparties/run-thirdparties-docker.sh | 1 + .../iceberg/iceberg_schema_change.out | 1016 ++++++++++++++--- .../iceberg/iceberg_schema_change.groovy | 59 +- 11 files changed, 1221 insertions(+), 211 deletions(-) mode change 100755 => 100644 docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl create mode 100644 docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run01.sql create mode 100644 docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run02.sql create mode 100644 docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run03.sql create mode 100644 docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run04.sql rename docker/thirdparties/docker-compose/iceberg/{spark-init-iceberg.sql => scripts/create_preinstalled_scripts/run05.sql} (100%) delete mode 100644 docker/thirdparties/docker-compose/iceberg/spark-init-paimon.sql diff --git a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl old mode 100755 new mode 100644 index 642ffdb4c4264da..1af170ff91ba7d8 --- a/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl +++ b/docker/thirdparties/docker-compose/iceberg/entrypoint.sh.tpl @@ -23,12 +23,17 @@ start-worker.sh spark://doris--spark-iceberg:7077 start-history-server.sh start-thriftserver.sh --driver-java-options "-Dderby.system.home=/tmp/derby" -# Entrypoint, for example notebook, pyspark or spark-sql -if [[ $# -gt 0 ]]; then - eval "$1" -fi - -# Avoid container exit -while true; do - sleep 1 -done + + +ls /mnt/scripts/create_preinstalled_scripts/*.sql | xargs -n 1 -I {} bash -c ' + START_TIME=$(date +%s) + spark-sql --master spark://doris--spark-iceberg:7077 -f {} + END_TIME=$(date +%s) + EXECUTION_TIME=$((END_TIME - START_TIME)) + echo "Script: {} executed in $EXECUTION_TIME seconds" +' + + +touch /mnt/SUCCESS; + +tail -f /dev/null diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl index 10c86d742e67777..38491f645a9a3a4 100644 --- a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl +++ b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl @@ -31,8 +31,7 @@ services: - ./data/output/spark-warehouse:/home/iceberg/warehouse - ./data/output/spark-notebooks:/home/iceberg/notebooks/notebooks - ./data:/mnt/data - - ./spark-init-iceberg.sql:/mnt/spark-init-iceberg.sql - - ./spark-init-paimon.sql:/mnt/spark-init-paimon.sql + - ./scripts:/mnt/scripts - ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf - ./data/input/jars/paimon-spark-3.5-0.8.0.jar:/opt/spark/jars/paimon-spark-3.5-0.8.0.jar - ./data/input/jars/paimon-s3-0.8.0.jar:/opt/spark/jars/paimon-s3-0.8.0.jar @@ -40,15 +39,14 @@ services: - AWS_ACCESS_KEY_ID=admin - AWS_SECRET_ACCESS_KEY=password - AWS_REGION=us-east-1 - entrypoint: > - /bin/sh -c " - spark-sql --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions -f /mnt/spark-init-iceberg.sql 2>&1; - spark-sql --conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions -f /mnt/spark-init-paimon.sql 2>&1; - tail -f /dev/null - " + entrypoint: /bin/sh /mnt/scripts/entrypoint.sh networks: - doris--iceberg - + healthcheck: + test: ["CMD", "ls", "/mnt/SUCCESS"] + interval: 5s + timeout: 120s + retries: 120 rest: image: tabulario/iceberg-rest container_name: doris--iceberg-rest @@ -105,6 +103,15 @@ services: mc cp -r /mnt/data/input/minio/warehouse/* minio/warehouse/; tail -f /dev/null " + + iceberg-hello-world: + image: hello-world + container_name: doris--iceberg-hello-world + depends_on: + spark-iceberg: + condition: service_healthy + network_mode: "host" + networks: doris--iceberg: ipam: diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run01.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run01.sql new file mode 100644 index 000000000000000..e5f70bc6366eb1c --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run01.sql @@ -0,0 +1,77 @@ + +use demo.test_db; + +drop table if exists complex_orc_v1_schema_change; +CREATE TABLE complex_orc_v1_schema_change ( + id int, + col1 array, + col2 array, + col3 array, + col4 map, + col5 map, + col6 map, + col7 struct, + col8 int, + col9 float, + col10 decimal(12,5), + col_del int +)USING iceberg +TBLPROPERTIES( + 'write.format.default' = 'orc', + 'format-version'='1'); + +INSERT INTO complex_orc_v1_schema_change +VALUES + (1, array(1, 2, 3), array(1.1, 2.2, 3.3), array(10.1234, 20.5678, 30.9876),map(1, 100, 2, 200), map(1, 1.1, 2, 2.2), map(1, 10.12345, 2, 20.98765),named_struct('x', 1, 'y', 1.1, 'z', 10.12345),10, 10.5, 100.56789, 999), + (2, array(4, 5), array(4.4, 5.5), array(40.1234, 50.5678),map(3, 300, 4, 400), map(3, 3.3, 4, 4.4), map(3, 30.12345, 4, 40.98765),named_struct('x', 2, 'y', 2.2, 'z', 20.98765),20, 20.5, 200.56789, 888), + (3, array(6), array(6.6), array(60.1234),map(5, 500), map(5, 5.5), map(5, 50.12345),named_struct('x', 3, 'y', 3.3, 'z', 30.12345),30, 30.5, 300.56789, 777), + (4, array(7,7,7), array(7.1,7.2,7.3), array(10,30),map(2,4), map(6,7), map(8,9),named_struct('x', 4, 'y', 4.4, 'z', 40.98765),40, 40.5, 400.56789, 666), + (5, NULL, NULL, NULL,NULL, NULL, NULL,named_struct('x', 5, 'y', 5.5, 'z', 50.12345),50, 50.5, 500.56789, 555), + (6, array(7, 8), array(7.7, 8.8), array(70.1234, 80.5678),map(6, 600, 7, 700), map(6, 6.6, 7, 7.7), map(6, 60.12345, 7, 70.98765),named_struct('x', 6, 'y', 6.6, 'z', 60.12345),60, 60.5, 600.56789, 444), + (7, array(9, 10), array(9.9, 10.10), array(90.1234, 100.5678),map(8, 800, 9, 900), map(8, 8.8, 9, 9.9), map(8, 80.12345, 9, 90.98765),named_struct('x', 7, 'y', 7.7, 'z', 70.98765),70, 70.5, 700.56789, 333), + (8, array(11, 12), array(11.11, 12.12), array(110.1234, 120.5678),map(10, 1000, 11, 1100), map(10, 10.10, 11, 11.11), map(10, 100.12345, 11, 110.98765),named_struct('x', 8, 'y', 8.8, 'z', 80.12345),80, 80.5, 800.56789, 222), + (9, array(13, 14), array(13.13, 14.14), array(130.1234, 140.5678),map(12, 1200, 13, 1300), map(12, 12.12, 13, 13.13), map(12, 120.12345, 13, 130.98765),named_struct('x', 9, 'y', 9.9, 'z', 90.12345),90, 90.5, 900.56789, 111), + (10, array(15, 16), array(15.15, 16.16), array(150.1234, 160.5678),map(14, 1400, 15, 1500), map(14, 14.14, 15, 15.15), map(14, 140.12345, 15, 150.98765),named_struct('x', 10, 'y', 10.10, 'z', 100.12345),100, 100.5, 1000.56789, 0); + + +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col1.element type bigint; +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col2.element type double; +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col3.element type decimal(20,4); +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col4.value type bigint; +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col5.value type double; +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col6.value type decimal(20,5); +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col7.x type bigint; +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col7.y type double; +ALTER TABLE complex_orc_v1_schema_change CHANGE COLUMN col7.z type decimal(20,5); +alter table complex_orc_v1_schema_change CHANGE COLUMN col8 col8 bigint; +alter table complex_orc_v1_schema_change CHANGE COLUMN col9 col9 double; +alter table complex_orc_v1_schema_change CHANGE COLUMN col10 col10 decimal(20,5); +alter table complex_orc_v1_schema_change drop column col7.z; +alter table complex_orc_v1_schema_change add column col7.add double; +alter table complex_orc_v1_schema_change change column col7.add first; +alter table complex_orc_v1_schema_change rename COLUMN col1 to rename_col1; +alter table complex_orc_v1_schema_change rename COLUMN col2 to rename_col2; +alter table complex_orc_v1_schema_change rename COLUMN col3 to rename_col3; +alter table complex_orc_v1_schema_change rename COLUMN col4 to rename_col4; +alter table complex_orc_v1_schema_change rename COLUMN col5 to rename_col5; +alter table complex_orc_v1_schema_change rename COLUMN col6 to rename_col6; +alter table complex_orc_v1_schema_change rename COLUMN col7 to rename_col7; +alter table complex_orc_v1_schema_change rename COLUMN col8 to rename_col8; +alter table complex_orc_v1_schema_change rename COLUMN col9 to rename_col9; +alter table complex_orc_v1_schema_change rename COLUMN col10 to rename_col10; +alter table complex_orc_v1_schema_change drop column col_del; +alter table complex_orc_v1_schema_change CHANGE COLUMN rename_col8 first; +alter table complex_orc_v1_schema_change CHANGE COLUMN rename_col9 after rename_col8; +alter table complex_orc_v1_schema_change CHANGE COLUMN rename_col10 after rename_col9; +alter table complex_orc_v1_schema_change add column col_add int; +alter table complex_orc_v1_schema_change add column col_add2 int; + + +INSERT INTO complex_orc_v1_schema_change (id, rename_col8, rename_col9, rename_col10,rename_col1, rename_col2, rename_col3,rename_col4, rename_col5, rename_col6,rename_col7, col_add, col_add2) +VALUES + (11,100, 11.1, 110.12345,array(11, 12, 13), array(11.1, 12.2, 13.3), array(110.1234, 120.5678, 130.9876),map(11, 1100, 12, 1200), map(11, 11.1, 12, 12.2), map(11, 110.12345, 12, 120.98765),named_struct('add', 11.1, 'x', 11, 'y', 11.1),110, 120), + (12,200, 22.2, 220.12345,array(14, 15), array(14.4, 15.5), array(140.1234, 150.5678),map(13, 1300, 14, 1400), map(13, 13.3, 14, 14.4), map(13, 130.12345, 14, 140.98765),named_struct('add', 22.2, 'x', 12, 'y', 12.2),130, 140), + (13,300, 33.3, 330.12345,array(16), array(16.6), array(160.1234),map(15, 1500), map(15, 15.5), map(15, 150.12345),named_struct('add', 33.3, 'x', 13, 'y', 13.3),150, 160), + (14,400, 44.4, 440.12345,array(), array(), array(),map(), map(), map(),named_struct('add', 44.4, 'x', 14, 'y', 14.4),170, 180), + (15,500, 55.5, 550.12345,NULL, NULL, NULL,NULL, NULL, NULL,named_struct('add', 55.5, 'x', 15, 'y', 15.5),190, 200); + diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run02.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run02.sql new file mode 100644 index 000000000000000..500e3d200fbe1db --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run02.sql @@ -0,0 +1,75 @@ +use demo.test_db; + +drop table if exists complex_orc_v2_schema_change; +CREATE TABLE complex_orc_v2_schema_change ( + id int, + col1 array, + col2 array, + col3 array, + col4 map, + col5 map, + col6 map, + col7 struct, + col8 int, + col9 float, + col10 decimal(12,5), + col_del int +)USING iceberg +TBLPROPERTIES( + 'write.format.default' = 'orc', + 'format-version'='2'); + +INSERT INTO complex_orc_v2_schema_change +VALUES + (1, array(1, 2, 3), array(1.1, 2.2, 3.3), array(10.1234, 20.5678, 30.9876),map(1, 100, 2, 200), map(1, 1.1, 2, 2.2), map(1, 10.12345, 2, 20.98765),named_struct('x', 1, 'y', 1.1, 'z', 10.12345),10, 10.5, 100.56789, 999), + (2, array(4, 5), array(4.4, 5.5), array(40.1234, 50.5678),map(3, 300, 4, 400), map(3, 3.3, 4, 4.4), map(3, 30.12345, 4, 40.98765),named_struct('x', 2, 'y', 2.2, 'z', 20.98765),20, 20.5, 200.56789, 888), + (3, array(6), array(6.6), array(60.1234),map(5, 500), map(5, 5.5), map(5, 50.12345),named_struct('x', 3, 'y', 3.3, 'z', 30.12345),30, 30.5, 300.56789, 777), + (4, array(7,7,7), array(7.1,7.2,7.3), array(10,30),map(2,4), map(6,7), map(8,9),named_struct('x', 4, 'y', 4.4, 'z', 40.98765),40, 40.5, 400.56789, 666), + (5, NULL, NULL, NULL,NULL, NULL, NULL,named_struct('x', 5, 'y', 5.5, 'z', 50.12345),50, 50.5, 500.56789, 555), + (6, array(7, 8), array(7.7, 8.8), array(70.1234, 80.5678),map(6, 600, 7, 700), map(6, 6.6, 7, 7.7), map(6, 60.12345, 7, 70.98765),named_struct('x', 6, 'y', 6.6, 'z', 60.12345),60, 60.5, 600.56789, 444), + (7, array(9, 10), array(9.9, 10.10), array(90.1234, 100.5678),map(8, 800, 9, 900), map(8, 8.8, 9, 9.9), map(8, 80.12345, 9, 90.98765),named_struct('x', 7, 'y', 7.7, 'z', 70.98765),70, 70.5, 700.56789, 333), + (8, array(11, 12), array(11.11, 12.12), array(110.1234, 120.5678),map(10, 1000, 11, 1100), map(10, 10.10, 11, 11.11), map(10, 100.12345, 11, 110.98765),named_struct('x', 8, 'y', 8.8, 'z', 80.12345),80, 80.5, 800.56789, 222), + (9, array(13, 14), array(13.13, 14.14), array(130.1234, 140.5678),map(12, 1200, 13, 1300), map(12, 12.12, 13, 13.13), map(12, 120.12345, 13, 130.98765),named_struct('x', 9, 'y', 9.9, 'z', 90.12345),90, 90.5, 900.56789, 111), + (10, array(15, 16), array(15.15, 16.16), array(150.1234, 160.5678),map(14, 1400, 15, 1500), map(14, 14.14, 15, 15.15), map(14, 140.12345, 15, 150.98765),named_struct('x', 10, 'y', 10.10, 'z', 100.12345),100, 100.5, 1000.56789, 0); + + +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col1.element type bigint; +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col2.element type double; +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col3.element type decimal(20,4); +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col4.value type bigint; +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col5.value type double; +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col6.value type decimal(20,5); +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col7.x type bigint; +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col7.y type double; +ALTER TABLE complex_orc_v2_schema_change CHANGE COLUMN col7.z type decimal(20,5); +alter table complex_orc_v2_schema_change CHANGE COLUMN col8 col8 bigint; +alter table complex_orc_v2_schema_change CHANGE COLUMN col9 col9 double; +alter table complex_orc_v2_schema_change CHANGE COLUMN col10 col10 decimal(20,5); +alter table complex_orc_v2_schema_change drop column col7.z; +alter table complex_orc_v2_schema_change add column col7.add double; +alter table complex_orc_v2_schema_change change column col7.add first; +alter table complex_orc_v2_schema_change rename COLUMN col1 to rename_col1; +alter table complex_orc_v2_schema_change rename COLUMN col2 to rename_col2; +alter table complex_orc_v2_schema_change rename COLUMN col3 to rename_col3; +alter table complex_orc_v2_schema_change rename COLUMN col4 to rename_col4; +alter table complex_orc_v2_schema_change rename COLUMN col5 to rename_col5; +alter table complex_orc_v2_schema_change rename COLUMN col6 to rename_col6; +alter table complex_orc_v2_schema_change rename COLUMN col7 to rename_col7; +alter table complex_orc_v2_schema_change rename COLUMN col8 to rename_col8; +alter table complex_orc_v2_schema_change rename COLUMN col9 to rename_col9; +alter table complex_orc_v2_schema_change rename COLUMN col10 to rename_col10; +alter table complex_orc_v2_schema_change drop column col_del; +alter table complex_orc_v2_schema_change CHANGE COLUMN rename_col8 first; +alter table complex_orc_v2_schema_change CHANGE COLUMN rename_col9 after rename_col8; +alter table complex_orc_v2_schema_change CHANGE COLUMN rename_col10 after rename_col9; +alter table complex_orc_v2_schema_change add column col_add int; +alter table complex_orc_v2_schema_change add column col_add2 int; + + +INSERT INTO complex_orc_v2_schema_change (id, rename_col8, rename_col9, rename_col10,rename_col1, rename_col2, rename_col3,rename_col4, rename_col5, rename_col6,rename_col7, col_add, col_add2) +VALUES + (11,100, 11.1, 110.12345,array(11, 12, 13), array(11.1, 12.2, 13.3), array(110.1234, 120.5678, 130.9876),map(11, 1100, 12, 1200), map(11, 11.1, 12, 12.2), map(11, 110.12345, 12, 120.98765),named_struct('add', 11.1, 'x', 11, 'y', 11.1),110, 120), + (12,200, 22.2, 220.12345,array(14, 15), array(14.4, 15.5), array(140.1234, 150.5678),map(13, 1300, 14, 1400), map(13, 13.3, 14, 14.4), map(13, 130.12345, 14, 140.98765),named_struct('add', 22.2, 'x', 12, 'y', 12.2),130, 140), + (13,300, 33.3, 330.12345,array(16), array(16.6), array(160.1234),map(15, 1500), map(15, 15.5), map(15, 150.12345),named_struct('add', 33.3, 'x', 13, 'y', 13.3),150, 160), + (14,400, 44.4, 440.12345,array(), array(), array(),map(), map(), map(),named_struct('add', 44.4, 'x', 14, 'y', 14.4),170, 180), + (15,500, 55.5, 550.12345,NULL, NULL, NULL,NULL, NULL, NULL,named_struct('add', 55.5, 'x', 15, 'y', 15.5),190, 200); diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run03.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run03.sql new file mode 100644 index 000000000000000..0860783249ccb4b --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run03.sql @@ -0,0 +1,75 @@ +use demo.test_db; + +drop table if exists complex_parquet_v2_schema_change; +CREATE TABLE complex_parquet_v2_schema_change ( + id int, + col1 array, + col2 array, + col3 array, + col4 map, + col5 map, + col6 map, + col7 struct, + col8 int, + col9 float, + col10 decimal(12,5), + col_del int +)USING iceberg +TBLPROPERTIES( + 'write.format.default' = 'parquet', + 'format-version'='2'); + +INSERT INTO complex_parquet_v2_schema_change +VALUES + (1, array(1, 2, 3), array(1.1, 2.2, 3.3), array(10.1234, 20.5678, 30.9876),map(1, 100, 2, 200), map(1, 1.1, 2, 2.2), map(1, 10.12345, 2, 20.98765),named_struct('x', 1, 'y', 1.1, 'z', 10.12345),10, 10.5, 100.56789, 999), + (2, array(4, 5), array(4.4, 5.5), array(40.1234, 50.5678),map(3, 300, 4, 400), map(3, 3.3, 4, 4.4), map(3, 30.12345, 4, 40.98765),named_struct('x', 2, 'y', 2.2, 'z', 20.98765),20, 20.5, 200.56789, 888), + (3, array(6), array(6.6), array(60.1234),map(5, 500), map(5, 5.5), map(5, 50.12345),named_struct('x', 3, 'y', 3.3, 'z', 30.12345),30, 30.5, 300.56789, 777), + (4, array(7,7,7), array(7.1,7.2,7.3), array(10,30),map(2,4), map(6,7), map(8,9),named_struct('x', 4, 'y', 4.4, 'z', 40.98765),40, 40.5, 400.56789, 666), + (5, NULL, NULL, NULL,NULL, NULL, NULL,named_struct('x', 5, 'y', 5.5, 'z', 50.12345),50, 50.5, 500.56789, 555), + (6, array(7, 8), array(7.7, 8.8), array(70.1234, 80.5678),map(6, 600, 7, 700), map(6, 6.6, 7, 7.7), map(6, 60.12345, 7, 70.98765),named_struct('x', 6, 'y', 6.6, 'z', 60.12345),60, 60.5, 600.56789, 444), + (7, array(9, 10), array(9.9, 10.10), array(90.1234, 100.5678),map(8, 800, 9, 900), map(8, 8.8, 9, 9.9), map(8, 80.12345, 9, 90.98765),named_struct('x', 7, 'y', 7.7, 'z', 70.98765),70, 70.5, 700.56789, 333), + (8, array(11, 12), array(11.11, 12.12), array(110.1234, 120.5678),map(10, 1000, 11, 1100), map(10, 10.10, 11, 11.11), map(10, 100.12345, 11, 110.98765),named_struct('x', 8, 'y', 8.8, 'z', 80.12345),80, 80.5, 800.56789, 222), + (9, array(13, 14), array(13.13, 14.14), array(130.1234, 140.5678),map(12, 1200, 13, 1300), map(12, 12.12, 13, 13.13), map(12, 120.12345, 13, 130.98765),named_struct('x', 9, 'y', 9.9, 'z', 90.12345),90, 90.5, 900.56789, 111), + (10, array(15, 16), array(15.15, 16.16), array(150.1234, 160.5678),map(14, 1400, 15, 1500), map(14, 14.14, 15, 15.15), map(14, 140.12345, 15, 150.98765),named_struct('x', 10, 'y', 10.10, 'z', 100.12345),100, 100.5, 1000.56789, 0); + + +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col1.element type bigint; +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col2.element type double; +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col3.element type decimal(20,4); +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col4.value type bigint; +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col5.value type double; +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col6.value type decimal(20,5); +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.x type bigint; +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.y type double; +ALTER TABLE complex_parquet_v2_schema_change CHANGE COLUMN col7.z type decimal(20,5); +alter table complex_parquet_v2_schema_change CHANGE COLUMN col8 col8 bigint; +alter table complex_parquet_v2_schema_change CHANGE COLUMN col9 col9 double; +alter table complex_parquet_v2_schema_change CHANGE COLUMN col10 col10 decimal(20,5); +alter table complex_parquet_v2_schema_change drop column col7.z; +alter table complex_parquet_v2_schema_change add column col7.add double; +alter table complex_parquet_v2_schema_change change column col7.add first; +alter table complex_parquet_v2_schema_change rename COLUMN col1 to rename_col1; +alter table complex_parquet_v2_schema_change rename COLUMN col2 to rename_col2; +alter table complex_parquet_v2_schema_change rename COLUMN col3 to rename_col3; +alter table complex_parquet_v2_schema_change rename COLUMN col4 to rename_col4; +alter table complex_parquet_v2_schema_change rename COLUMN col5 to rename_col5; +alter table complex_parquet_v2_schema_change rename COLUMN col6 to rename_col6; +alter table complex_parquet_v2_schema_change rename COLUMN col7 to rename_col7; +alter table complex_parquet_v2_schema_change rename COLUMN col8 to rename_col8; +alter table complex_parquet_v2_schema_change rename COLUMN col9 to rename_col9; +alter table complex_parquet_v2_schema_change rename COLUMN col10 to rename_col10; +alter table complex_parquet_v2_schema_change drop column col_del; +alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col8 first; +alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col9 after rename_col8; +alter table complex_parquet_v2_schema_change CHANGE COLUMN rename_col10 after rename_col9; +alter table complex_parquet_v2_schema_change add column col_add int; +alter table complex_parquet_v2_schema_change add column col_add2 int; + + +INSERT INTO complex_parquet_v2_schema_change (id, rename_col8, rename_col9, rename_col10,rename_col1, rename_col2, rename_col3,rename_col4, rename_col5, rename_col6,rename_col7, col_add, col_add2) +VALUES + (11,100, 11.1, 110.12345,array(11, 12, 13), array(11.1, 12.2, 13.3), array(110.1234, 120.5678, 130.9876),map(11, 1100, 12, 1200), map(11, 11.1, 12, 12.2), map(11, 110.12345, 12, 120.98765),named_struct('add', 11.1, 'x', 11, 'y', 11.1),110, 120), + (12,200, 22.2, 220.12345,array(14, 15), array(14.4, 15.5), array(140.1234, 150.5678),map(13, 1300, 14, 1400), map(13, 13.3, 14, 14.4), map(13, 130.12345, 14, 140.98765),named_struct('add', 22.2, 'x', 12, 'y', 12.2),130, 140), + (13,300, 33.3, 330.12345,array(16), array(16.6), array(160.1234),map(15, 1500), map(15, 15.5), map(15, 150.12345),named_struct('add', 33.3, 'x', 13, 'y', 13.3),150, 160), + (14,400, 44.4, 440.12345,array(), array(), array(),map(), map(), map(),named_struct('add', 44.4, 'x', 14, 'y', 14.4),170, 180), + (15,500, 55.5, 550.12345,NULL, NULL, NULL,NULL, NULL, NULL,named_struct('add', 55.5, 'x', 15, 'y', 15.5),190, 200); diff --git a/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run04.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run04.sql new file mode 100644 index 000000000000000..d5ba0048f39641a --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run04.sql @@ -0,0 +1,80 @@ + +use demo.test_db; + +drop table if exists complex_parquet_v1_schema_change; +CREATE TABLE complex_parquet_v1_schema_change ( + id int, + col1 array, + col2 array, + col3 array, + col4 map, + col5 map, + col6 map, + col7 struct, + col8 int, + col9 float, + col10 decimal(12,5), + col_del int +)USING iceberg +TBLPROPERTIES( + 'write.format.default' = 'parquet', + 'format-version'='1'); + +INSERT INTO complex_parquet_v1_schema_change +VALUES + (1, array(1, 2, 3), array(1.1, 2.2, 3.3), array(10.1234, 20.5678, 30.9876),map(1, 100, 2, 200), map(1, 1.1, 2, 2.2), map(1, 10.12345, 2, 20.98765),named_struct('x', 1, 'y', 1.1, 'z', 10.12345),10, 10.5, 100.56789, 999), + (2, array(4, 5), array(4.4, 5.5), array(40.1234, 50.5678),map(3, 300, 4, 400), map(3, 3.3, 4, 4.4), map(3, 30.12345, 4, 40.98765),named_struct('x', 2, 'y', 2.2, 'z', 20.98765),20, 20.5, 200.56789, 888), + (3, array(6), array(6.6), array(60.1234),map(5, 500), map(5, 5.5), map(5, 50.12345),named_struct('x', 3, 'y', 3.3, 'z', 30.12345),30, 30.5, 300.56789, 777), + (4, array(7,7,7), array(7.1,7.2,7.3), array(10,30),map(2,4), map(6,7), map(8,9),named_struct('x', 4, 'y', 4.4, 'z', 40.98765),40, 40.5, 400.56789, 666), + (5, NULL, NULL, NULL,NULL, NULL, NULL,named_struct('x', 5, 'y', 5.5, 'z', 50.12345),50, 50.5, 500.56789, 555), + (6, array(7, 8), array(7.7, 8.8), array(70.1234, 80.5678),map(6, 600, 7, 700), map(6, 6.6, 7, 7.7), map(6, 60.12345, 7, 70.98765),named_struct('x', 6, 'y', 6.6, 'z', 60.12345),60, 60.5, 600.56789, 444), + (7, array(9, 10), array(9.9, 10.10), array(90.1234, 100.5678),map(8, 800, 9, 900), map(8, 8.8, 9, 9.9), map(8, 80.12345, 9, 90.98765),named_struct('x', 7, 'y', 7.7, 'z', 70.98765),70, 70.5, 700.56789, 333), + (8, array(11, 12), array(11.11, 12.12), array(110.1234, 120.5678),map(10, 1000, 11, 1100), map(10, 10.10, 11, 11.11), map(10, 100.12345, 11, 110.98765),named_struct('x', 8, 'y', 8.8, 'z', 80.12345),80, 80.5, 800.56789, 222), + (9, array(13, 14), array(13.13, 14.14), array(130.1234, 140.5678),map(12, 1200, 13, 1300), map(12, 12.12, 13, 13.13), map(12, 120.12345, 13, 130.98765),named_struct('x', 9, 'y', 9.9, 'z', 90.12345),90, 90.5, 900.56789, 111), + (10, array(15, 16), array(15.15, 16.16), array(150.1234, 160.5678),map(14, 1400, 15, 1500), map(14, 14.14, 15, 15.15), map(14, 140.12345, 15, 150.98765),named_struct('x', 10, 'y', 10.10, 'z', 100.12345),100, 100.5, 1000.56789, 0); + + +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col1.element type bigint; +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col2.element type double; +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col3.element type decimal(20,4); +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col4.value type bigint; +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col5.value type double; +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col6.value type decimal(20,5); +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col7.x type bigint; +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col7.y type double; +ALTER TABLE complex_parquet_v1_schema_change CHANGE COLUMN col7.z type decimal(20,5); +alter table complex_parquet_v1_schema_change CHANGE COLUMN col8 col8 bigint; +alter table complex_parquet_v1_schema_change CHANGE COLUMN col9 col9 double; +alter table complex_parquet_v1_schema_change CHANGE COLUMN col10 col10 decimal(20,5); +alter table complex_parquet_v1_schema_change drop column col7.z; +alter table complex_parquet_v1_schema_change add column col7.add double; +alter table complex_parquet_v1_schema_change change column col7.add first; +alter table complex_parquet_v1_schema_change rename COLUMN col1 to rename_col1; +alter table complex_parquet_v1_schema_change rename COLUMN col2 to rename_col2; +alter table complex_parquet_v1_schema_change rename COLUMN col3 to rename_col3; +alter table complex_parquet_v1_schema_change rename COLUMN col4 to rename_col4; +alter table complex_parquet_v1_schema_change rename COLUMN col5 to rename_col5; +alter table complex_parquet_v1_schema_change rename COLUMN col6 to rename_col6; +alter table complex_parquet_v1_schema_change rename COLUMN col7 to rename_col7; +alter table complex_parquet_v1_schema_change rename COLUMN col8 to rename_col8; +alter table complex_parquet_v1_schema_change rename COLUMN col9 to rename_col9; +alter table complex_parquet_v1_schema_change rename COLUMN col10 to rename_col10; +alter table complex_parquet_v1_schema_change drop column col_del; +alter table complex_parquet_v1_schema_change CHANGE COLUMN rename_col8 first; +alter table complex_parquet_v1_schema_change CHANGE COLUMN rename_col9 after rename_col8; +alter table complex_parquet_v1_schema_change CHANGE COLUMN rename_col10 after rename_col9; +alter table complex_parquet_v1_schema_change add column col_add int; +alter table complex_parquet_v1_schema_change add column col_add2 int; + + +INSERT INTO complex_parquet_v1_schema_change (id, rename_col8, rename_col9, rename_col10,rename_col1, rename_col2, rename_col3,rename_col4, rename_col5, rename_col6,rename_col7, col_add, col_add2) +VALUES + (11,100, 11.1, 110.12345,array(11, 12, 13), array(11.1, 12.2, 13.3), array(110.1234, 120.5678, 130.9876),map(11, 1100, 12, 1200), map(11, 11.1, 12, 12.2), map(11, 110.12345, 12, 120.98765),named_struct('add', 11.1, 'x', 11, 'y', 11.1),110, 120), + (12,200, 22.2, 220.12345,array(14, 15), array(14.4, 15.5), array(140.1234, 150.5678),map(13, 1300, 14, 1400), map(13, 13.3, 14, 14.4), map(13, 130.12345, 14, 140.98765),named_struct('add', 22.2, 'x', 12, 'y', 12.2),130, 140), + (13,300, 33.3, 330.12345,array(16), array(16.6), array(160.1234),map(15, 1500), map(15, 15.5), map(15, 150.12345),named_struct('add', 33.3, 'x', 13, 'y', 13.3),150, 160), + (14,400, 44.4, 440.12345,array(), array(), array(),map(), map(), map(),named_struct('add', 44.4, 'x', 14, 'y', 14.4),170, 180), + (15,500, 55.5, 550.12345,NULL, NULL, NULL,NULL, NULL, NULL,named_struct('add', 55.5, 'x', 15, 'y', 15.5),190, 200); + + + + diff --git a/docker/thirdparties/docker-compose/iceberg/spark-init-iceberg.sql b/docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run05.sql similarity index 100% rename from docker/thirdparties/docker-compose/iceberg/spark-init-iceberg.sql rename to docker/thirdparties/docker-compose/iceberg/scripts/create_preinstalled_scripts/run05.sql diff --git a/docker/thirdparties/docker-compose/iceberg/spark-init-paimon.sql b/docker/thirdparties/docker-compose/iceberg/spark-init-paimon.sql deleted file mode 100644 index c868c4f7b1950ec..000000000000000 --- a/docker/thirdparties/docker-compose/iceberg/spark-init-paimon.sql +++ /dev/null @@ -1 +0,0 @@ --- create database if not exists paimon.test_paimon_db; diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index 27250b2a07420e3..0da1f251ed4cdbc 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -398,6 +398,7 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then cp "${ROOT}"/docker-compose/iceberg/entrypoint.sh.tpl "${ROOT}"/docker-compose/iceberg/entrypoint.sh sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/iceberg.yaml sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/entrypoint.sh + cp "${ROOT}"/docker-compose/iceberg/entrypoint.sh "${ROOT}"/docker-compose/iceberg/scripts/entrypoint.sh sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env down if [[ "${STOP}" -ne 1 ]]; then if [[ ! -d "${ICEBERG_DIR}/data" ]]; then diff --git a/regression-test/data/external_table_p0/iceberg/iceberg_schema_change.out b/regression-test/data/external_table_p0/iceberg/iceberg_schema_change.out index 06fa6aec7cf4625..ca5adb14283e2bb 100644 --- a/regression-test/data/external_table_p0/iceberg/iceberg_schema_change.out +++ b/regression-test/data/external_table_p0/iceberg/iceberg_schema_change.out @@ -1,50 +1,49 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !parquet_v1_1 -- -rename_col8 BIGINT Yes true \N -rename_col9 DOUBLE Yes true \N -rename_col10 DECIMAL(20, 5) Yes true \N -id INT Yes true \N -rename_col1 ARRAY Yes true \N -rename_col2 ARRAY Yes true \N -rename_col3 ARRAY Yes true \N -rename_col4 MAP Yes true \N -rename_col5 MAP Yes true \N -rename_col6 MAP Yes true \N -rename_col7 STRUCT Yes true \N -col_add INT Yes true \N -col_add2 INT Yes true \N +rename_col8 bigint Yes true \N +rename_col9 double Yes true \N +rename_col10 decimal(20,5) Yes true \N +id int Yes true \N +rename_col1 array Yes true \N +rename_col2 array Yes true \N +rename_col3 array Yes true \N +rename_col4 map Yes true \N +rename_col5 map Yes true \N +rename_col6 map Yes true \N +rename_col7 struct Yes true \N +col_add int Yes true \N +col_add2 int Yes true \N -- !parquet_v1_2 -- -1 1.2000000476837158 1.12345 1 [1, 2, 3] [1.100000023841858, 2.200000047683716, 3.299999952316284] [1.1234, 2.2345, 3.3456] {1:10, 2:20} {1:1.100000023841858, 2:2.200000047683716} {1:1.12345, 2:2.23456} {"add":null, "x":1, "y":1.100000023841858} \N \N -1 1.2000000476837158 1.12345 2 [4, 5, 6] [4.400000095367432, 5.5, 6.599999904632568] [4.4567, 5.5678, 6.6789] {3:30, 4:40} {3:3.299999952316284, 4:4.400000095367432} {3:3.34567, 4:4.45678} {"add":null, "x":2, "y":2.200000047683716} \N \N -1 1.2000000476837158 1.12345 3 [7, 8, 9] [7.699999809265137, 8.800000190734863, 9.899999618530273] [7.7890, 8.8901, 9.9012] {5:50, 6:60} {5:5.5, 6:6.599999904632568} {5:5.56789, 6:6.67890} {"add":null, "x":3, "y":3.299999952316284} \N \N -1 1.2000000476837158 1.12345 4 [10, 11, 12] [10.100000381469727, 11.109999656677246, 12.119999885559082] [10.1011, 11.1112, 12.1213] {7:70, 8:80} {7:7.699999809265137, 8:8.800000190734863} {7:7.78901, 8:8.89012} {"add":null, "x":4, "y":4.400000095367432} \N \N -1 1.2000000476837158 1.12345 5 [13, 14, 15] [13.130000114440918, 14.140000343322754, 15.149999618530273] [13.1314, 14.1415, 15.1516] {9:90, 10:100} {9:9.899999618530273, 10:10.100000381469727} {9:9.89012, 10:10.10123} {"add":null, "x":5, "y":5.5} \N \N -21447483648 1.7976931348623157E308 1234567890.12345 6 [21447483648, 21474483649, 21474483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235, 1234567890.2346, 1234567890.3457] {214748348:2147483648, 24748649:214743383649} {214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} 1 2 -2144748345648 1.7976931348623157E308 1234567890.23456 7 [2144748345648, 214742435483649, 214742435483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235, 12345367890.2346, 12344567890.3457] {214748348:2147483632148, 24748649:213144743383649} {214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} 2 3 -21447483648 1.7976931348623157E308 1234567890.12345 8 [21447483648, 21474483649, 21474483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235, 1234567890.2346, 1234567890.3457] {214748348:2147483648, 24748649:214743383649} {214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} 3 4 -2144748345648 1.7976931348623157E308 1234567890.23456 9 [2144748345648, 214742435483649, 214742435483650, 214742435483650, 214742435483650, 214742435483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235, 12345367890.2346, 12344567890.3457] {214748348:2147483632148, 24748649:213144743383649} {214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} 4 5 +10 10.5 100.56789 1 [1, 2, 3] [1.100000023841858, 2.200000047683716, 3.299999952316284] [10.1234, 20.5678, 30.9876] {1:100, 2:200} {1:1.100000023841858, 2:2.200000047683716} {1:10.12345, 2:20.98765} {"add":null, "x":1, "y":1.100000023841858} \N \N +20 20.5 200.56789 2 [4, 5] [4.400000095367432, 5.5] [40.1234, 50.5678] {3:300, 4:400} {3:3.299999952316284, 4:4.400000095367432} {3:30.12345, 4:40.98765} {"add":null, "x":2, "y":2.200000047683716} \N \N +30 30.5 300.56789 3 [6] [6.599999904632568] [60.1234] {5:500} {5:5.5} {5:50.12345} {"add":null, "x":3, "y":3.299999952316284} \N \N +40 40.5 400.56789 4 [7, 7, 7] [7.099999904632568, 7.199999809265137, 7.300000190734863] [10.0000, 30.0000] {2:4} {6:7} {8:9.00000} {"add":null, "x":4, "y":4.400000095367432} \N \N +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 +400 44.4 440.12345 14 [] [] [] {} {} {} {"add":44.4, "x":14, "y":14.4} 170 180 +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 -- !parquet_v1_3 -- -9 +15 -- !parquet_v1_4 -- -6 -7 -8 -9 -- !parquet_v1_5 -- -1 -2 -3 -4 +110 +130 +150 +170 +190 -- !parquet_v1_6 -- -6 -7 -8 -9 -- !parquet_v1_7 -- {"add":null, "x":1, "y":1.100000023841858} @@ -52,21 +51,39 @@ col_add2 INT Yes true \N {"add":null, "x":3, "y":3.299999952316284} {"add":null, "x":4, "y":4.400000095367432} {"add":null, "x":5, "y":5.5} -{"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} +{"add":null, "x":6, "y":6.599999904632568} +{"add":null, "x":7, "y":7.699999809265137} +{"add":null, "x":8, "y":8.800000190734863} +{"add":null, "x":9, "y":9.899999618530273} +{"add":null, "x":10, "y":10.100000381469727} +{"add":11.1, "x":11, "y":11.1} +{"add":22.2, "x":12, "y":12.2} +{"add":33.3, "x":13, "y":13.3} +{"add":44.4, "x":14, "y":14.4} +{"add":55.5, "x":15, "y":15.5} -- !parquet_v1_8 -- -3 -4 -5 +\N +\N +\N +\N +120 +140 +160 +180 +200 -- !parquet_v1_9 -- -9 1 -8 1 -7 1 -6 1 +15 1 +14 1 +13 1 +12 1 +11 1 +10 0 +9 0 +8 0 +7 0 +6 0 5 0 4 0 3 0 @@ -75,52 +92,196 @@ col_add2 INT Yes true \N -- !parquet_v1_10 -- +-- !parquet_v1_11 -- +3 [10.1234, 20.5678, 30.9876] +3 [110.1234, 120.5678, 130.9876] + +-- !parquet_v1_12 -- +3 [1.100000023841858, 2.200000047683716, 3.299999952316284] +2 [4.400000095367432, 5.5] +1 [6.599999904632568] +3 [7.099999904632568, 7.199999809265137, 7.300000190734863] +\N \N +2 [7.699999809265137, 8.800000190734863] +2 [9.899999618530273, 10.100000381469727] +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] +0 [] +\N \N + +-- !parquet_v1_13 -- +3 [7.099999904632568, 7.199999809265137, 7.300000190734863] +2 [7.699999809265137, 8.800000190734863] +2 [9.899999618530273, 10.100000381469727] +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] + +-- !parquet_v1_14 -- +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] + +-- !parquet_v1_15 -- +3 [1, 2, 3] +2 [4, 5] +1 [6] +3 [7, 7, 7] +\N \N +2 [7, 8] +2 [9, 10] +2 [11, 12] +2 [13, 14] +2 [15, 16] +3 [11, 12, 13] +2 [14, 15] +1 [16] +0 [] +\N \N + +-- !parquet_v1_16 -- +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 + +-- !parquet_v1_17 -- +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 + +-- !parquet_v1_18 -- +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 + +-- !parquet_v1_19 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +11.1 +22.2 +33.3 +44.4 +55.5 + +-- !parquet_v1_20 -- +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 + +-- !parquet_v1_21 -- +3 [10.1234, 20.5678, 30.9876] +2 [40.1234, 50.5678] +1 [60.1234] +2 [10.0000, 30.0000] +\N \N +2 [70.1234, 80.5678] +2 [90.1234, 100.5678] +2 [110.1234, 120.5678] +2 [130.1234, 140.5678] +2 [150.1234, 160.5678] +3 [110.1234, 120.5678, 130.9876] +2 [140.1234, 150.5678] +1 [160.1234] +0 [] +\N \N + +-- !parquet_v1_22 -- +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 +400 44.4 440.12345 14 [] [] [] {} {} {} {"add":44.4, "x":14, "y":14.4} 170 180 +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 + -- !parquet_v2_1 -- -rename_col8 BIGINT Yes true \N -rename_col9 DOUBLE Yes true \N -rename_col10 DECIMAL(20, 5) Yes true \N -id INT Yes true \N -rename_col1 ARRAY Yes true \N -rename_col2 ARRAY Yes true \N -rename_col3 ARRAY Yes true \N -rename_col4 MAP Yes true \N -rename_col5 MAP Yes true \N -rename_col6 MAP Yes true \N -rename_col7 STRUCT Yes true \N -col_add INT Yes true \N -col_add2 INT Yes true \N +rename_col8 bigint Yes true \N +rename_col9 double Yes true \N +rename_col10 decimal(20,5) Yes true \N +id int Yes true \N +rename_col1 array Yes true \N +rename_col2 array Yes true \N +rename_col3 array Yes true \N +rename_col4 map Yes true \N +rename_col5 map Yes true \N +rename_col6 map Yes true \N +rename_col7 struct Yes true \N +col_add int Yes true \N +col_add2 int Yes true \N -- !parquet_v2_2 -- -1 1.2000000476837158 1.12345 1 [1, 2, 3] [1.100000023841858, 2.200000047683716, 3.299999952316284] [1.1234, 2.2345, 3.3456] {1:10, 2:20} {1:1.100000023841858, 2:2.200000047683716} {1:1.12345, 2:2.23456} {"add":null, "x":1, "y":1.100000023841858} \N \N -1 1.2000000476837158 1.12345 2 [4, 5, 6] [4.400000095367432, 5.5, 6.599999904632568] [4.4567, 5.5678, 6.6789] {3:30, 4:40} {3:3.299999952316284, 4:4.400000095367432} {3:3.34567, 4:4.45678} {"add":null, "x":2, "y":2.200000047683716} \N \N -1 1.2000000476837158 1.12345 3 [7, 8, 9] [7.699999809265137, 8.800000190734863, 9.899999618530273] [7.7890, 8.8901, 9.9012] {5:50, 6:60} {5:5.5, 6:6.599999904632568} {5:5.56789, 6:6.67890} {"add":null, "x":3, "y":3.299999952316284} \N \N -1 1.2000000476837158 1.12345 4 [10, 11, 12] [10.100000381469727, 11.109999656677246, 12.119999885559082] [10.1011, 11.1112, 12.1213] {7:70, 8:80} {7:7.699999809265137, 8:8.800000190734863} {7:7.78901, 8:8.89012} {"add":null, "x":4, "y":4.400000095367432} \N \N -1 1.2000000476837158 1.12345 5 [13, 14, 15] [13.130000114440918, 14.140000343322754, 15.149999618530273] [13.1314, 14.1415, 15.1516] {9:90, 10:100} {9:9.899999618530273, 10:10.100000381469727} {9:9.89012, 10:10.10123} {"add":null, "x":5, "y":5.5} \N \N -21447483648 1.7976931348623157E308 1234567890.12345 6 [21447483648, 21474483649, 21474483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235, 1234567890.2346, 1234567890.3457] {214748348:2147483648, 24748649:214743383649} {214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} 1 2 -2144748345648 1.7976931348623157E308 1234567890.23456 7 [2144748345648, 214742435483649, 214742435483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235, 12345367890.2346, 12344567890.3457] {214748348:2147483632148, 24748649:213144743383649} {214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} 2 3 -21447483648 1.7976931348623157E308 1234567890.12345 8 [21447483648, 21474483649, 21474483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235, 1234567890.2346, 1234567890.3457] {214748348:2147483648, 24748649:214743383649} {214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} 3 4 -2144748345648 1.7976931348623157E308 1234567890.23456 9 [2144748345648, 214742435483649, 214742435483650, 214742435483650, 214742435483650, 214742435483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235, 12345367890.2346, 12344567890.3457] {214748348:2147483632148, 24748649:213144743383649} {214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} 4 5 +10 10.5 100.56789 1 [1, 2, 3] [1.100000023841858, 2.200000047683716, 3.299999952316284] [10.1234, 20.5678, 30.9876] {1:100, 2:200} {1:1.100000023841858, 2:2.200000047683716} {1:10.12345, 2:20.98765} {"add":null, "x":1, "y":1.100000023841858} \N \N +20 20.5 200.56789 2 [4, 5] [4.400000095367432, 5.5] [40.1234, 50.5678] {3:300, 4:400} {3:3.299999952316284, 4:4.400000095367432} {3:30.12345, 4:40.98765} {"add":null, "x":2, "y":2.200000047683716} \N \N +30 30.5 300.56789 3 [6] [6.599999904632568] [60.1234] {5:500} {5:5.5} {5:50.12345} {"add":null, "x":3, "y":3.299999952316284} \N \N +40 40.5 400.56789 4 [7, 7, 7] [7.099999904632568, 7.199999809265137, 7.300000190734863] [10.0000, 30.0000] {2:4} {6:7} {8:9.00000} {"add":null, "x":4, "y":4.400000095367432} \N \N +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 +400 44.4 440.12345 14 [] [] [] {} {} {} {"add":44.4, "x":14, "y":14.4} 170 180 +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 -- !parquet_v2_3 -- -9 +15 -- !parquet_v2_4 -- -6 -7 -8 -9 -- !parquet_v2_5 -- -1 -2 -3 -4 +110 +130 +150 +170 +190 -- !parquet_v2_6 -- -6 -7 -8 -9 -- !parquet_v2_7 -- {"add":null, "x":1, "y":1.100000023841858} @@ -128,21 +289,39 @@ col_add2 INT Yes true \N {"add":null, "x":3, "y":3.299999952316284} {"add":null, "x":4, "y":4.400000095367432} {"add":null, "x":5, "y":5.5} -{"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} +{"add":null, "x":6, "y":6.599999904632568} +{"add":null, "x":7, "y":7.699999809265137} +{"add":null, "x":8, "y":8.800000190734863} +{"add":null, "x":9, "y":9.899999618530273} +{"add":null, "x":10, "y":10.100000381469727} +{"add":11.1, "x":11, "y":11.1} +{"add":22.2, "x":12, "y":12.2} +{"add":33.3, "x":13, "y":13.3} +{"add":44.4, "x":14, "y":14.4} +{"add":55.5, "x":15, "y":15.5} -- !parquet_v2_8 -- -3 -4 -5 +\N +\N +\N +\N +120 +140 +160 +180 +200 -- !parquet_v2_9 -- -9 1 -8 1 -7 1 -6 1 +15 1 +14 1 +13 1 +12 1 +11 1 +10 0 +9 0 +8 0 +7 0 +6 0 5 0 4 0 3 0 @@ -151,52 +330,196 @@ col_add2 INT Yes true \N -- !parquet_v2_10 -- +-- !parquet_v2_11 -- +3 [10.1234, 20.5678, 30.9876] +3 [110.1234, 120.5678, 130.9876] + +-- !parquet_v2_12 -- +3 [1.100000023841858, 2.200000047683716, 3.299999952316284] +2 [4.400000095367432, 5.5] +1 [6.599999904632568] +3 [7.099999904632568, 7.199999809265137, 7.300000190734863] +\N \N +2 [7.699999809265137, 8.800000190734863] +2 [9.899999618530273, 10.100000381469727] +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] +0 [] +\N \N + +-- !parquet_v2_13 -- +3 [7.099999904632568, 7.199999809265137, 7.300000190734863] +2 [7.699999809265137, 8.800000190734863] +2 [9.899999618530273, 10.100000381469727] +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] + +-- !parquet_v2_14 -- +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] + +-- !parquet_v2_15 -- +3 [1, 2, 3] +2 [4, 5] +1 [6] +3 [7, 7, 7] +\N \N +2 [7, 8] +2 [9, 10] +2 [11, 12] +2 [13, 14] +2 [15, 16] +3 [11, 12, 13] +2 [14, 15] +1 [16] +0 [] +\N \N + +-- !parquet_v2_16 -- +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 + +-- !parquet_v2_17 -- +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 + +-- !parquet_v2_18 -- +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 + +-- !parquet_v2_19 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +11.1 +22.2 +33.3 +44.4 +55.5 + +-- !parquet_v2_20 -- +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 + +-- !parquet_v2_21 -- +3 [10.1234, 20.5678, 30.9876] +2 [40.1234, 50.5678] +1 [60.1234] +2 [10.0000, 30.0000] +\N \N +2 [70.1234, 80.5678] +2 [90.1234, 100.5678] +2 [110.1234, 120.5678] +2 [130.1234, 140.5678] +2 [150.1234, 160.5678] +3 [110.1234, 120.5678, 130.9876] +2 [140.1234, 150.5678] +1 [160.1234] +0 [] +\N \N + +-- !parquet_v2_22 -- +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 +400 44.4 440.12345 14 [] [] [] {} {} {} {"add":44.4, "x":14, "y":14.4} 170 180 +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 + -- !orc_v1_1 -- -rename_col8 BIGINT Yes true \N -rename_col9 DOUBLE Yes true \N -rename_col10 DECIMAL(20, 5) Yes true \N -id INT Yes true \N -rename_col1 ARRAY Yes true \N -rename_col2 ARRAY Yes true \N -rename_col3 ARRAY Yes true \N -rename_col4 MAP Yes true \N -rename_col5 MAP Yes true \N -rename_col6 MAP Yes true \N -rename_col7 STRUCT Yes true \N -col_add INT Yes true \N -col_add2 INT Yes true \N +rename_col8 bigint Yes true \N +rename_col9 double Yes true \N +rename_col10 decimal(20,5) Yes true \N +id int Yes true \N +rename_col1 array Yes true \N +rename_col2 array Yes true \N +rename_col3 array Yes true \N +rename_col4 map Yes true \N +rename_col5 map Yes true \N +rename_col6 map Yes true \N +rename_col7 struct Yes true \N +col_add int Yes true \N +col_add2 int Yes true \N -- !orc_v1_2 -- -1 1.2000000476837158 1.12345 1 [1, 2, 3] [1.100000023841858, 2.200000047683716, 3.299999952316284] [1.1234, 2.2345, 3.3456] {1:10, 2:20} {1:1.100000023841858, 2:2.200000047683716} {1:1.12345, 2:2.23456} {"add":null, "x":1, "y":1.100000023841858} \N \N -1 1.2000000476837158 1.12345 2 [4, 5, 6] [4.400000095367432, 5.5, 6.599999904632568] [4.4567, 5.5678, 6.6789] {3:30, 4:40} {3:3.299999952316284, 4:4.400000095367432} {3:3.34567, 4:4.45678} {"add":null, "x":2, "y":2.200000047683716} \N \N -1 1.2000000476837158 1.12345 3 [7, 8, 9] [7.699999809265137, 8.800000190734863, 9.899999618530273] [7.7890, 8.8901, 9.9012] {5:50, 6:60} {5:5.5, 6:6.599999904632568} {5:5.56789, 6:6.67890} {"add":null, "x":3, "y":3.299999952316284} \N \N -1 1.2000000476837158 1.12345 4 [10, 11, 12] [10.100000381469727, 11.109999656677246, 12.119999885559082] [10.1011, 11.1112, 12.1213] {7:70, 8:80} {7:7.699999809265137, 8:8.800000190734863} {7:7.78901, 8:8.89012} {"add":null, "x":4, "y":4.400000095367432} \N \N -1 1.2000000476837158 1.12345 5 [13, 14, 15] [13.130000114440918, 14.140000343322754, 15.149999618530273] [13.1314, 14.1415, 15.1516] {9:90, 10:100} {9:9.899999618530273, 10:10.100000381469727} {9:9.89012, 10:10.10123} {"add":null, "x":5, "y":5.5} \N \N -21447483648 1.7976931348623157E308 1234567890.12345 6 [21447483648, 21474483649, 21474483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235, 1234567890.2346, 1234567890.3457] {214748348:2147483648, 24748649:214743383649} {214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} 1 2 -2144748345648 1.7976931348623157E308 1234567890.23456 7 [2144748345648, 214742435483649, 214742435483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235, 12345367890.2346, 12344567890.3457] {214748348:2147483632148, 24748649:213144743383649} {214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} 2 3 -21447483648 1.7976931348623157E308 1234567890.12345 8 [21447483648, 21474483649, 21474483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235, 1234567890.2346, 1234567890.3457] {214748348:2147483648, 24748649:214743383649} {214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} 3 4 -2144748345648 1.7976931348623157E308 1234567890.23456 9 [2144748345648, 214742435483649, 214742435483650, 214742435483650, 214742435483650, 214742435483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235, 12345367890.2346, 12344567890.3457] {214748348:2147483632148, 24748649:213144743383649} {214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} 4 5 +10 10.5 100.56789 1 [1, 2, 3] [1.100000023841858, 2.200000047683716, 3.299999952316284] [10.1234, 20.5678, 30.9876] {1:100, 2:200} {1:1.100000023841858, 2:2.200000047683716} {1:10.12345, 2:20.98765} {"add":null, "x":1, "y":1.100000023841858} \N \N +20 20.5 200.56789 2 [4, 5] [4.400000095367432, 5.5] [40.1234, 50.5678] {3:300, 4:400} {3:3.299999952316284, 4:4.400000095367432} {3:30.12345, 4:40.98765} {"add":null, "x":2, "y":2.200000047683716} \N \N +30 30.5 300.56789 3 [6] [6.599999904632568] [60.1234] {5:500} {5:5.5} {5:50.12345} {"add":null, "x":3, "y":3.299999952316284} \N \N +40 40.5 400.56789 4 [7, 7, 7] [7.099999904632568, 7.199999809265137, 7.300000190734863] [10.0000, 30.0000] {2:4} {6:7} {8:9.00000} {"add":null, "x":4, "y":4.400000095367432} \N \N +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 +400 44.4 440.12345 14 [] [] [] {} {} {} {"add":44.4, "x":14, "y":14.4} 170 180 +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 -- !orc_v1_3 -- -9 +15 -- !orc_v1_4 -- -6 -7 -8 -9 -- !orc_v1_5 -- -1 -2 -3 -4 +110 +130 +150 +170 +190 -- !orc_v1_6 -- -6 -7 -8 -9 -- !orc_v1_7 -- {"add":null, "x":1, "y":1.100000023841858} @@ -204,21 +527,39 @@ col_add2 INT Yes true \N {"add":null, "x":3, "y":3.299999952316284} {"add":null, "x":4, "y":4.400000095367432} {"add":null, "x":5, "y":5.5} -{"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} +{"add":null, "x":6, "y":6.599999904632568} +{"add":null, "x":7, "y":7.699999809265137} +{"add":null, "x":8, "y":8.800000190734863} +{"add":null, "x":9, "y":9.899999618530273} +{"add":null, "x":10, "y":10.100000381469727} +{"add":11.1, "x":11, "y":11.1} +{"add":22.2, "x":12, "y":12.2} +{"add":33.3, "x":13, "y":13.3} +{"add":44.4, "x":14, "y":14.4} +{"add":55.5, "x":15, "y":15.5} -- !orc_v1_8 -- -3 -4 -5 +\N +\N +\N +\N +120 +140 +160 +180 +200 -- !orc_v1_9 -- -9 1 -8 1 -7 1 -6 1 +15 1 +14 1 +13 1 +12 1 +11 1 +10 0 +9 0 +8 0 +7 0 +6 0 5 0 4 0 3 0 @@ -227,52 +568,196 @@ col_add2 INT Yes true \N -- !orc_v1_10 -- +-- !orc_v1_11 -- +3 [10.1234, 20.5678, 30.9876] +3 [110.1234, 120.5678, 130.9876] + +-- !orc_v1_12 -- +3 [1.100000023841858, 2.200000047683716, 3.299999952316284] +2 [4.400000095367432, 5.5] +1 [6.599999904632568] +3 [7.099999904632568, 7.199999809265137, 7.300000190734863] +\N \N +2 [7.699999809265137, 8.800000190734863] +2 [9.899999618530273, 10.100000381469727] +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] +0 [] +\N \N + +-- !orc_v1_13 -- +3 [7.099999904632568, 7.199999809265137, 7.300000190734863] +2 [7.699999809265137, 8.800000190734863] +2 [9.899999618530273, 10.100000381469727] +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] + +-- !orc_v1_14 -- +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] + +-- !orc_v1_15 -- +3 [1, 2, 3] +2 [4, 5] +1 [6] +3 [7, 7, 7] +\N \N +2 [7, 8] +2 [9, 10] +2 [11, 12] +2 [13, 14] +2 [15, 16] +3 [11, 12, 13] +2 [14, 15] +1 [16] +0 [] +\N \N + +-- !orc_v1_16 -- +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 + +-- !orc_v1_17 -- +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 + +-- !orc_v1_18 -- +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 + +-- !orc_v1_19 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +11.1 +22.2 +33.3 +44.4 +55.5 + +-- !orc_v1_20 -- +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 + +-- !orc_v1_21 -- +3 [10.1234, 20.5678, 30.9876] +2 [40.1234, 50.5678] +1 [60.1234] +2 [10.0000, 30.0000] +\N \N +2 [70.1234, 80.5678] +2 [90.1234, 100.5678] +2 [110.1234, 120.5678] +2 [130.1234, 140.5678] +2 [150.1234, 160.5678] +3 [110.1234, 120.5678, 130.9876] +2 [140.1234, 150.5678] +1 [160.1234] +0 [] +\N \N + +-- !orc_v1_22 -- +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 +400 44.4 440.12345 14 [] [] [] {} {} {} {"add":44.4, "x":14, "y":14.4} 170 180 +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 + -- !orc_v2_1 -- -rename_col8 BIGINT Yes true \N -rename_col9 DOUBLE Yes true \N -rename_col10 DECIMAL(20, 5) Yes true \N -id INT Yes true \N -rename_col1 ARRAY Yes true \N -rename_col2 ARRAY Yes true \N -rename_col3 ARRAY Yes true \N -rename_col4 MAP Yes true \N -rename_col5 MAP Yes true \N -rename_col6 MAP Yes true \N -rename_col7 STRUCT Yes true \N -col_add INT Yes true \N -col_add2 INT Yes true \N +rename_col8 bigint Yes true \N +rename_col9 double Yes true \N +rename_col10 decimal(20,5) Yes true \N +id int Yes true \N +rename_col1 array Yes true \N +rename_col2 array Yes true \N +rename_col3 array Yes true \N +rename_col4 map Yes true \N +rename_col5 map Yes true \N +rename_col6 map Yes true \N +rename_col7 struct Yes true \N +col_add int Yes true \N +col_add2 int Yes true \N -- !orc_v2_2 -- -1 1.2000000476837158 1.12345 1 [1, 2, 3] [1.100000023841858, 2.200000047683716, 3.299999952316284] [1.1234, 2.2345, 3.3456] {1:10, 2:20} {1:1.100000023841858, 2:2.200000047683716} {1:1.12345, 2:2.23456} {"add":null, "x":1, "y":1.100000023841858} \N \N -1 1.2000000476837158 1.12345 2 [4, 5, 6] [4.400000095367432, 5.5, 6.599999904632568] [4.4567, 5.5678, 6.6789] {3:30, 4:40} {3:3.299999952316284, 4:4.400000095367432} {3:3.34567, 4:4.45678} {"add":null, "x":2, "y":2.200000047683716} \N \N -1 1.2000000476837158 1.12345 3 [7, 8, 9] [7.699999809265137, 8.800000190734863, 9.899999618530273] [7.7890, 8.8901, 9.9012] {5:50, 6:60} {5:5.5, 6:6.599999904632568} {5:5.56789, 6:6.67890} {"add":null, "x":3, "y":3.299999952316284} \N \N -1 1.2000000476837158 1.12345 4 [10, 11, 12] [10.100000381469727, 11.109999656677246, 12.119999885559082] [10.1011, 11.1112, 12.1213] {7:70, 8:80} {7:7.699999809265137, 8:8.800000190734863} {7:7.78901, 8:8.89012} {"add":null, "x":4, "y":4.400000095367432} \N \N -1 1.2000000476837158 1.12345 5 [13, 14, 15] [13.130000114440918, 14.140000343322754, 15.149999618530273] [13.1314, 14.1415, 15.1516] {9:90, 10:100} {9:9.899999618530273, 10:10.100000381469727} {9:9.89012, 10:10.10123} {"add":null, "x":5, "y":5.5} \N \N -21447483648 1.7976931348623157E308 1234567890.12345 6 [21447483648, 21474483649, 21474483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235, 1234567890.2346, 1234567890.3457] {214748348:2147483648, 24748649:214743383649} {214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} 1 2 -2144748345648 1.7976931348623157E308 1234567890.23456 7 [2144748345648, 214742435483649, 214742435483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235, 12345367890.2346, 12344567890.3457] {214748348:2147483632148, 24748649:213144743383649} {214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} 2 3 -21447483648 1.7976931348623157E308 1234567890.12345 8 [21447483648, 21474483649, 21474483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [1234567890.1235, 1234567890.2346, 1234567890.3457] {214748348:2147483648, 24748649:214743383649} {214748648:1.7976931348623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} 3 4 -2144748345648 1.7976931348623157E308 1234567890.23456 9 [2144748345648, 214742435483649, 214742435483650, 214742435483650, 214742435483650, 214742435483650] [1.7976931348623157e+308, 1.7976931348623157e+308, 1.7976931348623157e+308] [12345673890.1235, 12345367890.2346, 12344567890.3457] {214748348:2147483632148, 24748649:213144743383649} {214748648:1.717693623157e+308, 27483649:1.7976931348623157e+308} {214743648:1234567890.12345, 21474649:1234567890.23456} {"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} 4 5 +10 10.5 100.56789 1 [1, 2, 3] [1.100000023841858, 2.200000047683716, 3.299999952316284] [10.1234, 20.5678, 30.9876] {1:100, 2:200} {1:1.100000023841858, 2:2.200000047683716} {1:10.12345, 2:20.98765} {"add":null, "x":1, "y":1.100000023841858} \N \N +20 20.5 200.56789 2 [4, 5] [4.400000095367432, 5.5] [40.1234, 50.5678] {3:300, 4:400} {3:3.299999952316284, 4:4.400000095367432} {3:30.12345, 4:40.98765} {"add":null, "x":2, "y":2.200000047683716} \N \N +30 30.5 300.56789 3 [6] [6.599999904632568] [60.1234] {5:500} {5:5.5} {5:50.12345} {"add":null, "x":3, "y":3.299999952316284} \N \N +40 40.5 400.56789 4 [7, 7, 7] [7.099999904632568, 7.199999809265137, 7.300000190734863] [10.0000, 30.0000] {2:4} {6:7} {8:9.00000} {"add":null, "x":4, "y":4.400000095367432} \N \N +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 +400 44.4 440.12345 14 [] [] [] {} {} {} {"add":44.4, "x":14, "y":14.4} 170 180 +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 -- !orc_v2_3 -- -9 +15 -- !orc_v2_4 -- -6 -7 -8 -9 -- !orc_v2_5 -- -1 -2 -3 -4 +110 +130 +150 +170 +190 -- !orc_v2_6 -- -6 -7 -8 -9 -- !orc_v2_7 -- {"add":null, "x":1, "y":1.100000023841858} @@ -280,21 +765,39 @@ col_add2 INT Yes true \N {"add":null, "x":3, "y":3.299999952316284} {"add":null, "x":4, "y":4.400000095367432} {"add":null, "x":5, "y":5.5} -{"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214748223648, "y":1.7976931346232156e+308} -{"add":1234567890.12345, "x":214743338223648, "y":1.7976931346232156e+308} +{"add":null, "x":6, "y":6.599999904632568} +{"add":null, "x":7, "y":7.699999809265137} +{"add":null, "x":8, "y":8.800000190734863} +{"add":null, "x":9, "y":9.899999618530273} +{"add":null, "x":10, "y":10.100000381469727} +{"add":11.1, "x":11, "y":11.1} +{"add":22.2, "x":12, "y":12.2} +{"add":33.3, "x":13, "y":13.3} +{"add":44.4, "x":14, "y":14.4} +{"add":55.5, "x":15, "y":15.5} -- !orc_v2_8 -- -3 -4 -5 +\N +\N +\N +\N +120 +140 +160 +180 +200 -- !orc_v2_9 -- -9 1 -8 1 -7 1 -6 1 +15 1 +14 1 +13 1 +12 1 +11 1 +10 0 +9 0 +8 0 +7 0 +6 0 5 0 4 0 3 0 @@ -303,3 +806,148 @@ col_add2 INT Yes true \N -- !orc_v2_10 -- +-- !orc_v2_11 -- +3 [10.1234, 20.5678, 30.9876] +3 [110.1234, 120.5678, 130.9876] + +-- !orc_v2_12 -- +3 [1.100000023841858, 2.200000047683716, 3.299999952316284] +2 [4.400000095367432, 5.5] +1 [6.599999904632568] +3 [7.099999904632568, 7.199999809265137, 7.300000190734863] +\N \N +2 [7.699999809265137, 8.800000190734863] +2 [9.899999618530273, 10.100000381469727] +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] +0 [] +\N \N + +-- !orc_v2_13 -- +3 [7.099999904632568, 7.199999809265137, 7.300000190734863] +2 [7.699999809265137, 8.800000190734863] +2 [9.899999618530273, 10.100000381469727] +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] + +-- !orc_v2_14 -- +2 [11.109999656677246, 12.119999885559082] +2 [13.130000114440918, 14.140000343322754] +2 [15.149999618530273, 16.15999984741211] +3 [11.1, 12.2, 13.3] +2 [14.4, 15.5] +1 [16.6] + +-- !orc_v2_15 -- +3 [1, 2, 3] +2 [4, 5] +1 [6] +3 [7, 7, 7] +\N \N +2 [7, 8] +2 [9, 10] +2 [11, 12] +2 [13, 14] +2 [15, 16] +3 [11, 12, 13] +2 [14, 15] +1 [16] +0 [] +\N \N + +-- !orc_v2_16 -- +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 + +-- !orc_v2_17 -- +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 + +-- !orc_v2_18 -- +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 + +-- !orc_v2_19 -- +\N +\N +\N +\N +\N +\N +\N +\N +\N +\N +11.1 +22.2 +33.3 +44.4 +55.5 + +-- !orc_v2_20 -- +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 +11 +12 +13 +14 +15 + +-- !orc_v2_21 -- +3 [10.1234, 20.5678, 30.9876] +2 [40.1234, 50.5678] +1 [60.1234] +2 [10.0000, 30.0000] +\N \N +2 [70.1234, 80.5678] +2 [90.1234, 100.5678] +2 [110.1234, 120.5678] +2 [130.1234, 140.5678] +2 [150.1234, 160.5678] +3 [110.1234, 120.5678, 130.9876] +2 [140.1234, 150.5678] +1 [160.1234] +0 [] +\N \N + +-- !orc_v2_22 -- +50 50.5 500.56789 5 \N \N \N \N \N \N {"add":null, "x":5, "y":5.5} \N \N +60 60.5 600.56789 6 [7, 8] [7.699999809265137, 8.800000190734863] [70.1234, 80.5678] {6:600, 7:700} {6:6.599999904632568, 7:7.699999809265137} {6:60.12345, 7:70.98765} {"add":null, "x":6, "y":6.599999904632568} \N \N +70 70.5 700.56789 7 [9, 10] [9.899999618530273, 10.100000381469727] [90.1234, 100.5678] {8:800, 9:900} {8:8.800000190734863, 9:9.899999618530273} {8:80.12345, 9:90.98765} {"add":null, "x":7, "y":7.699999809265137} \N \N +80 80.5 800.56789 8 [11, 12] [11.109999656677246, 12.119999885559082] [110.1234, 120.5678] {10:1000, 11:1100} {10:10.100000381469727, 11:11.109999656677246} {10:100.12345, 11:110.98765} {"add":null, "x":8, "y":8.800000190734863} \N \N +90 90.5 900.56789 9 [13, 14] [13.130000114440918, 14.140000343322754] [130.1234, 140.5678] {12:1200, 13:1300} {12:12.119999885559082, 13:13.130000114440918} {12:120.12345, 13:130.98765} {"add":null, "x":9, "y":9.899999618530273} \N \N +100 100.5 1000.56789 10 [15, 16] [15.149999618530273, 16.15999984741211] [150.1234, 160.5678] {14:1400, 15:1500} {14:14.140000343322754, 15:15.149999618530273} {14:140.12345, 15:150.98765} {"add":null, "x":10, "y":10.100000381469727} \N \N +100 11.1 110.12345 11 [11, 12, 13] [11.1, 12.2, 13.3] [110.1234, 120.5678, 130.9876] {11:1100, 12:1200} {11:11.1, 12:12.2} {11:110.12345, 12:120.98765} {"add":11.1, "x":11, "y":11.1} 110 120 +200 22.2 220.12345 12 [14, 15] [14.4, 15.5] [140.1234, 150.5678] {13:1300, 14:1400} {13:13.3, 14:14.4} {13:130.12345, 14:140.98765} {"add":22.2, "x":12, "y":12.2} 130 140 +300 33.3 330.12345 13 [16] [16.6] [160.1234] {15:1500} {15:15.5} {15:150.12345} {"add":33.3, "x":13, "y":13.3} 150 160 +400 44.4 440.12345 14 [] [] [] {} {} {} {"add":44.4, "x":14, "y":14.4} 170 180 +500 55.5 550.12345 15 \N \N \N \N \N \N {"add":55.5, "x":15, "y":15.5} 190 200 + diff --git a/regression-test/suites/external_table_p0/iceberg/iceberg_schema_change.groovy b/regression-test/suites/external_table_p0/iceberg/iceberg_schema_change.groovy index 12e15736779e9ae..907225c2912430a 100644 --- a/regression-test/suites/external_table_p0/iceberg/iceberg_schema_change.groovy +++ b/regression-test/suites/external_table_p0/iceberg/iceberg_schema_change.groovy @@ -23,8 +23,6 @@ suite("iceberg_schema_change", "p0,external,doris,external_docker,external_docke return } - // TODO 找当时的人看下怎么构造的这个表 - return String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port") String minio_port = context.config.otherConfigs.get("iceberg_minio_port") @@ -46,7 +44,7 @@ suite("iceberg_schema_change", "p0,external,doris,external_docker,external_docke logger.info("catalog " + catalog_name + " created") sql """switch ${catalog_name};""" logger.info("switched to catalog " + catalog_name) - sql """ use multi_catalog;""" + sql """ use test_db;""" qt_parquet_v1_1 """ desc complex_parquet_v1_schema_change ;""" @@ -58,8 +56,19 @@ suite("iceberg_schema_change", "p0,external,doris,external_docker,external_docke qt_parquet_v1_7 """ select rename_col7 from complex_parquet_v1_schema_change order by id; """ qt_parquet_v1_8 """ select col_add2 from complex_parquet_v1_schema_change where id >=7 order by id; """ qt_parquet_v1_9 """ select id,count(col_add) from complex_parquet_v1_schema_change group by id order by id desc ; """ - qt_parquet_v1_10 """ select col_add from complex_parquet_v1_schema_change where col_add -1 = col_add2 order by id; """ - + qt_parquet_v1_10 """ select col_add from complex_parquet_v1_schema_change where col_add -1 = col_add2 order by id; """ + qt_parquet_v1_11 """ select array_size(rename_col3),rename_col3 from complex_parquet_v1_schema_change where array_size(rename_col3) > 2 order by id; """ + qt_parquet_v1_12 """ select array_size(rename_col2),rename_col2 from complex_parquet_v1_schema_change order by id; """ + qt_parquet_v1_13 """ select array_size(rename_col2),rename_col2 from complex_parquet_v1_schema_change where rename_col2[1] > 7 order by id; """ + qt_parquet_v1_14 """ select array_size(rename_col2),rename_col2 from complex_parquet_v1_schema_change where rename_col2[1] > 7 and id > 7 order by id; """ + qt_parquet_v1_15 """ select array_size(rename_col1),rename_col1 from complex_parquet_v1_schema_change order by id; """ + qt_parquet_v1_16 """ select * from complex_parquet_v1_schema_change where rename_col10 > 500 order by id ; """ + qt_parquet_v1_17 """ select * from complex_parquet_v1_schema_change where map_keys(rename_col4)[1] > 10 order by id; """ + qt_parquet_v1_18 """ select * from complex_parquet_v1_schema_change where map_values(rename_col5)[1] > 10 order by id; """ + qt_parquet_v1_19 """ select struct_element(rename_col7,"add") from complex_parquet_v1_schema_change order by id; """ + qt_parquet_v1_20 """ select struct_element(rename_col7,"x") from complex_parquet_v1_schema_change order by id; """ + qt_parquet_v1_21 """ select array_size(rename_col3),rename_col3 from complex_parquet_v1_schema_change order by id; """ + qt_parquet_v1_22 """ select * from complex_parquet_v1_schema_change where rename_col8 + rename_col9 > 100 order by id;""" qt_parquet_v2_1 """ desc complex_parquet_v2_schema_change ;""" @@ -72,8 +81,18 @@ suite("iceberg_schema_change", "p0,external,doris,external_docker,external_docke qt_parquet_v2_8 """ select col_add2 from complex_parquet_v2_schema_change where id >=7 order by id; """ qt_parquet_v2_9 """ select id,count(col_add) from complex_parquet_v2_schema_change group by id order by id desc ; """ qt_parquet_v2_10 """ select col_add from complex_parquet_v2_schema_change where col_add -1 = col_add2 order by id; """ - - + qt_parquet_v2_11 """ select array_size(rename_col3),rename_col3 from complex_parquet_v2_schema_change where array_size(rename_col3) > 2 order by id; """ + qt_parquet_v2_12 """ select array_size(rename_col2),rename_col2 from complex_parquet_v2_schema_change order by id; """ + qt_parquet_v2_13 """ select array_size(rename_col2),rename_col2 from complex_parquet_v2_schema_change where rename_col2[1] > 7 order by id; """ + qt_parquet_v2_14 """ select array_size(rename_col2),rename_col2 from complex_parquet_v2_schema_change where rename_col2[1] > 7 and id > 7 order by id; """ + qt_parquet_v2_15 """ select array_size(rename_col1),rename_col1 from complex_parquet_v2_schema_change order by id; """ + qt_parquet_v2_16 """ select * from complex_parquet_v2_schema_change where rename_col10 > 500 order by id ; """ + qt_parquet_v2_17 """ select * from complex_parquet_v2_schema_change where map_keys(rename_col4)[1] > 10 order by id; """ + qt_parquet_v2_18 """ select * from complex_parquet_v2_schema_change where map_values(rename_col5)[1] > 10 order by id; """ + qt_parquet_v2_19 """ select struct_element(rename_col7,"add") from complex_parquet_v2_schema_change order by id; """ + qt_parquet_v2_20 """ select struct_element(rename_col7,"x") from complex_parquet_v2_schema_change order by id; """ + qt_parquet_v2_21 """ select array_size(rename_col3),rename_col3 from complex_parquet_v2_schema_change order by id; """ + qt_parquet_v2_22 """ select * from complex_parquet_v2_schema_change where rename_col8 + rename_col9 > 100 order by id;""" qt_orc_v1_1 """ desc complex_orc_v1_schema_change ;""" @@ -86,7 +105,18 @@ suite("iceberg_schema_change", "p0,external,doris,external_docker,external_docke qt_orc_v1_8 """ select col_add2 from complex_orc_v1_schema_change where id >=7 order by id; """ qt_orc_v1_9 """ select id,count(col_add) from complex_orc_v1_schema_change group by id order by id desc ; """ qt_orc_v1_10 """ select col_add from complex_orc_v1_schema_change where col_add -1 = col_add2 order by id; """ - + qt_orc_v1_11 """ select array_size(rename_col3),rename_col3 from complex_orc_v1_schema_change where array_size(rename_col3) > 2 order by id; """ + qt_orc_v1_12 """ select array_size(rename_col2),rename_col2 from complex_orc_v1_schema_change order by id; """ + qt_orc_v1_13 """ select array_size(rename_col2),rename_col2 from complex_orc_v1_schema_change where rename_col2[1] > 7 order by id; """ + qt_orc_v1_14 """ select array_size(rename_col2),rename_col2 from complex_orc_v1_schema_change where rename_col2[1] > 7 and id > 7 order by id; """ + qt_orc_v1_15 """ select array_size(rename_col1),rename_col1 from complex_orc_v1_schema_change order by id; """ + qt_orc_v1_16 """ select * from complex_orc_v1_schema_change where rename_col10 > 500 order by id ; """ + qt_orc_v1_17 """ select * from complex_orc_v1_schema_change where map_keys(rename_col4)[1] > 10 order by id; """ + qt_orc_v1_18 """ select * from complex_orc_v1_schema_change where map_values(rename_col5)[1] > 10 order by id; """ + qt_orc_v1_19 """ select struct_element(rename_col7,"add") from complex_orc_v1_schema_change order by id; """ + qt_orc_v1_20 """ select struct_element(rename_col7,"x") from complex_orc_v1_schema_change order by id; """ + qt_orc_v1_21 """ select array_size(rename_col3),rename_col3 from complex_orc_v1_schema_change order by id; """ + qt_orc_v1_22 """ select * from complex_orc_v1_schema_change where rename_col8 + rename_col9 > 100 order by id;""" qt_orc_v2_1 """ desc complex_orc_v2_schema_change ;""" @@ -99,6 +129,19 @@ suite("iceberg_schema_change", "p0,external,doris,external_docker,external_docke qt_orc_v2_8 """ select col_add2 from complex_orc_v2_schema_change where id >=7 order by id; """ qt_orc_v2_9 """ select id,count(col_add) from complex_orc_v2_schema_change group by id order by id desc ; """ qt_orc_v2_10 """ select col_add from complex_orc_v2_schema_change where col_add -1 = col_add2 order by id; """ + qt_orc_v2_11 """ select array_size(rename_col3),rename_col3 from complex_orc_v2_schema_change where array_size(rename_col3) > 2 order by id; """ + qt_orc_v2_12 """ select array_size(rename_col2),rename_col2 from complex_orc_v2_schema_change order by id; """ + qt_orc_v2_13 """ select array_size(rename_col2),rename_col2 from complex_orc_v2_schema_change where rename_col2[1] > 7 order by id; """ + qt_orc_v2_14 """ select array_size(rename_col2),rename_col2 from complex_orc_v2_schema_change where rename_col2[1] > 7 and id > 7 order by id; """ + qt_orc_v2_15 """ select array_size(rename_col1),rename_col1 from complex_orc_v2_schema_change order by id; """ + qt_orc_v2_16 """ select * from complex_orc_v2_schema_change where rename_col10 > 500 order by id ; """ + qt_orc_v2_17 """ select * from complex_orc_v2_schema_change where map_keys(rename_col4)[1] > 10 order by id; """ + qt_orc_v2_18 """ select * from complex_orc_v2_schema_change where map_values(rename_col5)[1] > 10 order by id; """ + qt_orc_v2_19 """ select struct_element(rename_col7,"add") from complex_orc_v2_schema_change order by id; """ + qt_orc_v2_20 """ select struct_element(rename_col7,"x") from complex_orc_v2_schema_change order by id; """ + qt_orc_v2_21 """ select array_size(rename_col3),rename_col3 from complex_orc_v2_schema_change order by id; """ + qt_orc_v2_22 """ select * from complex_orc_v2_schema_change where rename_col8 + rename_col9 > 100 order by id;""" + } /* From d0793fadf3b5b091c1fb788744ab0e1075b84fa1 Mon Sep 17 00:00:00 2001 From: sparrow <38098988+biohazard4321@users.noreply.github.com> Date: Sun, 11 Aug 2024 23:43:35 +0800 Subject: [PATCH 42/94] [Fix](broker)fix change afs and bos properties not working cause of hadoop filesystem cache. (#39117) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Due to hadoop filesystem cache changing afs and bos properties not working,so add some cache disable config to them. --- .../java/org/apache/doris/broker/hdfs/FileSystemManager.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java b/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java index ad1a0bb03cc48f9..44b900c058ded6b 100644 --- a/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java +++ b/fs_brokers/apache_hdfs_broker/src/main/java/org/apache/doris/broker/hdfs/FileSystemManager.java @@ -152,6 +152,7 @@ public class FileSystemManager { private static final String FS_BOS_SECRET_KEY = "fs.bos.secret.access.key"; private static final String FS_BOS_ENDPOINT = "fs.bos.endpoint"; private static final String FS_BOS_IMPL = "fs.bos.impl"; + private static final String FS_BOS_IMPL_DISABLE_CACHE = "fs.bos.impl.disable.cache"; private static final String FS_BOS_MULTIPART_UPLOADS_BLOCK_SIZE = "fs.bos.multipart.uploads.block.size"; // arguments for afs @@ -159,6 +160,7 @@ public class FileSystemManager { private static final String HADOOP_JOB_UGI = "hadoop.job.ugi"; private static final String FS_DEFAULT_NAME = "fs.default.name"; private static final String FS_AFS_IMPL = "fs.afs.impl"; + private static final String FS_AFS_IMPL_DISABLE_CACHE = "fs.afs.impl.disable.cache"; private static final String DFS_AGENT_PORT = "dfs.agent.port"; private static final String DFS_CLIENT_AUTH_METHOD = "dfs.client.auth.method"; private static final String DFS_RPC_TIMEOUT = "dfs.rpc.timeout"; @@ -847,6 +849,7 @@ public BrokerFileSystem getBOSFileSystem(String path, Map proper conf.set(FS_BOS_SECRET_KEY, secretKey); conf.set(FS_BOS_ENDPOINT, endpoint); conf.set(FS_BOS_IMPL, "org.apache.hadoop.fs.bos.BaiduBosFileSystem"); + conf.set(FS_BOS_IMPL_DISABLE_CACHE, "true"); conf.set(FS_BOS_MULTIPART_UPLOADS_BLOCK_SIZE, multiPartUploadBlockSize); FileSystem bosFileSystem = FileSystem.get(pathUri.getUri(), conf); fileSystem.setFileSystem(bosFileSystem); @@ -1011,6 +1014,7 @@ private BrokerFileSystem getAfsFileSystem(String path, Map prope conf.set(HADOOP_JOB_GROUP_NAME, group); conf.set(FS_DEFAULT_NAME, host); conf.set(FS_AFS_IMPL, properties.getOrDefault(FS_AFS_IMPL, "org.apache.hadoop.fs.LiteFileSystem")); + conf.set(FS_AFS_IMPL_DISABLE_CACHE, "true"); conf.set(DFS_CLIENT_AUTH_METHOD, properties.getOrDefault(DFS_CLIENT_AUTH_METHOD, "3")); conf.set(DFS_AGENT_PORT, properties.getOrDefault(DFS_AGENT_PORT, "20001")); conf.set(DFS_RPC_TIMEOUT, properties.getOrDefault(DFS_RPC_TIMEOUT, "300000")); From c77bca3321cda492bf9ddc1ef3cfd78cc68a6f8b Mon Sep 17 00:00:00 2001 From: wangbo Date: Mon, 12 Aug 2024 09:31:21 +0800 Subject: [PATCH 43/94] [Improment]Add workload group resource usage (#39177) ## Proposed changes ``` mysql [information_schema]>select BE_ID,WORKLOAD_GROUP_ID,CPU_USAGE,LOCAL_SCAN_BYTES_PER_SECOND/1024/1024 as scan_io_mb, MEMORY_USAGE_BYTES/1024/1024 mem_mb from workload_group_resource_usage; +-------+-------------------+-----------+--------------------+-------------------+ | BE_ID | WORKLOAD_GROUP_ID | CPU_USAGE | scan_io_mb | mem_mb | +-------+-------------------+-----------+--------------------+-------------------+ | 10005 | 62053 | 61.41% | 1516.4589414596558 | 27970.84313774109 | | 10005 | 1 | 0.00% | 0 | 0 | +-------+-------------------+-----------+--------------------+-------------------+ ``` --- be/src/exec/schema_scanner.cpp | 3 + .../schema_backend_active_tasks.cpp | 2 +- ..._workload_group_resource_usage_scanner.cpp | 90 +++++++++++++++++++ ...ma_workload_group_resource_usage_scanner.h | 49 ++++++++++ be/src/io/fs/local_file_reader.cpp | 3 + be/src/olap/olap_common.h | 1 + be/src/pipeline/pipeline_task.cpp | 1 + be/src/runtime/query_context.h | 6 ++ be/src/runtime/thread_context.h | 36 +++++--- .../runtime/workload_group/workload_group.cpp | 28 +++++- .../runtime/workload_group/workload_group.h | 19 ++++ .../workload_group/workload_group_manager.cpp | 47 ++++++++++ .../workload_group/workload_group_manager.h | 6 ++ .../workload_management/io_throttle.cpp | 40 ++++++--- .../runtime/workload_management/io_throttle.h | 18 ++-- be/src/vec/exec/scan/vscanner.cpp | 9 ++ be/src/vec/exec/scan/vscanner.h | 6 +- .../vec/sink/writer/async_result_writer.cpp | 7 ++ .../doris/analysis/SchemaTableType.java | 5 +- .../org/apache/doris/catalog/SchemaTable.java | 10 +++ .../BackendPartitionedSchemaScanNode.java | 1 + gensrc/thrift/Descriptors.thrift | 3 +- .../jdbc/test_mariadb_jdbc_catalog.out | 1 + .../jdbc/test_mysql_jdbc_catalog.out | 1 + .../jdbc/test_mysql_jdbc_catalog_nereids.out | 1 + .../jdbc/test_mysql_jdbc_driver5_catalog.out | 1 + 26 files changed, 346 insertions(+), 48 deletions(-) create mode 100644 be/src/exec/schema_scanner/schema_workload_group_resource_usage_scanner.cpp create mode 100644 be/src/exec/schema_scanner/schema_workload_group_resource_usage_scanner.h diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp index 2ddb3db295b487c..a1021b616cc9ac1 100644 --- a/be/src/exec/schema_scanner.cpp +++ b/be/src/exec/schema_scanner.cpp @@ -49,6 +49,7 @@ #include "exec/schema_scanner/schema_variables_scanner.h" #include "exec/schema_scanner/schema_views_scanner.h" #include "exec/schema_scanner/schema_workload_group_privileges.h" +#include "exec/schema_scanner/schema_workload_group_resource_usage_scanner.h" #include "exec/schema_scanner/schema_workload_groups_scanner.h" #include "exec/schema_scanner/schema_workload_sched_policy_scanner.h" #include "olap/hll.h" @@ -230,6 +231,8 @@ std::unique_ptr SchemaScanner::create(TSchemaTableType::type type return SchemaTableOptionsScanner::create_unique(); case TSchemaTableType::SCH_WORKLOAD_GROUP_PRIVILEGES: return SchemaWorkloadGroupPrivilegesScanner::create_unique(); + case TSchemaTableType::SCH_WORKLOAD_GROUP_RESOURCE_USAGE: + return SchemaBackendWorkloadGroupResourceUsage::create_unique(); default: return SchemaDummyScanner::create_unique(); break; diff --git a/be/src/exec/schema_scanner/schema_backend_active_tasks.cpp b/be/src/exec/schema_scanner/schema_backend_active_tasks.cpp index b35e84a9f9c9f49..74e95f4203217cb 100644 --- a/be/src/exec/schema_scanner/schema_backend_active_tasks.cpp +++ b/be/src/exec/schema_scanner/schema_backend_active_tasks.cpp @@ -27,7 +27,7 @@ namespace doris { std::vector SchemaBackendActiveTasksScanner::_s_tbls_columns = { // name, type, size - {"BE_ID", TYPE_BIGINT, sizeof(StringRef), false}, + {"BE_ID", TYPE_BIGINT, sizeof(int64_t), false}, {"FE_HOST", TYPE_VARCHAR, sizeof(StringRef), false}, {"QUERY_ID", TYPE_VARCHAR, sizeof(StringRef), false}, {"TASK_TIME_MS", TYPE_BIGINT, sizeof(int64_t), false}, diff --git a/be/src/exec/schema_scanner/schema_workload_group_resource_usage_scanner.cpp b/be/src/exec/schema_scanner/schema_workload_group_resource_usage_scanner.cpp new file mode 100644 index 000000000000000..ca339044e98a5f5 --- /dev/null +++ b/be/src/exec/schema_scanner/schema_workload_group_resource_usage_scanner.cpp @@ -0,0 +1,90 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "exec/schema_scanner/schema_workload_group_resource_usage_scanner.h" + +#include +#include + +#include "runtime/exec_env.h" +#include "runtime/runtime_state.h" +#include "runtime/workload_group/workload_group_manager.h" +#include "vec/common/string_ref.h" +#include "vec/core/block.h" +#include "vec/data_types/data_type_factory.hpp" + +namespace doris { +std::vector SchemaBackendWorkloadGroupResourceUsage::_s_tbls_columns = { + // name, type, size + {"BE_ID", TYPE_BIGINT, sizeof(int64_t), false}, + {"WORKLOAD_GROUP_ID", TYPE_BIGINT, sizeof(int64_t), false}, + {"MEMORY_USAGE_BYTES", TYPE_BIGINT, sizeof(int64_t), false}, + {"CPU_USAGE_PERCENT", TYPE_DOUBLE, sizeof(double), false}, + {"LOCAL_SCAN_BYTES_PER_SECOND", TYPE_BIGINT, sizeof(int64_t), false}, + {"REMOTE_SCAN_BYTES_PER_SECOND", TYPE_BIGINT, sizeof(int64_t), false}, +}; + +SchemaBackendWorkloadGroupResourceUsage::SchemaBackendWorkloadGroupResourceUsage() + : SchemaScanner(_s_tbls_columns, TSchemaTableType::SCH_WORKLOAD_GROUP_RESOURCE_USAGE) {} + +SchemaBackendWorkloadGroupResourceUsage::~SchemaBackendWorkloadGroupResourceUsage() {} + +Status SchemaBackendWorkloadGroupResourceUsage::start(RuntimeState* state) { + _block_rows_limit = state->batch_size(); + return Status::OK(); +} + +Status SchemaBackendWorkloadGroupResourceUsage::get_next_block_internal(vectorized::Block* block, + bool* eos) { + if (!_is_init) { + return Status::InternalError("Used before initialized."); + } + + if (nullptr == block || nullptr == eos) { + return Status::InternalError("input pointer is nullptr."); + } + + if (_block == nullptr) { + _block = vectorized::Block::create_unique(); + + for (int i = 0; i < _s_tbls_columns.size(); ++i) { + TypeDescriptor descriptor(_s_tbls_columns[i].type); + auto data_type = + vectorized::DataTypeFactory::instance().create_data_type(descriptor, true); + _block->insert(vectorized::ColumnWithTypeAndName(data_type->create_column(), data_type, + _s_tbls_columns[i].name)); + } + + ExecEnv::GetInstance()->workload_group_mgr()->get_wg_resource_usage(_block.get()); + _total_rows = _block->rows(); + } + + if (_row_idx == _total_rows) { + *eos = true; + return Status::OK(); + } + + int current_batch_rows = std::min(_block_rows_limit, _total_rows - _row_idx); + vectorized::MutableBlock mblock = vectorized::MutableBlock::build_mutable_block(block); + RETURN_IF_ERROR(mblock.add_rows(_block.get(), _row_idx, current_batch_rows)); + _row_idx += current_batch_rows; + + *eos = _row_idx == _total_rows; + return Status::OK(); +} + +} // namespace doris \ No newline at end of file diff --git a/be/src/exec/schema_scanner/schema_workload_group_resource_usage_scanner.h b/be/src/exec/schema_scanner/schema_workload_group_resource_usage_scanner.h new file mode 100644 index 000000000000000..236dd69999fbb37 --- /dev/null +++ b/be/src/exec/schema_scanner/schema_workload_group_resource_usage_scanner.h @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "common/status.h" +#include "exec/schema_scanner.h" + +namespace doris { +class RuntimeState; +namespace vectorized { +class Block; +} // namespace vectorized + +class SchemaBackendWorkloadGroupResourceUsage : public SchemaScanner { + ENABLE_FACTORY_CREATOR(SchemaBackendWorkloadGroupResourceUsage); + +public: + SchemaBackendWorkloadGroupResourceUsage(); + ~SchemaBackendWorkloadGroupResourceUsage() override; + + Status start(RuntimeState* state) override; + Status get_next_block_internal(vectorized::Block* block, bool* eos) override; + + static std::vector _s_tbls_columns; + +private: + int _block_rows_limit = 4096; + int _row_idx = 0; + int _total_rows = 0; + std::unique_ptr _block = nullptr; +}; +}; // namespace doris \ No newline at end of file diff --git a/be/src/io/fs/local_file_reader.cpp b/be/src/io/fs/local_file_reader.cpp index 35cf2cc627e5e7e..b4f144a633048e4 100644 --- a/be/src/io/fs/local_file_reader.cpp +++ b/be/src/io/fs/local_file_reader.cpp @@ -67,6 +67,7 @@ void BeConfDataDirReader::init_be_conf_data_dir( data_dir_info.path = store_paths[i].path; data_dir_info.storage_medium = store_paths[i].storage_medium; data_dir_info.data_dir_type = DataDirType::OLAP_DATA_DIR; + data_dir_info.bvar_name = "local_data_dir_" + std::to_string(i); be_config_data_dir_list.push_back(data_dir_info); } @@ -75,6 +76,7 @@ void BeConfDataDirReader::init_be_conf_data_dir( data_dir_info.path = spill_store_paths[i].path; data_dir_info.storage_medium = spill_store_paths[i].storage_medium; data_dir_info.data_dir_type = doris::DataDirType::SPILL_DISK_DIR; + data_dir_info.bvar_name = "spill_data_dir_" + std::to_string(i); be_config_data_dir_list.push_back(data_dir_info); } @@ -83,6 +85,7 @@ void BeConfDataDirReader::init_be_conf_data_dir( data_dir_info.path = cache_paths[i].path; data_dir_info.storage_medium = TStorageMedium::REMOTE_CACHE; data_dir_info.data_dir_type = doris::DataDirType::DATA_CACHE_DIR; + data_dir_info.bvar_name = "local_cache_dir_" + std::to_string(i); be_config_data_dir_list.push_back(data_dir_info); } } diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index df2c3b3e145c57c..b6e336722f3eeb6 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -71,6 +71,7 @@ struct DataDirInfo { bool is_used = false; // whether available mark TStorageMedium::type storage_medium = TStorageMedium::HDD; // Storage medium type: SSD|HDD DataDirType data_dir_type = DataDirType::OLAP_DATA_DIR; + std::string bvar_name; }; struct PredicateFilterInfo { int type = 0; diff --git a/be/src/pipeline/pipeline_task.cpp b/be/src/pipeline/pipeline_task.cpp index da581629e21940c..8692075622a9066 100644 --- a/be/src/pipeline/pipeline_task.cpp +++ b/be/src/pipeline/pipeline_task.cpp @@ -302,6 +302,7 @@ Status PipelineTask::execute(bool* eos) { if (cpu_qs) { cpu_qs->add_cpu_nanos(delta_cpu_time); } + query_context()->update_wg_cpu_adder(delta_cpu_time); }}; if (_wait_to_start()) { return Status::OK(); diff --git a/be/src/runtime/query_context.h b/be/src/runtime/query_context.h index 1bbccfd33b74033..3241010c20ede56 100644 --- a/be/src/runtime/query_context.h +++ b/be/src/runtime/query_context.h @@ -250,6 +250,12 @@ class QueryContext { // only for file scan node std::map file_scan_range_params_map; + void update_wg_cpu_adder(int64_t delta_cpu_time) { + if (_workload_group != nullptr) { + _workload_group->update_cpu_adder(delta_cpu_time); + } + } + private: int _timeout_second; TUniqueId _query_id; diff --git a/be/src/runtime/thread_context.h b/be/src/runtime/thread_context.h index 8cf6892dd39e8a7..c54b1a6892bd9ee 100644 --- a/be/src/runtime/thread_context.h +++ b/be/src/runtime/thread_context.h @@ -114,20 +114,22 @@ __VA_ARGS__; \ } while (0) -#define LIMIT_LOCAL_SCAN_IO(data_dir, bytes_read) \ - std::shared_ptr iot = nullptr; \ - if (auto* t_ctx = doris::thread_context(true)) { \ - iot = t_ctx->get_local_scan_io_throttle(data_dir); \ - } \ - if (iot) { \ - iot->acquire(-1); \ - } \ - Defer defer { \ - [&]() { \ - if (iot) { \ - iot->update_next_io_time(*bytes_read); \ - } \ - } \ +#define LIMIT_LOCAL_SCAN_IO(data_dir, bytes_read) \ + std::shared_ptr iot = nullptr; \ + auto* t_ctx = doris::thread_context(true); \ + if (t_ctx) { \ + iot = t_ctx->get_local_scan_io_throttle(data_dir); \ + } \ + if (iot) { \ + iot->acquire(-1); \ + } \ + Defer defer { \ + [&]() { \ + if (iot) { \ + iot->update_next_io_time(*bytes_read); \ + t_ctx->update_total_local_scan_io_adder(*bytes_read); \ + } \ + } \ } #define LIMIT_REMOTE_SCAN_IO(bytes_read) \ @@ -276,6 +278,12 @@ class ThreadContext { return nullptr; } + void update_total_local_scan_io_adder(size_t bytes_read) { + if (std::shared_ptr wg_ptr = _wg_wptr.lock()) { + wg_ptr->update_total_local_scan_io_adder(bytes_read); + } + } + int thread_local_handle_count = 0; int skip_memory_check = 0; int skip_large_memory_check = 0; diff --git a/be/src/runtime/workload_group/workload_group.cpp b/be/src/runtime/workload_group/workload_group.cpp index dffaf3a940c68c0..b2f9541231a30c5 100644 --- a/be/src/runtime/workload_group/workload_group.cpp +++ b/be/src/runtime/workload_group/workload_group.cpp @@ -69,9 +69,18 @@ WorkloadGroup::WorkloadGroup(const WorkloadGroupInfo& tg_info) _remote_scan_bytes_per_second(tg_info.remote_read_bytes_per_second) { std::vector& data_dir_list = io::BeConfDataDirReader::be_config_data_dir_list; for (const auto& data_dir : data_dir_list) { - _scan_io_throttle_map[data_dir.path] = std::make_shared(); - } - _remote_scan_io_throttle = std::make_shared(); + _scan_io_throttle_map[data_dir.path] = + std::make_shared(_name, data_dir.bvar_name + "_read_bytes"); + } + _remote_scan_io_throttle = std::make_shared(_name, "remote_read_bytes"); + _mem_used_status = std::make_unique>(_name, "memory_used", 0); + _cpu_usage_adder = std::make_unique>(_name, "cpu_usage_adder"); + _cpu_usage_per_second = std::make_unique>>( + _name, "cpu_usage", _cpu_usage_adder.get(), 10); + _total_local_scan_io_adder = + std::make_unique>(_name, "total_local_read_bytes"); + _total_local_scan_io_per_second = std::make_unique>>( + _name, "total_local_read_bytes_per_second", _total_local_scan_io_adder.get(), 1); } std::string WorkloadGroup::debug_string() const { @@ -136,6 +145,7 @@ int64_t WorkloadGroup::make_memory_tracker_snapshots( } } refresh_memory(used_memory); + _mem_used_status->set_value(used_memory); return used_memory; } @@ -585,6 +595,18 @@ std::shared_ptr WorkloadGroup::get_remote_scan_io_throttle() { return _remote_scan_io_throttle; } +void WorkloadGroup::update_cpu_adder(int64_t delta_cpu_time) { + (*_cpu_usage_adder) << (uint64_t)delta_cpu_time; +} + +void WorkloadGroup::update_total_local_scan_io_adder(size_t scan_bytes) { + (*_total_local_scan_io_adder) << scan_bytes; +} + +int64_t WorkloadGroup::get_remote_scan_bytes_per_second() { + return _remote_scan_io_throttle->get_bvar_io_per_second(); +} + void WorkloadGroup::try_stop_schedulers() { std::lock_guard wlock(_task_sched_lock); if (_task_sched) { diff --git a/be/src/runtime/workload_group/workload_group.h b/be/src/runtime/workload_group/workload_group.h index 0cb8355400815c2..3561098b6ce29c1 100644 --- a/be/src/runtime/workload_group/workload_group.h +++ b/be/src/runtime/workload_group/workload_group.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -195,6 +196,17 @@ class WorkloadGroup : public std::enable_shared_from_this { void upsert_scan_io_throttle(WorkloadGroupInfo* tg_info); + void update_cpu_adder(int64_t delta_cpu_time); + + void update_total_local_scan_io_adder(size_t scan_bytes); + + int64_t get_mem_used() { return _mem_used_status->get_value(); } + uint64_t get_cpu_usage() { return _cpu_usage_per_second->get_value(); } + int64_t get_local_scan_bytes_per_second() { + return _total_local_scan_io_per_second->get_value(); + } + int64_t get_remote_scan_bytes_per_second(); + private: mutable std::shared_mutex _mutex; // lock _name, _version, _cpu_share, _memory_limit const uint64_t _id; @@ -234,6 +246,13 @@ class WorkloadGroup : public std::enable_shared_from_this { std::map> _scan_io_throttle_map; std::shared_ptr _remote_scan_io_throttle {nullptr}; + + // bvar metric + std::unique_ptr> _mem_used_status; + std::unique_ptr> _cpu_usage_adder; + std::unique_ptr>> _cpu_usage_per_second; + std::unique_ptr> _total_local_scan_io_adder; + std::unique_ptr>> _total_local_scan_io_per_second; }; using WorkloadGroupPtr = std::shared_ptr; diff --git a/be/src/runtime/workload_group/workload_group_manager.cpp b/be/src/runtime/workload_group/workload_group_manager.cpp index 6a196497e724a8d..07e0f7bc23f44aa 100644 --- a/be/src/runtime/workload_group/workload_group_manager.cpp +++ b/be/src/runtime/workload_group/workload_group_manager.cpp @@ -28,6 +28,7 @@ #include "util/mem_info.h" #include "util/threadpool.h" #include "util/time.h" +#include "vec/core/block.h" #include "vec/exec/scan/scanner_scheduler.h" namespace doris { @@ -257,6 +258,52 @@ void WorkloadGroupMgr::refresh_wg_weighted_memory_limit() { } } +void WorkloadGroupMgr::get_wg_resource_usage(vectorized::Block* block) { + auto insert_int_value = [&](int col_index, int64_t int_val, vectorized::Block* block) { + vectorized::MutableColumnPtr mutable_col_ptr; + mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable(); + auto* nullable_column = + reinterpret_cast(mutable_col_ptr.get()); + vectorized::IColumn* col_ptr = &nullable_column->get_nested_column(); + reinterpret_cast*>(col_ptr)->insert_value( + int_val); + nullable_column->get_null_map_data().emplace_back(0); + }; + + auto insert_double_value = [&](int col_index, double double_val, vectorized::Block* block) { + vectorized::MutableColumnPtr mutable_col_ptr; + mutable_col_ptr = std::move(*block->get_by_position(col_index).column).assume_mutable(); + auto* nullable_column = + reinterpret_cast(mutable_col_ptr.get()); + vectorized::IColumn* col_ptr = &nullable_column->get_nested_column(); + reinterpret_cast*>(col_ptr)->insert_value( + double_val); + nullable_column->get_null_map_data().emplace_back(0); + }; + + int64_t be_id = ExecEnv::GetInstance()->master_info()->backend_id; + int cpu_num = CpuInfo::num_cores(); + cpu_num = cpu_num <= 0 ? 1 : cpu_num; + uint64_t total_cpu_time_ns_per_second = cpu_num * 1000000000ll; + + std::shared_lock r_lock(_group_mutex); + block->reserve(_workload_groups.size()); + for (const auto& [id, wg] : _workload_groups) { + insert_int_value(0, be_id, block); + insert_int_value(1, wg->id(), block); + insert_int_value(2, wg->get_mem_used(), block); + + double cpu_usage_p = + (double)wg->get_cpu_usage() / (double)total_cpu_time_ns_per_second * 100; + cpu_usage_p = std::round(cpu_usage_p * 100.0) / 100.0; + + insert_double_value(3, cpu_usage_p, block); + + insert_int_value(4, wg->get_local_scan_bytes_per_second(), block); + insert_int_value(5, wg->get_remote_scan_bytes_per_second(), block); + } +} + void WorkloadGroupMgr::stop() { for (auto iter = _workload_groups.begin(); iter != _workload_groups.end(); iter++) { iter->second->try_stop_schedulers(); diff --git a/be/src/runtime/workload_group/workload_group_manager.h b/be/src/runtime/workload_group/workload_group_manager.h index f7f02bf63e6997b..15740d061adc94a 100644 --- a/be/src/runtime/workload_group/workload_group_manager.h +++ b/be/src/runtime/workload_group/workload_group_manager.h @@ -27,6 +27,10 @@ namespace doris { class CgroupCpuCtl; +namespace vectorized { +class Block; +} // namespace vectorized + namespace pipeline { class TaskScheduler; class MultiCoreTaskQueue; @@ -56,6 +60,8 @@ class WorkloadGroupMgr { void refresh_wg_weighted_memory_limit(); + void get_wg_resource_usage(vectorized::Block* block); + private: std::shared_mutex _group_mutex; std::unordered_map _workload_groups; diff --git a/be/src/runtime/workload_management/io_throttle.cpp b/be/src/runtime/workload_management/io_throttle.cpp index 3a8256eee3746dd..dacfa29012f59fe 100644 --- a/be/src/runtime/workload_management/io_throttle.cpp +++ b/be/src/runtime/workload_management/io_throttle.cpp @@ -17,12 +17,19 @@ #include "runtime/workload_management/io_throttle.h" +#include "util/defer_op.h" #include "util/time.h" namespace doris { +IOThrottle::IOThrottle(std::string prefix, std::string name) { + _io_adder = std::make_unique>(prefix, name); + _io_adder_per_second = std::make_unique>>( + prefix, name + "_per_second", _io_adder.get(), 1); +} + bool IOThrottle::acquire(int64_t block_timeout_ms) { - if (_io_bytes_per_second < 0) { + if (_io_bytes_per_second_limit < 0) { return true; } @@ -42,7 +49,7 @@ bool IOThrottle::acquire(int64_t block_timeout_ms) { } bool IOThrottle::try_acquire() { - if (_io_bytes_per_second < 0) { + if (_io_bytes_per_second_limit < 0) { return true; } std::unique_lock w_lock(_mutex); @@ -50,24 +57,31 @@ bool IOThrottle::try_acquire() { } void IOThrottle::update_next_io_time(int64_t io_bytes) { - if (_io_bytes_per_second <= 0 || io_bytes <= 0) { + Defer defer {[&]() { + if (io_bytes > 0) { + (*_io_adder) << io_bytes; + } + }}; + if (_io_bytes_per_second_limit <= 0 || io_bytes <= 0) { return; } - int64_t read_bytes_per_second = _io_bytes_per_second; - std::unique_lock w_lock(_mutex); - double io_bytes_float = static_cast(io_bytes); - double ret = (io_bytes_float / static_cast(read_bytes_per_second)) * - static_cast(MICROS_PER_SEC); - int64_t current_time = GetCurrentTimeMicros(); + int64_t read_bytes_per_second = _io_bytes_per_second_limit; + { + std::unique_lock w_lock(_mutex); + double io_bytes_float = static_cast(io_bytes); + double ret = (io_bytes_float / static_cast(read_bytes_per_second)) * + static_cast(MICROS_PER_SEC); + int64_t current_time = GetCurrentTimeMicros(); - if (current_time > _next_io_time_micros) { - _next_io_time_micros = current_time; + if (current_time > _next_io_time_micros) { + _next_io_time_micros = current_time; + } + _next_io_time_micros += ret < 1 ? static_cast(1) : static_cast(ret); } - _next_io_time_micros += ret < 1 ? static_cast(1) : static_cast(ret); } void IOThrottle::set_io_bytes_per_second(int64_t io_bytes_per_second) { - _io_bytes_per_second = io_bytes_per_second; + _io_bytes_per_second_limit = io_bytes_per_second; } }; // namespace doris \ No newline at end of file diff --git a/be/src/runtime/workload_management/io_throttle.h b/be/src/runtime/workload_management/io_throttle.h index 691255d23c48c47..ce62c65d7a9eeb2 100644 --- a/be/src/runtime/workload_management/io_throttle.h +++ b/be/src/runtime/workload_management/io_throttle.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include @@ -25,16 +26,9 @@ namespace doris { -class IOThrottle; - -struct IOThrottleCtx { - IOThrottle* io_throttle = nullptr; - int io_block_timeout; -}; - class IOThrottle { public: - IOThrottle() = default; + IOThrottle(std::string prefix, std::string name); ~IOThrottle() = default; @@ -47,12 +41,16 @@ class IOThrottle { void set_io_bytes_per_second(int64_t read_bytes_per_second); - int64_t get_io_bytes_per_second() { return _io_bytes_per_second; } + size_t get_bvar_io_per_second() { return _io_adder_per_second->get_value(); } private: std::mutex _mutex; std::condition_variable wait_condition; int64_t _next_io_time_micros {0}; - std::atomic _io_bytes_per_second {10485760}; + std::atomic _io_bytes_per_second_limit {10485760}; + + // bvar monitor + std::unique_ptr> _io_adder; + std::unique_ptr>> _io_adder_per_second; }; }; // namespace doris \ No newline at end of file diff --git a/be/src/vec/exec/scan/vscanner.cpp b/be/src/vec/exec/scan/vscanner.cpp index f4210b79ea803ee..6232be473026a6b 100644 --- a/be/src/vec/exec/scan/vscanner.cpp +++ b/be/src/vec/exec/scan/vscanner.cpp @@ -262,4 +262,13 @@ void VScanner::_collect_profile_before_close() { _state->update_num_rows_load_unselected(_counter.num_rows_unselected); } +void VScanner::update_scan_cpu_timer() { + int64_t cpu_time = _cpu_watch.elapsed_time(); + _scan_cpu_timer += cpu_time; + _query_statistics->add_cpu_nanos(cpu_time); + if (_state && _state->get_query_ctx()) { + _state->get_query_ctx()->update_wg_cpu_adder(cpu_time); + } +} + } // namespace doris::vectorized diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index 5eae2a089544cfa..19c37f6fc21e552 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -127,11 +127,7 @@ class VScanner { int64_t get_scanner_wait_worker_timer() const { return _scanner_wait_worker_timer; } - void update_scan_cpu_timer() { - int64_t cpu_time = _cpu_watch.elapsed_time(); - _scan_cpu_timer += cpu_time; - _query_statistics->add_cpu_nanos(cpu_time); - } + void update_scan_cpu_timer(); RuntimeState* runtime_state() { return _state; } diff --git a/be/src/vec/sink/writer/async_result_writer.cpp b/be/src/vec/sink/writer/async_result_writer.cpp index e5fe8f589d595b8..16dcbc648fb9be3 100644 --- a/be/src/vec/sink/writer/async_result_writer.cpp +++ b/be/src/vec/sink/writer/async_result_writer.cpp @@ -110,6 +110,13 @@ void AsyncResultWriter::process_block(RuntimeState* state, RuntimeProfile* profi DCHECK(_dependency); if (_writer_status.ok()) { while (true) { + ThreadCpuStopWatch cpu_time_stop_watch; + cpu_time_stop_watch.start(); + Defer defer {[&]() { + if (state && state->get_query_ctx()) { + state->get_query_ctx()->update_wg_cpu_adder(cpu_time_stop_watch.elapsed_time()); + } + }}; if (!_eos && _data_queue.empty() && _writer_status.ok()) { std::unique_lock l(_m); while (!_eos && _data_queue.empty() && _writer_status.ok()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/SchemaTableType.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/SchemaTableType.java index d0a3a3728b2d6ba..7c661861b3fbb65 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/SchemaTableType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/SchemaTableType.java @@ -80,7 +80,10 @@ public enum SchemaTableType { SCH_TABLE_OPTIONS("TABLE_OPTIONS", "TABLE_OPTIONS", TSchemaTableType.SCH_TABLE_OPTIONS), SCH_WORKLOAD_GROUP_PRIVILEGES("WORKLOAD_GROUP_PRIVILEGES", - "WORKLOAD_GROUP_PRIVILEGES", TSchemaTableType.SCH_WORKLOAD_GROUP_PRIVILEGES); + "WORKLOAD_GROUP_PRIVILEGES", TSchemaTableType.SCH_WORKLOAD_GROUP_PRIVILEGES), + + SCH_WORKLOAD_GROUP_RESOURCE_USAGE("WORKLOAD_GROUP_RESOURCE_USAGE", + "WORKLOAD_GROUP_RESOURCE_USAGE", TSchemaTableType.SCH_WORKLOAD_GROUP_RESOURCE_USAGE); private static final String dbName = "INFORMATION_SCHEMA"; private static SelectList fullSelectLists; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java index 8802d2665269e7c..311588719ca9882 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/SchemaTable.java @@ -540,6 +540,16 @@ public class SchemaTable extends Table { .column("IS_GRANTABLE", ScalarType.createVarchar(IS_GRANTABLE_LEN)) .build()) ) + .put("workload_group_resource_usage", + new SchemaTable(SystemIdGenerator.getNextId(), "workload_group_resource_usage", TableType.SCHEMA, + builder().column("BE_ID", ScalarType.createType(PrimitiveType.BIGINT)) + .column("WORKLOAD_GROUP_ID", ScalarType.createType(PrimitiveType.BIGINT)) + .column("MEMORY_USAGE_BYTES", ScalarType.createType(PrimitiveType.BIGINT)) + .column("CPU_USAGE_PERCENT", ScalarType.createType(PrimitiveType.DOUBLE)) + .column("LOCAL_SCAN_BYTES_PER_SECOND", ScalarType.createType(PrimitiveType.BIGINT)) + .column("REMOTE_SCAN_BYTES_PER_SECOND", ScalarType.createType(PrimitiveType.BIGINT)) + .build()) + ) .build(); private boolean fetchAllFe = false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java index ab2798e2ba799a0..cf5c85e98b7d85e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BackendPartitionedSchemaScanNode.java @@ -65,6 +65,7 @@ public class BackendPartitionedSchemaScanNode extends SchemaScanNode { BEACKEND_ID_COLUMN_SET.add("backend_id"); BACKEND_TABLE.add("backend_active_tasks"); + BACKEND_TABLE.add("workload_group_resource_usage"); BEACKEND_ID_COLUMN_SET.add("be_id"); } diff --git a/gensrc/thrift/Descriptors.thrift b/gensrc/thrift/Descriptors.thrift index 20042adc42ebd09..f97d09f68a5d1ca 100644 --- a/gensrc/thrift/Descriptors.thrift +++ b/gensrc/thrift/Descriptors.thrift @@ -134,7 +134,8 @@ enum TSchemaTableType { SCH_PROCS_PRIV, SCH_WORKLOAD_POLICY, SCH_TABLE_OPTIONS, - SCH_WORKLOAD_GROUP_PRIVILEGES; + SCH_WORKLOAD_GROUP_PRIVILEGES, + SCH_WORKLOAD_GROUP_RESOURCE_USAGE; } enum THdfsCompression { diff --git a/regression-test/data/external_table_p0/jdbc/test_mariadb_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_mariadb_jdbc_catalog.out index c828848a8faf249..a2949c016cbc355 100644 --- a/regression-test/data/external_table_p0/jdbc/test_mariadb_jdbc_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_mariadb_jdbc_catalog.out @@ -60,6 +60,7 @@ triggers user_privileges views workload_group_privileges +workload_group_resource_usage workload_groups workload_policy diff --git a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out index ee5cb342440a247..625eda53c5c78e4 100644 --- a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out @@ -224,6 +224,7 @@ triggers user_privileges views workload_group_privileges +workload_group_resource_usage workload_groups workload_policy diff --git a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog_nereids.out b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog_nereids.out index 0d7e953567f8ec6..816d2a86d0b33e8 100644 --- a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog_nereids.out +++ b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog_nereids.out @@ -192,6 +192,7 @@ triggers user_privileges views workload_group_privileges +workload_group_resource_usage workload_groups workload_policy diff --git a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_driver5_catalog.out b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_driver5_catalog.out index 953b425394c154c..3a9de0eab350111 100644 --- a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_driver5_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_driver5_catalog.out @@ -234,6 +234,7 @@ triggers user_privileges views workload_group_privileges +workload_group_resource_usage workload_groups workload_policy From 88b518b4df2ad871b0d4d2f776aef027fc2ecab1 Mon Sep 17 00:00:00 2001 From: kkop <2449402815@qq.com> Date: Mon, 12 Aug 2024 09:35:33 +0800 Subject: [PATCH 44/94] [enhancement](regression-test) agg schema key modify case (#39198) ## Proposed changes Issue Number: close #xxx --- .../test_agg_schema_key_change_modify.groovy | 1111 ++++++++ .../test_agg_schema_key_change_modify1.groovy | 2422 +++++++++++++++++ 2 files changed, 3533 insertions(+) create mode 100644 regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy create mode 100644 regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy new file mode 100644 index 000000000000000..9bf9520b9c151a2 --- /dev/null +++ b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy @@ -0,0 +1,1111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_agg_schema_key_change_modify","p0") { + def tbName1 = "test_agg_schema_key_change_modify1" + def tbName2 = "test_agg_schema_key_change_modify_1" + def initTable1 = "" + def initTableData1 = "" + + /** + * Test the agg model by modify a key type + */ + + sql """ DROP TABLE IF EXISTS ${tbName1} """ + def getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + def initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `is_teacher` BOOLEAN COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `is_teacher`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + def initTableData = "insert into ${tbName1} values(123456789, 'Alice', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from BOOLEAN to TINYINT + def errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher TINYINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from BOOLEAN to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher SMALLINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from BOOLEAN to INT + errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to INT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher INT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from BOOLEAN to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from BOOLEAN to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from BOOLEAN to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from BOOLEAN to CHAR + errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from BOOLEAN to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[is_teacher]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from BOOLEAN to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_teacher VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + /** + * Test the agg model by modify a key type from TINYINT to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName1} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `is_student` TINYINT COMMENT \"是否是学生\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `is_student`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 1, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 1, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 1, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 0, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 1, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from TINYINT to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change TINYINT to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student BOOLEAN key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Data doubling Test the agg model by modify a key type from TINYINT to SMALLINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student SMALLINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + + //Test the agg model by modify a key type from TINYINT to INT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student INT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `is_student` INT COMMENT \"是否是学生\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `is_student`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 1, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 1, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 1, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 0, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 1, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}","${tbName2}","is_student") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the agg model by modify a key type from TINYINT to BIGINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student BIGINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `is_student` BIGINT COMMENT \"是否是学生\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `is_student`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 1, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 1, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 1, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 0, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 1, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}","${tbName2}","is_student") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + + //Test the agg model by modify a key type from TINYINT to LARGEINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student LARGEINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `is_student` LARGEINT COMMENT \"是否是学生\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `is_student`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 1, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 1, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 1, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 0, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 1, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}","${tbName2}","is_student") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + + //TODO Test the agg model by modify a key type from TINYINT to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from TINYINT to DOUBLE + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student DOUBLE key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from TINYINT to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change TINYINT to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student DECIMAL key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from TINYINT to CHAR + errorMessage="errCode = 2, detailMessage = Can not change TINYINT to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student CHAR(15) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'char', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Data doubling Test the agg model by modify a key type from TINYINT to VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student VARCHAR(100) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'varchar', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from TINYINT to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[is_student]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column is_student STRING key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + /** + * Test the agg model by modify a key type from SMALLINT to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName1} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `car_number` SMALLINT COMMENT \"市民卡\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `car_number`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 13243, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 13445, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 15768, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 14243, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 10768, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 14325, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 15686, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from SMALLINT to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change SMALLINT to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number BOOLEAN key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + + // TODO Test the agg model by modify a key type from SMALLINT to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change SMALLINT to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number TINYINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Data doubling Test the agg model by modify a key type from SMALLINT to INT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number INT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from SMALLINT to BIGINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number BIGINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `car_number` BIGINT COMMENT \"市民卡\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `car_number`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 13243, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 13445, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 15768, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 14243, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 10768, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 14325, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 15686, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}","${tbName2}","car_number") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from SMALLINT to LARGEINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number LARGEINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 5, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `car_number` LARGEINT COMMENT \"市民卡\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `car_number`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 13243, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 13445, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 15768, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 14243, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 10768, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 14325, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 5, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 15686, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}","${tbName2}","car_number") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //TODO Test the agg model by modify a key type from SMALLINT to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from SMALLINT to DOUBLE + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number DOUBLE key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from SMALLINT to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change SMALLINT to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number DECIMAL key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from SMALLINT to CHAR + errorMessage="errCode = 2, detailMessage = Can not change SMALLINT to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number CHAR(15) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'casd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //Test the agg model by modify a key type from SMALLINT to VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number VARCHAR(100) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'vasd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false,"${tbName1}") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from SMALLINT to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[car_number]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column car_number STRING key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + /** + * Test the agg model by modify a key type from INT to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName1} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `sn_number` INT COMMENT \"sn卡\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`sn_number`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 2147483641, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 214748364, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 2147483441, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 2147483141, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 2127483141, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 2124483141, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 2123483141, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from INT to BOOLEAN + errorMessage = "errCode = 2, detailMessage = Can not change INT to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number BOOLEAN key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + // TODO Test the agg model by modify a key type from INT to TINYINT + errorMessage = "errCode = 2, detailMessage = Can not change INT to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number TINYINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from INT to SMALLINT + errorMessage = "errCode = 2, detailMessage = Can not change INT to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number SMALLINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //TODO Data doubling Test the agg model by modify a key type from INT to BIGINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number BIGINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from INT to LARGEINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number LARGEINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 5, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `sn_number` LARGEINT COMMENT \"sn卡\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`sn_number`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 2147483641, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 214748364, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 2147483441, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 2147483141, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 2127483141, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 2124483141, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 5, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 2123483141, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}","${tbName2}","sn_number") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the agg model by modify a key type from INT to FLOAT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from INT to DOUBLE + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number DOUBLE key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the agg model by modify a key type from INT to DECIMAL + errorMessage = "errCode = 2, detailMessage = Can not change INT to DECIMAL128" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number DECIMAL(38,0) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + + }, errorMessage) + + //TODO Test the agg model by modify a key type from INT to CHAR + errorMessage = "errCode = 2, detailMessage = Can not change INT to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number CHAR(15) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'casd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from INT to VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number VARCHAR(100) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'vasd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from INT to VARCHAR + errorMessage = "errCode = 2, detailMessage = Can not change from wider type int to narrower type varchar(2)" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number VARCHAR(2) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'v1asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from INT to STRING + errorMessage = "errCode = 2, detailMessage = String Type should not be used in key column[sn_number]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number STRING key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + + /** + * Test the agg model by modify a key type from BIGINT to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName1} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `sn_number` BIGINT COMMENT \"sn卡\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`sn_number`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 2147483641, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 214748364, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 2147483441, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 2147483141, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 2127483141, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 2124483141, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 2123483141, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from BIGINT to BOOLEAN + errorMessage = "errCode = 2, detailMessage = Can not change BIGINT to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number BOOLEAN key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + // TODO Test the agg model by modify a key type from BIGINT to TINYINT + errorMessage = "errCode = 2, detailMessage = Can not change BIGINT to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number TINYINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from BIGINT to SMALLINT + errorMessage = "errCode = 2, detailMessage = Can not change BIGINT to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number SMALLINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from BIGINT to INT + errorMessage = "errCode = 2, detailMessage = Can not change BIGINT to INT" + expectException({ + sql initTable + sql initTableData + + sql """ alter table ${tbName1} MODIFY column sn_number INT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Data doubling Test the agg model by modify a key type from BIGINT to LARGEINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number LARGEINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 5, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from INT to FLOAT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from INT to DOUBLE + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number DOUBLE key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the agg model by modify a key type from BIGINT to DECIMAL + errorMessage = "errCode = 2, detailMessage = Can not change BIGINT to DECIMAL128" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number DECIMAL(38,0) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + + }, errorMessage) + + //TODO Test the agg model by modify a key type from BIGINT to CHAR + errorMessage = "errCode = 2, detailMessage = Can not change BIGINT to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number CHAR(15) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'casd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from BIGINT to VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number VARCHAR(100) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'vasd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from BIGINT to VARCHAR + errorMessage = "errCode = 2, detailMessage = Can not change from wider type bigint to narrower type varchar(2)" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number VARCHAR(2) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'v1asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from BIGINT to STRING + errorMessage = "errCode = 2, detailMessage = String Type should not be used in key column[sn_number]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number STRING key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + +} diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy new file mode 100644 index 000000000000000..13dff5950bc34ad --- /dev/null +++ b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy @@ -0,0 +1,2422 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_agg_schema_key_change_modify1","p0") { + def tbName1 = "test_agg_schema_key_change_modify1" + def tbName2 = "test_agg_schema_key_change_modify_1" + /** + * Test the agg model by modify a value type + */ + + sql """ DROP TABLE IF EXISTS ${tbName1} """ + def getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + def initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `is_teacher` BOOLEAN COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `is_teacher`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + def initTableData = "insert into ${tbName1} values(123456789, 'Alice', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + def initTable1 = "" + def initTableData1 = "" + + /** + * Test the agg model by modify a key type from LARGEINT to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName1} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `sn_number` LARGEINT COMMENT \"sn卡\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`sn_number`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 2147483641, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 214748364, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 2147483441, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 2147483141, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 2127483141, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 2124483141, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 2123483141, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from LARGEINT to BOOLEAN + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number BOOLEAN key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + // TODO Test the agg model by modify a key type from LARGEINT to TINYINT + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number TINYINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from LARGEINT to SMALLINT + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number SMALLINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from LARGEINT to INT + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to INT" + expectException({ + sql initTable + sql initTableData + + sql """ alter table ${tbName1} MODIFY column sn_number INT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from LARGEINT to BIGINT + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number BIGINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 5, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from LARGEINT to FLOAT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from LARGEINT to DOUBLE + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number DOUBLE key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the agg model by modify a key type from LARGEINT to DECIMAL + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to DECIMAL128" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number DECIMAL(38,0) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + + }, errorMessage) + + //TODO Test the agg model by modify a key type from LARGEINT to CHAR + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number CHAR(15) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Data doubling Test the agg model by modify a key type from LARGEINT to VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number VARCHAR(100) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'vasd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable2 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `sn_number` VARCHAR(100) COMMENT \"sn卡\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`sn_number`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData2 = "insert into ${tbName2} values(123456789, 'Alice', 2147483641, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 214748364, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 2147483441, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 2147483141, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 2127483141, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 2124483141, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 'vasd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 2123483141, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + sql initTable2 + sql initTableData2 +// checkTableData("${tbName1}","${tbName2}","sn_number") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //TODO Test the agg model by modify a key type from LARGEINT to VARCHAR + //Test the agg model by modify a key type from LARGEINT to VARCHAR + errorMessage = "errCode = 2, detailMessage = Can not change from wider type largeint to narrower type varchar(2)" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number VARCHAR(2) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //TODO Test the agg model by modify a key type from LARGEINT to STRING + //Test the agg model by modify a key type from LARGEINT to STRING + errorMessage = "errCode = 2, detailMessage = String Type should not be used in key column[sn_number]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column sn_number STRING key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + /** + * Test the agg model by modify a key type from FLOAT to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName1} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` FLOAT COMMENT \"分数\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`score`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 1.8, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 1.89, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 2.6, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 3.9, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 4.2, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 2.5, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 2.1, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from FLOAT to BOOLEAN + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score BOOLEAN key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + // TODO Test the agg model by modify a key type from FLOAT to TINYINT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score TINYINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from FLOAT to SMALLINT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score SMALLINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from FLOAT to INT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + + sql """ alter table ${tbName1} MODIFY column score INT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from FLOAT to BIGINT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score BIGINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 545645, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from FLOAT to LARGEINT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 156546, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from FLOAT to DOUBLE + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score DOUBLE key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the agg model by modify a key type from FLOAT to DECIMAL + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score DECIMAL(38,0) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + + }, errorMessage) + + //TODO Test the agg model by modify a key type from FLOAT to CHAR + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score CHAR(15) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //TODO Test the agg model by modify a key type from FLOAT to VARCHAR + //Test the agg model by modify a key type from FLOAT to VARCHAR + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score VARCHAR(100) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //TODO Test the agg model by modify a key type from FLOAT to STRING + //Test the agg model by modify a key type from FLOAT to STRING + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score STRING key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + /** + * Test the agg model by modify a key type from DOUBLE to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName1} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DOUBLE COMMENT \"分数\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`score`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 1.8, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 1.89, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 2.6, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 3.9, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 4.2, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 2.5, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 2.1, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //Test the agg model by modify a key type from DOUBLE to BOOLEAN + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score BOOLEAN key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + // Test the agg model by modify a key type from DOUBLE to TINYINT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score TINYINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from DOUBLE to SMALLINT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score SMALLINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from DOUBLE to INT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + + sql """ alter table ${tbName1} MODIFY column score INT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from DOUBLE to BIGINT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score BIGINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 545645, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from DOUBLE to LARGEINT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 156546, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from DOUBLE to FLOAT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + // Test the agg model by modify a key type from DOUBLE to DECIMAL + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score DECIMAL(38,0) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + + }, errorMessage) + + //TODO Test the agg model by modify a key type from DOUBLE to CHAR + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score CHAR(15) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //TODO Test the agg model by modify a key type from DOUBLE to VARCHAR + //Test the agg model by modify a key type from FLOAT to VARCHAR + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score VARCHAR(100) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from DOUBLE to STRING + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column score STRING key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + /** + * Test the agg model by modify a key type from DECIMAL to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName1} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `rice` DECIMAL(38,10) COMMENT \"米粒\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`rice`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 1.8, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 1.89, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 2.6, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 3.9, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 4.2, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 2.5, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 2.1, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //Test the agg model by modify a key type from DECIMAL to BOOLEAN + errorMessage = "errCode = 2, detailMessage = Can not change DECIMAL128 to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice BOOLEAN key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + // Test the agg model by modify a key type from DECIMAL to TINYINT + errorMessage = "errCode = 2, detailMessage = Can not change DECIMAL128 to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice TINYINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from DECIMAL to SMALLINT + errorMessage = "errCode = 2, detailMessage = Can not change DECIMAL128 to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice SMALLINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from DECIMAL to INT + errorMessage = "errCode = 2, detailMessage = Can not change DECIMAL128 to INT" + expectException({ + sql initTable + sql initTableData + + sql """ alter table ${tbName1} MODIFY column rice INT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from DECIMAL to BIGINT + errorMessage = "errCode = 2, detailMessage = Can not change DECIMAL128 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice BIGINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 545645, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //Test the agg model by modify a key type from DECIMAL to LARGEINT + errorMessage = "errCode = 2, detailMessage = Can not change DECIMAL128 to LARGEINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice LARGEINT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 156546, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the agg model by modify a key type from DECIMAL to FLOAT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice FLOAT key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + // Test the agg model by modify a key type from DECIMAL to DOUBLE + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice DOUBLE key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + + }, errorMessage) + + //Test the agg model by modify a key type from DECIMAL to CHAR + errorMessage = "errCode = 2, detailMessage = Can not change DECIMAL128 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice CHAR(15) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + //TODO Data doubling Test the agg model by modify a key type from DECIMAL to VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice VARCHAR(100) key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable2 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `rice` VARCHAR(100) COMMENT \"米粒\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`,`rice`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData2 = "insert into ${tbName2} values(123456789, 'Alice', '1.8000000000', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '1.8900000000', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '2.6000000000', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '3.9000000000', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '4.2000000000', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '2.5000000000', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', '2.1000000000', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + sql initTable2 + sql initTableData2 +// checkTableData("${tbName1}","${tbName2}","rice") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the agg model by modify a key type from DECIMAL to STRING + errorMessage = "errCode = 2, detailMessage = String Type should not be used in key column[rice]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column rice STRING key """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + /** + * Test the agg model by modify a key type from DATE to other type + */ + + sql """ DROP TABLE IF EXISTS ${tbName1} """ + getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `login_time` DATE COMMENT \"用户登陆时间\",\n" + + " `is_teacher` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `login_time`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', '2022-01-01', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '2022-01-01 12:00:00', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '2022-01-01 12:00:00', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '2022-01-01 12:00:00', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '2022-01-01 12:00:00', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '2022-01-01 12:00:00', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', '2022-01-01 12:00:00', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from DATE to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BOOLEAN KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 0, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATE to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time TINYINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATE to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time SMALLINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to INT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to INT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time INT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATE to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATE to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to CHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from DATE to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[login_time]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + /** + * Test the agg model by modify a key type from DATETIME to other type + */ + + + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `login_time` DATETIME COMMENT \"用户登陆时间\",\n" + + " `is_teacher` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `login_time`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-01-01', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou','2022-01-01', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-01-01', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-01-01', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-01-01', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-01-01', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from DATETIME to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BOOLEAN KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 0, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIME to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time TINYINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIME to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time SMALLINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIME to INT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to INT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time INT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATETIME to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIME to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATETIME to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIME to CHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIME to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[login_time]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIME to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + /** + * Test the agg model by modify a key type from CHAR to other type + */ + + + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` CHAR(255) NOT NULL COMMENT \"用户昵称\",\n" + + " `login_time` DATETIME COMMENT \"用户登陆时间\",\n" + + " `is_teacher` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `login_time`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-01-01', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou','2022-01-01', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-01-01', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-01-01', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-01-01', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-01-01', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from CHAR to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change VARCHAR to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username BOOLEAN KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 0, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from CHAR to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username TINYINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from CHAR to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username SMALLINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from CHAR to INT + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username INT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from CHAR to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from CHAR to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from CHAR to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change VARCHAR to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from CHAR to DATETIME + errorMessage="errCode = 2, detailMessage = date literal [0] is invalid: null" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username DATETIME KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from CHAR to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[username]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from CHAR to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + /** + * Test the agg model by modify a key type from varchar to other type + */ + + + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR NOT NULL COMMENT \"用户昵称\",\n" + + " `login_time` DATETIME COMMENT \"用户登陆时间\",\n" + + " `is_teacher` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `login_time`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-01-01', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou','2022-01-01', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-01-01', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-01-01', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-01-01', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-01-01', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from VARCHAR to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change VARCHAR to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username BOOLEAN KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 0, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from VARCHAR to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username TINYINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from VARCHAR to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username SMALLINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from VARCHAR to INT + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username INT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from VARCHAR to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change default value" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from VARCHAR to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from VARCHAR to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change VARCHAR to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from VARCHAR to DATETIME + errorMessage="errCode = 2, detailMessage = date literal [0] is invalid: null" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username DATETIME KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from VARCHAR to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[username]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from VARCHAR to CHAR + errorMessage="errCode = 2, detailMessage = Can not change VARCHAR to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column username CHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + + /** + * Test the agg model by modify a key type from DATE to other type + */ + + sql """ DROP TABLE IF EXISTS ${tbName1} """ + getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `login_time` DATE COMMENT \"用户登陆时间\",\n" + + " `is_teacher` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `login_time`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', '2022-01-01', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '2022-01-01 12:00:00', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '2022-01-01 12:00:00', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '2022-01-01 12:00:00', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '2022-01-01 12:00:00', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '2022-01-01 12:00:00', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', '2022-01-01 12:00:00', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from DATE to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BOOLEAN KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 0, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATE to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time TINYINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATE to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time SMALLINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to INT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to INT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time INT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + + //TODO Test the agg model by modify a key type from DATE to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATE to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to CHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from DATE to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[login_time]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATE to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to CHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from DATE to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[login_time]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATE to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + /** + * Test the agg model by modify a key type from DATEV2 to other type + */ + + sql """ DROP TABLE IF EXISTS ${tbName1} """ + getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `login_time` DATEV2 COMMENT \"用户登陆时间\",\n" + + " `is_teacher` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `login_time`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', '2022-01-01', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '2022-01-01 12:00:00', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '2022-01-01 12:00:00', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '2022-01-01 12:00:00', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '2022-01-01 12:00:00', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '2022-01-01 12:00:00', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', '2022-01-01 12:00:00', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from DATEV2 to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BOOLEAN KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 0, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATEV2 to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time TINYINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATEV2 to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time SMALLINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATEV2 to INT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to INT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time INT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + + //TODO Test the agg model by modify a key type from DATEV2 to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATEV2 to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATEV2 to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATEV2 to CHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from DATEV2 to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[login_time]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATEV2 to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATEV2 to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATEV2 to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATEV2 to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATEV2 to CHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from DATEV2 to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[login_time]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATEV2 to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + /** + * Test the agg model by modify a key type from DATETIMEV2 to other type + */ + + + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `login_time` DATETIMEV2 COMMENT \"用户登陆时间\",\n" + + " `is_teacher` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `login_time`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-01-01', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou','2022-01-01', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-01-01', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-01-01', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-01-01', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-01-01', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from DATETIMEV2 to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BOOLEAN KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 0, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIMEV2 to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time TINYINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIMEV2 to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time SMALLINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIMEV2 to INT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to INT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time INT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATETIMEV2 to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIMEV2 to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATETIMEV2 to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIMEV2 to CHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIMEV2 to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[login_time]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIMEV2 to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + /** + * Test the agg model by modify a key type from DATETIME to other type + */ + + + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `login_time` DATETIME COMMENT \"用户登陆时间\",\n" + + " `is_teacher` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否是老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE_IF_NOT_NULL COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `login_time`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(123456789, 'Alice', 0, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 0, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-01-01', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 1, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou','2022-01-01', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 0, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-01-01', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-01-01', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-01-01', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-01-01', '2022-07-07 22:00:00');" + + //TODO Test the agg model by modify a key type from DATETIME to BOOLEAN + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BOOLEAN KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 0, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIME to TINYINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time TINYINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIME to SMALLINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time SMALLINT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIME to INT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to INT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time INT KEY DEFAULT "1" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATETIME to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time BIGINT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIME to FLOAT + errorMessage="errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time FLOAT KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + + //TODO Test the agg model by modify a key type from DATETIME to DECIMAL + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to DECIMAL32" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time DECIMAL KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1.0, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIME to CHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time CHAR KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + + },errorMessage) + + //TODO Test the agg model by modify a key type from DATETIME to STRING + errorMessage="errCode = 2, detailMessage = String Type should not be used in key column[login_time]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time STRING KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + + + //TODO Test the agg model by modify a key type from DATETIME to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change DATEV2 to VARCHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} MODIFY column login_time VARCHAR(32) KEY DEFAULT "0" """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true,"${tbName1}") + },errorMessage) + +} From b3c94328bc6be3f3e3fbb0c52287a8b2d5d0e19f Mon Sep 17 00:00:00 2001 From: kkop <2449402815@qq.com> Date: Mon, 12 Aug 2024 09:36:38 +0800 Subject: [PATCH 45/94] [enhancement](regression-test) agg schema key/value drop case (#39098) --- .../test_agg_schema_key_drop.groovy | 608 ++++++++++ .../test_agg_schema_value_drop.groovy | 1069 +++++++++++++++++ 2 files changed, 1677 insertions(+) create mode 100644 regression-test/suites/schema_change_p0/test_agg_schema_key_drop.groovy create mode 100644 regression-test/suites/schema_change_p0/test_agg_schema_value_drop.groovy diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_key_drop.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_key_drop.groovy new file mode 100644 index 000000000000000..81f3ce563bc8ba7 --- /dev/null +++ b/regression-test/suites/schema_change_p0/test_agg_schema_key_drop.groovy @@ -0,0 +1,608 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_agg_schema_key_drop", "p0") { + def tbName1 = "test_agg_schema_key_drop" + def tbName2 = "test_agg_schema_key_drop_1" + sql """ DROP TABLE IF EXISTS ${tbName1} """ + def getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + def errorMessage = "" + def insertSql = "insert into ${tbName1} values(923456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + + + /** + * Test the aggregate model by dorp a key type + */ + + sql """ DROP TABLE IF EXISTS ${tbName1} """ + def initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + def initTableData = "insert into ${tbName1} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + + def initTable1 = "" + def initTableData1 = "" + //Test the aggregate model by drop a key type from BOOLEAN + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column is_ok """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `phone`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_int") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the aggregate model by drop a key type from TINYINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column sex """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "phone") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the aggregate model by drop a key type from SMALLINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column age """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `sex`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "sex") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //TODO Test the aggregate model by drop a key type from INT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_int """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `phone`,`is_ok`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_bigint") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //TODO Test the aggregate model by drop a key type from BIGINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_bigint """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `phone`,`is_ok`, `t_int`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_date") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the aggregate model by drop a key type from LARGEINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column phone """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "is_ok") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the aggregate model by drop a key type from DATE + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_date """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_datev2") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the aggregate model by drop a key type from DATEV2 + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_datev2 """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_datetimev2") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the aggregate model by drop a key type from t_datetimev2 + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_datetimev2 """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_datetime") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the aggregate model by drop a key type from t_datetime + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_datetime """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `city`, `age`, `sex`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_datetimev2") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the aggregate model by drop a key type from CHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column city """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `username`, `score`, `age`, `sex`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 'Jane Smith', 85.2, 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 'Mike Johnson', 77.8, 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 'Emily Brown', 92.0, 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 'Sophia Lee', 91.3, 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 'David Wilson', 88.9, 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "age") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the aggregate model by drop a key type from VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column username """ + insertSql = "insert into ${tbName1} values(6, 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `score` DECIMAL(38,10) COMMENT \"分数\",\n" + + " `city` CHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN COMMENT \"是否完成\",\n" + + " `t_int` INT COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " aggregate KEY(`user_id`, `score`, `city`, `age`, `sex`, `phone`,`is_ok`, `t_int`, `t_bigint`, `t_date`, `t_datev2`, `t_datetimev2`, `t_datetime`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00')," + + " (2, 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00')," + + " (3, 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00')," + + " (4, 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + + " (6, 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00')," + + " (5, 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "age") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + +} diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_value_drop.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_value_drop.groovy new file mode 100644 index 000000000000000..7f5c77192f5f361 --- /dev/null +++ b/regression-test/suites/schema_change_p0/test_agg_schema_value_drop.groovy @@ -0,0 +1,1069 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_agg_schema_value_drop", "p0") { + def tbName1 = "test_agg_schema_value_drop" + def tbName2 = "test_agg_schema_value_drop_1" + sql """ DROP TABLE IF EXISTS ${tbName1} """ + def initTable1 = "" + def initTableData1 = "" + def getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + def errorMessage = "" + def insertSql = "insert into ${tbName1} values(923456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + + + /** + * Test the AGGREGATE model by drop a value type + */ + + + sql """ DROP TABLE IF EXISTS ${tbName1} """ + def initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + def initTableData = "insert into ${tbName1} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + // Test the AGGREGATE model by drop a value type from BOOLEAN + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column is_ok """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_int") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from TINYINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column sex """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "phone") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from SMALLINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column age """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (5, 'David Wilson', 88.9, 'Seattle', 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "sex") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from INT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_int """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_bigint") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from BIGINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_bigint """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_date") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from LARGEINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column phone """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "is_ok") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from DATE + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_date """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_datev2") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from DATEV2 + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_datev2 """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_datetimev2") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from t_datetimev2 + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_datetimev2 """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_datetime") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from t_datetimev2 + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_datetimev2 """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEv2 REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_string") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from t_datetime + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_datetime """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEv2 REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_string") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from CHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column city """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "age") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column username """ + insertSql = "insert into ${tbName1} values(6, 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "score") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from STRING + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_string """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', {'a': 500, 'b': 200}, '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', {'a': 100, 'b': 200}, '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', {'a': 200, 'b': 200}, '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', {'a': 300, 'b': 200}, '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', {'a': 400, 'b': 200}, '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', {'a': 500, 'b': 200}, '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', {'a': 500, 'b': 200}, '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "score") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from Map + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column m """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', '{\"k1\":\"v1\", \"k2\": 200}'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `j` JSON REPLACE_IF_NOT_NULL COMMENT \"\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', '[\"abc\", \"def\"]')," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', '[\"abc\", \"def\"]')," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', '[\"abc\", \"def\"]')," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', '[\"abc\", \"def\"]')," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', '{\\\"k1\\\":\\\"v1\\\", \\\"k2\\\": 200}')," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', '[\"abc\", \"def\"]');" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "score") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from JSON + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column j """ + insertSql = "insert into ${tbName1} values(6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 100, 'b': 200}); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) REPLACE_IF_NOT_NULL COMMENT \"用户昵称\",\n" + + " `score` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"分数\",\n" + + " `city` CHAR(20) REPLACE_IF_NOT_NULL COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT REPLACE_IF_NOT_NULL COMMENT \"用户年龄\",\n" + + " `sex` TINYINT REPLACE_IF_NOT_NULL COMMENT \"用户性别\",\n" + + " `phone` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"用户电话\",\n" + + " `is_ok` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"是否完成\",\n" + + " `t_int` INT REPLACE_IF_NOT_NULL COMMENT \"测试int\",\n" + + " `t_bigint` BIGINT REPLACE_IF_NOT_NULL COMMENT \"测试BIGINT\",\n" + + " `t_date` DATE REPLACE_IF_NOT_NULL COMMENT \"测试DATE\",\n" + + " `t_datev2` DATEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATEV2\",\n" + + " `t_datetimev2` DATETIMEV2 REPLACE_IF_NOT_NULL COMMENT \"测试DATETIMEV2\",\n" + + " `t_datetime` DATETIME REPLACE_IF_NOT_NULL COMMENT \"用户注册时间\",\n" + + " `t_string` STRING REPLACE_IF_NOT_NULL COMMENT \"测试string\",\n" + + " `m` Map REPLACE_IF_NOT_NULL COMMENT \"\",\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 'John Doe', 95.5, 'New York', 25, 1, 1234567890, true, 10, 1000000000, '2024-06-11', '2024-06-11', '2024-06-11 08:30:00', '2024-06-11 08:30:00', 'Test String 1', {'a': 100, 'b': 200})," + + " (2, 'Jane Smith', 85.2, 'Los Angeles', 30, 2, 9876543210, false, 20, 2000000000, '2024-06-12', '2024-06-12', '2024-06-12 09:45:00', '2024-06-12 09:45:00', 'Test String 2', {'a': 100, 'b': 200})," + + " (3, 'Mike Johnson', 77.8, 'Chicago', 35, 1, 1112223334, true, 30, 3000000000, '2024-06-13', '2024-06-13', '2024-06-13 11:15:00', '2024-06-13 11:15:00', 'Test String 3', {'a': 100, 'b': 200})," + + " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00', 'Test String 4', {'a': 100, 'b': 200})," + + " (6, 'Sophia Lee', 91.3, 'Boston', 29, 2, 7778889990, true, 60, 6000000000, '2024-06-16', '2024-06-16', '2024-06-16 17:00:00', '2024-06-16 17:00:00', 'Test String 6', {'a': 100, 'b': 200})," + + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00', 'Test String 5', {'a': 100, 'b': 200});" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "user_id") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + + + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `t_decimal` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"测试decimal\",\n" + + " `t_float` FLOAT REPLACE_IF_NOT_NULL COMMENT \"测试float\",\n" + + " `t_double` DOUBLE REPLACE_IF_NOT_NULL COMMENT \"测试double\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(1, 123.4567890123, 123.45, 1234.5678901234)," + + " (2, 234.5678901234, 234.56, 2345.6789012345)," + + " (3, 345.6789012345, 345.67, 3456.7890123456)," + + " (4, 456.7890123456, 456.78, 4567.8901234567)," + + " (5, 567.8901234567, 567.89, 5678.9012345678);" + + + // Test the AGGREGATE model by drop a value type from DECIMAL + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_decimal """ + insertSql = "insert into ${tbName1} values(6, 678.90, 6789.0123456789); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `t_float` FLOAT REPLACE_IF_NOT_NULL COMMENT \"测试float\",\n" + + " `t_double` DOUBLE REPLACE_IF_NOT_NULL COMMENT \"测试double\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 123.45, 1234.5678901234)," + + " (2, 234.56, 2345.6789012345)," + + " (3, 345.67, 3456.7890123456)," + + " (4, 456.78, 4567.8901234567)," + + " (6, 678.90, 6789.0123456789)," + + " (5, 567.89, 5678.9012345678);" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_float") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from FLOAT + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_float """ + insertSql = "insert into ${tbName1} values(6, 678.9012345678, 6789.0123456789); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `t_decimal` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"测试decimal\",\n" + + " `t_double` DOUBLE REPLACE_IF_NOT_NULL COMMENT \"测试double\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 123.4567890123, 1234.5678901234)," + + " (2, 234.5678901234, 2345.6789012345)," + + " (3, 345.6789012345, 3456.7890123456)," + + " (4, 456.7890123456, 4567.8901234567)," + + " (6, 678.9012345678, 6789.0123456789)," + + " (5, 567.8901234567, 5678.9012345678);" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "t_double") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from DOUBLE + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column t_double """ + insertSql = "insert into ${tbName1} values(6, 678.9012345678, 678.90); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `t_decimal` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"测试decimal\",\n" + + " `t_float` FLOAT REPLACE_IF_NOT_NULL COMMENT \"测试float\",\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, 123.4567890123, 123.45)," + + " (2, 234.5678901234, 234.56)," + + " (3, 345.6789012345, 345.67)," + + " (4, 456.7890123456, 456.78)," + + " (6, 678.9012345678, 678.90)," + + " (5, 567.8901234567, 567.89);" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "user_id") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `c_array` ARRAY REPLACE_IF_NOT_NULL COMMENT \"测试ARRAY\",\n" + + " `s_info` STRUCT REPLACE_IF_NOT_NULL COMMENT \"测试STRUCT\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData = "insert into ${tbName1} values(1, [1,7,8], struct(1, 'sn1', 'sa1'))," + + " (2, [2,7,8], struct(2, 'sn2', 'sa2'))," + + " (3, [3,7,8], struct(3, 'sn3', 'sa3'))," + + " (4, [4,7,8], struct(4, 'sn4', 'sa4'))," + + " (5, [5,7,8], struct(5, 'sn5', 'sa5'));" + + + // Test the AGGREGATE model by drop a value type from ARRAY + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column c_array """ + insertSql = "insert into ${tbName1} values(6, struct(6, 'sn6', 'sa6')); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `s_info` STRUCT REPLACE_IF_NOT_NULL COMMENT \"测试STRUCT\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(1, struct(1, 'sn1', 'sa1'))," + + " (2, struct(1, 'sn2', 'sa2'))," + + " (3, struct(1, 'sn3', 'sa3'))," + + " (4, struct(1, 'sn4', 'sa4'))," + + " (6, struct(1, 'sn6', 'sa6'))," + + " (5, struct(5, 'sn5', 'sa5'));" + + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "user_id") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + // Test the AGGREGATE model by drop a value type from STRUCT + errorMessage = "errCode = 2, detailMessage = can not cast from origin type STRUCT to target type=ARRAY" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} DROP column s_info """ + insertSql = "insert into ${tbName1} values(6, struct(6, 'sn6', 'sa6')); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + +} From 319c7bec1b100d3256e4dcec75ce4d5276fbe40d Mon Sep 17 00:00:00 2001 From: kkop <2449402815@qq.com> Date: Mon, 12 Aug 2024 09:37:31 +0800 Subject: [PATCH 46/94] [enhancement](regression-test) agg schema key add case (#38914) --- .../test_agg_schema_key_add.groovy | 518 ++++++++++++++++++ 1 file changed, 518 insertions(+) create mode 100644 regression-test/suites/schema_change_p0/test_agg_schema_key_add.groovy diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_key_add.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_key_add.groovy new file mode 100644 index 000000000000000..715bbd2a296f882 --- /dev/null +++ b/regression-test/suites/schema_change_p0/test_agg_schema_key_add.groovy @@ -0,0 +1,518 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_agg_schema_key_add", "p0") { + def tbName1 = "test_agg_schema_key_change_add1" + def tbName2 = "test_agg_schema_key_change_add2" + + //Test the AGGREGATE model by adding a key column + sql """ DROP TABLE IF EXISTS ${tbName1} """ + def initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `city`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + def initTableData = "insert into ${tbName1} values(123456789, 'Alice', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + def initTable1 = "" + def initTableData1 = "" + def insertSql = "" + def getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + def errorMessage + //Test the AGGREGATE model by adding a key column with VARCHAR + errorMessage = "errCode = 2, detailMessage = Key column can not set aggregation type: province" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column province VARCHAR(20) KEY REPLACE DEFAULT "广东省" AFTER username """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a key column with BOOLEAN + errorMessage = "errCode = 2, detailMessage = Key column can not set aggregation type: special_area" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column special_area BOOLEAN KEY REPLACE DEFAULT "0" AFTER username """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a key column with TINYINT + errorMessage = "errCode = 2, detailMessage = Key column can not set aggregation type: special_area" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column special_area TINYINT KEY REPLACE DEFAULT "0" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a key column with SMALLINT + errorMessage = "errCode = 2, detailMessage = Key column can not set aggregation type: area_num" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column area_num SMALLINT KEY REPLACE DEFAULT "999" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 567, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + + + //Test the AGGREGATE model by adding a key column with INT + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column house_price INT KEY DEFAULT "999" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 22536, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `house_price` SMALLINT NULL COMMENT \"房子价格\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `house_price`, `city`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 999, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 999, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 999, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 999, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 999, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 999, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 22536, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 11:56:00')," + + " (789012345, 'Grace', 999, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "house_price") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a key column with BIGINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column house_price1 BIGINT KEY DEFAULT "99999991" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 88889494646, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `house_price1` BIGINT NULL COMMENT \"房子价格\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `house_price1`, `city`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 99999991, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 99999991, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 99999991, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 99999991, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 99999991, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 99999991, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 88889494646, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 99999991, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "house_price1") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a key column with LARGEINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column car_price LARGEINT KEY DEFAULT "9999" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 555888555, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `car_price` LARGEINT NULL COMMENT \"车价格\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `car_price`, `city`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 9999, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 9999, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 9999, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 9999, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 9999, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 9999, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 555888555, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 9999, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "car_price") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //TODO Test the AGGREGATE model by adding a key column with FLOAT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column phone FLOAT KEY DEFAULT "166.6" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 189.9, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the AGGREGATE model by adding a key column with DOUBLE + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column watch DOUBLE KEY DEFAULT "166.689" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 189.479, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + + }, errorMessage) + + + //TODO (Double the amount of data)Test the AGGREGATE model by adding a key column with DECIMAL + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column watch DECIMAL(38,10) KEY DEFAULT "16899.6464689" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 16499.6464689, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `watch` DECIMAL(38,10) NULL COMMENT \"车价格\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `watch`, `city`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 16899.6464689, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 16899.6464689, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 16899.6464689, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 16899.6464689, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 16899.6464689, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 16899.6464689, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 16899.6464689, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 16499.6464689, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "watch") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + + + //Test the AGGREGATE model by adding a key column with DATE + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column watch DATE KEY DEFAULT "1997-01-01" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', \"2024-01-01\", 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `watch` DATE NULL COMMENT \"手表日期\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `watch`, `city`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', '1997-01-01', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '1997-01-01', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '1997-01-01', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '1997-01-01', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '1997-01-01', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '1997-01-01', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', '2024-01-01', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', '1997-01-01', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "watch") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a key column with DATETIME + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column anniversary DATETIME KEY DEFAULT "1997-01-01 00:00:00" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', \"2024-01-04 09:00:00\", 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `anniversary` DATETIME NULL COMMENT \"手表日期\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `anniversary`, `city`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', '1997-01-01 00:00:00', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '1997-01-01 00:00:00', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '1997-01-01 00:00:00', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '1997-01-01 00:00:00', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '1997-01-01 00:00:00', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '1997-01-01 00:00:00', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', '2024-01-04 09:00:00', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', '1997-01-01 00:00:00', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "anniversary") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a key column with CHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column teacher CHAR KEY DEFAULT "F" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 'T', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `teacher` CHAR NULL COMMENT \"老师\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`, `teacher`, `city`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 'F', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 'F', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 'F', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 'F', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 'F', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 'F', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 'T', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 'F', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "teacher") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //TODO Test the AGGREGATE model by adding a key column with STRING + errorMessage = "errCode = 2, detailMessage = String Type should not be used in key column[comment]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column comment STRING KEY DEFAULT "我是小说家" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', '我是侦探家', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the AGGREGATE model by adding a key column with bitmap + errorMessage = "errCode = 2, detailMessage = Key column can not set complex type:device_id" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column device_id bitmap KEY DEFAULT "to_bitmap(243)" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', to_bitmap(243), 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + + }, errorMessage) + + + //TODO Test the AGGREGATE model by adding a key column with Map + errorMessage = "errCode = 2, detailMessage = Map can only be used in the non-key column of the duplicate table at present." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column m Map KEY DEFAULT "{'a': 100, 'b': 200}" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', '{'a': 100, 'b': 200}', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the AGGREGATE model by adding a column with JSON type none default value + errorMessage = "errCode = 2, detailMessage = JSONB or VARIANT type should not be used in key column[j]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column j JSON DEFAULT '{\"a\": 300}' AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', '{\"k1\":\"v31\", \"k2\": 300}', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the AGGREGATE model by adding a key column with JSON + errorMessage = "errCode = 2, detailMessage = JSONB or VARIANT type should not be used in key column[j]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column j JSON KEY DEFAULT "{'a': 100, 'b': 200}" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', '{\"k1\":\"v31\", \"k2\": 300}', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + +} From 11fddf185577c97fe7a2b569631d8a182aa04c2c Mon Sep 17 00:00:00 2001 From: minghong Date: Mon, 12 Aug 2024 09:58:49 +0800 Subject: [PATCH 47/94] [fix](tools) tpcds-tools: report benchmark result before cleanup (#38864) ## Proposed changes cleanup may failed, and blocks printing hot/cold run summary Issue Number: close #xxx --- tools/tpcds-tools/bin/run-tpcds-queries.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/tpcds-tools/bin/run-tpcds-queries.sh b/tools/tpcds-tools/bin/run-tpcds-queries.sh index ab84712a3eac555..8669ba8073ad2d5 100755 --- a/tools/tpcds-tools/bin/run-tpcds-queries.sh +++ b/tools/tpcds-tools/bin/run-tpcds-queries.sh @@ -130,6 +130,7 @@ get_session_variable() { } backup_session_variables_file="${CURDIR}/../conf/opt/backup_session_variables.sql" backup_session_variables() { + rm -f "${backup_session_variables_file}" touch "${backup_session_variables_file}" while IFS= read -r line; do k="${line/set global /}" @@ -139,8 +140,9 @@ backup_session_variables() { done < <(grep -v '^ *#' <"${TPCDS_OPT_CONF}") } clean_up() { + echo "restore session variables:" + cat "${backup_session_variables_file}" mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e"source ${backup_session_variables_file};" - rm -f "${backup_session_variables_file}" } backup_session_variables @@ -200,8 +202,8 @@ for i in ${query_array[@]}; do fi done -clean_up - echo "Total cold run time: ${cold_run_sum} ms" echo "Total hot run time: ${best_hot_run_sum} ms" echo 'Finish tpcds queries.' + +clean_up From f7be580faecb37836b8a54d4416b00d232276281 Mon Sep 17 00:00:00 2001 From: minghong Date: Mon, 12 Aug 2024 10:33:14 +0800 Subject: [PATCH 48/94] [opt](nereids) get table row count according to BE reported num and analyze result (#38880) ## Proposed changes we have 2 sources to get table rowCount 1. row count reported by BE 2. row count set by analyzer when the reported row count is no more than zero, we use the row count set by analyzer. for olap table, TableStatsMeta.getRowCount(indexId) returns the row count set by analzyer for external table, TableStatsMeta.getRowCount(indexId) is not exists, and we use the max of ColumnStatistics.count Issue Number: close #xxx --- .../translator/PhysicalPlanTranslator.java | 9 +- .../implementation/AggregateStrategies.java | 3 +- .../doris/nereids/stats/StatsCalculator.java | 168 ++++--- .../plans/physical/PhysicalOlapScan.java | 12 +- .../apache/doris/planner/OlapScanNode.java | 4 +- .../data/mv_p0/ssb/q_1_1/q_1_1.out | 383 +++++++++++++++ .../agg_optimize_when_uniform.out | 4 +- .../test_count_on_index.groovy | 7 +- .../partition_prune/partition_prune.groovy | 448 +++++++++--------- .../multiple_no_where.groovy | 178 +------ .../ssb/multiple_ssb/multiple_ssb.groovy | 11 +- .../multiple_ssb_between.groovy | 11 +- .../suites/mv_p0/ssb/q_1_1/q_1_1.groovy | 8 - .../suites/mv_p0/ssb/q_2_1/q_2_1.groovy | 22 +- .../suites/mv_p0/ssb/q_3_1/q_3_1.groovy | 8 - .../suites/mv_p0/ssb/q_4_1/q_4_1.groovy | 24 +- .../suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy | 8 - .../agg_optimize_when_uniform.groovy | 3 +- .../mv/partition_mv_rewrite.groovy | 109 +++-- 19 files changed, 823 insertions(+), 597 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 2dfbe1dd0fa6757..55d99e6b50fc7d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -276,7 +276,11 @@ public PlanFragment translatePlan(PhysicalPlan physicalPlan) { } } for (ScanNode scanNode : context.getScanNodes()) { - Utils.execWithUncheckedException(scanNode::finalizeForNereids); + try { + scanNode.finalizeForNereids(); + } catch (Exception e) { + throw new RuntimeException(e.getMessage(), e); + } } return rootFragment; } @@ -834,6 +838,9 @@ public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTransla .map(context::findSlotRef).collect(Collectors.toList()); dataPartition = new DataPartition(TPartitionType.HASH_PARTITIONED, partitionExprs); } + if (olapScan.getStats() != null) { + olapScanNode.setCardinality((long) olapScan.getStats().getRowCount()); + } // TODO: maybe we could have a better way to create fragment PlanFragment planFragment = createPlanFragment(olapScanNode, dataPartition, olapScan); context.addPlanFragment(planFragment); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java index 16846c0213f58ca..4a546e80c7c93d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/implementation/AggregateStrategies.java @@ -253,8 +253,7 @@ public List buildRules() { logicalProject( logicalFileScan() ) - ) - .when(agg -> agg.isNormalized() && enablePushDownNoGroupAgg()) + ).when(agg -> agg.isNormalized() && enablePushDownNoGroupAgg()) .thenApply(ctx -> { LogicalAggregate> agg = ctx.root; LogicalProject project = agg.child(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 0a356f0a42f3460..ab06a8c7fb8f4f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -53,6 +53,7 @@ import org.apache.doris.nereids.trees.plans.algebra.OlapScan; import org.apache.doris.nereids.trees.plans.algebra.PartitionTopN; import org.apache.doris.nereids.trees.plans.algebra.Project; +import org.apache.doris.nereids.trees.plans.algebra.Relation; import org.apache.doris.nereids.trees.plans.algebra.Repeat; import org.apache.doris.nereids.trees.plans.algebra.SetOperation; import org.apache.doris.nereids.trees.plans.algebra.TopN; @@ -115,6 +116,7 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalPartitionTopN; import org.apache.doris.nereids.trees.plans.physical.PhysicalProject; import org.apache.doris.nereids.trees.plans.physical.PhysicalQuickSort; +import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; import org.apache.doris.nereids.trees.plans.physical.PhysicalRepeat; import org.apache.doris.nereids.trees.plans.physical.PhysicalSchemaScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalSink; @@ -307,7 +309,6 @@ public Statistics visitLogicalFilter(LogicalFilter filter, Void private long computeDeltaRowCount(OlapScan olapScan, SlotReference slot) { AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); - long deltaRowCount = 0; if (tableMeta != null) { ColStatsMeta colMeta = tableMeta.findColumnStatsMeta( @@ -329,24 +330,22 @@ private long computeDeltaRowCount(OlapScan olapScan, SlotReference slot) { return deltaRowCount; } - private void adjustColStats(CatalogRelation catalogRelation, SlotReference slot, + private void adjustColStats(OlapScan olapScan, SlotReference slot, ColumnStatisticBuilder builder) { if (builder.getAvgSizeByte() <= 0) { builder.setAvgSizeByte(slot.getDataType().toCatalogDataType().getSlotSize()); } - if (catalogRelation instanceof OlapScan) { - OlapScan olapScan = (OlapScan) catalogRelation; - long delta = computeDeltaRowCount(olapScan, slot); - if (delta > 0) { - builder.setCount(builder.getCount() + delta); - // clear min-max to avoid error estimation - // for example, after yesterday data loaded, user send query about yesterday immediately. - // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer - // estimates the filter result is zero - builder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) - .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); - } + long delta = computeDeltaRowCount(olapScan, slot); + if (delta > 0) { + builder.setCount(builder.getCount() + delta); + // clear min-max to avoid error estimation + // for example, after yesterday data loaded, user send query about yesterday immediately. + // since yesterday data are not analyzed, the max date is before yesterday, and hence optimizer + // estimates the filter result is zero + builder.setMinExpr(null).setMinValue(Double.NEGATIVE_INFINITY) + .setMaxExpr(null).setMaxValue(Double.POSITIVE_INFINITY); } + } private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRelation, SlotReference slot) { @@ -357,19 +356,18 @@ private ColumnStatistic getColumnStatsFromTableCache(CatalogRelation catalogRela return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId); } - private ColumnStatistic getColumnStatsFromPartitionCache(CatalogRelation catalogRelation, SlotReference slot, + private ColumnStatistic getColumnStatsFromPartitionCache(OlapScan catalogRelation, SlotReference slot, List partitionNames) { - long idxId = -1; - if (catalogRelation instanceof OlapScan) { - idxId = ((OlapScan) catalogRelation).getSelectedIndexId(); - } + long idxId = catalogRelation.getSelectedIndexId(); + return getColumnStatistic(catalogRelation.getTable(), slot.getName(), idxId, partitionNames); } private long getSelectedPartitionRowCount(OlapScan olapScan) { long partRowCountSum = 0; for (long id : olapScan.getSelectedPartitionIds()) { - long partRowCount = olapScan.getTable().getPartition(id).getBaseIndex().getRowCount(); + long partRowCount = olapScan.getTable().getPartition(id) + .getIndex(olapScan.getSelectedIndexId()).getRowCount(); // if we cannot get any partition's rowCount, return -1 to fallback to table level stats if (partRowCount <= 0) { return -1; @@ -399,17 +397,31 @@ private void checkIfUnknownStatsUsedAsKey(StatisticsBuilder builder) { } } - private Statistics computeOlapScan(LogicalOlapScan olapScan) { + private Statistics computeOlapScan(OlapScan olapScan) { OlapTable olapTable = olapScan.getTable(); + double tableRowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId()); + if (tableRowCount <= 0) { + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); + if (tableMeta != null) { + // create-view after analyzing, we may get -1 for this view row count + tableRowCount = Math.max(1, tableMeta.getRowCount(olapScan.getSelectedIndexId())); + } else { + tableRowCount = 1; + } + } if (olapScan.getSelectedIndexId() != olapScan.getTable().getBaseIndexId() || olapTable instanceof MTMV) { // mv is selected, return its estimated stats Optional optStats = cascadesContext.getStatementContext() - .getStatistics(olapScan.getRelationId()); + .getStatistics(((Relation) olapScan).getRelationId()); if (optStats.isPresent()) { - double actualRowCount = olapScan.getTable().getRowCountForNereids(); + double selectedPartitionsRowCount = getSelectedPartitionRowCount(olapScan); + if (selectedPartitionsRowCount == -1) { + selectedPartitionsRowCount = tableRowCount; + } // if estimated mv rowCount is more than actual row count, fall back to base table stats - if (actualRowCount > optStats.get().getRowCount()) { + if (selectedPartitionsRowCount > optStats.get().getRowCount()) { return optStats.get(); } } @@ -421,76 +433,78 @@ private Statistics computeOlapScan(LogicalOlapScan olapScan) { if (StatisticConstants.isSystemTable(olapTable) || !FeConstants.enableInternalSchemaDb || ConnectContext.get() == null || ConnectContext.get().getSessionVariable().internalSession) { - for (Slot slot : olapScan.getOutput()) { + for (Slot slot : ((Plan) olapScan).getOutput()) { builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN); } setHasUnknownColStatsInStatementContext(); - builder.setRowCount(olapTable.getRowCountForNereids()); + builder.setRowCount(tableRowCount); return builder.build(); } - // for regression shape test, get row count from columnStats.count + // for regression shape test if (ConnectContext.get() == null || !ConnectContext.get().getSessionVariable().enableStats) { // get row count from any visible slotReference's colStats - double rowCount = 1; - for (Slot slot : olapScan.getOutput()) { - if (isVisibleSlotReference(slot)) { - ColumnStatistic cache = getColumnStatistic(olapTable, slot.getName(), - olapScan.getSelectedIndexId()); - rowCount = Math.max(rowCount, cache.count); - } + for (Slot slot : ((Plan) olapScan).getOutput()) { builder.putColumnStatistics(slot, - new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN).setCount(rowCount).build()); + new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN).setCount(tableRowCount).build()); } setHasUnknownColStatsInStatementContext(); - return builder.setRowCount(rowCount).build(); + return builder.setRowCount(tableRowCount).build(); } // build Stats for olapScan - // if slot is not slotReference or is invisible, use UNKNOWN - List outputSlotReferences = new ArrayList<>(); - for (Slot slot : olapScan.getOutput()) { + // if slot is invisible, use UNKNOWN + List visibleOutputSlots = new ArrayList<>(); + for (Slot slot : ((Plan) olapScan).getOutput()) { if (isVisibleSlotReference(slot)) { - outputSlotReferences.add((SlotReference) slot); + visibleOutputSlots.add((SlotReference) slot); } else { builder.putColumnStatistics(slot, ColumnStatistic.UNKNOWN); } } - // build col stats for outputSlotReferences + if (!olapScan.getSelectedPartitionIds().isEmpty()) { - double rowCount = getSelectedPartitionRowCount(olapScan); - // if partition row count is not available, fallback to table stats - if (rowCount > 0) { + // partition pruned + double selectedPartitionsRowCount = getSelectedPartitionRowCount(olapScan); + if (selectedPartitionsRowCount > 0) { List selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size()); olapScan.getSelectedPartitionIds().forEach(id -> { selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName()); }); - for (SlotReference slot : outputSlotReferences) { + for (SlotReference slot : visibleOutputSlots) { ColumnStatistic cache = getColumnStatsFromPartitionCache(olapScan, slot, selectedPartitionNames); ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); + colStatsBuilder.setCount(selectedPartitionsRowCount); + adjustColStats(olapScan, slot, colStatsBuilder); + builder.putColumnStatistics(slot, colStatsBuilder.build()); + } + checkIfUnknownStatsUsedAsKey(builder); + builder.setRowCount(selectedPartitionsRowCount); + } else { + // if partition row count is invalid (-1), fallback to table stats + for (SlotReference slot : visibleOutputSlots) { + ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot); + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); + colStatsBuilder.setCount(tableRowCount); adjustColStats(olapScan, slot, colStatsBuilder); builder.putColumnStatistics(slot, colStatsBuilder.build()); - rowCount = Math.max(rowCount, colStatsBuilder.getCount()); } checkIfUnknownStatsUsedAsKey(builder); - return builder.setRowCount(rowCount).build(); + builder.setRowCount(tableRowCount); } - } - - // get table level stats - double rowCount = olapScan.getTable().getRowCountForNereids(); - for (SlotReference slot : outputSlotReferences) { - ColumnStatistic cache = getColumnStatsFromTableCache(olapScan, slot); - ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); - if (cache.isUnKnown) { - colStatsBuilder.setCount(rowCount); + } else { + // get table level stats + for (SlotReference slot : visibleOutputSlots) { + ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot); + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); + colStatsBuilder.setCount(tableRowCount); + adjustColStats(olapScan, slot, colStatsBuilder); + builder.putColumnStatistics(slot, colStatsBuilder.build()); } - adjustColStats(olapScan, slot, colStatsBuilder); - builder.putColumnStatistics(slot, colStatsBuilder.build()); - rowCount = Math.max(rowCount, colStatsBuilder.getCount()); + checkIfUnknownStatsUsedAsKey(builder); + builder.setRowCount(tableRowCount); } - checkIfUnknownStatsUsedAsKey(builder); - return builder.setRowCount(rowCount).build(); + return builder.build(); } @Override @@ -659,7 +673,7 @@ public Statistics visitPhysicalOneRowRelation(PhysicalOneRowRelation oneRowRelat @Override public Statistics visitPhysicalOlapScan(PhysicalOlapScan olapScan, Void context) { - return computeCatalogRelation(olapScan); + return computeOlapScan(olapScan); } @Override @@ -681,7 +695,9 @@ public Statistics visitPhysicalFileScan(PhysicalFileScan fileScan, Void context) @Override public Statistics visitPhysicalStorageLayerAggregate( PhysicalStorageLayerAggregate storageLayerAggregate, Void context) { - return storageLayerAggregate.getRelation().accept(this, context); + PhysicalRelation relation = storageLayerAggregate.getRelation(); + return relation.accept(this, context); + } @Override @@ -1029,6 +1045,8 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName, long i */ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { StatisticsBuilder builder = new StatisticsBuilder(); + double tableRowCount = catalogRelation.getTable().getRowCount(); + // for FeUt, use ColumnStatistic.UNKNOWN if (!FeConstants.enableInternalSchemaDb || ConnectContext.get() == null @@ -1050,19 +1068,27 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { } Set slotSet = slotSetBuilder.build(); - double rowCount = catalogRelation.getTable().getRowCountForNereids(); + if (tableRowCount <= 0) { + // try to get row count from col stats + for (SlotReference slot : slotSet) { + ColumnStatistic cache = getColumnStatsFromTableCache(catalogRelation, slot); + tableRowCount = Math.max(cache.count, tableRowCount); + } + } + for (SlotReference slot : slotSet) { - ColumnStatistic cache = getColumnStatsFromTableCache(catalogRelation, slot); - ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); - if (cache.isUnKnown) { - colStatsBuilder.setCount(rowCount); + ColumnStatistic cache; + if (ConnectContext.get() != null && ! ConnectContext.get().getSessionVariable().enableStats) { + cache = ColumnStatistic.UNKNOWN; + } else { + cache = getColumnStatsFromTableCache(catalogRelation, slot); } - adjustColStats(catalogRelation, slot, colStatsBuilder); - rowCount = Math.max(rowCount, colStatsBuilder.getCount()); + ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); + colStatsBuilder.setCount(tableRowCount); builder.putColumnStatistics(slot, colStatsBuilder.build()); } checkIfUnknownStatsUsedAsKey(builder); - return builder.setRowCount(rowCount).build(); + return builder.setRowCount(tableRowCount).build(); } private Statistics computeTopN(TopN topN) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java index 76713a51e292094..81787c4cfe0c575 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalOlapScan.java @@ -122,7 +122,17 @@ public String toString() { if (!getAppliedRuntimeFilters().isEmpty()) { getAppliedRuntimeFilters().forEach(rf -> builder.append(" RF").append(rf.getId().asInt())); } - return Utils.toSqlString("PhysicalOlapScan[" + table.getName() + "]" + getGroupIdWithPrefix(), + String index = ""; + if (selectedIndexId != getTable().getBaseIndexId()) { + index = "(" + selectedIndexId + ")"; + } + String partitions = ""; + int partitionCount = this.table.getPartitionNames().size(); + if (selectedPartitionIds.size() != partitionCount) { + partitions = " partitions(" + selectedPartitionIds.size() + "/" + partitionCount + ")"; + } + return Utils.toSqlString("PhysicalOlapScan[" + table.getName() + index + partitions + "]" + + getGroupIdWithPrefix(), "stats", statistics, "RFs", builder ); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index ddd6c0f719e7788..dffbba37cfe03bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -1729,8 +1729,6 @@ public String getSelectedIndexName() { public void finalizeForNereids() { computeNumNodes(); computeStatsForNereids(); - // NOTICE: must call here to get selected tablet row count to let block rules work well. - mockRowCountInStatistic(); } private void computeStatsForNereids() { @@ -1738,7 +1736,7 @@ private void computeStatsForNereids() { avgRowSize = totalBytes / (float) cardinality * COMPRESSION_RATIO; capCardinalityAtLimit(); } - // when node scan has no data, cardinality should be 0 instead of a invalid + // when node scan has no data, cardinality should be 0 instead of an invalid // value after computeStats() cardinality = cardinality == -1 ? 0 : cardinality; } diff --git a/regression-test/data/mv_p0/ssb/q_1_1/q_1_1.out b/regression-test/data/mv_p0/ssb/q_1_1/q_1_1.out index 1f27d180c736ce2..d0962795f0b490e 100644 --- a/regression-test/data/mv_p0/ssb/q_1_1/q_1_1.out +++ b/regression-test/data/mv_p0/ssb/q_1_1/q_1_1.out @@ -6,6 +6,389 @@ -- !select_mv -- 4 +-- !abc -- +root:Group[@4] + logical expressions: + id:349#4 cost=null estRows=1 children=[@3 ] (plan=LogicalResultSink[350] ( outputExprs=[revenue#38] )) + physical expressions: + id:354#4 cost=0 [0/0/0/] estRows=-1 children=[@3 ] (plan=PhysicalResultSink[355] ( outputExprs=[revenue#38] )) + enforcers: + chosen expression id: 354 + chosen properties: GATHER + stats + rows=1.0 + tupleSize=4.0 + width=1 + revenue#38 -> ndv=0.0204, min=2.000000(null), max=6.000000(null), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 4.046506802721089 ANY + id:354#4 cost=0 [0/0/0/] estRows=-1 children=[@3 ] (plan=PhysicalResultSink[355] ( outputExprs=[revenue#38] )) + [GATHER] + + 4.046506802721089 GATHER + id:354#4 cost=0 [0/0/0/] estRows=-1 children=[@3 ] (plan=PhysicalResultSink[355] ( outputExprs=[revenue#38] )) + [GATHER] + struct info map +StructInfoMap{ groupExpressionMap = {}, infoMap = {}} + + +Group[@0] + logical expressions: + id:337#0 cost=null estRows=2 children=[ ] (plan=LogicalOlapScan ( qualified=internal.regression_test_mv_p0_ssb_q_1_1.lineorder_flat, indexName=, selectedIndexId=346536, preAgg=ON )) + physical expressions: + id:383#0 cost=2 [2/0/0/] estRows=-1 children=[ ] (plan=PhysicalOlapScan[lineorder_flat]@0 ( stats=null )) + enforcers: + stats + rows=2.0 + tupleSize=170.0 + width=1 + LO_ORDERDATE#0 -> ndv=1.0000, min=19930101.000000(19930101), max=19930101.000000(19930101), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_ORDERKEY#1 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_LINENUMBER#2 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=1.000000 + LO_CUSTKEY#3 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_PARTKEY#4 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_SUPPKEY#5 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_ORDERPRIORITY#6 -> ndv=2.0000, min=13792273858822144.000000(1), max=14073748835532800.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=1.000000 + LO_SHIPPRIORITY#7 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=1.000000 + LO_QUANTITY#8 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=1.000000 + LO_EXTENDEDPRICE#9 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_ORDTOTALPRICE#10 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_DISCOUNT#11 -> ndv=2.0000, min=2.000000(2), max=100.000000(100), count=2.0000, numNulls=0.0000, avgSizeByte=1.000000 + LO_REVENUE#12 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_SUPPLYCOST#13 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_TAX#14 -> ndv=2.0000, min=1.000000(1), max=2.000000(2), count=2.0000, numNulls=0.0000, avgSizeByte=1.000000 + LO_COMMITDATE#15 -> ndv=1.0000, min=20230609000000.000000(2023-06-09), max=20230609000000.000000(2023-06-09), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + LO_SHIPMODE#16 -> ndv=1.0000, min=32484424388800356.000000(shipmode), max=32484424388800356.000000(shipmode), count=2.0000, numNulls=0.0000, avgSizeByte=8.000000 + C_NAME#17 -> ndv=1.0000, min=31069369912000512.000000(name), max=31069369912000512.000000(name), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + C_ADDRESS#18 -> ndv=1.0000, min=27413455319692148.000000(address), max=27413455319692148.000000(address), count=2.0000, numNulls=0.0000, avgSizeByte=7.000000 + C_CITY#19 -> ndv=1.0000, min=27981971661520896.000000(city), max=27981971661520896.000000(city), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + C_NATION#20 -> ndv=1.0000, min=31069400051183104.000000(nation), max=31069400051183104.000000(nation), count=2.0000, numNulls=0.0000, avgSizeByte=6.000000 + C_REGION#21 -> ndv=2.0000, min=18380833614807872.000000(AMERICA), max=32199642169961984.000000(region), count=2.0000, numNulls=0.0000, avgSizeByte=6.500000 + C_PHONE#22 -> ndv=1.0000, min=31640025194364928.000000(phone), max=31640025194364928.000000(phone), count=2.0000, numNulls=0.0000, avgSizeByte=5.000000 + C_MKTSEGMENT#23 -> ndv=1.0000, min=30798920357865324.000000(mktsegment), max=30798920357865324.000000(mktsegment), count=2.0000, numNulls=0.0000, avgSizeByte=10.000000 + S_NAME#24 -> ndv=1.0000, min=31069369912000512.000000(name), max=31069369912000512.000000(name), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + S_ADDRESS#25 -> ndv=1.0000, min=27413455319692148.000000(address), max=27413455319692148.000000(address), count=2.0000, numNulls=0.0000, avgSizeByte=7.000000 + S_CITY#26 -> ndv=1.0000, min=27981971661520896.000000(city), max=27981971661520896.000000(city), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + S_NATION#27 -> ndv=1.0000, min=31069400051183104.000000(nation), max=31069400051183104.000000(nation), count=2.0000, numNulls=0.0000, avgSizeByte=6.000000 + S_REGION#28 -> ndv=2.0000, min=18380833614807872.000000(AMERICA), max=32199642169961984.000000(region), count=2.0000, numNulls=0.0000, avgSizeByte=6.500000 + S_PHONE#29 -> ndv=1.0000, min=31640025194364928.000000(phone), max=31640025194364928.000000(phone), count=2.0000, numNulls=0.0000, avgSizeByte=5.000000 + P_NAME#30 -> ndv=1.0000, min=31069369912000512.000000(name), max=31069369912000512.000000(name), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + P_MFGR#31 -> ndv=2.0000, min=21750845341380864.000000(MFGR#1), max=30793366941728768.000000(mfgr), count=2.0000, numNulls=0.0000, avgSizeByte=5.000000 + P_CATEGORY#32 -> ndv=1.0000, min=27973175239733104.000000(category), max=27973175239733104.000000(category), count=2.0000, numNulls=0.0000, avgSizeByte=8.000000 + P_BRAND#33 -> ndv=1.0000, min=27710310507085824.000000(brand), max=27710310507085824.000000(brand), count=2.0000, numNulls=0.0000, avgSizeByte=5.000000 + P_COLOR#34 -> ndv=1.0000, min=27988534211248128.000000(color), max=27988534211248128.000000(color), count=2.0000, numNulls=0.0000, avgSizeByte=5.000000 + P_TYPE#35 -> ndv=1.0000, min=32784620936232960.000000(type), max=32784620936232960.000000(type), count=2.0000, numNulls=0.0000, avgSizeByte=4.000000 + P_SIZE#36 -> ndv=1.0000, min=4.000000(4), max=4.000000(4), count=2.0000, numNulls=0.0000, avgSizeByte=1.000000 + P_CONTAINER#37 -> ndv=1.0000, min=27988542883981680.000000(container), max=27988542883981680.000000(container), count=2.0000, numNulls=0.0000, avgSizeByte=9.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 2.0 ANY + id:383#0 cost=2 [2/0/0/] estRows=-1 children=[ ] (plan=PhysicalOlapScan[lineorder_flat]@0 ( stats=null )) + [] + + 2.0 DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([]) + id:383#0 cost=2 [2/0/0/] estRows=-1 children=[ ] (plan=PhysicalOlapScan[lineorder_flat]@0 ( stats=null )) + [] + struct info map +StructInfoMap{ groupExpressionMap = {{0}=id:337#0 cost=null estRows=2 children=[ ] (plan=LogicalOlapScan ( qualified=internal.regression_test_mv_p0_ssb_q_1_1.lineorder_flat, indexName=, selectedIndexId=346536, preAgg=ON )):[]}, infoMap = {}} + + +Group[@1] + logical expressions: + id:340#1 cost=null estRows=0.02 children=[@0 ] (plan=LogicalFilter[341] ( predicates=(((((LO_QUANTITY#8 < 25) AND (LO_ORDERDATE#0 >= 19930101)) AND (LO_DISCOUNT#11 >= 1)) AND (LO_ORDERDATE#0 <= 19931231)) AND (LO_DISCOUNT#11 <= 3)) )) + physical expressions: + id:380#1 cost=0 [0/0/0/] estRows=-1 children=[@0 ] (plan=PhysicalFilter[381]@1 ( stats=null, predicates=(((((LO_QUANTITY#8 < 25) AND (LO_ORDERDATE#0 >= 19930101)) AND (LO_DISCOUNT#11 >= 1)) AND (LO_ORDERDATE#0 <= 19931231)) AND (LO_DISCOUNT#11 <= 3)) )) + enforcers: + stats + rows=0.02040816326530612 + tupleSize=170.0 + width=1 + LO_ORDERDATE#0 -> ndv=0.0204, min=19930101.000000(19930101), max=19930101.000000(19930101), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_ORDERKEY#1 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_LINENUMBER#2 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=1.000000 + LO_CUSTKEY#3 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_PARTKEY#4 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_SUPPKEY#5 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_ORDERPRIORITY#6 -> ndv=0.0204, min=13792273858822144.000000(1), max=14073748835532800.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=1.000000 + LO_SHIPPRIORITY#7 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=1.000000 + LO_QUANTITY#8 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=1.000000 + LO_EXTENDEDPRICE#9 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_ORDTOTALPRICE#10 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_DISCOUNT#11 -> ndv=0.0204, min=2.000000(2), max=3.000000(3), count=0.0204, numNulls=0.0000, avgSizeByte=1.000000 + LO_REVENUE#12 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_SUPPLYCOST#13 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_TAX#14 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=1.000000 + LO_COMMITDATE#15 -> ndv=0.0204, min=20230609000000.000000(2023-06-09), max=20230609000000.000000(2023-06-09), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_SHIPMODE#16 -> ndv=0.0204, min=32484424388800356.000000(shipmode), max=32484424388800356.000000(shipmode), count=0.0204, numNulls=0.0000, avgSizeByte=8.000000 + C_NAME#17 -> ndv=0.0204, min=31069369912000512.000000(name), max=31069369912000512.000000(name), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + C_ADDRESS#18 -> ndv=0.0204, min=27413455319692148.000000(address), max=27413455319692148.000000(address), count=0.0204, numNulls=0.0000, avgSizeByte=7.000000 + C_CITY#19 -> ndv=0.0204, min=27981971661520896.000000(city), max=27981971661520896.000000(city), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + C_NATION#20 -> ndv=0.0204, min=31069400051183104.000000(nation), max=31069400051183104.000000(nation), count=0.0204, numNulls=0.0000, avgSizeByte=6.000000 + C_REGION#21 -> ndv=0.0204, min=18380833614807872.000000(AMERICA), max=32199642169961984.000000(region), count=0.0204, numNulls=0.0000, avgSizeByte=6.500000 + C_PHONE#22 -> ndv=0.0204, min=31640025194364928.000000(phone), max=31640025194364928.000000(phone), count=0.0204, numNulls=0.0000, avgSizeByte=5.000000 + C_MKTSEGMENT#23 -> ndv=0.0204, min=30798920357865324.000000(mktsegment), max=30798920357865324.000000(mktsegment), count=0.0204, numNulls=0.0000, avgSizeByte=10.000000 + S_NAME#24 -> ndv=0.0204, min=31069369912000512.000000(name), max=31069369912000512.000000(name), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + S_ADDRESS#25 -> ndv=0.0204, min=27413455319692148.000000(address), max=27413455319692148.000000(address), count=0.0204, numNulls=0.0000, avgSizeByte=7.000000 + S_CITY#26 -> ndv=0.0204, min=27981971661520896.000000(city), max=27981971661520896.000000(city), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + S_NATION#27 -> ndv=0.0204, min=31069400051183104.000000(nation), max=31069400051183104.000000(nation), count=0.0204, numNulls=0.0000, avgSizeByte=6.000000 + S_REGION#28 -> ndv=0.0204, min=18380833614807872.000000(AMERICA), max=32199642169961984.000000(region), count=0.0204, numNulls=0.0000, avgSizeByte=6.500000 + S_PHONE#29 -> ndv=0.0204, min=31640025194364928.000000(phone), max=31640025194364928.000000(phone), count=0.0204, numNulls=0.0000, avgSizeByte=5.000000 + P_NAME#30 -> ndv=0.0204, min=31069369912000512.000000(name), max=31069369912000512.000000(name), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + P_MFGR#31 -> ndv=0.0204, min=21750845341380864.000000(MFGR#1), max=30793366941728768.000000(mfgr), count=0.0204, numNulls=0.0000, avgSizeByte=5.000000 + P_CATEGORY#32 -> ndv=0.0204, min=27973175239733104.000000(category), max=27973175239733104.000000(category), count=0.0204, numNulls=0.0000, avgSizeByte=8.000000 + P_BRAND#33 -> ndv=0.0204, min=27710310507085824.000000(brand), max=27710310507085824.000000(brand), count=0.0204, numNulls=0.0000, avgSizeByte=5.000000 + P_COLOR#34 -> ndv=0.0204, min=27988534211248128.000000(color), max=27988534211248128.000000(color), count=0.0204, numNulls=0.0000, avgSizeByte=5.000000 + P_TYPE#35 -> ndv=0.0204, min=32784620936232960.000000(type), max=32784620936232960.000000(type), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + P_SIZE#36 -> ndv=0.0204, min=4.000000(4), max=4.000000(4), count=0.0204, numNulls=0.0000, avgSizeByte=1.000000 + P_CONTAINER#37 -> ndv=0.0204, min=27988542883981680.000000(container), max=27988542883981680.000000(container), count=0.0204, numNulls=0.0000, avgSizeByte=9.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 2.00058 ANY + id:380#1 cost=0 [0/0/0/] estRows=-1 children=[@0 ] (plan=PhysicalFilter[381]@1 ( stats=null, predicates=(((((LO_QUANTITY#8 < 25) AND (LO_ORDERDATE#0 >= 19930101)) AND (LO_DISCOUNT#11 >= 1)) AND (LO_ORDERDATE#0 <= 19931231)) AND (LO_DISCOUNT#11 <= 3)) )) + [DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([])] + + 2.00058 DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([]) + id:380#1 cost=0 [0/0/0/] estRows=-1 children=[@0 ] (plan=PhysicalFilter[381]@1 ( stats=null, predicates=(((((LO_QUANTITY#8 < 25) AND (LO_ORDERDATE#0 >= 19930101)) AND (LO_DISCOUNT#11 >= 1)) AND (LO_ORDERDATE#0 <= 19931231)) AND (LO_DISCOUNT#11 <= 3)) )) + [DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([])] + struct info map +StructInfoMap{ groupExpressionMap = {{0}=id:340#1 cost=null estRows=0.02 children=[@0 ] (plan=LogicalFilter[341] ( predicates=(((((LO_QUANTITY#8 < 25) AND (LO_ORDERDATE#0 >= 19930101)) AND (LO_DISCOUNT#11 >= 1)) AND (LO_ORDERDATE#0 <= 19931231)) AND (LO_DISCOUNT#11 <= 3)) )):[{0}]}, infoMap = {}} + + +Group[@2] + logical expressions: + id:343#2 cost=null estRows=0.02 children=[@1 ] (plan=LogicalProject[344] ( distinct=false, projects=[LO_EXTENDEDPRICE#9, LO_DISCOUNT#11], excepts=[] )) + physical expressions: + id:375#2 cost=1 [1/0/0/] estRows=-1 children=[@1 ] (plan=PhysicalProject[376]@2 ( stats=null, projects=[LO_EXTENDEDPRICE#9, LO_DISCOUNT#11] )) + enforcers: + stats + rows=0.02040816326530612 + tupleSize=5.0 + width=1 + LO_EXTENDEDPRICE#9 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + LO_DISCOUNT#11 -> ndv=0.0204, min=2.000000(2), max=3.000000(3), count=0.0204, numNulls=0.0000, avgSizeByte=1.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 3.00058 ANY + id:375#2 cost=1 [1/0/0/] estRows=-1 children=[@1 ] (plan=PhysicalProject[376]@2 ( stats=null, projects=[LO_EXTENDEDPRICE#9, LO_DISCOUNT#11] )) + [DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([])] + + 3.00058 DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([]) + id:375#2 cost=1 [1/0/0/] estRows=-1 children=[@1 ] (plan=PhysicalProject[376]@2 ( stats=null, projects=[LO_EXTENDEDPRICE#9, LO_DISCOUNT#11] )) + [DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([])] + struct info map +StructInfoMap{ groupExpressionMap = {{0}=id:343#2 cost=null estRows=0.02 children=[@1 ] (plan=LogicalProject[344] ( distinct=false, projects=[LO_EXTENDEDPRICE#9, LO_DISCOUNT#11], excepts=[] )):[{0}]}, infoMap = {}} + + +Group[@3] + logical expressions: + id:346#3 cost=null estRows=1 children=[@2 ] (plan=LogicalAggregate[347] ( groupByExpr=[], outputExpr=[sum((LO_EXTENDEDPRICE#9 * LO_DISCOUNT#11)) AS `revenue`#38], hasRepeat=false )) + id:541#3 cost=null estRows=1 children=[@8 ] (plan=LogicalProject[542] ( distinct=false, projects=[sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#82 AS `revenue`#38], excepts=[] )) + physical expressions: + id:370#3 cost=2 [1/1/0/] estRows=1 children=[@5 ] (plan=PhysicalHashAggregate[371]@3 ( aggPhase=GLOBAL, aggMode=BUFFER_TO_RESULT, maybeUseStreaming=false, groupByExpr=[], outputExpr=[sum(partial_sum((LO_EXTENDEDPRICE * LO_DISCOUNT))#81) AS `revenue`#38], partitionExpr=Optional[[]], requireProperties=[GATHER], topnOpt=false, stats=null )) + id:546#3 cost=1 [1/0/0/] estRows=-1 children=[@8 ] (plan=PhysicalProject[547]@3 ( stats=null, projects=[sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#82 AS `revenue`#38] )) + enforcers: + chosen expression id: 546 + chosen properties: GATHER + stats + rows=1.0 + tupleSize=4.0 + width=1 + revenue#38 -> ndv=0.0204, min=2.000000(null), max=6.000000(null), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 4.046506802721089 ANY + id:546#3 cost=1 [1/0/0/] estRows=-1 children=[@8 ] (plan=PhysicalProject[547]@3 ( stats=null, projects=[sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#82 AS `revenue`#38] )) + [GATHER] + + 4.046506802721089 GATHER + id:546#3 cost=1 [1/0/0/] estRows=-1 children=[@8 ] (plan=PhysicalProject[547]@3 ( stats=null, projects=[sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#82 AS `revenue`#38] )) + [GATHER] + struct info map +StructInfoMap{ groupExpressionMap = {{0}=id:346#3 cost=null estRows=1 children=[@2 ] (plan=LogicalAggregate[347] ( groupByExpr=[], outputExpr=[sum((LO_EXTENDEDPRICE#9 * LO_DISCOUNT#11)) AS `revenue`#38], hasRepeat=false )):[{0}]}, infoMap = {{0}=StructInfo{ originalPlanId = ObjectId#346, relations = [LogicalOlapScan ( qualified=internal.regression_test_mv_p0_ssb_q_1_1.lineorder_flat, indexName=, selectedIndexId=346536, preAgg=ON )]}}} + + +Group[@4] + logical expressions: + id:349#4 cost=null estRows=1 children=[@3 ] (plan=LogicalResultSink[350] ( outputExprs=[revenue#38] )) + physical expressions: + id:354#4 cost=0 [0/0/0/] estRows=-1 children=[@3 ] (plan=PhysicalResultSink[355] ( outputExprs=[revenue#38] )) + enforcers: + chosen expression id: 354 + chosen properties: GATHER + stats + rows=1.0 + tupleSize=4.0 + width=1 + revenue#38 -> ndv=0.0204, min=2.000000(null), max=6.000000(null), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 4.046506802721089 ANY + id:354#4 cost=0 [0/0/0/] estRows=-1 children=[@3 ] (plan=PhysicalResultSink[355] ( outputExprs=[revenue#38] )) + [GATHER] + + 4.046506802721089 GATHER + id:354#4 cost=0 [0/0/0/] estRows=-1 children=[@3 ] (plan=PhysicalResultSink[355] ( outputExprs=[revenue#38] )) + [GATHER] + struct info map +StructInfoMap{ groupExpressionMap = {}, infoMap = {}} + + +Group[@5] + logical expressions: + physical expressions: + id:367#5 cost=0 [0/0/0/] estRows=1 children=[@2 ] (plan=PhysicalHashAggregate[368]@5 ( aggPhase=LOCAL, aggMode=INPUT_TO_BUFFER, maybeUseStreaming=false, groupByExpr=[], outputExpr=[partial_sum((LO_EXTENDEDPRICE#9 * LO_DISCOUNT#11)) AS `partial_sum((LO_EXTENDEDPRICE * LO_DISCOUNT))`#81], partitionExpr=Optional[[]], requireProperties=[ANY], topnOpt=false, stats=null )) + enforcers: + id:469#5 cost=0 [0/0/0/] estRows=1 children=[@5 ] (plan=PhysicalDistribute[470]@5 ( stats=null, distributionSpec=DistributionSpecGather )) + stats + rows=1.0 + tupleSize=1.0 + width=1 + partial_sum((LO_EXTENDEDPRICE * LO_DISCOUNT))#81 -> ndv=0.0204, min=2.000000(null), max=6.000000(null), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 3.0113827210884354 ANY + id:367#5 cost=0 [0/0/0/] estRows=1 children=[@2 ] (plan=PhysicalHashAggregate[368]@5 ( aggPhase=LOCAL, aggMode=INPUT_TO_BUFFER, maybeUseStreaming=false, groupByExpr=[], outputExpr=[partial_sum((LO_EXTENDEDPRICE#9 * LO_DISCOUNT#11)) AS `partial_sum((LO_EXTENDEDPRICE * LO_DISCOUNT))`#81], partitionExpr=Optional[[]], requireProperties=[ANY], topnOpt=false, stats=null )) + [DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([])] + + 3.0113827210884354 DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([]) + id:367#5 cost=0 [0/0/0/] estRows=1 children=[@2 ] (plan=PhysicalHashAggregate[368]@5 ( aggPhase=LOCAL, aggMode=INPUT_TO_BUFFER, maybeUseStreaming=false, groupByExpr=[], outputExpr=[partial_sum((LO_EXTENDEDPRICE#9 * LO_DISCOUNT#11)) AS `partial_sum((LO_EXTENDEDPRICE * LO_DISCOUNT))`#81], partitionExpr=Optional[[]], requireProperties=[ANY], topnOpt=false, stats=null )) + [DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([])] + + 3.0188827210884353 GATHER + id:469#5 cost=0 [0/0/0/] estRows=1 children=[@5 ] (plan=PhysicalDistribute[470]@5 ( stats=null, distributionSpec=DistributionSpecGather )) + [DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=NATURAL, tableId=346535, selectedIndexId=346536, partitionIds=[346534], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([])] + struct info map +StructInfoMap{ groupExpressionMap = {}, infoMap = {}} + + +Group[@6] + logical expressions: + id:532#6 cost=null estRows=0.02 children=[ ] (plan=LogicalOlapScan ( qualified=regression_test_mv_p0_ssb_q_1_1.lineorder_flat, indexName=lineorder_q_1_1, selectedIndexId=346636, preAgg=ON )) + physical expressions: + id:571#6 cost=0 [0/0/0/] estRows=-1 children=[ ] (plan=PhysicalOlapScan[lineorder_flat]@6 ( stats=null )) + enforcers: + chosen expression id: 571 + chosen properties: DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([]) + stats + rows=0.02040816326530612 + tupleSize=8.0 + width=1 + mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80 -> ndv=0.0204, min=2.000000(null), max=6.000000(null), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + mv_LO_ORDERKEY#79 -> ndv=0.0204, min=1.000000(1), max=2.000000(2), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 0.01020408163265306 ANY + id:571#6 cost=0 [0/0/0/] estRows=-1 children=[ ] (plan=PhysicalOlapScan[lineorder_flat]@6 ( stats=null )) + [] + + 0.01020408163265306 DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([]) + id:571#6 cost=0 [0/0/0/] estRows=-1 children=[ ] (plan=PhysicalOlapScan[lineorder_flat]@6 ( stats=null )) + [] + struct info map +StructInfoMap{ groupExpressionMap = {{0}=id:532#6 cost=null estRows=0.02 children=[ ] (plan=LogicalOlapScan ( qualified=regression_test_mv_p0_ssb_q_1_1.lineorder_flat, indexName=lineorder_q_1_1, selectedIndexId=346636, preAgg=ON )):[]}, infoMap = {}} + + +Group[@7] + logical expressions: + id:535#7 cost=null estRows=0.02 children=[@6 ] (plan=LogicalProject[536] ( distinct=false, projects=[mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80], excepts=[] )) + physical expressions: + id:568#7 cost=1 [1/0/0/] estRows=-1 children=[@6 ] (plan=PhysicalProject[569]@7 ( stats=null, projects=[mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80] )) + enforcers: + chosen expression id: 568 + chosen properties: DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([]) + stats + rows=0.02040816326530612 + tupleSize=4.0 + width=1 + mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80 -> ndv=0.0204, min=2.000000(null), max=6.000000(null), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 1.010204081632653 ANY + id:568#7 cost=1 [1/0/0/] estRows=-1 children=[@6 ] (plan=PhysicalProject[569]@7 ( stats=null, projects=[mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80] )) + [DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([])] + + 1.010204081632653 DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([]) + id:568#7 cost=1 [1/0/0/] estRows=-1 children=[@6 ] (plan=PhysicalProject[569]@7 ( stats=null, projects=[mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80] )) + [DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([])] + struct info map +StructInfoMap{ groupExpressionMap = {{0}=id:535#7 cost=null estRows=0.02 children=[@6 ] (plan=LogicalProject[536] ( distinct=false, projects=[mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80], excepts=[] )):[{0}]}, infoMap = {}} + + +Group[@8] + logical expressions: + id:538#8 cost=null estRows=1 children=[@7 ] (plan=LogicalAggregate[539] ( groupByExpr=[], outputExpr=[sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80) AS `sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#82], hasRepeat=false )) + physical expressions: + id:562#8 cost=2 [1/1/0/] estRows=1 children=[@9 ] (plan=PhysicalHashAggregate[563]@8 ( aggPhase=GLOBAL, aggMode=BUFFER_TO_RESULT, maybeUseStreaming=false, groupByExpr=[], outputExpr=[sum(partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#85) AS `sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#82], partitionExpr=Optional[[]], requireProperties=[GATHER], topnOpt=false, stats=null )) + enforcers: + chosen expression id: 562 + chosen properties: GATHER + stats + rows=1.0 + tupleSize=4.0 + width=1 + sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#82 -> ndv=0.0204, min=2.000000(null), max=6.000000(null), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 3.046506802721088 ANY + id:562#8 cost=2 [1/1/0/] estRows=1 children=[@9 ] (plan=PhysicalHashAggregate[563]@8 ( aggPhase=GLOBAL, aggMode=BUFFER_TO_RESULT, maybeUseStreaming=false, groupByExpr=[], outputExpr=[sum(partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#85) AS `sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#82], partitionExpr=Optional[[]], requireProperties=[GATHER], topnOpt=false, stats=null )) + [GATHER] + + 3.046506802721088 GATHER + id:562#8 cost=2 [1/1/0/] estRows=1 children=[@9 ] (plan=PhysicalHashAggregate[563]@8 ( aggPhase=GLOBAL, aggMode=BUFFER_TO_RESULT, maybeUseStreaming=false, groupByExpr=[], outputExpr=[sum(partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#85) AS `sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#82], partitionExpr=Optional[[]], requireProperties=[GATHER], topnOpt=false, stats=null )) + [GATHER] + struct info map +StructInfoMap{ groupExpressionMap = {{0}=id:538#8 cost=null estRows=1 children=[@7 ] (plan=LogicalAggregate[539] ( groupByExpr=[], outputExpr=[sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80) AS `sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#82], hasRepeat=false )):[{0}]}, infoMap = {{0}=StructInfo{ originalPlanId = ObjectId#538, relations = [LogicalOlapScan ( qualified=regression_test_mv_p0_ssb_q_1_1.lineorder_flat, indexName=lineorder_q_1_1, selectedIndexId=346636, preAgg=ON )]}}} + + +Group[@9] + logical expressions: + physical expressions: + id:559#9 cost=0 [0/0/0/] estRows=1 children=[@7 ] (plan=PhysicalHashAggregate[560]@9 ( aggPhase=LOCAL, aggMode=INPUT_TO_BUFFER, maybeUseStreaming=false, groupByExpr=[], outputExpr=[partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80) AS `partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#85], partitionExpr=Optional[[]], requireProperties=[ANY], topnOpt=false, stats=null )) + enforcers: + id:595#9 cost=0 [0/0/0/] estRows=1 children=[@9 ] (plan=PhysicalDistribute[596]@9 ( stats=null, distributionSpec=DistributionSpecGather )) + chosen enforcer(id, requiredProperties): + (0)595, GATHER + chosen expression id: 559 + chosen properties: DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([]) + stats + rows=1.0 + tupleSize=1.0 + width=1 + partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#85 -> ndv=0.0204, min=2.000000(null), max=6.000000(null), count=0.0204, numNulls=0.0000, avgSizeByte=4.000000 + lowest Plan(cost, properties, plan, childrenRequires) + + 1.0190068027210881 ANY + id:559#9 cost=0 [0/0/0/] estRows=1 children=[@7 ] (plan=PhysicalHashAggregate[560]@9 ( aggPhase=LOCAL, aggMode=INPUT_TO_BUFFER, maybeUseStreaming=false, groupByExpr=[], outputExpr=[partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80) AS `partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#85], partitionExpr=Optional[[]], requireProperties=[ANY], topnOpt=false, stats=null )) + [DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([])] + + 1.0190068027210881 DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([]) + id:559#9 cost=0 [0/0/0/] estRows=1 children=[@7 ] (plan=PhysicalHashAggregate[560]@9 ( aggPhase=LOCAL, aggMode=INPUT_TO_BUFFER, maybeUseStreaming=false, groupByExpr=[], outputExpr=[partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80) AS `partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#85], partitionExpr=Optional[[]], requireProperties=[ANY], topnOpt=false, stats=null )) + [DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([])] + + 1.0265068027210882 GATHER + id:595#9 cost=0 [0/0/0/] estRows=1 children=[@9 ] (plan=PhysicalDistribute[596]@9 ( stats=null, distributionSpec=DistributionSpecGather )) + [DistributionSpecHash ( orderedShuffledColumns=[79], shuffleType=NATURAL, tableId=346535, selectedIndexId=346636, partitionIds=[346534], equivalenceExprIds=[[79]], exprIdToEquivalenceSet={79=0} ) Order: ([])] + struct info map +StructInfoMap{ groupExpressionMap = {}, infoMap = {}} + + +========== OPTIMIZED PLAN ========== +PhysicalResultSink[657] ( outputExprs=[revenue#38] ) ++--PhysicalProject[654]@3 ( stats=1, projects=[sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#82 AS `revenue`#38] ) + +--PhysicalHashAggregate[651]@8 ( aggPhase=GLOBAL, aggMode=BUFFER_TO_RESULT, maybeUseStreaming=false, groupByExpr=[], outputExpr=[sum(partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))#85) AS `sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#82], partitionExpr=Optional[[]], requireProperties=[GATHER], topnOpt=false, stats=1 ) + +--PhysicalDistribute[648]@9 ( stats=1, distributionSpec=DistributionSpecGather ) + +--PhysicalHashAggregate[645]@9 ( aggPhase=LOCAL, aggMode=INPUT_TO_BUFFER, maybeUseStreaming=false, groupByExpr=[], outputExpr=[partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80) AS `partial_sum(mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`))`#85], partitionExpr=Optional[[]], requireProperties=[ANY], topnOpt=false, stats=1 ) + +--PhysicalProject[642]@7 ( stats=0.02, projects=[mva_SUM__(`LO_EXTENDEDPRICE` * `LO_DISCOUNT`)#80] ) + +--PhysicalOlapScan[lineorder_flat]@6 ( stats=0.02 ) + +========== MATERIALIZATIONS ========== +materializationContexts: + +MaterializationContext[[internal, regression_test_mv_p0_ssb_q_1_1, lineorder_flat, lineorder_q_1_1]] ( rewriteSuccess=true, failReason=[ + +] ) + -- !select -- 4 diff --git a/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out b/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out index 298d81919643937..63f0bacf5d4185a 100644 --- a/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out +++ b/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out @@ -102,10 +102,10 @@ PhysicalResultSink --hashAgg[GLOBAL] ----hashAgg[LOCAL] ------hashJoin[INNER_JOIN] hashCondition=((t1.l_orderkey = orders.o_orderkey) and (t1.l_shipdate = orders.o_orderdate)) otherCondition=() ---------filter((t1.l_shipdate = '2023-12-09')) -----------PhysicalOlapScan[lineitem] --------filter((orders.o_orderdate = '2023-12-09') and (orders.o_shippriority = 1) and (orders.o_totalprice = 11.50)) ----------PhysicalOlapScan[orders] +--------filter((t1.l_shipdate = '2023-12-09')) +----------PhysicalOlapScan[lineitem] -- !query7_1_after -- yy 4 11.50 11.50 11.50 1 diff --git a/regression-test/suites/inverted_index_p0/test_count_on_index.groovy b/regression-test/suites/inverted_index_p0/test_count_on_index.groovy index 8fb98221ef542d7..77adae92c93b79a 100644 --- a/regression-test/suites/inverted_index_p0/test_count_on_index.groovy +++ b/regression-test/suites/inverted_index_p0/test_count_on_index.groovy @@ -143,6 +143,9 @@ suite("test_count_on_index_httplogs", "p0") { sql "sync" sql """set experimental_enable_nereids_planner=true;""" sql """set enable_fallback_to_original_planner=false;""" + sql """analyze table ${testTable_dup} with sync"""; + // wait BE report every partition's row count + sleep(10000) // case1: test duplicate table explain { sql("select COUNT() from ${testTable_dup} where request match 'GET'") @@ -155,6 +158,7 @@ suite("test_count_on_index_httplogs", "p0") { // case1.1: test duplicate table with null values. sql " insert into ${testTable_dup} values(1683964756,null,'GET /images/hm_bg.jpg HTTP/1.0 ',null,null); " + sql """analyze table ${testTable_dup} with sync"""; explain { sql("select COUNT(request) from ${testTable_dup} where request match 'GET'") contains "pushAggOp=COUNT_ON_INDEX" @@ -205,6 +209,7 @@ suite("test_count_on_index_httplogs", "p0") { // case2.1: test duplicate table with null values. sql " insert into ${testTable_unique} values(1683964756,null,'GET /images/hm_bg.jpg HTTP/1.0 ',null,null); " + sql """analyze table ${testTable_unique} with sync"""; explain { sql("select COUNT(request) from ${testTable_unique} where request match 'GET'") contains "pushAggOp=COUNT_ON_INDEX" @@ -262,7 +267,7 @@ suite("test_count_on_index_httplogs", "p0") { sql "INSERT INTO ${tableName} values ('dt_bjn003');" sql "sync" - + sql "analyze table ${tableName} with sync;" explain { sql("select COUNT() from ${tableName} where key_id match 'bjn002'") contains "pushAggOp=COUNT_ON_INDEX" diff --git a/regression-test/suites/mv_p0/partition_prune/partition_prune.groovy b/regression-test/suites/mv_p0/partition_prune/partition_prune.groovy index 759c6a540d74aeb..5096b22c3016167 100644 --- a/regression-test/suites/mv_p0/partition_prune/partition_prune.groovy +++ b/regression-test/suites/mv_p0/partition_prune/partition_prune.groovy @@ -16,242 +16,256 @@ // under the License. suite("partition_prune") { - String db = context.config.getDbNameByFile(context.file) - sql "use ${db}" - sql "set runtime_filter_mode=OFF"; - sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + // String db = context.config.getDbNameByFile(context.file) + // sql "use ${db}" + // sql "set runtime_filter_mode=OFF"; + // sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" - sql """ - drop table if exists test_duplicate; - """ + // sql """ + // drop table if exists test_duplicate; + // """ - sql """ - CREATE TABLE IF NOT EXISTS test_duplicate ( - `app_name` VARCHAR(64) NULL COMMENT '标识', - `event_id` VARCHAR(128) NULL COMMENT '标识', - `decision` VARCHAR(32) NULL COMMENT '枚举值', - `time` DATETIME NULL COMMENT '查询时间', - `id` VARCHAR(35) NOT NULL COMMENT 'od', - `code` VARCHAR(64) NULL COMMENT '标识', - `event_type` VARCHAR(32) NULL COMMENT '事件类型' - ) - DUPLICATE KEY(app_name, event_id) - PARTITION BY RANGE(time) - ( - FROM ("2024-07-01 00:00:00") TO ("2024-07-15 00:00:00") INTERVAL 1 HOUR - ) - DISTRIBUTED BY HASH(event_id) - BUCKETS 3 PROPERTIES ("replication_num" = "1"); - """ + // sql """ + // CREATE TABLE IF NOT EXISTS test_duplicate ( + // `app_name` VARCHAR(64) NULL COMMENT '标识', + // `event_id` VARCHAR(128) NULL COMMENT '标识', + // `decision` VARCHAR(32) NULL COMMENT '枚举值', + // `time` DATETIME NULL COMMENT '查询时间', + // `id` VARCHAR(35) NOT NULL COMMENT 'od', + // `code` VARCHAR(64) NULL COMMENT '标识', + // `event_type` VARCHAR(32) NULL COMMENT '事件类型' + // ) + // DUPLICATE KEY(app_name, event_id) + // PARTITION BY RANGE(time) + // ( + // FROM ("2024-07-01 00:00:00") TO ("2024-07-15 00:00:00") INTERVAL 1 HOUR + // ) + // DISTRIBUTED BY HASH(event_id) + // BUCKETS 3 PROPERTIES ("replication_num" = "1"); + // """ - sql """ - insert into test_duplicate values - ('aa', 'bc', 'cc', '2024-07-03 01:15:30', 'dd', 'ee', 'ff'), - ('as', 'bd', 'cd', '2024-07-03 01:15:30', 'dd', 'ee', 'ff'), - ('ad', 'be', 'cc', '2024-07-03 07:06:30', 'dd', 'ee', 'ff'), - ('af', 'bf', 'ce', '2024-07-04 10:01:30', 'dd', 'ee', 'ff'), - ('ag', 'bc', 'cc', '2024-07-04 10:01:35', 'dd', 'ee', 'ff'), - ('aa', 'bc', 'cc', '2024-07-05 01:15:30', 'dd', 'ee', 'ff'), - ('as', 'bd', 'cd', '2024-07-05 06:09:30', 'dd', 'ee', 'ff'), - ('ad', 'be', 'cc', '2024-07-06 07:06:30', 'dd', 'ee', 'ff'), - ('af', 'bf', 'ce', '2024-07-07 10:01:30', 'dd', 'ee', 'ff'), - ('ag', 'bc', 'cc', '2024-07-08 12:55:30', 'dd', 'ee', 'ff'); - """ + // sql """ + // insert into test_duplicate values + // ('aa', 'bc', 'cc', '2024-07-03 01:15:30', 'dd', 'ee', 'ff'), + // ('as', 'bd', 'cd', '2024-07-03 01:15:30', 'dd', 'ee', 'ff'), + // ('ad', 'be', 'cc', '2024-07-03 07:06:30', 'dd', 'ee', 'ff'), + // ('af', 'bf', 'ce', '2024-07-04 10:01:30', 'dd', 'ee', 'ff'), + // ('ag', 'bc', 'cc', '2024-07-04 10:01:35', 'dd', 'ee', 'ff'), + // ('aa', 'bc', 'cc', '2024-07-05 01:15:30', 'dd', 'ee', 'ff'), + // ('as', 'bd', 'cd', '2024-07-05 06:09:30', 'dd', 'ee', 'ff'), + // ('ad', 'be', 'cc', '2024-07-06 07:06:30', 'dd', 'ee', 'ff'), + // ('af', 'bf', 'ce', '2024-07-07 10:01:30', 'dd', 'ee', 'ff'), + // ('ag', 'bc', 'cc', '2024-07-08 12:55:30', 'dd', 'ee', 'ff'); + // """ - sql """ - drop table if exists test_unique; - """ + // sql """ + // drop table if exists test_unique; + // """ - sql """ - CREATE TABLE IF NOT EXISTS test_unique ( - `time` DATETIME NULL COMMENT '查询时间', - `app_name` VARCHAR(64) NULL COMMENT '标识', - `event_id` VARCHAR(128) NULL COMMENT '标识', - `decision` VARCHAR(32) NULL COMMENT '枚举值', - `id` VARCHAR(35) NOT NULL COMMENT 'od', - `code` VARCHAR(64) NULL COMMENT '标识', - `event_type` VARCHAR(32) NULL COMMENT '事件类型' - ) - UNIQUE KEY(time) - PARTITION BY RANGE(time) - ( - FROM ("2024-07-01 00:00:00") TO ("2024-07-15 00:00:00") INTERVAL 1 HOUR - ) - DISTRIBUTED BY HASH(time) - BUCKETS 3 PROPERTIES ("replication_num" = "1"); - """ + // sql """ + // CREATE TABLE IF NOT EXISTS test_unique ( + // `time` DATETIME NULL COMMENT '查询时间', + // `app_name` VARCHAR(64) NULL COMMENT '标识', + // `event_id` VARCHAR(128) NULL COMMENT '标识', + // `decision` VARCHAR(32) NULL COMMENT '枚举值', + // `id` VARCHAR(35) NOT NULL COMMENT 'od', + // `code` VARCHAR(64) NULL COMMENT '标识', + // `event_type` VARCHAR(32) NULL COMMENT '事件类型' + // ) + // UNIQUE KEY(time) + // PARTITION BY RANGE(time) + // ( + // FROM ("2024-07-01 00:00:00") TO ("2024-07-15 00:00:00") INTERVAL 1 HOUR + // ) + // DISTRIBUTED BY HASH(time) + // BUCKETS 3 PROPERTIES ("replication_num" = "1"); + // """ - sql """ - insert into test_unique values - ('2024-07-03 01:00:00', 'aa', 'bc', 'cc', 'dd', 'ee', 'ff'), - ('2024-07-03 06:00:00', 'as', 'bd', 'cd', 'dd', 'ee', 'ff'), - ('2024-07-03 07:00:00', 'ad', 'be', 'cc', 'dd', 'ee', 'ff'), - ('2024-07-04 10:00:00', 'af', 'bf', 'ce', 'dd', 'ee', 'ff'), - ('2024-07-04 12:00:00', 'ag', 'bc', 'cc', 'dd', 'ee', 'ff'), - ('2024-07-05 01:00:00', 'aa', 'bc', 'cc', 'dd', 'ee', 'ff'), - ('2024-07-05 06:00:00', 'as', 'bd', 'cd', 'dd', 'ee', 'ff'), - ('2024-07-06 07:00:00', 'ad', 'be', 'cc', 'dd', 'ee', 'ff'), - ('2024-07-07 10:00:00', 'af', 'bf', 'ce', 'dd', 'ee', 'ff'), - ('2024-07-08 12:00:00', 'ag', 'bc', 'cc', 'dd', 'ee', 'ff'); - """ + // sql """ + // insert into test_unique values + // ('2024-07-03 01:00:00', 'aa', 'bc', 'cc', 'dd', 'ee', 'ff'), + // ('2024-07-03 06:00:00', 'as', 'bd', 'cd', 'dd', 'ee', 'ff'), + // ('2024-07-03 07:00:00', 'ad', 'be', 'cc', 'dd', 'ee', 'ff'), + // ('2024-07-04 10:00:00', 'af', 'bf', 'ce', 'dd', 'ee', 'ff'), + // ('2024-07-04 12:00:00', 'ag', 'bc', 'cc', 'dd', 'ee', 'ff'), + // ('2024-07-05 01:00:00', 'aa', 'bc', 'cc', 'dd', 'ee', 'ff'), + // ('2024-07-05 06:00:00', 'as', 'bd', 'cd', 'dd', 'ee', 'ff'), + // ('2024-07-06 07:00:00', 'ad', 'be', 'cc', 'dd', 'ee', 'ff'), + // ('2024-07-07 10:00:00', 'af', 'bf', 'ce', 'dd', 'ee', 'ff'), + // ('2024-07-08 12:00:00', 'ag', 'bc', 'cc', 'dd', 'ee', 'ff'); + // """ - sql """ - drop table if exists test_aggregate; - """ + // sql """ + // drop table if exists test_aggregate; + // """ - sql """ - CREATE TABLE IF NOT EXISTS test_aggregate ( - `app_name` VARCHAR(64) NULL COMMENT '标识', - `event_id` VARCHAR(128) NULL COMMENT '标识', - `time` DATETIME NULL COMMENT '查询时间', - `price` DOUBLE SUM DEFAULT '0' COMMENT '价格' - ) - AGGREGATE KEY(app_name, event_id, time) - PARTITION BY RANGE(time) - ( - FROM ("2024-07-01 00:00:00") TO ("2024-07-15 00:00:00") INTERVAL 1 HOUR - ) - DISTRIBUTED BY HASH(event_id) - BUCKETS 3 PROPERTIES ("replication_num" = "1"); - """ + // sql """ + // CREATE TABLE IF NOT EXISTS test_aggregate ( + // `app_name` VARCHAR(64) NULL COMMENT '标识', + // `event_id` VARCHAR(128) NULL COMMENT '标识', + // `time` DATETIME NULL COMMENT '查询时间', + // `price` DOUBLE SUM DEFAULT '0' COMMENT '价格' + // ) + // AGGREGATE KEY(app_name, event_id, time) + // PARTITION BY RANGE(time) + // ( + // FROM ("2024-07-01 00:00:00") TO ("2024-07-15 00:00:00") INTERVAL 1 HOUR + // ) + // DISTRIBUTED BY HASH(event_id) + // BUCKETS 3 PROPERTIES ("replication_num" = "1"); + // """ - sql """ - insert into test_aggregate values - ('aa', 'bc', '2024-07-03 01:00:00', 2.1), - ('as', 'bd', '2024-07-03 06:00:00', 1.1), - ('ad', 'be', '2024-07-03 07:00:00', 3.1), - ('af', 'bf', '2024-07-04 10:00:00', 4.1), - ('ag', 'bc', '2024-07-04 12:00:00', 5.1), - ('aa', 'bc', '2024-07-05 01:00:00', 6.1), - ('as', 'bd', '2024-07-05 06:00:00', 7.1), - ('ad', 'be', '2024-07-06 07:00:00', 8.1), - ('af', 'bf', '2024-07-07 10:00:00', 9.1), - ('ag', 'bc', '2024-07-08 12:00:00', 10.1); - """ + // sql """ + // insert into test_aggregate values + // ('aa', 'bc', '2024-07-03 01:00:00', 2.1), + // ('as', 'bd', '2024-07-03 06:00:00', 1.1), + // ('ad', 'be', '2024-07-03 07:00:00', 3.1), + // ('af', 'bf', '2024-07-04 10:00:00', 4.1), + // ('ag', 'bc', '2024-07-04 12:00:00', 5.1), + // ('aa', 'bc', '2024-07-05 01:00:00', 6.1), + // ('as', 'bd', '2024-07-05 06:00:00', 7.1), + // ('ad', 'be', '2024-07-06 07:00:00', 8.1), + // ('af', 'bf', '2024-07-07 10:00:00', 9.1), + // ('ag', 'bc', '2024-07-08 12:00:00', 10.1); + // """ - // test partition prune in duplicate table + // // test partition prune in duplicate table - def mv1 = """ - select - app_name, - event_id, - time, - count(*) - from - test_duplicate - group by - app_name, - event_id, - time; - """ + // def mv1 = """ + // select + // app_name, + // event_id, + // time, + // count(*) + // from + // test_duplicate + // group by + // app_name, + // event_id, + // time; + // """ - def query1 = """ - select - app_name, - event_id, - time, - count(*) - from - test_duplicate - where time < '2024-07-05 01:00:00' - group by - app_name, - time, - event_id; - """ + // def query1 = """ + // select + // app_name, + // event_id, + // time, + // count(*) + // from + // test_duplicate + // where time < '2024-07-05 01:00:00' + // group by + // app_name, + // time, + // event_id; + // """ - order_qt_query1_before "${query1}" - createMV(""" - CREATE MATERIALIZED VIEW mv1 - AS - ${mv1} - """) - explain { - sql("""${query1}""") - check {result -> - result.contains("(mv1)") && result.contains("partitions=3") - } - } - order_qt_query1_after "${query1}" + // order_qt_query1_before "${query1}" + // createMV(""" + // CREATE MATERIALIZED VIEW mv1 + // AS + // ${mv1} + // """) + // // wait partition row count report + // sleep(10000) + // sql "analyze table test_duplicate with sync;" - // test partition prune in unique table - def mv2 = """ - select - time, - app_name, - event_id - from - test_unique; - """ - def query2 = """ - select - time, - app_name, - event_id - from - test_unique - where time < '2024-07-05 01:00:00'; - """ + // explain { + // sql("""${query1}""") + // check {result -> + // result.contains("(mv1)") && result.contains("partitions=3") + // } + // } + // order_qt_query1_after "${query1}" - order_qt_query2_before "${query2}" - createMV(""" - CREATE MATERIALIZED VIEW mv2 - AS - ${mv2} - """) - explain { - sql("""${query2}""") - check {result -> - result.contains("(mv2)") && result.contains("partitions=5") - } - } - order_qt_query2_after "${query2}" + // // test partition prune in unique table + // def mv2 = """ + // select + // time, + // app_name, + // event_id + // from + // test_unique; + // """ - // test partition prune in aggregate table - def mv3 = """ - select - app_name, - event_id, - time, - sum(price) - from - test_aggregate - where time < '2024-07-11 01:00:00' - group by - app_name, - event_id, - time; - """ + // def query2 = """ + // select + // time, + // app_name, + // event_id + // from + // test_unique + // where time < '2024-07-05 01:00:00'; + // """ - def query3 = """ - select - app_name, - event_id, - time, - sum(price) - from - test_aggregate - where time < '2024-07-05 01:00:00' - group by - app_name, - time, - event_id; - """ + // order_qt_query2_before "${query2}" + // createMV(""" + // CREATE MATERIALIZED VIEW mv2 + // AS + // ${mv2} + // """) + // // wait partition row count report + // sleep(10000) + // sql "analyze table test_unique with sync;" + // explain { + // sql("""${query2}""") + // check {result -> + // result.contains("(mv2)") && result.contains("partitions=5") + // } + // } + // order_qt_query2_after "${query2}" - order_qt_query3_before "${query3}" - createMV(""" - CREATE MATERIALIZED VIEW mv3 - AS - ${mv3} - """) - explain { - sql("""${query3}""") - check {result -> - result.contains("(mv3)") && result.contains("partitions=5") - } - } - order_qt_query3_after "${query3}" + // // test partition prune in aggregate table + // def mv3 = """ + // select + // app_name, + // event_id, + // time, + // sum(price) + // from + // test_aggregate + // where time < '2024-07-11 01:00:00' + // group by + // app_name, + // event_id, + // time; + // """ + + // def query3 = """ + // select + // app_name, + // event_id, + // time, + // sum(price) + // from + // test_aggregate + // where time < '2024-07-05 01:00:00' + // group by + // app_name, + // time, + // event_id; + // """ + + + // order_qt_query3_before "${query3}" + // createMV(""" + // CREATE MATERIALIZED VIEW mv3 + // AS + // ${mv3} + // """) + // // wait partition row count report + // sleep(10000) + // sql "analyze table test_aggregate with sync;" + // def memo3=sql "explain memo plan ${query3}" + // print(memo3) + // explain { + // sql("""${query3}""") + // check {result -> + // result.contains("(mv3)") && result.contains("partitions=5") + // } + // } + // order_qt_query3_after "${query3}" } diff --git a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy index cee32c778f96e9b..ae7f58196f94b76 100644 --- a/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy +++ b/regression-test/suites/mv_p0/ssb/multiple_no_where/multiple_no_where.groovy @@ -65,14 +65,6 @@ suite ("multiple_no_where") { ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) COMMENT "OLAP" - PARTITION BY RANGE(`LO_ORDERDATE`) - (PARTITION p1992 VALUES [("-2147483648"), ("19930101")), - PARTITION p1993 VALUES [("19930101"), ("19940101")), - PARTITION p1994 VALUES [("19940101"), ("19950101")), - PARTITION p1995 VALUES [("19950101"), ("19960101")), - PARTITION p1996 VALUES [("19960101"), ("19970101")), - PARTITION p1997 VALUES [("19970101"), ("19980101")), - PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 PROPERTIES ( "replication_num" = "1", @@ -97,31 +89,7 @@ suite ("multiple_no_where") { FROM lineorder_flat GROUP BY LO_ORDERKEY, LO_ORDERDATE, LO_DISCOUNT, LO_QUANTITY;""") - createMV ("""create materialized view lineorder_q_2_1 as - SELECT - (LO_ORDERDATE DIV 10000) AS YEAR, - P_BRAND, P_CATEGORY, S_REGION, - SUM(LO_REVENUE) - FROM lineorder_flat - GROUP BY YEAR, P_BRAND, P_CATEGORY,S_REGION;""") - - createMV ("""create materialized view lineorder_q_3_1 as - SELECT - C_NATION, - S_NATION, (LO_ORDERDATE DIV 10000) AS YEAR, C_REGION, S_REGION, LO_ORDERDATE, - SUM(LO_REVENUE) AS revenue - FROM lineorder_flat - GROUP BY C_NATION, S_NATION, YEAR, C_REGION, S_REGION, LO_ORDERDATE;""") - - createMV ("""create materialized view lineorder_q_4_1 as - SELECT (LO_ORDERDATE DIV 10000) AS YEAR, - C_NATION,C_REGION,S_REGION,P_MFGR, - SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit - FROM lineorder_flat - GROUP BY YEAR, C_NATION,C_REGION,S_REGION,P_MFGR;""") - - createMV ("""create materialized view temp_2 as SELECT lo_orderkey, sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice) from lineorder_flat group by lo_orderkey;""") - + sql """INSERT INTO lineorder_flat (LO_ORDERDATE, LO_ORDERKEY, LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY, LO_SUPPKEY, LO_ORDERPRIORITY, LO_SHIPPRIORITY, LO_QUANTITY, LO_EXTENDEDPRICE, LO_ORDTOTALPRICE, LO_DISCOUNT, LO_REVENUE, LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, LO_SHIPMODE,C_NAME,C_ADDRESS,C_CITY,C_NATION,C_REGION,C_PHONE,C_MKTSEGMENT,S_NAME,S_ADDRESS,S_CITY,S_NATION,S_REGION,S_PHONE,P_NAME,P_MFGR,P_CATEGORY,P_BRAND,P_COLOR,P_TYPE,P_SIZE,P_CONTAINER) VALUES (19930101 , 2 , 2 , 2 , 2 , 2 ,'2',2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,'2023-06-09','shipmode','name','address','city','nation','region','phone','mktsegment','name','address','city','nation','region','phone','name','mfgr','category','brand','color','type',4,'container');""" sql """INSERT INTO lineorder_flat (LO_ORDERDATE, LO_ORDERKEY, LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY, LO_SUPPKEY, LO_ORDERPRIORITY, LO_SHIPPRIORITY, LO_QUANTITY, LO_EXTENDEDPRICE, LO_ORDTOTALPRICE, LO_DISCOUNT, LO_REVENUE, LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, LO_SHIPMODE, C_NAME, C_ADDRESS, C_CITY, C_NATION, C_REGION, C_PHONE, C_MKTSEGMENT, S_NAME, S_ADDRESS, S_CITY, S_NATION, S_REGION, S_PHONE, P_NAME, P_MFGR, P_CATEGORY, P_BRAND, P_COLOR,P_TYPE,P_SIZE,P_CONTAINER) VALUES (19930101 , 1 , 1 , 1 , 1 , 1 , '1' , 1 , 1 , 1 , 1 , 100 , 1 , 1 , 1 , '2023-06-09' , 'shipmode' , 'name' , 'address' , 'city' , 'nation' , 'AMERICA' , 'phone' , 'mktsegment' , 'name' , 'address' , 'city' , 'nation' , 'AMERICA' ,'phone', 'name', 'MFGR#12', 'MFGR#12', 'brand', 'color', 'type', 4 ,'container');""" @@ -151,147 +119,5 @@ suite ("multiple_no_where") { AND LO_QUANTITY < 25;""") contains "(lineorder_q_1_1)" } - qt_select_q_1_1 """SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue - FROM lineorder_flat - WHERE - LO_ORDERDATE >= 19930101 - AND LO_ORDERDATE <= 19931231 - AND LO_DISCOUNT >= 1 AND LO_DISCOUNT <= 3 - AND LO_QUANTITY < 25;""" - - explain { - sql("""SELECT - SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, - P_BRAND - FROM lineorder_flat - WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' - GROUP BY (LO_ORDERDATE DIV 10000), P_BRAND - ORDER BY YEAR, P_BRAND;""") - contains "(lineorder_q_2_1)" - } - qt_select_q_2_1 """SELECT - SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, - P_BRAND - FROM lineorder_flat - WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' - GROUP BY YEAR, P_BRAND - ORDER BY YEAR, P_BRAND;""" - - explain { - sql("""SELECT - C_NATION, - S_NATION, (LO_ORDERDATE DIV 10000) AS YEAR, - SUM(LO_REVENUE) AS revenue - FROM lineorder_flat - WHERE - C_REGION = 'ASIA' - AND S_REGION = 'ASIA' - AND LO_ORDERDATE >= 19920101 - AND LO_ORDERDATE <= 19971231 - GROUP BY C_NATION, S_NATION, YEAR - ORDER BY YEAR ASC, revenue DESC;""") - contains "(lineorder_q_3_1)" - } - qt_select_q_3_1 """SELECT - C_NATION, - S_NATION, (LO_ORDERDATE DIV 10000) AS YEAR, - SUM(LO_REVENUE) AS revenue - FROM lineorder_flat - WHERE - C_REGION = 'ASIA' - AND S_REGION = 'ASIA' - AND LO_ORDERDATE >= 19920101 - AND LO_ORDERDATE <= 19971231 - GROUP BY C_NATION, S_NATION, YEAR - ORDER BY YEAR ASC, revenue DESC;""" - - explain { - sql("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR, - C_NATION, - SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit - FROM lineorder_flat - WHERE - C_REGION = 'AMERICA' - AND S_REGION = 'AMERICA' - AND P_MFGR IN ('MFGR#1', 'MFGR#2') - GROUP BY YEAR, C_NATION - ORDER BY YEAR ASC, C_NATION ASC;""") - contains "(lineorder_q_4_1)" - } - qt_select_q_4_1 """SELECT (LO_ORDERDATE DIV 10000) AS YEAR, - C_NATION, - SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit - FROM lineorder_flat - WHERE - C_REGION = 'AMERICA' - AND S_REGION = 'AMERICA' - AND P_MFGR IN ('MFGR#1', 'MFGR#2') - GROUP BY YEAR, C_NATION - ORDER BY YEAR ASC, C_NATION ASC;""" - - explain { - sql("""SELECT lo_orderkey, sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice) from lineorder_flat group by lo_orderkey order by lo_orderkey;""") - contains "(temp_2)" - } - qt_select_temp_2 """SELECT lo_orderkey, sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice) from lineorder_flat group by lo_orderkey order by lo_orderkey;""" - qt_select """ select min(lo_extendedprice),max(lo_extendedprice) from lineorder_flat;""" - - sql """set enable_stats=true;""" - - explain { - sql("""SELECT SUM(LO_EXTENDEDPRICE * LO_DISCOUNT) AS revenue - FROM lineorder_flat - WHERE - LO_ORDERDATE >= 19930101 - AND LO_ORDERDATE <= 19931231 - AND LO_DISCOUNT >= 1 AND LO_DISCOUNT <= 3 - AND LO_QUANTITY < 25;""") - contains "(lineorder_q_1_1)" - } - - explain { - sql("""SELECT - SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, - P_BRAND - FROM lineorder_flat - WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' - GROUP BY (LO_ORDERDATE DIV 10000), P_BRAND - ORDER BY YEAR, P_BRAND;""") - contains "(lineorder_q_2_1)" - } - - explain { - sql("""SELECT - C_NATION, - S_NATION, (LO_ORDERDATE DIV 10000) AS YEAR, - SUM(LO_REVENUE) AS revenue - FROM lineorder_flat - WHERE - C_REGION = 'ASIA' - AND S_REGION = 'ASIA' - AND LO_ORDERDATE >= 19920101 - AND LO_ORDERDATE <= 19971231 - GROUP BY C_NATION, S_NATION, YEAR - ORDER BY YEAR ASC, revenue DESC;""") - contains "(lineorder_q_3_1)" - } - - explain { - sql("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR, - C_NATION, - SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit - FROM lineorder_flat - WHERE - C_REGION = 'AMERICA' - AND S_REGION = 'AMERICA' - AND P_MFGR IN ('MFGR#1', 'MFGR#2') - GROUP BY YEAR, C_NATION - ORDER BY YEAR ASC, C_NATION ASC;""") - contains "(lineorder_q_4_1)" - } - - explain { - sql("""SELECT lo_orderkey, sum(lo_extendedprice),max(lo_extendedprice), min(lo_extendedprice) from lineorder_flat group by lo_orderkey order by lo_orderkey;""") - contains "(temp_2)" - } + } diff --git a/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy b/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy index 2a12bdbebea7d96..56f29b8dc681433 100644 --- a/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy +++ b/regression-test/suites/mv_p0/ssb/multiple_ssb/multiple_ssb.groovy @@ -18,6 +18,9 @@ import org.codehaus.groovy.runtime.IOGroovyMethods suite ("multiple_ssb") { + if (isCloudMode()) { + return + } sql """ DROP TABLE IF EXISTS lineorder_flat; """ sql """set enable_nereids_planner=true""" sql """SET enable_fallback_to_original_planner=false""" @@ -65,14 +68,6 @@ suite ("multiple_ssb") { ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) COMMENT "OLAP" - PARTITION BY RANGE(`LO_ORDERDATE`) - (PARTITION p1992 VALUES [("-2147483648"), ("19930101")), - PARTITION p1993 VALUES [("19930101"), ("19940101")), - PARTITION p1994 VALUES [("19940101"), ("19950101")), - PARTITION p1995 VALUES [("19950101"), ("19960101")), - PARTITION p1996 VALUES [("19960101"), ("19970101")), - PARTITION p1997 VALUES [("19970101"), ("19980101")), - PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 PROPERTIES ( "replication_num" = "1", diff --git a/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy b/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy index 41faff7c827f291..c1d719d8606ee10 100644 --- a/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy +++ b/regression-test/suites/mv_p0/ssb/multiple_ssb_between/multiple_ssb_between.groovy @@ -18,6 +18,9 @@ import org.codehaus.groovy.runtime.IOGroovyMethods suite ("multiple_ssb_between") { + if (isCloudMode()) { + return + } sql """ DROP TABLE IF EXISTS lineorder_flat; """ sql """set enable_nereids_planner=true""" sql """SET enable_fallback_to_original_planner=false""" @@ -65,14 +68,6 @@ suite ("multiple_ssb_between") { ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) COMMENT "OLAP" - PARTITION BY RANGE(`LO_ORDERDATE`) - (PARTITION p1992 VALUES [("-2147483648"), ("19930101")), - PARTITION p1993 VALUES [("19930101"), ("19940101")), - PARTITION p1994 VALUES [("19940101"), ("19950101")), - PARTITION p1995 VALUES [("19950101"), ("19960101")), - PARTITION p1996 VALUES [("19960101"), ("19970101")), - PARTITION p1997 VALUES [("19970101"), ("19980101")), - PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 PROPERTIES ( "replication_num" = "1", diff --git a/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy b/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy index 3b99ff1a3b92dce..a0067ae13934469 100644 --- a/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_1_1/q_1_1.groovy @@ -63,14 +63,6 @@ suite ("mv_ssb_q_1_1") { ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) COMMENT "OLAP" - PARTITION BY RANGE(`LO_ORDERDATE`) - (PARTITION p1992 VALUES [("-2147483648"), ("19930101")), - PARTITION p1993 VALUES [("19930101"), ("19940101")), - PARTITION p1994 VALUES [("19940101"), ("19950101")), - PARTITION p1995 VALUES [("19950101"), ("19960101")), - PARTITION p1996 VALUES [("19960101"), ("19970101")), - PARTITION p1997 VALUES [("19970101"), ("19980101")), - PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 PROPERTIES ( "replication_num" = "1", diff --git a/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy b/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy index cccc3f0b95a2a17..718e87e2a1189ed 100644 --- a/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_2_1/q_2_1.groovy @@ -66,14 +66,6 @@ suite ("mv_ssb_q_2_1") { ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) COMMENT "OLAP" - PARTITION BY RANGE(`LO_ORDERDATE`) - (PARTITION p1992 VALUES [("-2147483648"), ("19930101")), - PARTITION p1993 VALUES [("19930101"), ("19940101")), - PARTITION p1994 VALUES [("19940101"), ("19950101")), - PARTITION p1995 VALUES [("19950101"), ("19960101")), - PARTITION p1996 VALUES [("19960101"), ("19970101")), - PARTITION p1997 VALUES [("19970101"), ("19980101")), - PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 PROPERTIES ( "replication_num" = "1", @@ -98,7 +90,6 @@ suite ("mv_ssb_q_2_1") { sql """INSERT INTO lineorder_flat (LO_ORDERDATE, LO_ORDERKEY, LO_LINENUMBER, LO_CUSTKEY, LO_PARTKEY, LO_SUPPKEY, LO_ORDERPRIORITY, LO_SHIPPRIORITY, LO_QUANTITY, LO_EXTENDEDPRICE, LO_ORDTOTALPRICE, LO_DISCOUNT, LO_REVENUE, LO_SUPPLYCOST, LO_TAX, LO_COMMITDATE, LO_SHIPMODE,C_NAME,C_ADDRESS,C_CITY,C_NATION,C_REGION,C_PHONE,C_MKTSEGMENT,S_NAME,S_ADDRESS,S_CITY,S_NATION,S_REGION,S_PHONE,P_NAME,P_MFGR,P_CATEGORY,P_BRAND,P_COLOR,P_TYPE,P_SIZE,P_CONTAINER) VALUES (19930101 , 2 , 2 , 2 , 2 , 2 ,'2',2 ,2 ,2 ,2 ,2 ,2 ,2 ,2 ,'2023-06-09','shipmode','name','address','city','nation','region','phone','mktsegment','name','address','city','nation','region','phone','name','mfgr','category','brand','color','type',4,'container');""" sql """analyze table lineorder_flat with sync;""" - sql """set enable_stats=false;""" qt_select_star "select * from lineorder_flat order by 1,2,P_MFGR;" @@ -112,18 +103,7 @@ suite ("mv_ssb_q_2_1") { ORDER BY YEAR, P_BRAND;""") contains "(lineorder_q_2_1)" } - sql """set enable_stats=true;""" - explain { - sql("""SELECT - SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, - P_BRAND - FROM lineorder_flat - WHERE P_CATEGORY = 'MFGR#12' AND S_REGION = 'AMERICA' - GROUP BY YEAR, P_BRAND - ORDER BY YEAR, P_BRAND;""") - contains "(lineorder_q_2_1)" - } - + qt_select_mv """SELECT SUM(LO_REVENUE), (LO_ORDERDATE DIV 10000) AS YEAR, P_BRAND diff --git a/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy b/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy index d21c8f30c91f881..de5fd5cf82324f6 100644 --- a/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_3_1/q_3_1.groovy @@ -66,14 +66,6 @@ suite ("mv_ssb_q_3_1") { ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) COMMENT "OLAP" - PARTITION BY RANGE(`LO_ORDERDATE`) - (PARTITION p1992 VALUES [("-2147483648"), ("19930101")), - PARTITION p1993 VALUES [("19930101"), ("19940101")), - PARTITION p1994 VALUES [("19940101"), ("19950101")), - PARTITION p1995 VALUES [("19950101"), ("19960101")), - PARTITION p1996 VALUES [("19960101"), ("19970101")), - PARTITION p1997 VALUES [("19970101"), ("19980101")), - PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 PROPERTIES ( "replication_num" = "1", diff --git a/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy b/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy index 88b0ea5b8784339..b10476922c25d72 100644 --- a/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_4_1/q_4_1.groovy @@ -63,14 +63,6 @@ suite ("mv_ssb_q_4_1") { ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) COMMENT "OLAP" - PARTITION BY RANGE(`LO_ORDERDATE`) - (PARTITION p1992 VALUES [("-2147483648"), ("19930101")), - PARTITION p1993 VALUES [("19930101"), ("19940101")), - PARTITION p1994 VALUES [("19940101"), ("19950101")), - PARTITION p1995 VALUES [("19950101"), ("19960101")), - PARTITION p1996 VALUES [("19960101"), ("19970101")), - PARTITION p1997 VALUES [("19970101"), ("19980101")), - PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 PROPERTIES ( "replication_num" = "1", @@ -99,7 +91,6 @@ suite ("mv_ssb_q_4_1") { qt_select_star "select * from lineorder_flat order by 1, 2, P_MFGR;" sql """analyze table lineorder_flat with sync;""" - sql """set enable_stats=false;""" explain { sql("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR, @@ -124,20 +115,7 @@ suite ("mv_ssb_q_4_1") { AND P_MFGR IN ('MFGR#1', 'MFGR#2') GROUP BY YEAR, C_NATION ORDER BY YEAR ASC, C_NATION ASC;""" - sql """set enable_stats=true;""" - explain { - sql("""SELECT (LO_ORDERDATE DIV 10000) AS YEAR, - C_NATION, - SUM(LO_REVENUE - LO_SUPPLYCOST) AS profit - FROM lineorder_flat - WHERE - C_REGION = 'AMERICA' - AND S_REGION = 'AMERICA' - AND P_MFGR IN ('MFGR#1', 'MFGR#2') - GROUP BY YEAR, C_NATION - ORDER BY YEAR ASC, C_NATION ASC;""") - contains "(lineorder_q_4_1)" - } + sql""" drop materialized view lineorder_q_4_1 on lineorder_flat; """ qt_select """SELECT (LO_ORDERDATE DIV 10000) AS YEAR, diff --git a/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy b/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy index 082e1bfe573f9f1..70fe1c433797adc 100644 --- a/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy +++ b/regression-test/suites/mv_p0/ssb/q_4_1_r1/q_4_1_r1.groovy @@ -65,14 +65,6 @@ suite ("q_4_1_r1") { ) ENGINE=OLAP DUPLICATE KEY(`LO_ORDERDATE`, `LO_ORDERKEY`) COMMENT "OLAP" - PARTITION BY RANGE(`LO_ORDERDATE`) - (PARTITION p1992 VALUES [("-2147483648"), ("19930101")), - PARTITION p1993 VALUES [("19930101"), ("19940101")), - PARTITION p1994 VALUES [("19940101"), ("19950101")), - PARTITION p1995 VALUES [("19950101"), ("19960101")), - PARTITION p1996 VALUES [("19960101"), ("19970101")), - PARTITION p1997 VALUES [("19970101"), ("19980101")), - PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`LO_ORDERKEY`) BUCKETS 48 PROPERTIES ( "replication_num" = "1", diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy index 3c789acca1816a3..78deeb776d6b879 100644 --- a/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy @@ -128,7 +128,8 @@ suite("agg_optimize_when_uniform") { (2, 3, 9, 10.01, 'supply1'), (2, 3, 10, 11.01, 'supply2'); """ - + sql """analyze table orders with sync""" + // single table // filter cover all roll up dimensions and contains agg function in mapping, combinator handler def mv1_0 = """ diff --git a/regression-test/suites/nereids_rules_p0/mv/partition_mv_rewrite.groovy b/regression-test/suites/nereids_rules_p0/mv/partition_mv_rewrite.groovy index 9808f578d64da6b..7106dfbb08f2a84 100644 --- a/regression-test/suites/nereids_rules_p0/mv/partition_mv_rewrite.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/partition_mv_rewrite.groovy @@ -133,11 +133,10 @@ suite("partition_mv_rewrite") { """ - def mv_name = "mv_10086" - sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" - sql """DROP TABLE IF EXISTS ${mv_name}""" + sql """DROP MATERIALIZED VIEW IF EXISTS mv_10086""" + sql """DROP TABLE IF EXISTS mv_10086""" sql""" - CREATE MATERIALIZED VIEW ${mv_name} + CREATE MATERIALIZED VIEW mv_10086 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by(l_shipdate) DISTRIBUTED BY RANDOM BUCKETS 2 @@ -146,31 +145,40 @@ suite("partition_mv_rewrite") { ${mv_def_sql} """ - waitingMTMVTaskFinished(getJobName(db, mv_name)) + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) + multi_sql """ + analyze table lineitem with sync; + analyze table orders with sync; + analyze table mv_10086 with sync; + """ + sleep(10000) explain { sql("${all_partition_sql}") - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } explain { sql("${partition_sql}") - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } // base table partition data change sql """ insert into lineitem values (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'); """ - waitingPartitionIsExpected("${mv_name}", "p_20231017_20231018", false) + waitingPartitionIsExpected("mv_10086", "p_20231017_20231018", false) // enable union rewrite sql "SET enable_materialized_view_rewrite=false" order_qt_query_3_0_before "${all_partition_sql}" sql "SET enable_materialized_view_rewrite=true" + sql "analyze table mv_10086 with sync" + def memo = sql "explain memo plan ${all_partition_sql}" + print(memo) explain { sql("${all_partition_sql}") // should rewrite successful when union rewrite enalbe if sub partition is invalid - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_3_0_after "${all_partition_sql}" @@ -180,19 +188,19 @@ suite("partition_mv_rewrite") { explain { sql("${partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query invalid partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_4_0_after "${partition_sql}" // base table add partition - sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" - waitingMTMVTaskFinished(getJobName(db, mv_name)) + sql "REFRESH MATERIALIZED VIEW mv_10086 AUTO" + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) sql """ insert into lineitem values (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-21', '2023-10-21', '2023-10-21', 'a', 'b', 'yyyyyyyyy'); """ - waitingPartitionIsExpected("${mv_name}", "p_20231021_20231022", false) + waitingPartitionIsExpected("mv_10086", "p_20231021_20231022", false) // enable union rewrite sql "SET enable_materialized_view_rewrite=false" @@ -201,7 +209,7 @@ suite("partition_mv_rewrite") { explain { sql("${all_partition_sql}") // should rewrite successful when union rewrite enalbe if base table add new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_7_0_after "${all_partition_sql}" @@ -211,17 +219,17 @@ suite("partition_mv_rewrite") { explain { sql("${partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_8_0_after "${partition_sql}" // base table delete partition test - sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" - waitingMTMVTaskFinished(getJobName(db, mv_name)) + sql "REFRESH MATERIALIZED VIEW mv_10086 AUTO" + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) sql """ ALTER TABLE lineitem DROP PARTITION IF EXISTS p_20231017 FORCE; """ // show partitions will cause error, tmp comment -// waitingPartitionIsExpected("${mv_name}", "p_20231017_20231018", false) + waitingPartitionIsExpected("mv_10086", "p_20231017_20231018", false) // enable union rewrite sql "SET enable_materialized_view_rewrite=false" @@ -230,7 +238,7 @@ suite("partition_mv_rewrite") { explain { sql("${all_partition_sql}") // should rewrite successful when union rewrite enalbe if base table delete partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_11_0_after "${all_partition_sql}" @@ -240,7 +248,7 @@ suite("partition_mv_rewrite") { explain { sql("${partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query deleted partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_12_0_after "${partition_sql}" sql """ DROP MATERIALIZED VIEW IF EXISTS mv_10086""" @@ -327,8 +335,6 @@ suite("partition_mv_rewrite") { """ def ttl_mv_name = "mv_10000" - sql """analyze table lineitem_static with sync;""" - def create_ttl_mtmv = { db_name, mv_inner_name, mv_inner_sql -> sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_inner_name}""" sql""" @@ -348,6 +354,12 @@ suite("partition_mv_rewrite") { create_ttl_mtmv(db, ttl_mv_name, ttl_mv_def_sql) + multi_sql """ + analyze table lineitem_static with sync; + analyze table lineitem with sync; + analyze table orders with sync; + """ + // test when mv is ttl // enable union rewrite sql "SET enable_materialized_view_rewrite=true" @@ -411,10 +423,10 @@ suite("partition_mv_rewrite") { l_suppkey; """ - sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" - sql """DROP TABLE IF EXISTS ${mv_name}""" + sql """DROP MATERIALIZED VIEW IF EXISTS mv_10086""" + sql """DROP TABLE IF EXISTS mv_10086""" sql""" - CREATE MATERIALIZED VIEW ${mv_name} + CREATE MATERIALIZED VIEW mv_10086 BUILD IMMEDIATE REFRESH AUTO ON MANUAL partition by (date_trunc(`col1`, 'month')) DISTRIBUTED BY RANDOM BUCKETS 2 @@ -422,15 +434,22 @@ suite("partition_mv_rewrite") { AS ${roll_up_mv_def_sql} """ - waitingMTMVTaskFinished(getJobName(db, mv_name)) + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) + + + multi_sql """ + analyze table lineitem_static with sync; + analyze table lineitem with sync; + analyze table orders with sync; + """ explain { sql("${roll_up_all_partition_sql}") - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } explain { sql("${roll_up_partition_sql}") - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } // base table add partition sql """ @@ -442,10 +461,16 @@ suite("partition_mv_rewrite") { sql "SET enable_materialized_view_rewrite=false" order_qt_query_17_0_before "${roll_up_all_partition_sql}" sql "SET enable_materialized_view_rewrite=true" + + multi_sql """ + analyze table lineitem_static with sync; + analyze table lineitem with sync; + analyze table orders with sync; + """ explain { sql("${roll_up_all_partition_sql}") // should rewrite successful when union rewrite enalbe if base table add new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_17_0_after "${roll_up_all_partition_sql}" @@ -455,7 +480,7 @@ suite("partition_mv_rewrite") { explain { sql("${roll_up_partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_18_0_after "${roll_up_partition_sql}" @@ -472,8 +497,9 @@ suite("partition_mv_rewrite") { // base table partition add data - sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" - waitingMTMVTaskFinished(getJobName(db, mv_name)) + sql "REFRESH MATERIALIZED VIEW mv_10086 AUTO" + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) + sql """ insert into lineitem values (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-11-21', '2023-11-21', '2023-11-21', 'd', 'd', 'yyyyyyyyy'), @@ -484,10 +510,17 @@ suite("partition_mv_rewrite") { sql "SET enable_materialized_view_rewrite=false" order_qt_query_19_0_before "${roll_up_all_partition_sql}" sql "SET enable_materialized_view_rewrite=true" + + + multi_sql """ + analyze table lineitem_static with sync; + analyze table lineitem with sync; + analyze table orders with sync; + """ explain { sql("${roll_up_all_partition_sql}") // should rewrite successful when union rewrite enalbe if base table add new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_19_0_after "${roll_up_all_partition_sql}" @@ -497,14 +530,14 @@ suite("partition_mv_rewrite") { explain { sql("${roll_up_partition_sql}") // should rewrite successfully when union rewrite enable if doesn't query new partition - contains("${mv_name}(${mv_name})") + contains("mv_10086(mv_10086)") } order_qt_query_20_0_after "${roll_up_partition_sql}" // base table delete partition - sql "REFRESH MATERIALIZED VIEW ${mv_name} AUTO" - waitingMTMVTaskFinished(getJobName(db, mv_name)) + sql "REFRESH MATERIALIZED VIEW mv_10086 AUTO" + waitingMTMVTaskFinished(getJobName(db, "mv_10086")) sql """ ALTER TABLE lineitem DROP PARTITION IF EXISTS p_20231121 FORCE; """ @@ -516,7 +549,7 @@ suite("partition_mv_rewrite") { // explain { // sql("${roll_up_all_partition_sql}") // // should rewrite successful when union rewrite enalbe if base table add new partition -// contains("${mv_name}(${mv_name})") +// contains("mv_10086(mv_10086)") // } // order_qt_query_21_0_after "${roll_up_all_partition_sql}" // @@ -526,7 +559,7 @@ suite("partition_mv_rewrite") { // explain { // sql("${roll_up_partition_sql}") // // should rewrite successfully when union rewrite enable if doesn't query new partition -// contains("${mv_name}(${mv_name})") +// contains("mv_10086(mv_10086)") // } // order_qt_query_22_0_after "${roll_up_partition_sql}" } From 68b54b57c16a2da11cbd5e8afe0b014a5558bc6a Mon Sep 17 00:00:00 2001 From: minghong Date: Mon, 12 Aug 2024 10:34:52 +0800 Subject: [PATCH 49/94] [fix](nereids)update regression case for eager-agg (#38464) ## Proposed changes the new plans are better than original plans. Issue Number: close #xxx --- .../eager_aggregate/push_down_count_through_join.out | 8 ++++---- .../push_down_count_through_join.groovy | 11 ++++++----- .../push_down_count_through_join_one_side.groovy | 4 ++-- .../eager_aggregate/push_down_max_through_join.groovy | 2 ++ .../eager_aggregate/push_down_min_through_join.groovy | 3 ++- .../eager_aggregate/push_down_sum_through_join.groovy | 3 ++- .../push_down_sum_through_join_one_side.groovy | 3 ++- 7 files changed, 20 insertions(+), 14 deletions(-) diff --git a/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_through_join.out b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_through_join.out index 4a12c8d638a4e74..a7a0f422ee8d2ae 100644 --- a/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_through_join.out +++ b/regression-test/data/nereids_rules_p0/eager_aggregate/push_down_count_through_join.out @@ -106,7 +106,7 @@ PhysicalResultSink --------PhysicalOlapScan[count_t] --------PhysicalOlapScan[count_t] --- !groupby_pushdown_multi_table_join -- +-- !groupby_pushdown_multi_table_join_1 -- PhysicalResultSink --hashAgg[GLOBAL] ----hashAgg[LOCAL] @@ -318,7 +318,7 @@ PhysicalResultSink ----------PhysicalOlapScan[count_t] ----------PhysicalOlapScan[count_t] --- !groupby_pushdown_multi_table_join -- +-- !groupby_pushdown_multi_table_join_2 -- PhysicalResultSink --hashAgg[GLOBAL] ----hashAgg[LOCAL] @@ -571,7 +571,7 @@ Used: UnUsed: use_push_down_agg_through_join SyntaxError: --- !with_hint_groupby_pushdown_multi_table_join -- +-- !with_hint_groupby_pushdown_multi_table_join_1 -- PhysicalResultSink --hashAgg[GLOBAL] ----hashAgg[LOCAL] @@ -908,7 +908,7 @@ Used: UnUsed: use_push_down_agg_through_join SyntaxError: --- !with_hint_groupby_pushdown_multi_table_join -- +-- !with_hint_groupby_pushdown_multi_table_join_2 -- PhysicalResultSink --hashAgg[GLOBAL] ----hashAgg[LOCAL] diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join.groovy index 33b2f888f155f74..7a60b34c54f3a18 100644 --- a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join.groovy +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join.groovy @@ -20,6 +20,7 @@ suite("push_down_count_through_join") { sql "set runtime_filter_mode=OFF" sql "SET enable_fallback_to_original_planner=false" sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "set DISABLE_NEREIDS_RULES='ONE_PHASE_AGGREGATE_WITHOUT_DISTINCT, ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI'" sql """ DROP TABLE IF EXISTS count_t; @@ -47,7 +48,7 @@ suite("push_down_count_through_join") { sql "insert into count_t values (8, null, 'c')" sql "insert into count_t values (9, 3, null)" sql "insert into count_t values (10, null, null)" - + sql "analyze table count_t with sync;" qt_groupby_pushdown_basic """ explain shape plan select count(t1.score) from count_t t1, count_t t2 where t1.id = t2.id group by t1.name; """ @@ -100,7 +101,7 @@ suite("push_down_count_through_join") { explain shape plan select count(t1.score), count(*), max(t1.score) from count_t t1 join count_t t2 on t1.id = t2.id group by t1.name; """ - qt_groupby_pushdown_multi_table_join """ + qt_groupby_pushdown_multi_table_join_1 """ explain shape plan select count(t1.score) from count_t t1 join count_t t2 on t1.id = t2.id join count_t t3 on t1.name = t3.name group by t1.name; """ @@ -201,7 +202,7 @@ suite("push_down_count_through_join") { explain shape plan select count(*) from count_t t1, count_t t2 where t1.id = t2.id group by t1.name having count(*) > 100; """ - qt_groupby_pushdown_multi_table_join """ + qt_groupby_pushdown_multi_table_join_2 """ explain shape plan select count(*) from count_t t1 join count_t t2 on t1.id = t2.id join count_t t3 on t1.name = t3.name group by t1.name; """ @@ -289,7 +290,7 @@ suite("push_down_count_through_join") { explain shape plan select /*+ USE_CBO_RULE(push_down_agg_through_join) */ count(t1.score), count(*), max(t1.score) from count_t t1 join count_t t2 on t1.id = t2.id group by t1.name; """ - qt_with_hint_groupby_pushdown_multi_table_join """ + qt_with_hint_groupby_pushdown_multi_table_join_1 """ explain shape plan select /*+ USE_CBO_RULE(push_down_agg_through_join) */ count(t1.score) from count_t t1 join count_t t2 on t1.id = t2.id join count_t t3 on t1.name = t3.name group by t1.name; """ @@ -390,7 +391,7 @@ suite("push_down_count_through_join") { explain shape plan select /*+ USE_CBO_RULE(push_down_agg_through_join) */ count(*) from count_t t1, count_t t2 where t1.id = t2.id group by t1.name having count(*) > 100; """ - qt_with_hint_groupby_pushdown_multi_table_join """ + qt_with_hint_groupby_pushdown_multi_table_join_2 """ explain shape plan select /*+ USE_CBO_RULE(push_down_agg_through_join) */ count(*) from count_t t1 join count_t t2 on t1.id = t2.id join count_t t3 on t1.name = t3.name group by t1.name; """ diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.groovy index 595e5fc37060387..02e067102963335 100644 --- a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.groovy +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_count_through_join_one_side.groovy @@ -21,7 +21,7 @@ suite("push_down_count_through_join_one_side") { sql 'set be_number_for_test=3' sql "SET enable_fallback_to_original_planner=false" sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" - + sql "set DISABLE_NEREIDS_RULES='ONE_PHASE_AGGREGATE_WITHOUT_DISTINCT, ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI'" sql """ DROP TABLE IF EXISTS count_t_one_side; """ @@ -48,7 +48,7 @@ suite("push_down_count_through_join_one_side") { sql "insert into count_t_one_side values (8, null, 'c')" sql "insert into count_t_one_side values (9, 3, null)" sql "insert into count_t_one_side values (10, null, null)" - + sql "analyze table count_t_one_side with sync;" qt_groupby_pushdown_basic """ explain shape plan select count(t1.score) from count_t_one_side t1, count_t_one_side t2 where t1.id = t2.id group by t1.name; """ diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_max_through_join.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_max_through_join.groovy index 1b3d2d44fcd0875..e41f89e116183cb 100644 --- a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_max_through_join.groovy +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_max_through_join.groovy @@ -20,6 +20,7 @@ suite("push_down_max_through_join") { sql "set runtime_filter_mode=OFF" sql "SET enable_fallback_to_original_planner=false" sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "set DISABLE_NEREIDS_RULES='ONE_PHASE_AGGREGATE_WITHOUT_DISTINCT, ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI'" sql """ DROP TABLE IF EXISTS max_t; @@ -47,6 +48,7 @@ suite("push_down_max_through_join") { sql "insert into max_t values (8, null, 'c')" sql "insert into max_t values (9, 3, null)" sql "insert into max_t values (10, null, null)" + sql "analyze table max_t with sync;" qt_groupby_pushdown_basic """ explain shape plan select max(t1.score) from max_t t1, max_t t2 where t1.id = t2.id group by t1.name; diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_min_through_join.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_min_through_join.groovy index c93dfe53d80c1ec..99602a9deae3afa 100644 --- a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_min_through_join.groovy +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_min_through_join.groovy @@ -20,6 +20,7 @@ suite("push_down_min_through_join") { sql "set runtime_filter_mode=OFF" sql "SET enable_fallback_to_original_planner=false" sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "set DISABLE_NEREIDS_RULES='ONE_PHASE_AGGREGATE_WITHOUT_DISTINCT, ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI'" sql """ DROP TABLE IF EXISTS min_t; @@ -47,7 +48,7 @@ suite("push_down_min_through_join") { sql "insert into min_t values (8, null, 'c')" sql "insert into min_t values (9, 3, null)" sql "insert into min_t values (10, null, null)" - + sql "analyze table min_t with sync;" qt_groupby_pushdown_basic """ explain shape plan select min(t1.score) from min_t t1, min_t t2 where t1.id = t2.id group by t1.name; """ diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join.groovy index 4f9470860b25eed..d6248c79fd7ab69 100644 --- a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join.groovy +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join.groovy @@ -20,6 +20,7 @@ suite("push_down_sum_through_join") { sql "set runtime_filter_mode=OFF" sql "SET enable_fallback_to_original_planner=false" sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "set DISABLE_NEREIDS_RULES='ONE_PHASE_AGGREGATE_WITHOUT_DISTINCT, ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI'" sql """ DROP TABLE IF EXISTS sum_t; @@ -47,7 +48,7 @@ suite("push_down_sum_through_join") { sql "insert into sum_t values (8, null, 'c')" sql "insert into sum_t values (9, 3, null)" sql "insert into sum_t values (10, null, null)" - + sql "analyze table sum_t with sync;" qt_groupby_pushdown_basic """ explain shape plan select sum(t1.score) from sum_t t1, sum_t t2 where t1.id = t2.id group by t1.name; """ diff --git a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.groovy b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.groovy index 1910c294de42b9e..ca75cb699a51c76 100644 --- a/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.groovy +++ b/regression-test/suites/nereids_rules_p0/eager_aggregate/push_down_sum_through_join_one_side.groovy @@ -20,6 +20,7 @@ suite("push_down_sum_through_join_one_side") { sql "set runtime_filter_mode=OFF" sql "SET enable_fallback_to_original_planner=false" sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "set DISABLE_NEREIDS_RULES='ONE_PHASE_AGGREGATE_WITHOUT_DISTINCT, ONE_PHASE_AGGREGATE_SINGLE_DISTINCT_TO_MULTI'" sql """ DROP TABLE IF EXISTS sum_t_one_side; @@ -47,7 +48,7 @@ suite("push_down_sum_through_join_one_side") { sql "insert into sum_t_one_side values (8, null, 'c')" sql "insert into sum_t_one_side values (9, 3, null)" sql "insert into sum_t_one_side values (10, null, null)" - + sql "analyze table sum_t_one_side with sync;" qt_groupby_pushdown_basic """ explain shape plan select sum(t1.score) from sum_t_one_side t1, sum_t_one_side t2 where t1.id = t2.id group by t1.name; """ From 404966da10ce89210ca20a177916f51ace12a14f Mon Sep 17 00:00:00 2001 From: Pxl Date: Mon, 12 Aug 2024 10:50:24 +0800 Subject: [PATCH 50/94] [Chore](materialized-view) add key column limit of uniq table's create mv stmt (#39139) ## Proposed changes add key column limit of uniq table's create mv stmt --- .../doris/alter/MaterializedViewHandler.java | 20 ++++++++++++++++--- .../rules/rewrite/mv/SelectMvIndexTest.java | 2 +- .../data/mv_p0/test_mv_mor/test_mv_mor.out | 4 ++-- .../mv_p0/test_mv_mor/test_mv_mor.groovy | 2 +- .../suites/mv_p0/unique/unique.groovy | 8 ++++---- .../test_uniq_mv_schema_change.groovy | 4 ++-- .../suites/statistics/test_analyze_mv.groovy | 11 +++++----- 7 files changed, 32 insertions(+), 19 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java index 33e6aa58de0bf4f..e13f4d59093965e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/MaterializedViewHandler.java @@ -85,6 +85,7 @@ import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; @@ -495,6 +496,7 @@ private List checkAndPrepareMaterializedView(CreateMaterializedViewStmt // b. Unique table: // 1. mv must not contain group expr // 2. all slot's isKey same with mv column + // 3. mv must contain all key column // c. Duplicate table: // 1. Columns resolved by semantics are legal // 2. Key column not allow float/double type. @@ -584,6 +586,21 @@ private List checkAndPrepareMaterializedView(CreateMaterializedViewStmt } } + // check b.3 + if (olapTable.getKeysType() == KeysType.UNIQUE_KEYS && !olapTable.getEnableUniqueKeyMergeOnWrite() + && !addMVClause.isReplay()) { + Set originColumns = new TreeSet(String.CASE_INSENSITIVE_ORDER); + for (Column column : newMVColumns) { + originColumns.add(CreateMaterializedViewStmt.mvColumnBreaker(column.getName())); + } + for (Column column : olapTable.getBaseSchema()) { + if (column.isKey() && !originColumns.contains(column.getName())) { + throw new DdlException("The materialized view of uniq table must contain all key columns. column:" + + column.getName()); + } + } + } + if (newMVColumns.size() == olapTable.getBaseSchema().size() && !addMVClause.isReplay()) { boolean allKeysMatch = true; for (int i = 0; i < newMVColumns.size(); i++) { @@ -641,9 +658,6 @@ private List checkAndPrepareMaterializedView(CreateMaterializedViewStmt public List checkAndPrepareMaterializedView(AddRollupClause addRollupClause, OlapTable olapTable, long baseIndexId, boolean changeStorageFormat) throws DdlException { - if (olapTable.getEnableUniqueKeyMergeOnWrite()) { - throw new DdlException("MergeOnWrite table can't create materialized view."); - } if (olapTable.getRowStoreCol() != null) { throw new DdlException("RowStore table can't create materialized view."); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMvIndexTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMvIndexTest.java index 27b228a1918d274..41dfec8a6dd0626 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMvIndexTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/mv/SelectMvIndexTest.java @@ -781,7 +781,7 @@ void testUniqueTableInQuery() throws Exception { String uniqueTable = "CREATE TABLE " + TEST_TABLE_NAME + " (k1 int, k2 int, v1 int) UNIQUE KEY (k1, k2) " + "DISTRIBUTED BY HASH(k1) BUCKETS 3 PROPERTIES ('replication_num' = '1','enable_unique_key_merge_on_write' = 'false');"; createTable(uniqueTable); - String createK1MV = "create materialized view only_k1 as select k2 from " + TEST_TABLE_NAME; + String createK1MV = "create materialized view only_k1 as select k2,k1 from " + TEST_TABLE_NAME; createMv(createK1MV); String query = "select * from " + TEST_TABLE_NAME + ";"; singleTableTest(query, TEST_TABLE_NAME, false); diff --git a/regression-test/data/mv_p0/test_mv_mor/test_mv_mor.out b/regression-test/data/mv_p0/test_mv_mor/test_mv_mor.out index fbf59000daf0517..43f8446eeaa2336 100644 --- a/regression-test/data/mv_p0/test_mv_mor/test_mv_mor.out +++ b/regression-test/data/mv_p0/test_mv_mor/test_mv_mor.out @@ -4,8 +4,8 @@ 1 3 -- !select_mv -- -1 2 -1 3 +1 1 1 2 +1 2 1 3 -- !select_mv -- 1 diff --git a/regression-test/suites/mv_p0/test_mv_mor/test_mv_mor.groovy b/regression-test/suites/mv_p0/test_mv_mor/test_mv_mor.groovy index 1ca52c75e86ff9a..e88647dbbe71e5e 100644 --- a/regression-test/suites/mv_p0/test_mv_mor/test_mv_mor.groovy +++ b/regression-test/suites/mv_p0/test_mv_mor/test_mv_mor.groovy @@ -34,7 +34,7 @@ suite ("test_mv_mor") { """ sql "insert into u_table select 1,1,1,1;" sql "insert into u_table select 1,2,1,1;" - createMV("create materialized view k123p as select k1,k2+k3 from u_table;") + createMV("create materialized view k123p as select k1,k2,k3,k2+k3 from u_table;") sql "insert into u_table select 1,1,1,2;" sql "insert into u_table select 1,2,1,2;" diff --git a/regression-test/suites/mv_p0/unique/unique.groovy b/regression-test/suites/mv_p0/unique/unique.groovy index 3031a7103144e0d..58aa0529c7b4496 100644 --- a/regression-test/suites/mv_p0/unique/unique.groovy +++ b/regression-test/suites/mv_p0/unique/unique.groovy @@ -44,19 +44,19 @@ suite ("unique") { test { sql """create materialized view kadj as select k4 from u_table""" - exception "The materialized view need key column" + exception "The materialized view of uniq table must contain all key columns. column:k1" } test { - sql """create materialized view kadj as select k4,k1 from u_table""" + sql """create materialized view kadj as select k4,k1,k2,k3 from u_table""" exception "The materialized view not support value column before key column" } createMV("create materialized view kadj as select k3,k2,k1,k4 from u_table;") - createMV("create materialized view kadj2 as select k3,k2,length(k4) from u_table;") + createMV("create materialized view kadj2 as select k1,k3,k2,length(k4) from u_table;") - createMV("create materialized view k31l42 as select k3,length(k1),k2 from u_table;") + createMV("create materialized view k31l42 as select k1,k3,length(k1),k2 from u_table;") sql "insert into u_table select 300,-3,null,'c';" sql """analyze table u_table with sync;""" diff --git a/regression-test/suites/schema_change_p0/test_uniq_mv_schema_change.groovy b/regression-test/suites/schema_change_p0/test_uniq_mv_schema_change.groovy index a364b7f9ccbf8ee..2523072104783cf 100644 --- a/regression-test/suites/schema_change_p0/test_uniq_mv_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_uniq_mv_schema_change.groovy @@ -80,7 +80,7 @@ suite ("test_uniq_mv_schema_change") { //add materialized view def mvName = "mv1" - sql "create materialized view ${mvName} as select user_id, date, city, age from ${tableName};" + sql "create materialized view ${mvName} as select user_id, date, city, age, sex from ${tableName};" waitForJob(tableName, 3000) // alter and test light schema change @@ -90,7 +90,7 @@ suite ("test_uniq_mv_schema_change") { //add materialized view def mvName2 = "mv2" - sql "create materialized view ${mvName2} as select user_id, date, city, age, cost from ${tableName};" + sql "create materialized view ${mvName2} as select user_id, date, city, age, sex, cost from ${tableName};" waitForJob(tableName, 3000) sql """ INSERT INTO ${tableName} VALUES diff --git a/regression-test/suites/statistics/test_analyze_mv.groovy b/regression-test/suites/statistics/test_analyze_mv.groovy index 862949b8c9367f4..444aa1eb724122a 100644 --- a/regression-test/suites/statistics/test_analyze_mv.groovy +++ b/regression-test/suites/statistics/test_analyze_mv.groovy @@ -395,13 +395,13 @@ suite("test_analyze_mv") { ); """ - createMV("create materialized view mv1 as select key1 from mvTestUni;") - createMV("create materialized view mv6 as select key2, value2, value3 from mvTestUni;") + createMV("create materialized view mv1 as select key1, key2 from mvTestUni;") + createMV("create materialized view mv6 as select key1, key2, value2, value3 from mvTestUni;") sql """insert into mvTestUni values (1, 2, 3, 4, 5), (1, 2, 3, 7, 8), (1, 11, 22, 33, 44), (10, 20, 30, 40, 50), (10, 20, 30, 40, 50), (100, 200, 300, 400, 500), (1001, 2001, 3001, 4001, 5001);""" sql """analyze table mvTestUni with sync;""" result_sample = sql """show column stats mvTestUni""" - assertEquals(9, result_sample.size()) + assertEquals(11, result_sample.size()) result_sample = sql """show column stats mvTestUni(key1)""" assertEquals(1, result_sample.size()) @@ -414,10 +414,9 @@ suite("test_analyze_mv") { assertEquals("FULL", result_sample[0][9]) result_sample = sql """show column stats mvTestUni(mv_key1)""" - assertEquals(1, result_sample.size()) + assertEquals(2, result_sample.size()) assertEquals("mv_key1", result_sample[0][0]) - assertEquals("mv1", result_sample[0][1]) - assertEquals("4.0", result_sample[0][2]) + assertEquals("5.0", result_sample[0][2]) assertEquals("4.0", result_sample[0][3]) assertEquals("1", result_sample[0][7]) assertEquals("1001", result_sample[0][8]) From 81438d561bee8d07f7a35fbcab5e727adda39458 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Mon, 12 Aug 2024 10:53:26 +0800 Subject: [PATCH 51/94] [fix](jdbc scan) Remove the `conjuncts.remove` call in JdbcScan (#39180) In #37565, due to the change in the calling order of finalize, the final generated Plan will be missing the PREDICATES that have been pushed down in Jdbc. Although this behavior is correct, before perfectly handling the push down of various PREDICATES, we need to keep all conjuncts to ensure that we can still filter data normally when the data returned by Jdbc is a superset. --- .../mysql/init/03-create-table.sql | 11 +- .../docker-compose/mysql/init/04-insert.sql | 4 + .../datasource/jdbc/source/JdbcScanNode.java | 5 +- .../datasource/odbc/source/OdbcScanNode.java | 5 +- .../jdbc/test_mysql_jdbc_catalog.out | 28 ++- .../jdbc/test_mysql_jdbc_catalog.groovy | 237 +++++++++--------- 6 files changed, 153 insertions(+), 137 deletions(-) diff --git a/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql b/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql index cb8bb5d9cb40b9c..312a0a25fac45ba 100644 --- a/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql +++ b/docker/thirdparties/docker-compose/mysql/init/03-create-table.sql @@ -342,4 +342,13 @@ CREATE TABLE Doris.Doris ( CREATE TABLE Doris.doris ( id varchar(128) -); \ No newline at end of file +); + +create table doris_test.compoundpredicate_test ( +pk int, +col_int_undef_signed int, +col_int_undef_signed2 int +); + +create table doris_test.text_push (pk varchar(10)); + diff --git a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql index d71986b135492cd..4580440e9bd4f4a 100644 --- a/docker/thirdparties/docker-compose/mysql/init/04-insert.sql +++ b/docker/thirdparties/docker-compose/mysql/init/04-insert.sql @@ -1164,3 +1164,7 @@ insert into doris_test.test_zd (id,d_z) VALUES (1,'0000-00-00'),(2,'2022-01-01') insert into Doris.DORIS values ('DORIS'); insert into Doris.Doris values ('Doris'); insert into Doris.doris values ('doris'); + +insert into doris_test.compoundpredicate_test(pk,col_int_undef_signed,col_int_undef_signed2) values (0,null,23868),(1,68,-18),(2,19030,-125),(3,16539,null),(4,null,null),(5,null,-127),(6,14680,-26424),(7,-22270,12722),(8,null,null),(9,null,null),(10,null,7744),(11,null,-94),(12,16970,95),(13,null,7023),(14,null,1),(15,3679,-11),(16,null,-1079),(17,-22,null),(18,30995,null),(19,null,-79); + +insert into doris_test.text_push values('a'),('aa'),('aaa'); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java index 70cbb4058330214..ab3f9f809fb6900 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java @@ -64,6 +64,7 @@ public class JdbcScanNode extends ExternalScanNode { private final List columns = new ArrayList(); private final List filters = new ArrayList(); + private final List pushedDownConjuncts = new ArrayList<>(); private String tableName; private TOdbcTableType jdbcType; private String graphQueryString = ""; @@ -131,7 +132,7 @@ private void createJdbcFilters() { for (Expr individualConjunct : pushDownConjuncts) { String filter = conjunctExprToString(jdbcType, individualConjunct, tbl); filters.add(filter); - conjuncts.remove(individualConjunct); + pushedDownConjuncts.add(individualConjunct); } } @@ -168,7 +169,7 @@ private void createJdbcColumns() { } private boolean shouldPushDownLimit() { - return limit != -1 && conjuncts.isEmpty(); + return limit != -1 && conjuncts.size() == pushedDownConjuncts.size(); } private String getJdbcQueryStr() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/odbc/source/OdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/odbc/source/OdbcScanNode.java index 368a15bde48e3f4..bb9dfddc921030c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/odbc/source/OdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/odbc/source/OdbcScanNode.java @@ -61,6 +61,7 @@ public class OdbcScanNode extends ExternalScanNode { private final List columns = new ArrayList(); private final List filters = new ArrayList(); + private final List pushedDownConjuncts = new ArrayList<>(); private String tblName; private String connectString; private TOdbcTableType odbcType; @@ -145,7 +146,7 @@ public String getNodeExplainString(String prefix, TExplainLevel detailLevel) { // only all conjuncts be pushed down as filter, we can // push down limit operation to ODBC table private boolean shouldPushDownLimit() { - return limit != -1 && conjuncts.isEmpty(); + return limit != -1 && conjuncts.size() == pushedDownConjuncts.size(); } private String getOdbcQueryStr() { @@ -215,7 +216,7 @@ private void createOdbcFilters() { if (shouldPushDownConjunct(odbcType, p)) { String filter = JdbcScanNode.conjunctExprToString(odbcType, p, tbl); filters.add(filter); - conjuncts.remove(p); + pushedDownConjuncts.add(p); } } } diff --git a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out index 625eda53c5c78e4..95b964578a980c6 100644 --- a/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_mysql_jdbc_catalog.out @@ -250,15 +250,6 @@ workload_policy 张三6 11 124314567 123 321312 1999-02-13T00:00 中国 男 0 张三7 11 123445167 123 321312 1998-02-13T00:00 中国 男 0 --- !test_filter_not_old_plan -- -张三1 11 12345678 123 321312 1999-02-13T00:00 中国 男 0 -张三2 11 12345671 123 321312 1999-02-13T00:00 中国 男 0 -张三3 11 12345673 123 321312 1999-02-13T00:00 中国 男 0 -张三4 11 123456711 123 321312 1999-02-13T00:00 中国 男 0 -张三5 11 1232134567 123 321312 1999-02-13T00:00 中国 男 0 -张三6 11 124314567 123 321312 1999-02-13T00:00 中国 男 0 -张三7 11 123445167 123 321312 1998-02-13T00:00 中国 男 0 - -- !test_insert1 -- doris1 18 @@ -457,3 +448,22 @@ doris -- !sql -- 1 +-- !sql -- +10 \N 7744 +11 \N -94 +12 16970 95 +13 \N 7023 +14 \N 1 +15 3679 -11 +16 \N -1079 +17 -22 \N +18 30995 \N +19 \N -79 +5 \N -127 +6 14680 -26424 +7 -22270 12722 +8 \N \N +9 \N \N + +-- !sql -- + diff --git a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy index b6f744e8d3e52f4..888edcc0fcd322a 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_mysql_jdbc_catalog.groovy @@ -133,102 +133,89 @@ suite("test_mysql_jdbc_catalog", "p0,external,mysql,external_docker,external_doc qt_sql """select current_catalog()""" sql """switch ${catalog_name}""" qt_sql """select current_catalog()""" - def res_dbs_log = sql "show databases;" - for(int i = 0;i < res_dbs_log.size();i++) { - def tbs = sql "show tables from `${res_dbs_log[i][0]}`" - log.info( "database = ${res_dbs_log[i][0]} => tables = "+tbs.toString()) - } - try { - - sql """ use ${ex_db_name}""" - - order_qt_ex_tb0 """ select id, name from ${ex_tb0} order by id; """ - sql """ insert into internal.${internal_db_name}.${inDorisTable} select id, name from ${ex_tb0}; """ - order_qt_in_tb """ select id, name from internal.${internal_db_name}.${inDorisTable} order by id; """ - - order_qt_ex_tb1 """ select * from ${ex_tb1} order by id; """ - order_qt_ex_tb2 """ select * from ${ex_tb2} order by id; """ - order_qt_ex_tb3 """ select * from ${ex_tb3} order by game_code; """ - order_qt_ex_tb4 """ select * from ${ex_tb4} order by products_id; """ - order_qt_ex_tb5 """ select * from ${ex_tb5} order by id; """ - order_qt_ex_tb6 """ select * from ${ex_tb6} order by id; """ - order_qt_ex_tb7 """ select * from ${ex_tb7} order by id; """ - order_qt_ex_tb8 """ select * from ${ex_tb8} order by uid; """ - order_qt_ex_tb9 """ select * from ${ex_tb9} order by c_date; """ - order_qt_ex_tb10 """ select * from ${ex_tb10} order by aa; """ - order_qt_ex_tb11 """ select * from ${ex_tb11} order by aa; """ - order_qt_ex_tb12 """ select * from ${ex_tb12} order by cc; """ - order_qt_ex_tb13 """ select * from ${ex_tb13} order by name; """ - order_qt_ex_tb14 """ select * from ${ex_tb14} order by tid; """ - order_qt_ex_tb15 """ select * from ${ex_tb15} order by col1; """ - order_qt_ex_tb16 """ select * from ${ex_tb16} order by id; """ - order_qt_ex_tb17 """ select * from ${ex_tb17} order by id; """ - order_qt_ex_tb18 """ select * from ${ex_tb18} order by num_tinyint; """ - order_qt_ex_tb19 """ select * from ${ex_tb19} order by date_value; """ - order_qt_ex_tb20 """ select * from ${ex_tb20} order by decimal_normal; """ - order_qt_ex_tb21_1 """ select `key`, `id` from ${ex_tb21} where `key` = 2 order by id;""" - order_qt_ex_tb21_2 """ select `key`, `id` from ${ex_tb21} where `key` like 2 order by id;""" - order_qt_ex_tb21_3 """ select `key`, `id` from ${ex_tb21} where `key` in (1,2) order by id;""" - order_qt_ex_tb21_4 """ select `key`, `id` from ${ex_tb21} where abs(`key`) = 2 order by id;""" - order_qt_ex_tb21_5 """ select `key`, `id` from ${ex_tb21} where `key` between 1 and 2 order by id;""" - order_qt_ex_tb21_6 """ select `key`, `id` from ${ex_tb21} where `key` = case when id = 1 then 1 else 0 end order by id;""" - order_qt_ex_tb21_7 """ select (`key` +1) as k, `id` from ${ex_tb21} having abs(k) = 2 order by id;""" - order_qt_ex_tb21_8 """ select `key` as k, `id` from ${ex_tb21} having abs(k) = 2 order by id;""" - order_qt_information_schema """ show tables from information_schema; """ - order_qt_dt """select * from ${dt}; """ - order_qt_dt_null """select * from ${dt_null} order by 1; """ - order_qt_test_dz """select * from ${test_zd} order by 1; """ - order_qt_test_filter_not """select * from ${ex_tb13} where name not like '%张三0%' order by 1; """ - explain { - sql("select `datetime` from all_types where to_date(`datetime`) = '2012-10-25';") - contains """ SELECT `datetime` FROM `doris_test`.`all_types` WHERE (date(`datetime`) = '2012-10-25')""" - } + sql """ use ${ex_db_name}""" - explain { - sql("select /*+ SET_VAR(enable_ext_func_pred_pushdown = false) */ `datetime` from all_types where to_date(`datetime`) = '2012-10-25';") - contains """SELECT `datetime` FROM `doris_test`.`all_types`""" - } + order_qt_ex_tb0 """ select id, name from ${ex_tb0} order by id; """ + sql """ insert into internal.${internal_db_name}.${inDorisTable} select id, name from ${ex_tb0}; """ + order_qt_in_tb """ select id, name from internal.${internal_db_name}.${inDorisTable} order by id; """ + + order_qt_ex_tb1 """ select * from ${ex_tb1} order by id; """ + order_qt_ex_tb2 """ select * from ${ex_tb2} order by id; """ + order_qt_ex_tb3 """ select * from ${ex_tb3} order by game_code; """ + order_qt_ex_tb4 """ select * from ${ex_tb4} order by products_id; """ + order_qt_ex_tb5 """ select * from ${ex_tb5} order by id; """ + order_qt_ex_tb6 """ select * from ${ex_tb6} order by id; """ + order_qt_ex_tb7 """ select * from ${ex_tb7} order by id; """ + order_qt_ex_tb8 """ select * from ${ex_tb8} order by uid; """ + order_qt_ex_tb9 """ select * from ${ex_tb9} order by c_date; """ + order_qt_ex_tb10 """ select * from ${ex_tb10} order by aa; """ + order_qt_ex_tb11 """ select * from ${ex_tb11} order by aa; """ + order_qt_ex_tb12 """ select * from ${ex_tb12} order by cc; """ + order_qt_ex_tb13 """ select * from ${ex_tb13} order by name; """ + order_qt_ex_tb14 """ select * from ${ex_tb14} order by tid; """ + order_qt_ex_tb15 """ select * from ${ex_tb15} order by col1; """ + order_qt_ex_tb16 """ select * from ${ex_tb16} order by id; """ + order_qt_ex_tb17 """ select * from ${ex_tb17} order by id; """ + order_qt_ex_tb18 """ select * from ${ex_tb18} order by num_tinyint; """ + order_qt_ex_tb19 """ select * from ${ex_tb19} order by date_value; """ + order_qt_ex_tb20 """ select * from ${ex_tb20} order by decimal_normal; """ + order_qt_ex_tb21_1 """ select `key`, `id` from ${ex_tb21} where `key` = 2 order by id;""" + order_qt_ex_tb21_2 """ select `key`, `id` from ${ex_tb21} where `key` like 2 order by id;""" + order_qt_ex_tb21_3 """ select `key`, `id` from ${ex_tb21} where `key` in (1,2) order by id;""" + order_qt_ex_tb21_4 """ select `key`, `id` from ${ex_tb21} where abs(`key`) = 2 order by id;""" + order_qt_ex_tb21_5 """ select `key`, `id` from ${ex_tb21} where `key` between 1 and 2 order by id;""" + order_qt_ex_tb21_6 """ select `key`, `id` from ${ex_tb21} where `key` = case when id = 1 then 1 else 0 end order by id;""" + order_qt_ex_tb21_7 """ select (`key` +1) as k, `id` from ${ex_tb21} having abs(k) = 2 order by id;""" + order_qt_ex_tb21_8 """ select `key` as k, `id` from ${ex_tb21} having abs(k) = 2 order by id;""" + order_qt_information_schema """ show tables from information_schema; """ + order_qt_dt """select * from ${dt}; """ + order_qt_dt_null """select * from ${dt_null} order by 1; """ + order_qt_test_dz """select * from ${test_zd} order by 1; """ + order_qt_test_filter_not """select * from ${ex_tb13} where name not like '%张三0%' order by 1; """ + explain { + sql("select `datetime` from all_types where to_date(`datetime`) = '2012-10-25';") + contains """ SELECT `datetime` FROM `doris_test`.`all_types` WHERE (date(`datetime`) = '2012-10-25')""" + } + + explain { + sql("select /*+ SET_VAR(enable_ext_func_pred_pushdown = false) */ `datetime` from all_types where to_date(`datetime`) = '2012-10-25';") + contains """SELECT `datetime` FROM `doris_test`.`all_types`""" + } - // test insert - String uuid1 = UUID.randomUUID().toString(); - connect(user=user, password="${pwd}", url=url) { - try { - sql """ insert into ${catalog_name}.${ex_db_name}.${test_insert} values ('${uuid1}', 'doris1', 18) """ - fail() - } catch (Exception e) { - log.info(e.getMessage()) - } + // test insert + String uuid1 = UUID.randomUUID().toString(); + connect(user=user, password="${pwd}", url=url) { + try { + sql """ insert into ${catalog_name}.${ex_db_name}.${test_insert} values ('${uuid1}', 'doris1', 18) """ + fail() + } catch (Exception e) { + log.info(e.getMessage()) } + } - sql """GRANT LOAD_PRIV ON ${catalog_name}.${ex_db_name}.${test_insert} TO ${user}""" + sql """GRANT LOAD_PRIV ON ${catalog_name}.${ex_db_name}.${test_insert} TO ${user}""" - connect(user=user, password="${pwd}", url=url) { - try { - sql """ insert into ${catalog_name}.${ex_db_name}.${test_insert} values ('${uuid1}', 'doris1', 18) """ - } catch (Exception e) { - fail(); - } + connect(user=user, password="${pwd}", url=url) { + try { + sql """ insert into ${catalog_name}.${ex_db_name}.${test_insert} values ('${uuid1}', 'doris1', 18) """ + } catch (Exception e) { + fail(); } - order_qt_test_insert1 """ select name, age from ${test_insert} where id = '${uuid1}' order by age """ - - String uuid2 = UUID.randomUUID().toString(); - sql """ insert into ${test_insert} values ('${uuid2}', 'doris2', 19), ('${uuid2}', 'doris3', 20) """ - order_qt_test_insert2 """ select name, age from ${test_insert} where id = '${uuid2}' order by age """ - - sql """ insert into ${test_insert} select * from ${test_insert} where id = '${uuid2}' """ - order_qt_test_insert3 """ select name, age from ${test_insert} where id = '${uuid2}' order by age """ - - String uuid3 = UUID.randomUUID().toString(); - sql """ INSERT INTO ${test_insert2} VALUES - ('${uuid3}', true, 'abcHa1.12345', '1.123450xkalowadawd', '2022-10-01', 3.14159, 1, 2, 0, 100000, 1.2345678, 24.000, '07:09:51', '2022', '2022-11-27 07:09:51', '2022-11-27 07:09:51'); """ - order_qt_test_insert4 """ select k1,k2,k3,k4,k5,k6,k7,k8,k9,k10,k11,k12,k13,k14,k15 from ${test_insert2} where id = '${uuid3}' """ - } finally { - res_dbs_log = sql "show databases;" - for(int i = 0;i < res_dbs_log.size();i++) { - def tbs = sql "show tables from `${res_dbs_log[i][0]}`" - log.info( "database = ${res_dbs_log[i][0]} => tables = "+tbs.toString()) - } - } + } + order_qt_test_insert1 """ select name, age from ${test_insert} where id = '${uuid1}' order by age """ + + String uuid2 = UUID.randomUUID().toString(); + sql """ insert into ${test_insert} values ('${uuid2}', 'doris2', 19), ('${uuid2}', 'doris3', 20) """ + order_qt_test_insert2 """ select name, age from ${test_insert} where id = '${uuid2}' order by age """ + + sql """ insert into ${test_insert} select * from ${test_insert} where id = '${uuid2}' """ + order_qt_test_insert3 """ select name, age from ${test_insert} where id = '${uuid2}' order by age """ + + String uuid3 = UUID.randomUUID().toString(); + sql """ INSERT INTO ${test_insert2} VALUES + ('${uuid3}', true, 'abcHa1.12345', '1.123450xkalowadawd', '2022-10-01', 3.14159, 1, 2, 0, 100000, 1.2345678, 24.000, '07:09:51', '2022', '2022-11-27 07:09:51', '2022-11-27 07:09:51'); """ + order_qt_test_insert4 """ select k1,k2,k3,k4,k5,k6,k7,k8,k9,k10,k11,k12,k13,k14,k15 from ${test_insert2} where id = '${uuid3}' """ + sql """ drop catalog if exists ${catalog_name} """ // test only_specified_database argument @@ -313,41 +300,29 @@ suite("test_mysql_jdbc_catalog", "p0,external,mysql,external_docker,external_doc "jdbc.driver_class" = "com.mysql.cj.jdbc.Driver"); """ sql """ switch ${catalog_name} """ - - res_dbs_log = sql "show databases;" - for(int i = 0;i < res_dbs_log.size();i++) { - def tbs = sql "show tables from `${res_dbs_log[i][0]}`" - log.info( "database = ${res_dbs_log[i][0]} => tables = "+tbs.toString()) - } - try { - sql """ use ${ex_db_name} """ - order_qt_ex_tb1 """ select * from ${ex_tb1} order by id; """ - - // test all types supported by MySQL - sql """use doris_test;""" - qt_mysql_all_types """select * from all_types order by tinyint_u;""" - - // test insert into internal.db.table select * from all_types - sql """ insert into internal.${internal_db_name}.${test_insert_all_types} select * from all_types; """ - order_qt_select_insert_all_types """ select * from internal.${internal_db_name}.${test_insert_all_types} order by tinyint_u; """ - - // test CTAS - sql """ drop table if exists internal.${internal_db_name}.${test_ctas} """ - sql """ create table internal.${internal_db_name}.${test_ctas} - PROPERTIES("replication_num" = "1") - AS select * from all_types; - """ - order_qt_ctas """select * from internal.${internal_db_name}.${test_ctas} order by tinyint_u;""" + sql """ use ${ex_db_name} """ + order_qt_ex_tb1 """ select * from ${ex_tb1} order by id; """ + + // test all types supported by MySQL + sql """use doris_test;""" + qt_mysql_all_types """select * from all_types order by tinyint_u;""" + + // test insert into internal.db.table select * from all_types + sql """ insert into internal.${internal_db_name}.${test_insert_all_types} select * from all_types; """ + order_qt_select_insert_all_types """ select * from internal.${internal_db_name}.${test_insert_all_types} order by tinyint_u; """ + + // test CTAS + sql """ drop table if exists internal.${internal_db_name}.${test_ctas} """ + sql """ create table internal.${internal_db_name}.${test_ctas} + PROPERTIES("replication_num" = "1") + AS select * from all_types; + """ + + order_qt_ctas """select * from internal.${internal_db_name}.${test_ctas} order by tinyint_u;""" + + order_qt_ctas_desc """desc internal.${internal_db_name}.${test_ctas};""" - order_qt_ctas_desc """desc internal.${internal_db_name}.${test_ctas};""" - } finally { - res_dbs_log = sql "show databases;" - for(int i = 0;i < res_dbs_log.size();i++) { - def tbs = sql "show tables from `${res_dbs_log[i][0]}`" - log.info( "database = ${res_dbs_log[i][0]} => tables = "+tbs.toString()) - } - } sql """ drop catalog if exists ${catalog_name} """ // test mysql view @@ -622,6 +597,22 @@ suite("test_mysql_jdbc_catalog", "p0,external,mysql,external_docker,external_doc sql """drop catalog if exists mysql_rename2;""" + sql """drop catalog if exists mysql_conjuncts;""" + + sql """create catalog if not exists mysql_conjuncts properties( + "type"="jdbc", + "user"="root", + "password"="123456", + "jdbc_url" = "jdbc:mysql://${externalEnvIp}:${mysql_port}/doris_test?useSSL=false&zeroDateTimeBehavior=convertToNull", + "driver_url" = "${driver_url}", + "driver_class" = "com.mysql.cj.jdbc.Driver" + );""" + + order_qt_sql """SELECT * FROM mysql_conjuncts.doris_test.compoundpredicate_test WHERE (pk > 4) OR ((pk < 6 OR pk > 7) AND col_int_undef_signed < 1);""" + + order_qt_sql """select * from mysql_conjuncts.doris_test.text_push where pk <=7;""" + + sql """drop catalog if exists mysql_conjuncts;""" } } From dee2b70d3e433b2c046198aefeb723440e9befcb Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Mon, 12 Aug 2024 11:05:00 +0800 Subject: [PATCH 52/94] [enhance](mtmv)hive cache add partitionId to partitionName Map (#38525) MTMV need get partitionName by partitionId but now only have partitionName=>partitionId Map,when there are many partitions, obtaining the name from this map based on the ID can be slow, --- .../org/apache/doris/catalog/OlapTable.java | 10 ---------- .../datasource/hive/HMSExternalTable.java | 20 +++---------------- .../datasource/hive/HiveMetaStoreCache.java | 10 ++++++---- .../apache/doris/mtmv/MTMVPartitionUtil.java | 9 --------- .../apache/doris/mtmv/MTMVRelatedTableIf.java | 9 --------- .../doris/datasource/CatalogMgrTest.java | 5 ++++- .../doris/mtmv/MTMVPartitionUtilTest.java | 8 -------- 7 files changed, 13 insertions(+), 58 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index a1fbbcc1fb14b55..884cd4f4054e692 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -2921,16 +2921,6 @@ public MTMVSnapshotIf getTableSnapshot() { return new MTMVVersionSnapshot(visibleVersion); } - @Override - public String getPartitionName(long partitionId) throws AnalysisException { - readLock(); - try { - return getPartitionOrAnalysisException(partitionId).getName(); - } finally { - readUnlock(); - } - } - private static Cloud.GetVersionResponse getVersionFromMeta(Cloud.GetVersionRequest req) throws RpcException { long startAt = System.nanoTime(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 6752ae5dcbe6069..e1afbf3dc0bdd92 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -48,6 +48,7 @@ import org.apache.doris.thrift.TTableDescriptor; import org.apache.doris.thrift.TTableType; +import com.google.common.collect.BiMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -732,28 +733,13 @@ public Map getAndCopyPartitionItems() { getDbName(), getName(), getPartitionColumnTypes()); Map res = Maps.newHashMap(); Map idToPartitionItem = hivePartitionValues.getIdToPartitionItem(); + BiMap idToName = hivePartitionValues.getPartitionNameToIdMap().inverse(); for (Entry entry : idToPartitionItem.entrySet()) { - try { - res.put(getPartitionName(entry.getKey()), entry.getValue()); - } catch (AnalysisException e) { - LOG.info("can not get partitionName by: " + entry.getKey()); - } - + res.put(idToName.get(entry.getKey()), entry.getValue()); } return res; } - @Override - public String getPartitionName(long partitionId) throws AnalysisException { - Map partitionNameToIdMap = getHivePartitionValues().getPartitionNameToIdMap(); - for (Entry entry : partitionNameToIdMap.entrySet()) { - if (entry.getValue().equals(partitionId)) { - return entry.getKey(); - } - } - throw new AnalysisException("can not find partition, partitionId: " + partitionId); - } - private HiveMetaStoreCache.HivePartitionValues getHivePartitionValues() { HiveMetaStoreCache cache = Env.getCurrentEnv().getExtMetaCacheMgr() .getMetaStoreCache((HMSExternalCatalog) getCatalog()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 631362a5b417019..312f2382b0d0f56 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -54,6 +54,8 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Preconditions; import com.google.common.base.Strings; +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -246,7 +248,7 @@ private HivePartitionValues loadPartitionValues(PartitionValueCacheKey key) { LOG.debug("load #{} partitions for {} in catalog {}", partitionNames.size(), key, catalog.getName()); } Map idToPartitionItem = Maps.newHashMapWithExpectedSize(partitionNames.size()); - Map partitionNameToIdMap = Maps.newHashMapWithExpectedSize(partitionNames.size()); + BiMap partitionNameToIdMap = HashBiMap.create(partitionNames.size()); Map> idToUniqueIdsMap = Maps.newHashMapWithExpectedSize(partitionNames.size()); long idx = 0; for (String partitionName : partitionNames) { @@ -1074,7 +1076,7 @@ public static class HiveFileStatus { @Data public static class HivePartitionValues { private long nextPartitionId; - private Map partitionNameToIdMap; + private BiMap partitionNameToIdMap; private Map> idToUniqueIdsMap; private Map idToPartitionItem; private Map> partitionValuesMap; @@ -1093,7 +1095,7 @@ public HivePartitionValues(Map idToPartitionItem, Map, UniqueId> rangeToId, RangeMap singleColumnRangeMap, long nextPartitionId, - Map partitionNameToIdMap, + BiMap partitionNameToIdMap, Map> idToUniqueIdsMap, Map> singleUidToColumnRangeMap, Map> partitionValuesMap) { @@ -1111,7 +1113,7 @@ public HivePartitionValues(Map idToPartitionItem, public HivePartitionValues copy() { HivePartitionValues copy = new HivePartitionValues(); copy.setNextPartitionId(nextPartitionId); - copy.setPartitionNameToIdMap(partitionNameToIdMap == null ? null : Maps.newHashMap(partitionNameToIdMap)); + copy.setPartitionNameToIdMap(partitionNameToIdMap == null ? null : HashBiMap.create(partitionNameToIdMap)); copy.setIdToUniqueIdsMap(idToUniqueIdsMap == null ? null : Maps.newHashMap(idToUniqueIdsMap)); copy.setIdToPartitionItem(idToPartitionItem == null ? null : Maps.newHashMap(idToPartitionItem)); copy.setPartitionValuesMap(partitionValuesMap == null ? null : Maps.newHashMap(partitionValuesMap)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java index 61fc3e91651efa5..b07ca6ad1d10bf7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java @@ -41,7 +41,6 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -166,14 +165,6 @@ public static Map> generateRelatedPartitionDescs(M return result.getDescs(); } - public static List getPartitionNamesByIds(MTMV mtmv, Collection ids) throws AnalysisException { - List res = Lists.newArrayList(); - for (Long partitionId : ids) { - res.add(mtmv.getPartitionName(partitionId)); - } - return res; - } - public static List getPartitionsIdsByNames(MTMV mtmv, List partitions) throws AnalysisException { mtmv.readLock(); try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelatedTableIf.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelatedTableIf.java index 8aee7741cee0a08..c34df750de5bdc2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelatedTableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelatedTableIf.java @@ -79,15 +79,6 @@ public interface MTMVRelatedTableIf extends TableIf { */ MTMVSnapshotIf getTableSnapshot() throws AnalysisException; - /** - * getPartitionName - * - * @param partitionId - * @return partitionName - * @throws AnalysisException - */ - String getPartitionName(long partitionId) throws AnalysisException; - /** * Does the current type of table allow timed triggering * diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java index 364e1f23b16de14..5f1e19f32846f2a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/CatalogMgrTest.java @@ -63,6 +63,8 @@ import com.github.benmanes.caffeine.cache.LoadingCache; import com.google.common.base.Preconditions; +import com.google.common.collect.BiMap; +import com.google.common.collect.HashBiMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Range; @@ -477,6 +479,7 @@ public void testAddSingleColumnPartitionsCache() { partitionValueCacheKey.getTypes()); HivePartitionValues partitionValues = metaStoreCache.getPartitionValues(partitionValueCacheKey); Assert.assertEquals(partitionValues.getPartitionNameToIdMap().size(), 4); + Assert.assertEquals(partitionValues.getPartitionNameToIdMap().inverse().size(), 4); } @Test @@ -520,7 +523,7 @@ private HivePartitionValues loadPartitionValues(PartitionValueCacheKey key, List HiveMetaStoreCache metaStoreCache) { // partition name format: nation=cn/city=beijing Map idToPartitionItem = Maps.newHashMapWithExpectedSize(partitionNames.size()); - Map partitionNameToIdMap = Maps.newHashMapWithExpectedSize(partitionNames.size()); + BiMap partitionNameToIdMap = HashBiMap.create(partitionNames.size()); Map> idToUniqueIdsMap = Maps.newHashMapWithExpectedSize(partitionNames.size()); long idx = 0; for (String partitionName : partitionNames) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionUtilTest.java index d40e6db5d447577..d6c4a87f224ca40 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionUtilTest.java @@ -99,10 +99,6 @@ public void setUp() throws NoSuchMethodException, SecurityException, AnalysisExc minTimes = 0; result = baseSnapshotIf; - mtmv.getPartitionName(anyLong); - minTimes = 0; - result = "name1"; - mtmv.getRefreshSnapshot(); minTimes = 0; result = refreshSnapshot; @@ -123,10 +119,6 @@ public void setUp() throws NoSuchMethodException, SecurityException, AnalysisExc minTimes = 0; result = baseSnapshotIf; - baseOlapTable.getPartitionName(anyLong); - minTimes = 0; - result = "name1"; - refreshSnapshot.equalsWithRelatedPartition(anyString, anyString, (MTMVSnapshotIf) any); minTimes = 0; result = true; From 1f6e5cba61b8c80c29e7782cdd6ceeaae2a8ef34 Mon Sep 17 00:00:00 2001 From: kkop <2449402815@qq.com> Date: Mon, 12 Aug 2024 11:12:54 +0800 Subject: [PATCH 53/94] [enhancement](regression-test) agg schema value add case (#38968) --- .../test_agg_schema_value_add.out | 11 + .../test_agg_schema_value_add.groovy | 655 ++++++++++++++++++ 2 files changed, 666 insertions(+) create mode 100644 regression-test/data/schema_change_p0/test_agg_schema_value_add.out create mode 100644 regression-test/suites/schema_change_p0/test_agg_schema_value_add.groovy diff --git a/regression-test/data/schema_change_p0/test_agg_schema_value_add.out b/regression-test/data/schema_change_p0/test_agg_schema_value_add.out new file mode 100644 index 000000000000000..fac20be2ecff476 --- /dev/null +++ b/regression-test/data/schema_change_p0/test_agg_schema_value_add.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- ! -- +123456789 Alice \N Beijing 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 +234567890 Bob \N Shanghai 30 1 13998765432 No. 456 Street, Shanghai 2022-02-02T12:00 +345678901 Carol \N Guangzhou 28 0 13724681357 No. 789 Street, Guangzhou 2022-03-03T14:00 +456789012 Dave \N Shenzhen 35 1 13680864279 No. 987 Street, Shenzhen 2022-04-04T16:00 +567890123 Eve \N Chengdu 27 0 13572468091 No. 654 Street, Chengdu 2022-05-05T18:00 +678901234 Frank \N Hangzhou 32 1 13467985213 No. 321 Street, Hangzhou 2022-06-06T20:00 +789012345 Grace \N Xian 29 0 13333333333 No. 222 Street, Xian 2022-07-07T22:00 +923456689 Alice \N Yaan 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 + diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_value_add.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_value_add.groovy new file mode 100644 index 000000000000000..fc2864114ed4739 --- /dev/null +++ b/regression-test/suites/schema_change_p0/test_agg_schema_value_add.groovy @@ -0,0 +1,655 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_agg_schema_value_add", "p0") { + def tbName1 = "test_agg_model_schema_value_add" + def tbName2 = "test_agg_model_schema_value_add_1" + //Test the AGGREGATE model by adding a value column + sql """ DROP TABLE IF EXISTS ${tbName1} """ + def initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + def initTableData = "insert into ${tbName1} values(123456789, 'Alice', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + def initTable1 = "" + def initTableData1 = "" + //Test the AGGREGATE model by adding a value column with VARCHAR + + def getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " + def errorMessage = "" + def insertSql = "" + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column province VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT "广东省" AFTER username """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `province` VARCHAR(20) REPLACE_IF_NOT_NULL COMMENT \"省份\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', '广东省', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '广东省', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '广东省', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '广东省', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '广东省', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '广东省', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', '广东省', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "province") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the AGGREGATE model by adding a value column with BOOLEAN + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column special_area BOOLEAN REPLACE DEFAULT "0" AFTER username """ + insertSql = "insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `special_area` BOOLEAN REPLACE_IF_NOT_NULL COMMENT \"特区\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', '0', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '0', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '0', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '0', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '0', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '0', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', '0', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "special_area") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the AGGREGATE model by adding a value column with TINYINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column special_area TINYINT REPLACE_IF_NOT_NULL DEFAULT "0" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 1, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `special_area` TINYINT REPLACE_IF_NOT_NULL DEFAULT \"0\" COMMENT \"特区\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', '0', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '0', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '0', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '0', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '0', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '0', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', '1', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', '0', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "special_area") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a value column with SMALLINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column area_num SMALLINT REPLACE_IF_NOT_NULL DEFAULT "999" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 567, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `area_num` SMALLINT REPLACE_IF_NOT_NULL DEFAULT \"999\" COMMENT \"地区编号\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 999, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 999, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 999, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 999, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 999, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 999, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 567, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 999, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "area_num") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a value column with INT + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column house_price INT DEFAULT "999" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 22536, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `house_price` INT REPLACE_IF_NOT_NULL DEFAULT \"999\" COMMENT \"房子价格\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 999, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 999, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 999, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 999, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 999, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 999, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 22536, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 11:56:00')," + + " (789012345, 'Grace', 999, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "house_price") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + //Test the AGGREGATE model by adding a value column with BIGINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column house_price1 BIGINT DEFAULT "99999991" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 88889494646, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `house_price1` BIGINT REPLACE_IF_NOT_NULL COMMENT \"房子价格\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 99999991, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 99999991, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 99999991, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 99999991, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 99999991, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 99999991, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 88889494646, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 99999991, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "house_price1") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a value column with LARGEINT + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column car_price LARGEINT REPLACE_IF_NOT_NULL DEFAULT "9999" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 555888555, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `car_price` LARGEINT REPLACE_IF_NOT_NULL COMMENT \"车价格\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 9999, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 9999, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 9999, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 9999, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 9999, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 9999, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 555888555, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 9999, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "car_price") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //TODO Test the AGGREGATE model by adding a value column with FLOAT + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column phone FLOAT DEFAULT "166.68" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 189.98, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a value column with DOUBLE + errorMessage = "errCode = 2, detailMessage = Float or double can not used as a key, use decimal instead." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column watch DOUBLE DEFAULT "166.689" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 189.479, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a value column with DECIMAL + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column watch DECIMAL(38,10) DEFAULT "16899.6464689" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 16499.6464689, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `watch` DECIMAL(38,10) REPLACE_IF_NOT_NULL COMMENT \"车价格\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 16899.6464689, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 16899.6464689, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 16899.6464689, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 16899.6464689, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 16899.6464689, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 16899.6464689, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 16499.6464689, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 16899.6464689, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "watch") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a value column with DATE + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column watch DATE REPLACE_IF_NOT_NULL DEFAULT "1997-01-01" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', \"2024-01-01\", 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `watch` DATE REPLACE_IF_NOT_NULL COMMENT \"手表日期\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', '1997-01-01', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '1997-01-01', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '1997-01-01', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '1997-01-01', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '1997-01-01', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '1997-01-01', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', '2024-01-01', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', '1997-01-01', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "watch") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a value column with DATETIME + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column anniversary DATETIME REPLACE_IF_NOT_NULL DEFAULT "1997-01-01 00:00:00" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', \"2024-01-04 09:00:00\", 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `anniversary` DATETIME REPLACE_IF_NOT_NULL COMMENT \"手表日期\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', '1997-01-01 00:00:00', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', '1997-01-01 00:00:00', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', '1997-01-01 00:00:00', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', '1997-01-01 00:00:00', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', '1997-01-01 00:00:00', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', '1997-01-01 00:00:00', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', '2024-01-04 09:00:00', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', '1997-01-01 00:00:00', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "anniversary") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a value column with CHAR + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column teacher CHAR REPLACE_IF_NOT_NULL DEFAULT "F" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', 'T', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + + + sql """ DROP TABLE IF EXISTS ${tbName2} """ + initTable1 = " CREATE TABLE IF NOT EXISTS ${tbName2}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `teacher` CHAR REPLACE_IF_NOT_NULL COMMENT \"老师\",\n" + + " `city` VARCHAR(20) REPLACE_IF_NOT_NULL DEFAULT \"广州\" COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT SUM COMMENT \"用户年龄\",\n" + + " `sex` TINYINT MAX COMMENT \"用户性别\",\n" + + " `phone` LARGEINT MAX COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) REPLACE DEFAULT \"青海省西宁市城东区\"COMMENT \"用户地址\",\n" + + " `register_time` DATETIME REPLACE DEFAULT \"1970-01-01 00:00:00\" COMMENT \"用户注册时间\"\n" + + " )\n" + + " AGGREGATE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\"\n" + + " );" + + initTableData1 = "insert into ${tbName2} values(123456789, 'Alice', 'F', 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 'F', 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 'F', 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 'F', 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 'F', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 'F', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (923456689, 'Alice', 'T', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (789012345, 'Grace', 'F', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + sql initTable1 + sql initTableData1 + checkTableData("${tbName1}", "${tbName2}", "teacher") + sql """ DROP TABLE IF EXISTS ${tbName1} """ + + + //Test the AGGREGATE model by adding a value column with STRING + errorMessage = "errCode = 2, detailMessage = String Type should not be used in key column[comment]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column comment STRING DEFAULT "我是小说家" AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', '我是侦探家', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the AGGREGATE model by adding a value column with HLL + errorMessage = "errCode = 2, detailMessage = can not cast from origin type VARCHAR(1) to target type=HLL" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column comment HLL HLL_UNION AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', '2', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //TODO Test the AGGREGATE model by adding a value column with bitmap + sql """ DROP TABLE IF EXISTS ${tbName1} """ + errorMessage = "errCode = 2, detailMessage = Column count doesn't match value count" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column device_id bitmap BITMAP_UNION AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', to_bitmap(243), 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, true, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a value column with Map + sql """ DROP TABLE IF EXISTS ${tbName1} """ + errorMessage = "errCode = 2, detailMessage = Map can only be used in the non-key column of the duplicate table at present." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column m Map AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', {'a': 100, 'b': 200}, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a value column with JSON + errorMessage = "errCode = 2, detailMessage = JSONB or VARIANT type should not be used in key column[j]." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column j JSON AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', '{\"k1\":\"v31\", \"k2\": 300}', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a value column with ARRAY + errorMessage = "errCode = 2, detailMessage = Array can only be used in the non-key column of the duplicate table at present." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column c_array ARRAY AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', [6,7,8], 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + + //Test the AGGREGATE model by adding a value column with STRUCT + errorMessage = "errCode = 2, detailMessage = Struct can only be used in the non-key column of the duplicate table at present." + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName1} add column s_info STRUCT AFTER username """ + insertSql = " insert into ${tbName1} values(923456689, 'Alice', [6,7,8], 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 600 + }, insertSql, false, "${tbName1}") + }, errorMessage) + + +} From c1dda5305390253fbb70902b40fb6e176cd9574f Mon Sep 17 00:00:00 2001 From: zfr95 <87513668+zfr9527@users.noreply.github.com> Date: Mon, 12 Aug 2024 11:15:02 +0800 Subject: [PATCH 54/94] [test](mtmv)Add the cases of upgrade and downgrade of mtmv (#38868) ## Proposed changes [test](mtmv)Add the cases of upgrade and downgrade of mtmv --- .../nereids_rules_p0/mv_up_down/load.groovy | 128 ++++++++++++++++ .../mv_up_down/test_mtmv_job_and_hit.groovy | 141 ++++++++++++++++++ 2 files changed, 269 insertions(+) create mode 100644 regression-test/suites/nereids_rules_p0/mv_up_down/load.groovy create mode 100644 regression-test/suites/nereids_rules_p0/mv_up_down/test_mtmv_job_and_hit.groovy diff --git a/regression-test/suites/nereids_rules_p0/mv_up_down/load.groovy b/regression-test/suites/nereids_rules_p0/mv_up_down/load.groovy new file mode 100644 index 000000000000000..9a514c652912d4f --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv_up_down/load.groovy @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_upgrade_downgrade_prepare_mtmv","p0,mtmv,restart_fe") { + + String db = context.config.getDbNameByFile(context.file) + String orders_tb = "up_down_mtmv_orders" + String lineitem_tb = "up_down_mtmv_lineitem" + String mtmv_name = "up_down_mtmv_test_mv" + sql "use ${db}" + + + sql """ + drop table if exists ${orders_tb} + """ + + sql """CREATE TABLE `${orders_tb}` ( + `o_orderkey` BIGINT NULL, + `o_custkey` INT NULL, + `o_orderstatus` VARCHAR(1) NULL, + `o_totalprice` DECIMAL(15, 2) NULL, + `o_orderpriority` VARCHAR(15) NULL, + `o_clerk` VARCHAR(15) NULL, + `o_shippriority` INT NULL, + `o_comment` VARCHAR(79) NULL, + `o_orderdate` DATE not NULL + ) ENGINE=OLAP + DUPLICATE KEY(`o_orderkey`, `o_custkey`) + COMMENT 'OLAP' + auto partition by range (date_trunc(`o_orderdate`, 'day')) () + DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ + drop table if exists ${lineitem_tb} + """ + + sql """CREATE TABLE `${lineitem_tb}` ( + `l_orderkey` BIGINT NULL, + `l_linenumber` INT NULL, + `l_partkey` INT NULL, + `l_suppkey` INT NULL, + `l_quantity` DECIMAL(15, 2) NULL, + `l_extendedprice` DECIMAL(15, 2) NULL, + `l_discount` DECIMAL(15, 2) NULL, + `l_tax` DECIMAL(15, 2) NULL, + `l_returnflag` VARCHAR(1) NULL, + `l_linestatus` VARCHAR(1) NULL, + `l_commitdate` DATE NULL, + `l_receiptdate` DATE NULL, + `l_shipinstruct` VARCHAR(25) NULL, + `l_shipmode` VARCHAR(10) NULL, + `l_comment` VARCHAR(44) NULL, + `l_shipdate` DATE not NULL + ) ENGINE=OLAP + DUPLICATE KEY(l_orderkey, l_linenumber, l_partkey, l_suppkey ) + COMMENT 'OLAP' + auto partition by range (date_trunc(`l_shipdate`, 'day')) () + DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + );""" + + sql """ + insert into ${orders_tb} values + (null, 1, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-17'), + (1, null, 'o', 109.2, 'c','d',2, 'mm', '2023-10-17'), + (3, 3, null, 99.5, 'a', 'b', 1, 'yy', '2023-10-19'), + (1, 2, 'o', null, 'a', 'b', 1, 'yy', '2023-10-20'), + (2, 3, 'k', 109.2, null,'d',2, 'mm', '2023-10-21'), + (3, 1, 'k', 99.5, 'a', null, 1, 'yy', '2023-10-22'), + (1, 3, 'o', 99.5, 'a', 'b', null, 'yy', '2023-10-19'), + (2, 1, 'o', 109.2, 'c','d',2, null, '2023-10-18'), + (3, 2, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-17'), + (4, 5, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-19'); + """ + + sql """ + insert into ${lineitem_tb} values + (null, 1, 2, 3, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-17'), + (1, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-17'), + (3, 3, null, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx', '2023-10-19'), + (1, 2, 3, null, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-17'), + (2, 3, 2, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', null, '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-18'), + (3, 1, 1, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', null, 'c', 'd', 'xxxxxxxxx', '2023-10-19'), + (1, 3, 2, 2, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-17'); + """ + + sql """analyze table ${orders_tb} with sync;""" + sql """analyze table ${lineitem_tb} with sync;""" + + String mtmv_sql = """select l_Shipdate, o_Orderdate, l_partkey, l_suppkey + from ${lineitem_tb} + left join ${orders_tb} + on ${lineitem_tb}.l_orderkey = ${orders_tb}.o_orderkey""" + + sql """DROP MATERIALIZED VIEW IF EXISTS ${mtmv_name};""" + sql """DROP TABLE IF EXISTS ${mtmv_name}""" + sql""" + CREATE MATERIALIZED VIEW ${mtmv_name} + BUILD IMMEDIATE REFRESH AUTO ON MANUAL + partition by(l_shipdate) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + ${mtmv_sql} + """ + + def job_name = getJobName(db, mtmv_name) + waitingMTMVTaskFinishedByMvName(mtmv_name) + +} diff --git a/regression-test/suites/nereids_rules_p0/mv_up_down/test_mtmv_job_and_hit.groovy b/regression-test/suites/nereids_rules_p0/mv_up_down/test_mtmv_job_and_hit.groovy new file mode 100644 index 000000000000000..31869fa21419ba0 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv_up_down/test_mtmv_job_and_hit.groovy @@ -0,0 +1,141 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_upgrade_downgrade_compatibility_mtmv","p0,mtmv,restart_fe") { + + String db = context.config.getDbNameByFile(context.file) + String orders_tb = "up_down_mtmv_orders" + String lineitem_tb = "up_down_mtmv_lineitem" + String mtmv_name = "up_down_mtmv_test_mv" + + def compare_res = { def stmt -> + sql "SET enable_materialized_view_rewrite=false" + def origin_res = sql stmt + logger.info("origin_res: " + origin_res) + sql "SET enable_materialized_view_rewrite=true" + def mv_origin_res = sql stmt + logger.info("mv_origin_res: " + mv_origin_res) + assertTrue((mv_origin_res == [] && origin_res == []) || (mv_origin_res.size() == origin_res.size())) + for (int row = 0; row < mv_origin_res.size(); row++) { + assertTrue(mv_origin_res[row].size() == origin_res[row].size()) + for (int col = 0; col < mv_origin_res[row].size(); col++) { + assertTrue(mv_origin_res[row][col] == origin_res[row][col]) + } + } + } + + String mtmv_sql = """select l_Shipdate, o_Orderdate, l_partkey, l_suppkey + from ${lineitem_tb} + left join ${orders_tb} + on ${lineitem_tb}.l_orderkey = ${orders_tb}.o_orderkey""" + + def select_count1 = sql """select count(*) from ${mtmv_name}""" + logger.info("select_count1: " + select_count1) + + explain { + sql("${mtmv_sql}") + contains "${mtmv_name}(${mtmv_name})" + } + compare_res(mtmv_sql + " order by 1,2,3,4") + + sql """ + insert into ${orders_tb} values + (null, 1, 'k', 99.5, 'a', 'b', 1, 'yy', '2023-10-17'), + (6, null, 'o', 109.2, 'c','d',2, 'mm', '2023-10-17'), + (6, 3, null, 99.5, 'a', 'b', 1, 'yy', '2023-10-19'), + (6, 2, 'o', null, 'a', 'b', 1, 'yy', '2023-10-20'); + """ + + sql """ + insert into ${lineitem_tb} values + (null, 1, 2, 3, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-17'), + (6, null, 3, 1, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy', '2023-10-17'), + (6, 3, null, 2, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx', '2023-10-19'), + (6, 2, 3, null, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy', '2023-10-17'); + """ + + sql """refresh MATERIALIZED VIEW ${mtmv_name} auto;""" + + // insert and refresh mtmv + def job_name = getJobName(db, mtmv_name) + waitingMTMVTaskFinishedByMvName(mtmv_name) + def select_count2 = sql """select count(*) from ${mtmv_name}""" + logger.info("select_count2: " + select_count2) + assertTrue(select_count2[0][0] != select_count1[0][0]) + + explain { + sql("${mtmv_sql}") + contains "${mtmv_name}(${mtmv_name})" + } + compare_res(mtmv_sql + " order by 1,2,3,4") + + // pause + def job_status = sql """select * from jobs("type"="mv") where Name="${job_name}";""" + assertTrue(job_status[0][8] == "RUNNING") + sql """PAUSE MATERIALIZED VIEW JOB ON ${mtmv_name};""" + job_status = sql """select * from jobs("type"="mv") where Name="${job_name}";""" + assertTrue(job_status[0][8] == "PAUSED") + + explain { + sql("${mtmv_sql}") + contains "${mtmv_name}(${mtmv_name})" + } + compare_res(mtmv_sql + " order by 1,2,3,4") + + // resume + sql """RESUME MATERIALIZED VIEW JOB ON ${mtmv_name};""" + job_status = sql """select * from jobs("type"="mv") where Name="${job_name}";""" + assertTrue(job_status[0][8] == "RUNNING") + + explain { + sql("${mtmv_sql}") + contains "${mtmv_name}(${mtmv_name})" + } + compare_res(mtmv_sql + " order by 1,2,3,4") + + // drop + sql """DROP MATERIALIZED VIEW IF EXISTS ${mtmv_name};""" + sql """DROP TABLE IF EXISTS ${mtmv_name}""" + test { + sql """select count(*) from ${mtmv_name}""" + exception "does not exist" + } + + // create + sql""" + CREATE MATERIALIZED VIEW ${mtmv_name} + BUILD IMMEDIATE REFRESH AUTO ON MANUAL + partition by(l_shipdate) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + ${mtmv_sql} + """ + + job_name = getJobName(db, mtmv_name) + waitingMTMVTaskFinishedByMvName(mtmv_name) + + def select_count3 = sql """select count(*) from ${mtmv_name}""" + logger.info("select_count3: " + select_count3) + assertTrue(select_count3[0][0] == select_count2[0][0]) + + explain { + sql("${mtmv_sql}") + contains "${mtmv_name}(${mtmv_name})" + } + compare_res(mtmv_sql + " order by 1,2,3,4") +} From 86246f78bd0973943fd7a2ff1159d4cd730a5d24 Mon Sep 17 00:00:00 2001 From: hui lai <1353307710@qq.com> Date: Mon, 12 Aug 2024 11:17:43 +0800 Subject: [PATCH 55/94] [opt](bytebuffer) allocate ByteBuffer memory by Allocator and make it exception safe (#38960) At present, the memory allocation of `ByteBuffer` is done through `new char[capacity_]`. Now, it is uniformly allocated by `Allocator` for the following purposes: 1. Better memory statistics 2. Better support for memory limit check --- be/src/http/action/http_stream.cpp | 51 +++++++++++-------- be/src/http/action/stream_load.cpp | 32 ++++++++---- be/src/io/fs/stream_load_pipe.cpp | 7 ++- be/src/runtime/exec_env.h | 4 ++ be/src/runtime/exec_env_init.cpp | 2 + .../runtime/stream_load/stream_load_context.h | 8 ++- .../stream_load/stream_load_executor.cpp | 4 ++ be/src/util/byte_buffer.h | 19 +++++-- 8 files changed, 87 insertions(+), 40 deletions(-) diff --git a/be/src/http/action/http_stream.cpp b/be/src/http/action/http_stream.cpp index e7bfa83911141af..afeb251ca415557 100644 --- a/be/src/http/action/http_stream.cpp +++ b/be/src/http/action/http_stream.cpp @@ -234,31 +234,40 @@ void HttpStreamAction::on_chunk_data(HttpRequest* req) { struct evhttp_request* ev_req = req->get_evhttp_request(); auto evbuf = evhttp_request_get_input_buffer(ev_req); + SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->stream_load_pipe_tracker()); + int64_t start_read_data_time = MonotonicNanos(); while (evbuffer_get_length(evbuf) > 0) { - auto bb = ByteBuffer::allocate(128 * 1024); - auto remove_bytes = evbuffer_remove(evbuf, bb->ptr, bb->capacity); - bb->pos = remove_bytes; - bb->flip(); - auto st = ctx->body_sink->append(bb); - // schema_buffer stores 1M of data for parsing column information - // need to determine whether to cache for the first time - if (ctx->is_read_schema) { - if (ctx->schema_buffer->pos + remove_bytes < config::stream_tvf_buffer_size) { - ctx->schema_buffer->put_bytes(bb->ptr, remove_bytes); - } else { - LOG(INFO) << "use a portion of data to request fe to obtain column information"; - ctx->is_read_schema = false; - ctx->status = process_put(req, ctx); + try { + auto bb = ByteBuffer::allocate(128 * 1024); + auto remove_bytes = evbuffer_remove(evbuf, bb->ptr, bb->capacity); + bb->pos = remove_bytes; + bb->flip(); + auto st = ctx->body_sink->append(bb); + // schema_buffer stores 1M of data for parsing column information + // need to determine whether to cache for the first time + if (ctx->is_read_schema) { + if (ctx->schema_buffer->pos + remove_bytes < config::stream_tvf_buffer_size) { + ctx->schema_buffer->put_bytes(bb->ptr, remove_bytes); + } else { + LOG(INFO) << "use a portion of data to request fe to obtain column information"; + ctx->is_read_schema = false; + ctx->status = process_put(req, ctx); + } } + if (!st.ok() && !ctx->status.ok()) { + LOG(WARNING) << "append body content failed. errmsg=" << st << ", " << ctx->brief(); + ctx->status = st; + return; + } + ctx->receive_bytes += remove_bytes; + } catch (const doris::Exception& e) { + if (e.code() == doris::ErrorCode::MEM_ALLOC_FAILED) { + ctx->status = Status::MemoryLimitExceeded( + fmt::format("PreCatch error code:{}, {}, ", e.code(), e.to_string())); + } + ctx->status = Status::Error(e.code(), e.to_string()); } - - if (!st.ok() && !ctx->status.ok()) { - LOG(WARNING) << "append body content failed. errmsg=" << st << ", " << ctx->brief(); - ctx->status = st; - return; - } - ctx->receive_bytes += remove_bytes; } // after all the data has been read and it has not reached 1M, it will execute here if (ctx->is_read_schema) { diff --git a/be/src/http/action/stream_load.cpp b/be/src/http/action/stream_load.cpp index 75d6943d3c63821..d0c5dff2075c6fb 100644 --- a/be/src/http/action/stream_load.cpp +++ b/be/src/http/action/stream_load.cpp @@ -340,19 +340,29 @@ void StreamLoadAction::on_chunk_data(HttpRequest* req) { struct evhttp_request* ev_req = req->get_evhttp_request(); auto evbuf = evhttp_request_get_input_buffer(ev_req); + SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->stream_load_pipe_tracker()); + int64_t start_read_data_time = MonotonicNanos(); while (evbuffer_get_length(evbuf) > 0) { - auto bb = ByteBuffer::allocate(128 * 1024); - auto remove_bytes = evbuffer_remove(evbuf, bb->ptr, bb->capacity); - bb->pos = remove_bytes; - bb->flip(); - auto st = ctx->body_sink->append(bb); - if (!st.ok()) { - LOG(WARNING) << "append body content failed. errmsg=" << st << ", " << ctx->brief(); - ctx->status = st; - return; - } - ctx->receive_bytes += remove_bytes; + try { + auto bb = ByteBuffer::allocate(128 * 1024); + auto remove_bytes = evbuffer_remove(evbuf, bb->ptr, bb->capacity); + bb->pos = remove_bytes; + bb->flip(); + auto st = ctx->body_sink->append(bb); + if (!st.ok()) { + LOG(WARNING) << "append body content failed. errmsg=" << st << ", " << ctx->brief(); + ctx->status = st; + return; + } + ctx->receive_bytes += remove_bytes; + } catch (const doris::Exception& e) { + if (e.code() == doris::ErrorCode::MEM_ALLOC_FAILED) { + ctx->status = Status::MemoryLimitExceeded( + fmt::format("PreCatch error code:{}, {}, ", e.code(), e.to_string())); + } + ctx->status = Status::Error(e.code(), e.to_string()); + } } int64_t read_data_time = MonotonicNanos() - start_read_data_time; int64_t last_receive_and_read_data_cost_nanos = ctx->receive_and_read_data_cost_nanos; diff --git a/be/src/io/fs/stream_load_pipe.cpp b/be/src/io/fs/stream_load_pipe.cpp index 21c3856a8156906..ce91a2e839113ca 100644 --- a/be/src/io/fs/stream_load_pipe.cpp +++ b/be/src/io/fs/stream_load_pipe.cpp @@ -111,7 +111,9 @@ Status StreamLoadPipe::read_one_message(std::unique_ptr* data, size_t } Status StreamLoadPipe::append_and_flush(const char* data, size_t size, size_t proto_byte_size) { - ByteBufferPtr buf = ByteBuffer::allocate(BitUtil::RoundUpToPowerOfTwo(size + 1)); + SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->stream_load_pipe_tracker()); + ByteBufferPtr buf; + RETURN_IF_ERROR_OR_CATCH_EXCEPTION(ByteBuffer::create_and_allocate(buf, 128 * 1024)); buf->put_bytes(data, size); buf->flip(); return _append(buf, proto_byte_size); @@ -145,7 +147,8 @@ Status StreamLoadPipe::append(const char* data, size_t size) { // need to allocate a new chunk, min chunk is 64k size_t chunk_size = std::max(_min_chunk_size, size - pos); chunk_size = BitUtil::RoundUpToPowerOfTwo(chunk_size); - _write_buf = ByteBuffer::allocate(chunk_size); + SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->stream_load_pipe_tracker()); + RETURN_IF_ERROR_OR_CATCH_EXCEPTION(ByteBuffer::create_and_allocate(_write_buf, chunk_size)); _write_buf->put_bytes(data + pos, size - pos); return Status::OK(); } diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 04bd5aa672acd1c..e686df2dfd6a8f7 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -180,6 +180,9 @@ class ExecEnv { std::shared_ptr segcompaction_mem_tracker() { return _segcompaction_mem_tracker; } + std::shared_ptr stream_load_pipe_tracker() { + return _stream_load_pipe_tracker; + } std::shared_ptr point_query_executor_mem_tracker() { return _point_query_executor_mem_tracker; } @@ -358,6 +361,7 @@ class ExecEnv { std::shared_ptr _brpc_iobuf_block_memory_tracker; // Count the memory consumption of segment compaction tasks. std::shared_ptr _segcompaction_mem_tracker; + std::shared_ptr _stream_load_pipe_tracker; // Tracking memory may be shared between multiple queries. std::shared_ptr _point_query_executor_mem_tracker; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 8c308f4b4d81fda..d160e9abdc2ff1f 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -616,6 +616,8 @@ void ExecEnv::init_mem_tracker() { MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "SubcolumnsTree"); _s3_file_buffer_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "S3FileBuffer"); + _stream_load_pipe_tracker = + MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "StreamLoadPipe"); } void ExecEnv::_register_metrics() { diff --git a/be/src/runtime/stream_load/stream_load_context.h b/be/src/runtime/stream_load/stream_load_context.h index 633c3af428b94ed..f7c4a0d474fa319 100644 --- a/be/src/runtime/stream_load/stream_load_context.h +++ b/be/src/runtime/stream_load/stream_load_context.h @@ -37,6 +37,7 @@ #include "common/utils.h" #include "runtime/exec_env.h" #include "runtime/stream_load/stream_load_executor.h" +#include "runtime/thread_context.h" #include "util/byte_buffer.h" #include "util/time.h" #include "util/uid_util.h" @@ -95,9 +96,14 @@ class StreamLoadContext { public: StreamLoadContext(ExecEnv* exec_env) : id(UniqueId::gen_uid()), _exec_env(exec_env) { start_millis = UnixMillis(); + SCOPED_ATTACH_TASK(ExecEnv::GetInstance()->stream_load_pipe_tracker()); + schema_buffer = ByteBuffer::allocate(config::stream_tvf_buffer_size); } ~StreamLoadContext() { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER( + ExecEnv::GetInstance()->stream_load_pipe_tracker()); + schema_buffer.reset(); if (need_rollback) { _exec_env->stream_load_executor()->rollback_txn(this); need_rollback = false; @@ -184,7 +190,7 @@ class StreamLoadContext { std::shared_ptr body_sink; std::shared_ptr pipe; - ByteBufferPtr schema_buffer = ByteBuffer::allocate(config::stream_tvf_buffer_size); + ByteBufferPtr schema_buffer; TStreamLoadPutResult put_result; TStreamLoadMultiTablePutResult multi_table_put_result; diff --git a/be/src/runtime/stream_load/stream_load_executor.cpp b/be/src/runtime/stream_load/stream_load_executor.cpp index 28b0556aafdd2cc..4ddd29ac9c3e7c5 100644 --- a/be/src/runtime/stream_load/stream_load_executor.cpp +++ b/be/src/runtime/stream_load/stream_load_executor.cpp @@ -142,6 +142,10 @@ Status StreamLoadExecutor::execute_plan_fragment(std::shared_ptr; -struct ByteBuffer { +struct ByteBuffer : private Allocator { static ByteBufferPtr allocate(size_t size) { ByteBufferPtr ptr(new ByteBuffer(size)); return ptr; } - ~ByteBuffer() { delete[] ptr; } + static Status create_and_allocate(ByteBufferPtr& ptr, size_t size) { + ptr = ByteBufferPtr(new ByteBuffer(size)); + return Status::OK(); + } + + ~ByteBuffer() { Allocator::free(ptr, capacity); } void put_bytes(const char* data, size_t size) { memcpy(ptr + pos, data, size); @@ -56,14 +64,15 @@ struct ByteBuffer { size_t remaining() const { return limit - pos; } bool has_remaining() const { return limit > pos; } - char* const ptr; + char* ptr; size_t pos; size_t limit; size_t capacity; private: - ByteBuffer(size_t capacity_) - : ptr(new char[capacity_]), pos(0), limit(capacity_), capacity(capacity_) {} + ByteBuffer(size_t capacity_) : pos(0), limit(capacity_), capacity(capacity_) { + ptr = reinterpret_cast(Allocator::alloc(capacity_)); + } }; } // namespace doris From d91a7adb1422c2d067c5f4b0f170734f9eb22398 Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Mon, 12 Aug 2024 11:35:22 +0800 Subject: [PATCH 56/94] [enhance](auth)modify priv of refresh catalog/db/table (#39008) change priv of refresh catalog/db/table to `PrivPredicate.SHOW` --- .../org/apache/doris/analysis/RefreshCatalogStmt.java | 6 +++--- .../java/org/apache/doris/analysis/RefreshDbStmt.java | 11 +++-------- .../org/apache/doris/analysis/RefreshTableStmt.java | 11 +++-------- .../main/java/org/apache/doris/common/ErrorCode.java | 2 ++ .../java/org/apache/doris/catalog/RefreshDbTest.java | 8 ++------ .../org/apache/doris/catalog/RefreshTableTest.java | 6 +----- 6 files changed, 14 insertions(+), 30 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshCatalogStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshCatalogStmt.java index 060807b79199726..7f72a25b7b79aed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshCatalogStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshCatalogStmt.java @@ -67,9 +67,9 @@ public void analyze(Analyzer analyzer) throws UserException { } if (!Env.getCurrentEnv().getAccessManager().checkCtlPriv( - ConnectContext.get(), catalogName, PrivPredicate.ALTER)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_CATALOG_ACCESS_DENIED, - analyzer.getQualifiedUser(), catalogName); + ConnectContext.get(), catalogName, PrivPredicate.SHOW)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_CATALOG_ACCESS_DENIED_ERROR, + PrivPredicate.SHOW.getPrivs().toString(), catalogName); } // Set to false only if user set the property "invalid_cache"="false" diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshDbStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshDbStmt.java index 091d55eafe04498..69b312455a03264 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshDbStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshDbStmt.java @@ -87,14 +87,9 @@ public void analyze(Analyzer analyzer) throws AnalysisException, UserException { } // check access if (!Env.getCurrentEnv().getAccessManager().checkDbPriv(ConnectContext.get(), catalogName, - dbName, PrivPredicate.DROP)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_DBACCESS_DENIED_ERROR, - ConnectContext.get().getQualifiedUser(), dbName); - } - if (!Env.getCurrentEnv().getAccessManager().checkDbPriv(ConnectContext.get(), catalogName, - dbName, PrivPredicate.CREATE)) { - ErrorReport.reportAnalysisException( - ErrorCode.ERR_DBACCESS_DENIED_ERROR, analyzer.getQualifiedUser(), dbName); + dbName, PrivPredicate.SHOW)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_DB_ACCESS_DENIED_ERROR, + PrivPredicate.SHOW.getPrivs().toString(), dbName); } String invalidConfig = properties == null ? null : properties.get(INVALID_CACHE); // Default is to invalid cache. diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshTableStmt.java index 9755e91dadfb47c..7d872f70a945c0b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/RefreshTableStmt.java @@ -60,14 +60,9 @@ public void analyze(Analyzer analyzer) throws UserException { // check access if (!Env.getCurrentEnv().getAccessManager().checkTblPriv(ConnectContext.get(), tableName.getCtl(), tableName.getDb(), - tableName.getTbl(), PrivPredicate.DROP)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "DROP"); - } - - if (!Env.getCurrentEnv().getAccessManager().checkTblPriv(ConnectContext.get(), - tableName.getCtl(), tableName.getDb(), - tableName.getTbl(), PrivPredicate.CREATE)) { - ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "CREATE"); + tableName.getTbl(), PrivPredicate.SHOW)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_TABLE_ACCESS_DENIED_ERROR, + PrivPredicate.SHOW.getPrivs().toString(), tableName.getTbl()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java b/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java index 6b12532b2e6d92e..c65116dcc8310be 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java @@ -75,6 +75,8 @@ public enum ErrorCode { + "(current value: %d)"), ERR_SPECIFIC_ACCESS_DENIED_ERROR(1227, new byte[]{'4', '2', '0', '0', '0'}, "Access denied; you need (at least " + "one of) the (%s) privilege(s) for this operation"), + ERR_CATALOG_ACCESS_DENIED_ERROR(1221, new byte[]{'4', '2', '0', '0', '0'}, "Access denied; you need (at least " + + "one of) the (%s) privilege(s) on catalog %s for this operation"), ERR_DB_ACCESS_DENIED_ERROR(1225, new byte[]{'4', '2', '0', '0', '0'}, "Access denied; you need (at least " + "one of) the (%s) privilege(s) on database %s for this operation"), diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/RefreshDbTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/RefreshDbTest.java index fd7a6b1a24bdab6..a18c41edc920dca 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/RefreshDbTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/RefreshDbTest.java @@ -120,23 +120,19 @@ public void testRefreshPriv() throws Exception { // create user1 auth.createUser((CreateUserStmt) parseAndAnalyzeStmt( "create user 'user1'@'%' identified by 'pwd1';", rootCtx)); - // grant only create_priv to user1 on test1.db1.tbl11 - GrantStmt grantStmt = (GrantStmt) parseAndAnalyzeStmt( - "grant create_priv on test1.db1.* to 'user1'@'%';", rootCtx); - auth.grant(grantStmt); // mock login user1 UserIdentity user1 = new UserIdentity("user1", "%"); user1.analyze(); ConnectContext user1Ctx = createCtx(user1, "127.0.0.1"); ExceptionChecker.expectThrowsWithMsg(AnalysisException.class, - "Access denied for user 'user1' to database 'db1'", + "Access denied", () -> parseAndAnalyzeStmt("refresh database test1.db1", user1Ctx)); ConnectContext.remove(); // add drop priv to user1 rootCtx.setThreadLocalInfo(); - grantStmt = (GrantStmt) parseAndAnalyzeStmt( + GrantStmt grantStmt = (GrantStmt) parseAndAnalyzeStmt( "grant drop_priv on test1.db1.* to 'user1'@'%';", rootCtx); auth.grant(grantStmt); ConnectContext.remove(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/RefreshTableTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/RefreshTableTest.java index aee15abd27884bd..45878b6001e8b54 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/catalog/RefreshTableTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/RefreshTableTest.java @@ -128,10 +128,6 @@ public void testRefreshPriv() throws Exception { // create user1 auth.createUser((CreateUserStmt) parseAndAnalyzeStmt( "create user 'user1'@'%' identified by 'pwd1';", rootCtx)); - // grant only create_priv to user1 on test1.db1.tbl11 - GrantStmt grantStmt = (GrantStmt) parseAndAnalyzeStmt( - "grant create_priv on test1.db1.tbl11 to 'user1'@'%';", rootCtx); - auth.grant(grantStmt); // mock login user1 UserIdentity user1 = new UserIdentity("user1", "%"); @@ -144,7 +140,7 @@ public void testRefreshPriv() throws Exception { // add drop priv to user1 rootCtx.setThreadLocalInfo(); - grantStmt = (GrantStmt) parseAndAnalyzeStmt( + GrantStmt grantStmt = (GrantStmt) parseAndAnalyzeStmt( "grant drop_priv on test1.db1.tbl11 to 'user1'@'%';", rootCtx); auth.grant(grantStmt); ConnectContext.remove(); From 191371df057bd56b0a7827b022fb091d12d750de Mon Sep 17 00:00:00 2001 From: Sun Chenyang Date: Mon, 12 Aug 2024 12:10:03 +0800 Subject: [PATCH 57/94] [fix] (compaction) fix time series compaction policy (#39170) ## Proposed changes Fix the issue introduced by #38220 ### BUG The permits requested for compaction cannot be 0, 0 indicates failure. --- be/src/olap/tablet.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 76bd24ff7523ad3..1efb9afbf06f7f9 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -1723,7 +1723,8 @@ Status Tablet::prepare_compaction_and_calculate_permits( // Time series policy does not rely on permits, it uses goal size to control memory if (tablet->tablet_meta()->compaction_policy() == CUMULATIVE_TIME_SERIES_POLICY) { - permits = 0; + // permits = 0 means that prepare_compaction failed + permits = 1; } else { permits = compaction->get_compaction_permits(); } From 1810cba2364f72c10a26fac1097d4642c901977d Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Mon, 12 Aug 2024 15:20:57 +0800 Subject: [PATCH 58/94] [fix](variable) support all type functions (#39144) --- .../src/main/java/org/apache/doris/qe/ConnectContext.java | 4 ++-- regression-test/data/nereids_p0/test_user_var.out | 3 +++ regression-test/suites/nereids_p0/test_user_var.groovy | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java index b5f8c8d9be5ec7c..c531b1167d2c3cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java @@ -558,7 +558,7 @@ public void setUserVar(SetVar setVar) { } else if (literalExpr instanceof NullLiteral) { return Literal.of(null); } else { - return Literal.of(""); + return Literal.of(literalExpr.getStringValue()); } } else { // If there are no such user defined var, just return the NULL value. @@ -587,7 +587,7 @@ public void fillValueForUserDefinedVar(VariableExpr desc) { desc.setIsNull(); } else { desc.setType(Type.VARCHAR); - desc.setStringValue(""); + desc.setStringValue(literalExpr.getStringValue()); } } else { // If there are no such user defined var, just fill the NULL value. diff --git a/regression-test/data/nereids_p0/test_user_var.out b/regression-test/data/nereids_p0/test_user_var.out index 5d6338dd7ee6c04..a4508fd738dc5df 100644 --- a/regression-test/data/nereids_p0/test_user_var.out +++ b/regression-test/data/nereids_p0/test_user_var.out @@ -17,3 +17,6 @@ true false -- !function -- 4 +-- !datetime -- +2007-12-01 00:30:19 + diff --git a/regression-test/suites/nereids_p0/test_user_var.groovy b/regression-test/suites/nereids_p0/test_user_var.groovy index 35eb8d861d8e661..7c97387f4bbc2a7 100644 --- a/regression-test/suites/nereids_p0/test_user_var.groovy +++ b/regression-test/suites/nereids_p0/test_user_var.groovy @@ -23,6 +23,7 @@ suite("test_user_var") { sql "SET @c1='H', @c2=''" sql "SET @d1=true, @d2=false" sql "SET @f1=null" + sql "set @dt1 = from_unixtime(1196440219);" sql "set @func_1=(abs(1) + 1) * 2" sql "set disable_nereids_rules=PRUNE_EMPTY_PARTITION" @@ -33,6 +34,7 @@ suite("test_user_var") { qt_boolean 'select @d1, @d2;' qt_null_literal 'select @f1, @f2;' qt_function 'select @func_1' + qt_datetime 'select @dt1' multi_sql( """ From 433b84a1857acbcc18f9f19fd7b26a35b18f46e1 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Mon, 12 Aug 2024 15:56:51 +0800 Subject: [PATCH 59/94] [Fix](parquet-reader) Fix and optimize parquet min-max filtering. (#38277) ## Proposed changes Refer to trino's implementation - Some bugs in the historical version paquet-mr. Use `CorruptStatistics::should_ignore_statistics()` to handle. - The old version of parquet uses `min` and `max` stats, and later implements `min_value` and `max_value`. `Min`/`max` stats cannot be used for some types and in some cases. This is related to the comparison and sorting method of values. - If it is double or float, special cases such as NaN, -0, and 0 must be handled. - If the string type only has min and max stats, but no min_value or max_value, use `ParquetPredicate::_try_read_old_utf8_stats()` to expand the range reading optimization method for optimization. --- .../exec/format/parquet/parquet_common.cpp | 340 ++++++++++++++++++ .../vec/exec/format/parquet/parquet_common.h | 131 ++++++- .../exec/format/parquet/parquet_pred_cmp.h | 142 +++++++- .../format/parquet/vparquet_page_index.cpp | 4 +- .../exec/format/parquet/vparquet_reader.cpp | 103 +++++- .../vec/exec/format/parquet/vparquet_reader.h | 3 + .../parquet_corrupt_statistics_test.cpp | 134 +++++++ .../exec/parquet/parquet_statistics_test.cpp | 155 ++++++++ .../vec/exec/parquet/parquet_version_test.cpp | 221 ++++++++++++ 9 files changed, 1207 insertions(+), 26 deletions(-) create mode 100644 be/test/vec/exec/parquet/parquet_corrupt_statistics_test.cpp create mode 100644 be/test/vec/exec/parquet/parquet_statistics_test.cpp create mode 100644 be/test/vec/exec/parquet/parquet_version_test.cpp diff --git a/be/src/vec/exec/format/parquet/parquet_common.cpp b/be/src/vec/exec/format/parquet/parquet_common.cpp index 33e9f11242b46de..59e12fcc71a4634 100644 --- a/be/src/vec/exec/format/parquet/parquet_common.cpp +++ b/be/src/vec/exec/format/parquet/parquet_common.cpp @@ -162,4 +162,344 @@ bool ColumnSelectVector::can_filter_all(size_t remaining_num_values) { void ColumnSelectVector::skip(size_t num_values) { _filter_map_index += num_values; } + +ParsedVersion::ParsedVersion(std::string application, std::optional version, + std::optional app_build_hash) + : _application(std::move(application)), + _version(std::move(version)), + _app_build_hash(std::move(app_build_hash)) {} + +bool ParsedVersion::operator==(const ParsedVersion& other) const { + return _application == other._application && _version == other._version && + _app_build_hash == other._app_build_hash; +} + +bool ParsedVersion::operator!=(const ParsedVersion& other) const { + return !(*this == other); +} + +size_t ParsedVersion::hash() const { + std::hash hasher; + return hasher(_application) ^ (_version ? hasher(*_version) : 0) ^ + (_app_build_hash ? hasher(*_app_build_hash) : 0); +} + +std::string ParsedVersion::to_string() const { + return "ParsedVersion(application=" + _application + + ", semver=" + (_version ? *_version : "null") + + ", app_build_hash=" + (_app_build_hash ? *_app_build_hash : "null") + ")"; +} + +Status VersionParser::parse(const std::string& created_by, + std::unique_ptr* parsed_version) { + static const std::string FORMAT = + "(.*?)\\s+version\\s*(?:([^(]*?)\\s*(?:\\(\\s*build\\s*([^)]*?)\\s*\\))?)?"; + static const std::regex PATTERN(FORMAT); + + std::smatch matcher; + if (!std::regex_match(created_by, matcher, PATTERN)) { + return Status::InternalError(fmt::format("Could not parse created_by: {}, using format: {}", + created_by, FORMAT)); + } + + std::string application = matcher[1].str(); + if (application.empty()) { + return Status::InternalError("application cannot be null or empty"); + } + std::optional semver = + matcher[2].str().empty() ? std::nullopt : std::optional(matcher[2].str()); + std::optional app_build_hash = + matcher[3].str().empty() ? std::nullopt : std::optional(matcher[3].str()); + *parsed_version = std::make_unique(application, semver, app_build_hash); + return Status::OK(); +} + +SemanticVersion::SemanticVersion(int major, int minor, int patch) + : _major(major), + _minor(minor), + _patch(patch), + _prerelease(false), + _unknown(std::nullopt), + _pre(std::nullopt), + _build_info(std::nullopt) {} + +#ifdef BE_TEST +SemanticVersion::SemanticVersion(int major, int minor, int patch, bool has_unknown) + : _major(major), + _minor(minor), + _patch(patch), + _prerelease(has_unknown), + _unknown(std::nullopt), + _pre(std::nullopt), + _build_info(std::nullopt) {} +#endif + +SemanticVersion::SemanticVersion(int major, int minor, int patch, + std::optional unknown, std::optional pre, + std::optional build_info) + : _major(major), + _minor(minor), + _patch(patch), + _prerelease(unknown.has_value() && !unknown.value().empty()), + _unknown(std::move(unknown)), + _pre(pre.has_value() ? std::optional(Prerelease(std::move(pre.value()))) + : std::nullopt), + _build_info(std::move(build_info)) {} + +Status SemanticVersion::parse(const std::string& version, + std::unique_ptr* semantic_version) { + static const std::regex pattern(R"(^(\d+)\.(\d+)\.(\d+)([^-+]*)?(?:-([^+]*))?(?:\+(.*))?$)"); + std::smatch match; + + if (!std::regex_match(version, match, pattern)) { + return Status::InternalError(version + " does not match format"); + } + + int major = std::stoi(match[1].str()); + int minor = std::stoi(match[2].str()); + int patch = std::stoi(match[3].str()); + std::optional unknown = + match[4].str().empty() ? std::nullopt : std::optional(match[4].str()); + std::optional prerelease = + match[5].str().empty() ? std::nullopt : std::optional(match[5].str()); + std::optional build_info = + match[6].str().empty() ? std::nullopt : std::optional(match[6].str()); + if (major < 0 || minor < 0 || patch < 0) { + return Status::InternalError("major({}), minor({}), and patch({}) must all be >= 0", major, + minor, patch); + } + *semantic_version = + std::make_unique(major, minor, patch, unknown, prerelease, build_info); + return Status::OK(); +} + +int SemanticVersion::compare_to(const SemanticVersion& other) const { + if (int cmp = _compare_integers(_major, other._major); cmp != 0) { + return cmp; + } + if (int cmp = _compare_integers(_minor, other._minor); cmp != 0) { + return cmp; + } + if (int cmp = _compare_integers(_patch, other._patch); cmp != 0) { + return cmp; + } + if (int cmp = _compare_booleans(other._prerelease, _prerelease); cmp != 0) { + return cmp; + } + if (_pre.has_value()) { + if (other._pre.has_value()) { + return _pre.value().compare_to(other._pre.value()); + } else { + return -1; + } + } else if (other._pre.has_value()) { + return 1; + } + return 0; +} + +bool SemanticVersion::operator==(const SemanticVersion& other) const { + return compare_to(other) == 0; +} + +bool SemanticVersion::operator!=(const SemanticVersion& other) const { + return !(*this == other); +} + +std::string SemanticVersion::to_string() const { + std::string result = + std::to_string(_major) + "." + std::to_string(_minor) + "." + std::to_string(_patch); + if (_prerelease && _unknown) result += _unknown.value(); + if (_pre) result += _pre.value().to_string(); + if (_build_info) result += _build_info.value(); + return result; +} + +SemanticVersion::NumberOrString::NumberOrString(const std::string& value_string) + : _original(value_string) { + const static std::regex NUMERIC("\\d+"); + _is_numeric = std::regex_match(_original, NUMERIC); + _number = -1; + if (_is_numeric) { + _number = std::stoi(_original); + } +} + +SemanticVersion::NumberOrString::NumberOrString(const NumberOrString& other) + : _original(other._original), _is_numeric(other._is_numeric), _number(other._number) {} + +int SemanticVersion::NumberOrString::compare_to(const SemanticVersion::NumberOrString& that) const { + if (this->_is_numeric != that._is_numeric) { + return this->_is_numeric ? -1 : 1; + } + + if (_is_numeric) { + return this->_number - that._number; + } + + return this->_original.compare(that._original); +} + +std::string SemanticVersion::NumberOrString::to_string() const { + return _original; +} + +bool SemanticVersion::NumberOrString::operator<(const SemanticVersion::NumberOrString& that) const { + return compare_to(that) < 0; +} + +bool SemanticVersion::NumberOrString::operator==( + const SemanticVersion::NumberOrString& that) const { + return compare_to(that) == 0; +} + +bool SemanticVersion::NumberOrString::operator!=( + const SemanticVersion::NumberOrString& that) const { + return !(*this == that); +} + +bool SemanticVersion::NumberOrString::operator>(const SemanticVersion::NumberOrString& that) const { + return compare_to(that) > 0; +} + +bool SemanticVersion::NumberOrString::operator<=( + const SemanticVersion::NumberOrString& that) const { + return !(*this > that); +} + +bool SemanticVersion::NumberOrString::operator>=( + const SemanticVersion::NumberOrString& that) const { + return !(*this < that); +} + +int SemanticVersion::_compare_integers(int x, int y) { + return (x < y) ? -1 : ((x == y) ? 0 : 1); +} + +int SemanticVersion::_compare_booleans(bool x, bool y) { + return (x == y) ? 0 : (x ? 1 : -1); +} + +std::vector SemanticVersion::Prerelease::_split(const std::string& s, + const std::regex& delimiter) { + std::sregex_token_iterator iter(s.begin(), s.end(), delimiter, -1); + std::sregex_token_iterator end; + std::vector tokens(iter, end); + return tokens; +} + +SemanticVersion::Prerelease::Prerelease(std::string original) : _original(std::move(original)) { + static const std::regex DOT("\\."); + auto parts = _split(_original, DOT); + for (const auto& part : parts) { + NumberOrString number_or_string(part); + _identifiers.emplace_back(number_or_string); + } +} + +int SemanticVersion::Prerelease::compare_to(const Prerelease& that) const { + int size = std::min(this->_identifiers.size(), that._identifiers.size()); + for (int i = 0; i < size; ++i) { + int cmp = this->_identifiers[i].compare_to(that._identifiers[i]); + if (cmp != 0) { + return cmp; + } + } + return static_cast(this->_identifiers.size()) - static_cast(that._identifiers.size()); +} + +std::string SemanticVersion::Prerelease::to_string() const { + return _original; +} + +bool SemanticVersion::Prerelease::operator<(const Prerelease& that) const { + return compare_to(that) < 0; +} + +bool SemanticVersion::Prerelease::operator==(const Prerelease& that) const { + return compare_to(that) == 0; +} + +bool SemanticVersion::Prerelease::operator!=(const Prerelease& that) const { + return !(*this == that); +} + +bool SemanticVersion::Prerelease::operator>(const Prerelease& that) const { + return compare_to(that) > 0; +} + +bool SemanticVersion::Prerelease::operator<=(const Prerelease& that) const { + return !(*this > that); +} + +bool SemanticVersion::Prerelease::operator>=(const Prerelease& that) const { + return !(*this < that); +} + +const SemanticVersion CorruptStatistics::PARQUET_251_FIXED_VERSION(1, 8, 0); +const SemanticVersion CorruptStatistics::CDH_5_PARQUET_251_FIXED_START(1, 5, 0, std::nullopt, + "cdh5.5.0", std::nullopt); +const SemanticVersion CorruptStatistics::CDH_5_PARQUET_251_FIXED_END(1, 5, 0); + +bool CorruptStatistics::should_ignore_statistics(const std::string& created_by, + tparquet::Type::type physical_type) { + if (physical_type != tparquet::Type::BYTE_ARRAY && + physical_type != tparquet::Type::FIXED_LEN_BYTE_ARRAY) { + // The bug only applies to binary columns + return false; + } + + if (created_by.empty()) { + // created_by is not populated + VLOG_DEBUG + << "Ignoring statistics because created_by is null or empty! See PARQUET-251 and " + "PARQUET-297"; + return true; + } + + Status status; + std::unique_ptr parsed_version; + status = VersionParser::parse(created_by, &parsed_version); + if (!status.ok()) { + VLOG_DEBUG << "Ignoring statistics because created_by could not be parsed (see " + "PARQUET-251)." + " CreatedBy: " + << created_by << ", msg: " << status.msg(); + return true; + } + + if (parsed_version->application() != "parquet-mr") { + // Assume other applications don't have this bug + return false; + } + + if ((!parsed_version->version().has_value()) || parsed_version->version().value().empty()) { + VLOG_DEBUG << "Ignoring statistics because created_by did not contain a semver (see " + "PARQUET-251): " + << created_by; + return true; + } + + std::unique_ptr semantic_version; + status = SemanticVersion::parse(parsed_version->version().value(), &semantic_version); + if (!status.ok()) { + VLOG_DEBUG << "Ignoring statistics because created_by could not be parsed (see " + "PARQUET-251)." + " CreatedBy: " + << created_by << ", msg: " << status.msg(); + return true; + } + if (semantic_version->compare_to(PARQUET_251_FIXED_VERSION) < 0 && + !(semantic_version->compare_to(CDH_5_PARQUET_251_FIXED_START) >= 0 && + semantic_version->compare_to(CDH_5_PARQUET_251_FIXED_END) < 0)) { + VLOG_DEBUG + << "Ignoring statistics because this file was created prior to the fixed version, " + "see PARQUET-251"; + return true; + } + + // This file was created after the fix + return false; +} + } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/parquet_common.h b/be/src/vec/exec/format/parquet/parquet_common.h index 2cf745882ee1392..da374d5fe793f89 100644 --- a/be/src/vec/exec/format/parquet/parquet_common.h +++ b/be/src/vec/exec/format/parquet/parquet_common.h @@ -17,10 +17,12 @@ #pragma once +#include #include #include #include +#include #include #include @@ -156,4 +158,131 @@ class ColumnSelectVector { size_t _num_filtered; size_t _read_index; }; -} // namespace doris::vectorized \ No newline at end of file + +enum class ColumnOrderName { UNDEFINED, TYPE_DEFINED_ORDER }; + +enum class SortOrder { SIGNED, UNSIGNED, UNKNOWN }; + +class ParsedVersion { +public: + ParsedVersion(std::string application, std::optional version, + std::optional app_build_hash); + + const std::string& application() const { return _application; } + + const std::optional& version() const { return _version; } + + const std::optional& app_build_hash() const { return _app_build_hash; } + + bool operator==(const ParsedVersion& other) const; + + bool operator!=(const ParsedVersion& other) const; + + size_t hash() const; + + std::string to_string() const; + +private: + std::string _application; + std::optional _version; + std::optional _app_build_hash; +}; + +class VersionParser { +public: + static Status parse(const std::string& created_by, + std::unique_ptr* parsed_version); +}; + +class SemanticVersion { +public: + SemanticVersion(int major, int minor, int patch); + +#ifdef BE_TEST + SemanticVersion(int major, int minor, int patch, bool has_unknown); +#endif + + SemanticVersion(int major, int minor, int patch, std::optional unknown, + std::optional pre, std::optional build_info); + + static Status parse(const std::string& version, + std::unique_ptr* semantic_version); + + int compare_to(const SemanticVersion& other) const; + + bool operator==(const SemanticVersion& other) const; + + bool operator!=(const SemanticVersion& other) const; + + std::string to_string() const; + +private: + class NumberOrString { + public: + explicit NumberOrString(const std::string& value_string); + + NumberOrString(const NumberOrString& other); + + int compare_to(const NumberOrString& that) const; + std::string to_string() const; + + bool operator<(const NumberOrString& that) const; + bool operator==(const NumberOrString& that) const; + bool operator!=(const NumberOrString& that) const; + bool operator>(const NumberOrString& that) const; + bool operator<=(const NumberOrString& that) const; + bool operator>=(const NumberOrString& that) const; + + private: + std::string _original; + bool _is_numeric; + int _number; + }; + + class Prerelease { + public: + explicit Prerelease(std::string original); + + int compare_to(const Prerelease& that) const; + std::string to_string() const; + + bool operator<(const Prerelease& that) const; + bool operator==(const Prerelease& that) const; + bool operator!=(const Prerelease& that) const; + bool operator>(const Prerelease& that) const; + bool operator<=(const Prerelease& that) const; + bool operator>=(const Prerelease& that) const; + + const std::string& original() const { return _original; } + + private: + static std::vector _split(const std::string& s, const std::regex& delimiter); + + std::string _original; + std::vector _identifiers; + }; + + static int _compare_integers(int x, int y); + static int _compare_booleans(bool x, bool y); + + int _major; + int _minor; + int _patch; + bool _prerelease; + std::optional _unknown; + std::optional _pre; + std::optional _build_info; +}; + +class CorruptStatistics { +public: + static bool should_ignore_statistics(const std::string& created_by, + tparquet::Type::type physical_type); + +private: + static const SemanticVersion PARQUET_251_FIXED_VERSION; + static const SemanticVersion CDH_5_PARQUET_251_FIXED_START; + static const SemanticVersion CDH_5_PARQUET_251_FIXED_END; +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h index 916f3f64ee690e8..316cbc5d71685dd 100644 --- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h +++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include @@ -38,9 +39,7 @@ class ParquetPredicate { M(TYPE_TINYINT, tparquet::Type::INT32) \ M(TYPE_SMALLINT, tparquet::Type::INT32) \ M(TYPE_INT, tparquet::Type::INT32) \ - M(TYPE_BIGINT, tparquet::Type::INT64) \ - M(TYPE_FLOAT, tparquet::Type::FLOAT) \ - M(TYPE_DOUBLE, tparquet::Type::DOUBLE) + M(TYPE_BIGINT, tparquet::Type::INT64) private: struct ScanPredicate { @@ -132,6 +131,8 @@ class ParquetPredicate { CppType min_value; CppType max_value; + std::unique_ptr encoded_min_copy; + std::unique_ptr encoded_max_copy; tparquet::Type::type physical_type = col_schema->physical_type; switch (col_val_range.type()) { #define DISPATCH(REINTERPRET_TYPE, PARQUET_TYPE) \ @@ -142,24 +143,69 @@ class ParquetPredicate { break; FOR_REINTERPRET_TYPES(DISPATCH) #undef DISPATCH + case TYPE_FLOAT: + if constexpr (std::is_same_v) { + if (col_schema->physical_type != tparquet::Type::FLOAT) { + return false; + } + min_value = *reinterpret_cast(encoded_min.data()); + max_value = *reinterpret_cast(encoded_max.data()); + if (std::isnan(min_value) || std::isnan(max_value)) { + return false; + } + // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 values would be skipped + if (std::signbit(min_value) == 0 && min_value == 0.0F) { + min_value = -0.0F; + } + if (std::signbit(max_value) != 0 && max_value == -0.0F) { + max_value = 0.0F; + } + break; + } else { + return false; + } + case TYPE_DOUBLE: + if constexpr (std::is_same_v) { + if (col_schema->physical_type != tparquet::Type::DOUBLE) { + return false; + } + min_value = *reinterpret_cast(encoded_min.data()); + max_value = *reinterpret_cast(encoded_max.data()); + if (std::isnan(min_value) || std::isnan(max_value)) { + return false; + } + // Updating min to -0.0 and max to +0.0 to ensure that no 0.0 values would be skipped + if (std::signbit(min_value) == 0 && min_value == 0.0) { + min_value = -0.0; + } + if (std::signbit(max_value) != 0 && max_value == -0.0) { + max_value = 0.0; + } + break; + } else { + return false; + } case TYPE_VARCHAR: [[fallthrough]]; case TYPE_CHAR: [[fallthrough]]; case TYPE_STRING: - // TODO: In parquet, min and max statistics may not be able to handle UTF8 correctly. - // Current processing method is using min_value and max_value statistics introduced by PARQUET-1025 if they are used. - // If not, current processing method is temporarily ignored. A better way is try to read min and max statistics - // if it contains only ASCII characters. - if (!use_min_max_value) { - return false; - } if constexpr (std::is_same_v) { - min_value = StringRef(encoded_min); - max_value = StringRef(encoded_max); + if (!use_min_max_value) { + encoded_min_copy = std::make_unique(encoded_min); + encoded_max_copy = std::make_unique(encoded_max); + if (!_try_read_old_utf8_stats(*encoded_min_copy, *encoded_max_copy)) { + return false; + } + min_value = StringRef(*encoded_min_copy); + max_value = StringRef(*encoded_max_copy); + } else { + min_value = StringRef(encoded_min); + max_value = StringRef(encoded_max); + } } else { return false; - }; + } break; case TYPE_DECIMALV2: if constexpr (std::is_same_v) { @@ -397,9 +443,64 @@ class ParquetPredicate { return predicates; } + static inline bool _is_ascii(uint8_t byte) { return byte < 128; } + + static int _common_prefix(const std::string& encoding_min, const std::string& encoding_max) { + int min_length = std::min(encoding_min.size(), encoding_max.size()); + int common_length = 0; + while (common_length < min_length && + encoding_min[common_length] == encoding_max[common_length]) { + common_length++; + } + return common_length; + } + + static bool _try_read_old_utf8_stats(std::string& encoding_min, std::string& encoding_max) { + if (encoding_min == encoding_max) { + // If min = max, then there is a single value only + // No need to modify, just use min + encoding_max = encoding_min; + return true; + } else { + int common_prefix_length = _common_prefix(encoding_min, encoding_max); + + // For min we can retain all-ASCII, because this produces a strictly lower value. + int min_good_length = common_prefix_length; + while (min_good_length < encoding_min.size() && + _is_ascii(static_cast(encoding_min[min_good_length]))) { + min_good_length++; + } + + // For max we can be sure only of the part matching the min. When they differ, we can consider only one next, and only if both are ASCII + int max_good_length = common_prefix_length; + if (max_good_length < encoding_max.size() && max_good_length < encoding_min.size() && + _is_ascii(static_cast(encoding_min[max_good_length])) && + _is_ascii(static_cast(encoding_max[max_good_length]))) { + max_good_length++; + } + // Incrementing 127 would overflow. Incrementing within non-ASCII can have side-effects. + while (max_good_length > 0 && + (static_cast(encoding_max[max_good_length - 1]) == 127 || + !_is_ascii(static_cast(encoding_max[max_good_length - 1])))) { + max_good_length--; + } + if (max_good_length == 0) { + // We can return just min bound, but code downstream likely expects both are present or both are absent. + return false; + } + + encoding_min.resize(min_good_length); + encoding_max.resize(max_good_length); + if (max_good_length > 0) { + encoding_max[max_good_length - 1]++; + } + return true; + } + } + public: static bool filter_by_stats(const ColumnValueRangeType& col_val_range, - const FieldSchema* col_schema, bool is_set_min_max, + const FieldSchema* col_schema, bool ignore_min_max_stats, const std::string& encoded_min, const std::string& encoded_max, bool is_all_null, const cctz::time_zone& ctz, bool use_min_max_value = false) { @@ -416,11 +517,14 @@ class ParquetPredicate { return; } } - for (auto& filter : filters) { - need_filter |= _filter_by_min_max(range, filter, col_schema, encoded_min, - encoded_max, ctz, use_min_max_value); - if (need_filter) { - break; + if (!ignore_min_max_stats) { + for (auto& filter : filters) { + need_filter |= + _filter_by_min_max(range, filter, col_schema, encoded_min, + encoded_max, ctz, use_min_max_value); + if (need_filter) { + break; + } } } }, diff --git a/be/src/vec/exec/format/parquet/vparquet_page_index.cpp b/be/src/vec/exec/format/parquet/vparquet_page_index.cpp index 35cf076318ea7e4..53fb1579c8ef368 100644 --- a/be/src/vec/exec/format/parquet/vparquet_page_index.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_page_index.cpp @@ -68,7 +68,7 @@ Status PageIndex::collect_skipped_page_range(tparquet::ColumnIndex* column_index const int num_of_pages = column_index->null_pages.size(); for (int page_id = 0; page_id < num_of_pages; page_id++) { bool is_all_null = column_index->null_pages[page_id]; - if (ParquetPredicate::filter_by_stats(col_val_range, col_schema, !is_all_null, + if (ParquetPredicate::filter_by_stats(col_val_range, col_schema, false, encoded_min_vals[page_id], encoded_max_vals[page_id], is_all_null, ctz)) { skipped_ranges.emplace_back(page_id); @@ -125,4 +125,4 @@ Status PageIndex::parse_offset_index(const tparquet::ColumnChunk& chunk, const u return Status::OK(); } -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_reader.cpp index 74f6001ab2a6814..d8ad51a0479ae84 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp @@ -938,15 +938,53 @@ Status ParquetReader::_process_column_stat_filter(const std::vectorphysical_type == tparquet::Type::INT96 || + col_schema->parquet_schema.logicalType.__isset.UNKNOWN + ? ColumnOrderName::UNDEFINED + : ColumnOrderName::TYPE_DEFINED_ORDER; + if ((statistic.min_value != statistic.max_value) && + (column_order != ColumnOrderName::TYPE_DEFINED_ORDER)) { + ignore_min_max_stats = true; + } *filter_group = ParquetPredicate::filter_by_stats( - slot_iter->second, col_schema, is_set_min_max, statistic.min_value, + slot_iter->second, col_schema, ignore_min_max_stats, statistic.min_value, statistic.max_value, is_all_null, *_ctz, true); } else { + if (statistic.__isset.min && statistic.__isset.max) { + bool max_equals_min = statistic.min == statistic.max; + + SortOrder sort_order = _determine_sort_order(col_schema->parquet_schema); + bool sort_orders_match = SortOrder::SIGNED == sort_order; + if (!sort_orders_match && !max_equals_min) { + ignore_min_max_stats = true; + } + bool should_ignore_corrupted_stats = false; + if (_ignored_stats.count(col_schema->physical_type) == 0) { + if (CorruptStatistics::should_ignore_statistics(_t_metadata->created_by, + col_schema->physical_type)) { + _ignored_stats[col_schema->physical_type] = true; + should_ignore_corrupted_stats = true; + } else { + _ignored_stats[col_schema->physical_type] = false; + } + } else if (_ignored_stats[col_schema->physical_type]) { + should_ignore_corrupted_stats = true; + } + if (should_ignore_corrupted_stats) { + ignore_min_max_stats = true; + } else if (!sort_orders_match && !max_equals_min) { + ignore_min_max_stats = true; + } + } else { + ignore_min_max_stats = true; + } *filter_group = ParquetPredicate::filter_by_stats( - slot_iter->second, col_schema, is_set_min_max, statistic.min, statistic.max, - is_all_null, *_ctz, false); + slot_iter->second, col_schema, ignore_min_max_stats, statistic.min, + statistic.max, is_all_null, *_ctz, false); } if (*filter_group) { break; @@ -1021,4 +1059,61 @@ void ParquetReader::_collect_profile_before_close() { _collect_profile(); } +SortOrder ParquetReader::_determine_sort_order(const tparquet::SchemaElement& parquet_schema) { + tparquet::Type::type physical_type = parquet_schema.type; + const tparquet::LogicalType& logical_type = parquet_schema.logicalType; + + // Assume string type is SortOrder::SIGNED, use ParquetPredicate::_try_read_old_utf8_stats() to handle it. + if (logical_type.__isset.STRING && (physical_type == tparquet::Type::BYTE_ARRAY || + physical_type == tparquet::Type::FIXED_LEN_BYTE_ARRAY)) { + return SortOrder::SIGNED; + } + + if (logical_type.__isset.INTEGER) { + if (logical_type.INTEGER.isSigned) { + return SortOrder::SIGNED; + } else { + return SortOrder::UNSIGNED; + } + } else if (logical_type.__isset.DATE) { + return SortOrder::SIGNED; + } else if (logical_type.__isset.ENUM) { + return SortOrder::UNSIGNED; + } else if (logical_type.__isset.BSON) { + return SortOrder::UNSIGNED; + } else if (logical_type.__isset.JSON) { + return SortOrder::UNSIGNED; + } else if (logical_type.__isset.STRING) { + return SortOrder::UNSIGNED; + } else if (logical_type.__isset.DECIMAL) { + return SortOrder::UNKNOWN; + } else if (logical_type.__isset.MAP) { + return SortOrder::UNKNOWN; + } else if (logical_type.__isset.LIST) { + return SortOrder::UNKNOWN; + } else if (logical_type.__isset.TIME) { + return SortOrder::SIGNED; + } else if (logical_type.__isset.TIMESTAMP) { + return SortOrder::SIGNED; + } else if (logical_type.__isset.UNKNOWN) { + return SortOrder::UNKNOWN; + } else { + switch (physical_type) { + case tparquet::Type::BOOLEAN: + case tparquet::Type::INT32: + case tparquet::Type::INT64: + case tparquet::Type::FLOAT: + case tparquet::Type::DOUBLE: + return SortOrder::SIGNED; + case tparquet::Type::BYTE_ARRAY: + case tparquet::Type::FIXED_LEN_BYTE_ARRAY: + return SortOrder::UNSIGNED; + case tparquet::Type::INT96: + return SortOrder::UNKNOWN; + default: + return SortOrder::UNKNOWN; + } + } +} + } // namespace doris::vectorized diff --git a/be/src/vec/exec/format/parquet/vparquet_reader.h b/be/src/vec/exec/format/parquet/vparquet_reader.h index c79d9202c00d758..a00538e0ec3cdcb 100644 --- a/be/src/vec/exec/format/parquet/vparquet_reader.h +++ b/be/src/vec/exec/format/parquet/vparquet_reader.h @@ -220,6 +220,8 @@ class ParquetReader : public GenericReader { const RowGroupReader::RowGroupIndex& group, size_t* avg_io_size); void _collect_profile(); + static SortOrder _determine_sort_order(const tparquet::SchemaElement& parquet_schema); + private: RuntimeProfile* _profile = nullptr; const TFileScanRangeParams& _scan_params; @@ -283,5 +285,6 @@ class ParquetReader : public GenericReader { const VExprContextSPtrs* _not_single_slot_filter_conjuncts = nullptr; const std::unordered_map* _slot_id_to_filter_conjuncts = nullptr; bool _hive_use_column_names = false; + std::unordered_map _ignored_stats; }; } // namespace doris::vectorized diff --git a/be/test/vec/exec/parquet/parquet_corrupt_statistics_test.cpp b/be/test/vec/exec/parquet/parquet_corrupt_statistics_test.cpp new file mode 100644 index 000000000000000..bad95614f006f46 --- /dev/null +++ b/be/test/vec/exec/parquet/parquet_corrupt_statistics_test.cpp @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "vec/exec/format/parquet/parquet_common.h" + +namespace doris { +namespace vectorized { +class ParquetCorruptStatisticsTest : public testing::Test { +public: + ParquetCorruptStatisticsTest() = default; +}; + +TEST_F(ParquetCorruptStatisticsTest, test_only_applies_to_binary) { + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.6.0 (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.6.0 (build abcd)", + tparquet::Type::FIXED_LEN_BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.6.0 (build abcd)", tparquet::Type::DOUBLE)); +} + +TEST_F(ParquetCorruptStatisticsTest, test_corrupt_statistics) { + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.6.0 (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.4.2 (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.6.100 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.7.999 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.6.22rc99 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.6.22rc99-SNAPSHOT (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.6.1-SNAPSHOT (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.6.0t-01-abcdefg (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("unparseable string", + tparquet::Type::BYTE_ARRAY)); + + // missing semver + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version (build abcd)", + tparquet::Type::BYTE_ARRAY)); + + // missing build hash + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.6.0 (build )", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.6.0 (build)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version (build)", + tparquet::Type::BYTE_ARRAY)); + + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics("imapla version 1.6.0 (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics("imapla version 1.10.0 (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.8.0 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.8.1 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.8.1rc3 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.8.1rc3-SNAPSHOT (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.9.0 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 2.0.0 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.9.0t-01-abcdefg (build abcd)", tparquet::Type::BYTE_ARRAY)); + + // missing semver + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics("impala version (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics("impala version (build abcd)", + tparquet::Type::BYTE_ARRAY)); + + // missing build hash + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics("impala version 1.6.0 (build )", + tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics("impala version 1.6.0 (build)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics("impala version (build)", + tparquet::Type::BYTE_ARRAY)); +} + +TEST_F(ParquetCorruptStatisticsTest, test_distribution_corrupt_statistics) { + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.5.0-cdh5.4.999 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.5.0-cdh5.5.0-SNAPSHOT (build " + "956ed6c14c611b4c4eaaa1d6e5b9a9c6d4dfa336)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.5.0-cdh5.5.0 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.5.0-cdh5.5.1 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_FALSE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.5.0-cdh5.6.0 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics( + "parquet-mr version 1.4.10 (build abcd)", tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.5.0 (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.5.1 (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.6.0 (build abcd)", + tparquet::Type::BYTE_ARRAY)); + EXPECT_TRUE(CorruptStatistics::should_ignore_statistics("parquet-mr version 1.7.0 (build abcd)", + tparquet::Type::BYTE_ARRAY)); +} + +} // namespace vectorized +} // namespace doris diff --git a/be/test/vec/exec/parquet/parquet_statistics_test.cpp b/be/test/vec/exec/parquet/parquet_statistics_test.cpp new file mode 100644 index 000000000000000..cd8d3068fe13128 --- /dev/null +++ b/be/test/vec/exec/parquet/parquet_statistics_test.cpp @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "vec/exec/format/parquet/parquet_pred_cmp.h" + +namespace doris { +namespace vectorized { +class ParquetStatisticsTest : public testing::Test { +public: + ParquetStatisticsTest() = default; +}; + +TEST_F(ParquetStatisticsTest, test_try_read_old_utf8_stats) { + // [, bcé]: min is empty, max starts with ASCII + { + std::string encoding_min(""); + std::string encoding_max("bcé"); + EXPECT_FALSE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + } + + // // [, ébc]: min is empty, max starts with non-ASCII + { + std::string encoding_min(""); + std::string encoding_max("ébc"); + EXPECT_FALSE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + } + + // [aa, bé]: no common prefix, first different are both ASCII, min is all ASCII + { + std::string encoding_min("aa"); + std::string encoding_max("bé"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + EXPECT_EQ(encoding_min, "aa"); + EXPECT_EQ(encoding_max, "c"); + } + + // [abcd, abcdN]: common prefix, not only ASCII, one prefix of the other, last common ASCII + { + std::string encoding_min("abcd"); + std::string encoding_max("abcdN"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + EXPECT_EQ(encoding_min, "abcd"); + EXPECT_EQ(encoding_max, "abce"); + } + + // [abcé, abcéN]: common prefix, not only ASCII, one prefix of the other, last common non ASCII + { + std::string encoding_min("abcé"); + std::string encoding_max("abcéN"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + EXPECT_EQ(encoding_min, "abcé"); + EXPECT_EQ(encoding_max, "abd"); + } + + // [abcéM, abcéN]: common prefix, not only ASCII, first different are both ASCII + { + std::string encoding_min("abcéM"); + std::string encoding_max("abcéN"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + EXPECT_EQ(encoding_min, "abcéM"); + EXPECT_EQ(encoding_max, "abcéO"); + } + + // [abcéMab, abcéNxy]: common prefix, not only ASCII, first different are both ASCII, more characters afterwards + { + std::string encoding_min("abcéMab"); + std::string encoding_max("abcéNxy"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + EXPECT_EQ(encoding_min, "abcéMab"); + EXPECT_EQ(encoding_max, "abcéO"); + } + + // [abcéM, abcé\u00f7]: common prefix, not only ASCII, first different are both ASCII, but need to be chopped off (127) + { + std::string encoding_min("abcéM"); + std::string encoding_max("abcé\u00f7"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + EXPECT_EQ(encoding_min, "abcéM"); + EXPECT_EQ(encoding_max, "abd"); + } + + // [abc\u007fé, bcd\u007fé]: no common prefix, first different are both ASCII + { + std::string encoding_min("abc\u007fé"); + std::string encoding_max("bcd\u007fé"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + EXPECT_EQ(encoding_min, "abc\u007f"); + EXPECT_EQ(encoding_max, "c"); + } + + // [é, a]: no common prefix, first different are not both ASCII + { + std::string encoding_min("é"); + std::string encoding_max("a"); + EXPECT_FALSE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + } + + // [é, ê]: no common prefix, first different are both not ASCII + { + std::string encoding_min("é"); + std::string encoding_max("ê"); + EXPECT_FALSE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + } + + // [aé, aé]: min = max (common prefix, first different are both not ASCII) + { + std::string encoding_min("aé"); + std::string encoding_max("aé"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + EXPECT_EQ(encoding_min, "aé"); + EXPECT_EQ(encoding_max, "aé"); + } + + // [aé, bé]: no common prefix, first different are both ASCII + { + std::string encoding_min("aé"); + std::string encoding_max("bé"); + EXPECT_TRUE(ParquetPredicate::_try_read_old_utf8_stats(encoding_min, encoding_max)); + ; + EXPECT_EQ(encoding_min, "a"); + EXPECT_EQ(encoding_max, "c"); + } +} + +} // namespace vectorized +} // namespace doris diff --git a/be/test/vec/exec/parquet/parquet_version_test.cpp b/be/test/vec/exec/parquet/parquet_version_test.cpp new file mode 100644 index 000000000000000..10d17e27790f047 --- /dev/null +++ b/be/test/vec/exec/parquet/parquet_version_test.cpp @@ -0,0 +1,221 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "vec/exec/format/parquet/parquet_common.h" + +namespace doris { +namespace vectorized { +class ParquetVersionTest : public testing::Test { +public: + ParquetVersionTest() = default; +}; + +TEST_F(ParquetVersionTest, test_version_parser) { + std::unique_ptr parsed_version; + + Status status = VersionParser::parse("parquet-mr version 1.6.0 (build abcd)", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.6.0", "abcd"), *parsed_version); + + status = VersionParser::parse("parquet-mr version 1.6.22rc99-SNAPSHOT (build abcd)", + &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.6.22rc99-SNAPSHOT", "abcd"), *parsed_version); + + status = VersionParser::parse("unparseable string", &parsed_version); + EXPECT_FALSE(status.ok()); + + // missing semver + status = VersionParser::parse("parquet-mr version (build abcd)", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", std::nullopt, "abcd"), *parsed_version); + + status = VersionParser::parse("parquet-mr version (build abcd)", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", std::nullopt, "abcd"), *parsed_version); + + // missing build hash + status = VersionParser::parse("parquet-mr version 1.6.0 (build )", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.6.0", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version 1.6.0 (build)", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.6.0", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version (build)", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", std::nullopt, std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version (build )", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", std::nullopt, std::nullopt), *parsed_version); + + // Missing entire build section + status = VersionParser::parse("parquet-mr version 1.6.0", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.6.0", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version 1.8.0rc4", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.8.0rc4", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version 1.8.0rc4-SNAPSHOT", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.8.0rc4-SNAPSHOT", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", std::nullopt, std::nullopt), *parsed_version); + + // Various spaces + status = VersionParser::parse("parquet-mr version 1.6.0", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.6.0", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version 1.8.0rc4", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.8.0rc4", std::nullopt), *parsed_version); + + status = + VersionParser::parse("parquet-mr version 1.8.0rc4-SNAPSHOT ", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.8.0rc4-SNAPSHOT", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", std::nullopt, std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version 1.6.0 ( build )", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.6.0", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version 1.6.0 ( build)", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", "1.6.0", std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version ( build)", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", std::nullopt, std::nullopt), *parsed_version); + + status = VersionParser::parse("parquet-mr version (build )", &parsed_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(ParsedVersion("parquet-mr", std::nullopt, std::nullopt), *parsed_version); +} + +void assertLessThan(const std::string& a, const std::string& b) { + std::unique_ptr version_a; + Status status = SemanticVersion::parse(a, &version_a); + EXPECT_TRUE(status.ok()); + std::unique_ptr version_b; + status = SemanticVersion::parse(b, &version_b); + EXPECT_TRUE(status.ok()); + EXPECT_LT(version_a->compare_to(*version_b), 0) << a << " should be < " << b; + EXPECT_GT(version_b->compare_to(*version_a), 0) << b << " should be > " << a; +} + +void assertEqualTo(const std::string& a, const std::string& b) { + std::unique_ptr version_a; + Status status = SemanticVersion::parse(a, &version_a); + EXPECT_TRUE(status.ok()); + std::unique_ptr version_b; + status = SemanticVersion::parse(b, &version_b); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(version_a->compare_to(*version_b), 0) << a << " should equal " << b; +} + +TEST_F(ParquetVersionTest, test_compare) { + EXPECT_EQ(SemanticVersion(1, 8, 1).compare_to(SemanticVersion(1, 8, 1)), 0); + EXPECT_LT(SemanticVersion(1, 8, 0).compare_to(SemanticVersion(1, 8, 1)), 0); + EXPECT_GT(SemanticVersion(1, 8, 2).compare_to(SemanticVersion(1, 8, 1)), 0); + + EXPECT_EQ(SemanticVersion(1, 8, 1).compare_to(SemanticVersion(1, 8, 1)), 0); + EXPECT_LT(SemanticVersion(1, 8, 0).compare_to(SemanticVersion(1, 8, 1)), 0); + EXPECT_GT(SemanticVersion(1, 8, 2).compare_to(SemanticVersion(1, 8, 1)), 0); + + EXPECT_LT(SemanticVersion(1, 7, 0).compare_to(SemanticVersion(1, 8, 0)), 0); + EXPECT_GT(SemanticVersion(1, 9, 0).compare_to(SemanticVersion(1, 8, 0)), 0); + + EXPECT_LT(SemanticVersion(0, 0, 0).compare_to(SemanticVersion(1, 0, 0)), 0); + EXPECT_GT(SemanticVersion(2, 0, 0).compare_to(SemanticVersion(1, 0, 0)), 0); + + EXPECT_LT(SemanticVersion(1, 8, 100).compare_to(SemanticVersion(1, 9, 0)), 0); + + EXPECT_GT(SemanticVersion(1, 8, 0).compare_to(SemanticVersion(1, 8, 0, true)), 0); + EXPECT_EQ(SemanticVersion(1, 8, 0, true).compare_to(SemanticVersion(1, 8, 0, true)), 0); + EXPECT_LT(SemanticVersion(1, 8, 0, true).compare_to(SemanticVersion(1, 8, 0)), 0); +} + +TEST_F(ParquetVersionTest, test_semver_prerelease_examples) { + std::vector examples = {"1.0.0-alpha", "1.0.0-alpha.1", "1.0.0-alpha.beta", + "1.0.0-beta", "1.0.0-beta.2", "1.0.0-beta.11", + "1.0.0-rc.1", "1.0.0"}; + for (size_t i = 0; i < examples.size() - 1; ++i) { + assertLessThan(examples[i], examples[i + 1]); + assertEqualTo(examples[i], examples[i]); + } + assertEqualTo(examples.back(), examples.back()); +} + +TEST_F(ParquetVersionTest, test_semver_build_info_examples) { + assertEqualTo("1.0.0-alpha+001", "1.0.0-alpha+001"); + assertEqualTo("1.0.0-alpha", "1.0.0-alpha+001"); + assertEqualTo("1.0.0+20130313144700", "1.0.0+20130313144700"); + assertEqualTo("1.0.0", "1.0.0+20130313144700"); + assertEqualTo("1.0.0-beta+exp.sha.5114f85", "1.0.0-beta+exp.sha.5114f85"); + assertEqualTo("1.0.0-beta", "1.0.0-beta+exp.sha.5114f85"); +} + +TEST_F(ParquetVersionTest, test_unknown_comparisons) { + assertLessThan("1.0.0rc0-alpha+001", "1.0.0-alpha"); +} + +TEST_F(ParquetVersionTest, test_distribution_versions) { + assertEqualTo("1.5.0-cdh5.5.0", "1.5.0-cdh5.5.0"); + assertLessThan("1.5.0-cdh5.5.0", "1.5.0-cdh5.5.1"); + assertLessThan("1.5.0-cdh5.5.0", "1.5.0-cdh5.5.1-SNAPSHOT"); + assertLessThan("1.5.0-cdh5.5.0", "1.5.0-cdh5.6.0"); + assertLessThan("1.5.0-cdh5.5.0", "1.5.0-cdh6.0.0"); + assertLessThan("1.5.0-cdh5.5.0", "1.5.0"); + assertLessThan("1.5.0-cdh5.5.0", "1.5.0-cdh5.5.0-SNAPSHOT"); +} + +TEST_F(ParquetVersionTest, test_parse) { + std::unique_ptr semantic_version; + Status status = SemanticVersion::parse("1.8.0", &semantic_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(*semantic_version, SemanticVersion(1, 8, 0)); + status = SemanticVersion::parse("1.8.0rc3", &semantic_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(*semantic_version, SemanticVersion(1, 8, 0, true)); + status = SemanticVersion::parse("1.8.0rc3-SNAPSHOT", &semantic_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(*semantic_version, SemanticVersion(1, 8, 0, "rc3", "SNAPSHOT", std::nullopt)); + status = SemanticVersion::parse("1.8.0-SNAPSHOT", &semantic_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(*semantic_version, SemanticVersion(1, 8, 0, std::nullopt, "SNAPSHOT", std::nullopt)); + status = SemanticVersion::parse("1.5.0-cdh5.5.0", &semantic_version); + EXPECT_TRUE(status.ok()); + EXPECT_EQ(*semantic_version, SemanticVersion(1, 5, 0, std::nullopt, "cdh5.5.0", std::nullopt)); +} + +} // namespace vectorized +} // namespace doris From 333bebd7fbbb8f5a09dd3ae297005c2a1182da1c Mon Sep 17 00:00:00 2001 From: yagagagaga Date: Mon, 12 Aug 2024 15:58:29 +0800 Subject: [PATCH 60/94] [chore](cloud) Reflect the current working mode (cloud) in the `version_comment` session variable (#38269) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before: ``` mysql> show variables like '%version_comment%'; +-----------------+-------------------------------------------+-------------------------------------------+---------+ | Variable_name | Value | Default_Value | Changed | +-----------------+-------------------------------------------+-------------------------------------------+---------+ | version_comment | Doris version doris-0.0.0-rc09-afb2dcca62 | Doris version doris-0.0.0-rc09-afb2dcca62 | 0 | +-----------------+-------------------------------------------+-------------------------------------------+---------+ 1 row in set (0.04 sec) ``` After: ``` mysql> show variables like '%version_comment%'; +-----------------+--------------------------------------------------------+-------------------------------------------------------+---------+ | Variable_name | Value | Default_Value | Changed | +-----------------+--------------------------------------------------------+-------------------------------------------------------+---------+ | version_comment | Doris version doris-0.0.0-rc09-afb2dcca62 (Cloud mode) | Doris version doris-0.0.0-rc09-afb2dcca62 (Cloud Mode)| 0 | +-----------------+--------------------------------------------------------+-------------------------------------------------------+---------+ 1 row in set (0.04 sec) ``` --- .../src/main/java/org/apache/doris/qe/GlobalVariable.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/GlobalVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/GlobalVariable.java index a75a14c6d4c5bba..b449a4bc8e9b4e3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/GlobalVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/GlobalVariable.java @@ -17,6 +17,7 @@ package org.apache.doris.qe; +import org.apache.doris.common.Config; import org.apache.doris.common.Version; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.mysql.MysqlHandshakePacket; @@ -71,7 +72,8 @@ public final class GlobalVariable { @VariableMgr.VarAttr(name = VERSION_COMMENT, flag = VariableMgr.READ_ONLY) public static String versionComment = "Doris version " - + Version.DORIS_BUILD_VERSION + "-" + Version.DORIS_BUILD_SHORT_HASH; + + Version.DORIS_BUILD_VERSION + "-" + Version.DORIS_BUILD_SHORT_HASH + + (Config.isCloudMode() ? " (Cloud Mode)" : ""); @VariableMgr.VarAttr(name = VERSION, flag = VariableMgr.READ_ONLY) public static String version = MysqlHandshakePacket.SERVER_VERSION; From 4910a5aa72efcda7ec007fb5a9e3ae3331a3089b Mon Sep 17 00:00:00 2001 From: TengJianPing <18241664+jacktengg@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:09:42 +0800 Subject: [PATCH 61/94] [fix](window_funnel) fix wrong result of window_funnel (#38954) ## Proposed changes Issue Number: close #xxx Current logic of `window_funnel` is wrong, it cannot express the semantic of the function. This PR re-implement the logic. --- be/src/agent/be_exec_version_manager.h | 2 +- be/src/util/simd/bits.h | 16 + .../aggregate_function_simple_factory.cpp | 2 + .../aggregate_function_window_funnel.cpp | 35 +- .../aggregate_function_window_funnel.h | 412 +++++++++++++++++- .../java/org/apache/doris/common/Config.java | 2 +- .../nereids_p0/aggregate/window_funnel.out | 99 +++++ .../window_functions/window_funnel.out | 5 + .../nereids_p0/aggregate/window_funnel.groovy | 361 +++++++++++++++ .../window_functions/window_funnel.sql | 71 ++- 10 files changed, 948 insertions(+), 57 deletions(-) diff --git a/be/src/agent/be_exec_version_manager.h b/be/src/agent/be_exec_version_manager.h index a55e26f7ba4493d..a7b4e2dee20e577 100644 --- a/be/src/agent/be_exec_version_manager.h +++ b/be/src/agent/be_exec_version_manager.h @@ -82,7 +82,7 @@ class BeExecVersionManager { * d. change some agg function nullable property: PR #37215 * e. change variant serde to fix PR #38413 */ -constexpr inline int BeExecVersionManager::max_be_exec_version = 6; +constexpr inline int BeExecVersionManager::max_be_exec_version = 7; constexpr inline int BeExecVersionManager::min_be_exec_version = 0; /// functional diff --git a/be/src/util/simd/bits.h b/be/src/util/simd/bits.h index 45f82b23ac99000..a36a95b6eef5dbb 100644 --- a/be/src/util/simd/bits.h +++ b/be/src/util/simd/bits.h @@ -136,6 +136,18 @@ static size_t find_byte(const std::vector& vec, size_t start, T byte) { return (T*)p - vec.data(); } +template +static size_t find_byte(const T* data, size_t start, size_t end, T byte) { + if (start >= end) { + return start; + } + const void* p = std::memchr((const void*)(data + start), byte, end - start); + if (p == nullptr) { + return end; + } + return (T*)p - data; +} + template bool contain_byte(const T* __restrict data, const size_t length, const signed char byte) { return nullptr != std::memchr(reinterpret_cast(data), byte, length); @@ -145,6 +157,10 @@ inline size_t find_one(const std::vector& vec, size_t start) { return find_byte(vec, start, 1); } +inline size_t find_one(const uint8_t* data, size_t start, size_t end) { + return find_byte(data, start, end, 1); +} + inline size_t find_zero(const std::vector& vec, size_t start) { return find_byte(vec, start, 0); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp index 8f58cf06a414bb0..436691c6ef2aad5 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_simple_factory.cpp @@ -55,6 +55,7 @@ void register_aggregate_function_group_concat(AggregateFunctionSimpleFactory& fa void register_aggregate_function_percentile(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_percentile_old(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory); +void register_aggregate_function_window_funnel_old(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_retention(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_percentile_approx(AggregateFunctionSimpleFactory& factory); void register_aggregate_function_orthogonal_bitmap(AggregateFunctionSimpleFactory& factory); @@ -98,6 +99,7 @@ AggregateFunctionSimpleFactory& AggregateFunctionSimpleFactory::instance() { register_aggregate_function_percentile_old(instance); register_aggregate_function_percentile_approx(instance); register_aggregate_function_window_funnel(instance); + register_aggregate_function_window_funnel_old(instance); register_aggregate_function_retention(instance); register_aggregate_function_orthogonal_bitmap(instance); register_aggregate_function_collect_list(instance); diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp index 6d9ca27fe10bfee..8bfdcc26f4310be 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.cpp @@ -24,6 +24,7 @@ #include "common/logging.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/aggregate_functions/helpers.h" +#include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_nullable.h" @@ -38,11 +39,33 @@ AggregateFunctionPtr create_aggregate_function_window_funnel(const std::string& } if (WhichDataType(remove_nullable(argument_types[2])).is_date_time_v2()) { return creator_without_type::create< - AggregateFunctionWindowFunnel, UInt64>>( - argument_types, result_is_nullable); + AggregateFunctionWindowFunnel>(argument_types, + result_is_nullable); } else if (WhichDataType(remove_nullable(argument_types[2])).is_date_time()) { - return creator_without_type::create>( + return creator_without_type::create< + AggregateFunctionWindowFunnel>(argument_types, + result_is_nullable); + } else { + LOG(WARNING) << "Only support DateTime type as window argument!"; + return nullptr; + } +} + +AggregateFunctionPtr create_aggregate_function_window_funnel_old(const std::string& name, + const DataTypes& argument_types, + const bool result_is_nullable) { + if (argument_types.size() < 3) { + LOG(WARNING) << "window_funnel's argument less than 3."; + return nullptr; + } + if (WhichDataType(remove_nullable(argument_types[2])).is_date_time_v2()) { + return creator_without_type::create< + AggregateFunctionWindowFunnelOld, UInt64>>( argument_types, result_is_nullable); + } else if (WhichDataType(remove_nullable(argument_types[2])).is_date_time()) { + return creator_without_type::create< + AggregateFunctionWindowFunnelOld>(argument_types, + result_is_nullable); } else { LOG(WARNING) << "Only support DateTime type as window argument!"; return nullptr; @@ -52,4 +75,10 @@ AggregateFunctionPtr create_aggregate_function_window_funnel(const std::string& void register_aggregate_function_window_funnel(AggregateFunctionSimpleFactory& factory) { factory.register_function_both("window_funnel", create_aggregate_function_window_funnel); } +void register_aggregate_function_window_funnel_old(AggregateFunctionSimpleFactory& factory) { + factory.register_alternative_function("window_funnel", + create_aggregate_function_window_funnel_old, true); + factory.register_alternative_function("window_funnel", + create_aggregate_function_window_funnel_old, false); +} } // namespace doris::vectorized diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h index 9356cfd4b68a266..3751078910baddc 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h @@ -21,26 +21,29 @@ #pragma once -#include -#include +#include #include #include #include #include #include +#include #include -#include #include "agent/be_exec_version_manager.h" #include "common/compiler_util.h" +#include "common/exception.h" #include "util/binary_cast.hpp" +#include "util/simd/bits.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/columns/column_string.h" -#include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" #include "vec/common/assert_cast.h" +#include "vec/core/sort_block.h" +#include "vec/core/sort_description.h" #include "vec/core/types.h" +#include "vec/data_types/data_type_factory.hpp" #include "vec/data_types/data_type_number.h" #include "vec/io/var_int.h" #include "vec/runtime/vdatetime_value.h" @@ -72,8 +75,384 @@ WindowFunnelMode string_to_window_funnel_mode(const String& string) { } } -template +template struct WindowFunnelState { + using DateValueType = std::conditional_t, VecDateTimeValue>; + int event_count = 0; + int64_t window; + bool enable_mode; + WindowFunnelMode window_funnel_mode; + mutable MutableColumnPtr timestamp_column; + mutable MutableColumns event_columns; + ColumnVector::Container* timestamp_column_data; + std::vector::Container*> event_columns_datas; + Block block; + SortDescription sort_description {1}; + bool sorted; + bool is_merge; + + WindowFunnelState() { + event_count = 0; + window = 0; + window_funnel_mode = WindowFunnelMode::INVALID; + + sort_description[0].column_number = 0; + sort_description[0].direction = 1; + sort_description[0].nulls_direction = -1; + sorted = false; + is_merge = false; + } + WindowFunnelState(int arg_event_count) : WindowFunnelState() { + timestamp_column = ColumnVector::create(); + timestamp_column_data = + &assert_cast&>(*timestamp_column).get_data(); + event_count = arg_event_count; + event_columns.resize(event_count); + for (int i = 0; i < event_count; i++) { + event_columns[i] = ColumnVector::create(); + event_columns_datas.emplace_back( + &assert_cast&>(*event_columns[i]).get_data()); + } + } + + void reset() { + window = 0; + timestamp_column->clear(); + for (auto& column : event_columns) { + column->clear(); + } + block.clear_column_data(); + sorted = false; + is_merge = false; + } + + void add(const IColumn** arg_columns, ssize_t row_num, int64_t win, WindowFunnelMode mode) { + window = win; + window_funnel_mode = enable_mode ? mode : WindowFunnelMode::DEFAULT; + + timestamp_column_data->push_back( + assert_cast&>(*arg_columns[2]).get_data()[row_num]); + for (int i = 0; i < event_count; i++) { + event_columns_datas[i]->push_back( + assert_cast&>(*arg_columns[3 + i]) + .get_data()[row_num]); + } + } + + void sort() { + if (sorted) { + return; + } + if (!is_merge) { + Block tmp_block; + tmp_block.insert({std::move(timestamp_column), + DataTypeFactory::instance().create_data_type(TYPE_INDEX), + "timestamp"}); + for (int i = 0; i < event_count; i++) { + tmp_block.insert({std::move(event_columns[i]), + DataTypeFactory::instance().create_data_type(TypeIndex::UInt8), + "event_" + std::to_string(i)}); + } + + block = tmp_block.clone_without_columns(); + sort_block(tmp_block, block, sort_description, 0); + } else { + auto tmp_block = block.clone_without_columns(); + sort_block(block, tmp_block, sort_description, 0); + block = std::move(tmp_block); + } + sorted = true; + } + + template + int _match_event_list(size_t& start_row, size_t row_count, + const NativeType* timestamp_data) const { + int matched_count = 0; + DateValueType start_timestamp; + DateValueType end_timestamp; + TimeInterval interval(SECOND, window, false); + + int column_idx = 1; + const auto& first_event_column = block.get_by_position(column_idx); + const auto& first_event_data = + assert_cast&>(*first_event_column.column).get_data(); + auto match_row = simd::find_one(first_event_data.data(), start_row, row_count); + start_row = match_row + 1; + if (match_row < row_count) { + auto prev_timestamp = binary_cast(timestamp_data[match_row]); + end_timestamp = prev_timestamp; + end_timestamp.template date_add_interval(interval); + + matched_count++; + + column_idx++; + auto last_match_row = match_row; + for (; column_idx < event_count + 1; column_idx++) { + const auto& event_column = block.get_by_position(column_idx); + const auto& event_data = + assert_cast&>(*event_column.column).get_data(); + if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::FIXED) { + ++match_row; + if (event_data[match_row] == 1) { + auto current_timestamp = + binary_cast(timestamp_data[match_row]); + if (current_timestamp <= end_timestamp) { + matched_count++; + continue; + } + } + break; + } + match_row = simd::find_one(event_data.data(), match_row + 1, row_count); + if (match_row < row_count) { + auto current_timestamp = + binary_cast(timestamp_data[match_row]); + bool is_matched = current_timestamp <= end_timestamp; + if (is_matched) { + if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::INCREASE) { + is_matched = current_timestamp > prev_timestamp; + } + } + if (!is_matched) { + break; + } + if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::INCREASE) { + prev_timestamp = + binary_cast(timestamp_data[match_row]); + } + if constexpr (WINDOW_FUNNEL_MODE == WindowFunnelMode::DEDUPLICATION) { + bool is_dup = false; + if (match_row != last_match_row + 1) { + for (int tmp_column_idx = 1; tmp_column_idx < column_idx; + tmp_column_idx++) { + const auto& tmp_event_column = + block.get_by_position(tmp_column_idx); + const auto& tmp_event_data = + assert_cast&>( + *tmp_event_column.column) + .get_data(); + auto dup_match_row = simd::find_one(tmp_event_data.data(), + last_match_row + 1, match_row); + if (dup_match_row < match_row) { + is_dup = true; + break; + } + } + } + if (is_dup) { + break; + } + last_match_row = match_row; + } + matched_count++; + } else { + break; + } + } + } + return matched_count; + } + + template + int _get_internal() const { + size_t start_row = 0; + int max_found_event_count = 0; + const auto& ts_column = block.get_by_position(0).column->get_ptr(); + const auto& timestamp_data = + assert_cast&>(*ts_column).get_data().data(); + + auto row_count = block.rows(); + while (start_row < row_count) { + auto found_event_count = + _match_event_list(start_row, row_count, timestamp_data); + if (found_event_count == event_count) { + return found_event_count; + } + max_found_event_count = std::max(max_found_event_count, found_event_count); + } + return max_found_event_count; + } + int get() const { + auto row_count = block.rows(); + if (event_count == 0 || row_count == 0) { + return 0; + } + switch (window_funnel_mode) { + case WindowFunnelMode::DEFAULT: + return _get_internal(); + case WindowFunnelMode::DEDUPLICATION: + return _get_internal(); + case WindowFunnelMode::FIXED: + return _get_internal(); + case WindowFunnelMode::INCREASE: + return _get_internal(); + default: + throw doris::Exception(ErrorCode::INTERNAL_ERROR, "Invalid window_funnel mode"); + return 0; + } + } + + void merge(const WindowFunnelState& other) { + is_merge = true; + MutableBlock mutable_block(&block); + if (!other.block.empty()) { + auto st = mutable_block.merge(other.block); + if (!st.ok()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, st.to_string()); + return; + } + } + block = mutable_block.to_block(); + + event_count = event_count > 0 ? event_count : other.event_count; + window = window > 0 ? window : other.window; + if (enable_mode) { + window_funnel_mode = window_funnel_mode == WindowFunnelMode::INVALID + ? other.window_funnel_mode + : window_funnel_mode; + } else { + window_funnel_mode = WindowFunnelMode::DEFAULT; + } + } + + void write(BufferWritable& out) const { + write_var_int(event_count, out); + write_var_int(window, out); + if (enable_mode) { + write_var_int(static_cast>(window_funnel_mode), + out); + } + PBlock pblock; + size_t uncompressed_bytes = 0; + size_t compressed_bytes = 0; + Status status; + std::string buff; + if (is_merge) { + status = block.serialize( + BeExecVersionManager::get_newest_version(), &pblock, &uncompressed_bytes, + &compressed_bytes, + segment_v2::CompressionTypePB::ZSTD); // ZSTD for better compression ratio + } else { + Block tmp_block; + tmp_block.insert({std::move(timestamp_column), + DataTypeFactory::instance().create_data_type(TYPE_INDEX), + "timestamp"}); + for (int i = 0; i < event_count; i++) { + tmp_block.insert({std::move(event_columns[i]), + DataTypeFactory::instance().create_data_type(TypeIndex::UInt8), + "event_" + std::to_string(i)}); + } + status = tmp_block.serialize( + BeExecVersionManager::get_newest_version(), &pblock, &uncompressed_bytes, + &compressed_bytes, + segment_v2::CompressionTypePB::ZSTD); // ZSTD for better compression ratio + } + if (!status.ok()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, status.to_string()); + return; + } + if (!pblock.SerializeToString(&buff)) { + throw doris::Exception(ErrorCode::SERIALIZE_PROTOBUF_ERROR, + "Serialize window_funnel data"); + return; + } + auto data_bytes = buff.size(); + write_var_uint(data_bytes, out); + out.write(buff.data(), data_bytes); + } + + void read(BufferReadable& in) { + int64_t event_level; + read_var_int(event_level, in); + event_count = (int)event_level; + read_var_int(window, in); + window_funnel_mode = WindowFunnelMode::DEFAULT; + if (enable_mode) { + int64_t mode; + read_var_int(mode, in); + window_funnel_mode = static_cast(mode); + } + size_t data_bytes = 0; + read_var_uint(data_bytes, in); + std::string buff; + buff.resize(data_bytes); + in.read(buff.data(), data_bytes); + + PBlock pblock; + if (!pblock.ParseFromArray(buff.data(), data_bytes)) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, + "Failed to parse window_funnel data to block"); + } + auto status = block.deserialize(pblock); + if (!status.ok()) { + throw doris::Exception(ErrorCode::INTERNAL_ERROR, status.to_string()); + } + } +}; + +template +class AggregateFunctionWindowFunnel + : public IAggregateFunctionDataHelper< + WindowFunnelState, + AggregateFunctionWindowFunnel> { +public: + AggregateFunctionWindowFunnel(const DataTypes& argument_types_) + : IAggregateFunctionDataHelper, + AggregateFunctionWindowFunnel>( + argument_types_) {} + + void create(AggregateDataPtr __restrict place) const override { + auto data = new (place) WindowFunnelState( + IAggregateFunction::get_argument_types().size() - 3); + /// support window funnel mode from 2.0. See `BeExecVersionManager::max_be_exec_version` + data->enable_mode = version >= 3; + } + + String get_name() const override { return "window_funnel"; } + + DataTypePtr get_return_type() const override { return std::make_shared(); } + + void reset(AggregateDataPtr __restrict place) const override { this->data(place).reset(); } + + void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, + Arena*) const override { + const auto& window = + assert_cast&>(*columns[0]).get_data()[row_num]; + StringRef mode = columns[1]->get_data_at(row_num); + this->data(place).add(columns, row_num, window, + string_to_window_funnel_mode(mode.to_string())); + } + + void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, + Arena*) const override { + this->data(place).merge(this->data(rhs)); + } + + void serialize(ConstAggregateDataPtr __restrict place, BufferWritable& buf) const override { + this->data(place).write(buf); + } + + void deserialize(AggregateDataPtr __restrict place, BufferReadable& buf, + Arena*) const override { + this->data(place).read(buf); + } + + void insert_result_into(ConstAggregateDataPtr __restrict place, IColumn& to) const override { + this->data(const_cast(place)).sort(); + assert_cast(to).get_data().push_back( + IAggregateFunctionDataHelper< + WindowFunnelState, + AggregateFunctionWindowFunnel>::data(place) + .get()); + } + +protected: + using IAggregateFunction::version; +}; + +template +struct WindowFunnelStateOld { std::vector> events; int max_event_level; bool sorted; @@ -81,7 +460,7 @@ struct WindowFunnelState { WindowFunnelMode window_funnel_mode; bool enable_mode; - WindowFunnelState() { + WindowFunnelStateOld() { sorted = true; max_event_level = 0; window = 0; @@ -185,7 +564,7 @@ struct WindowFunnelState { return 0; } - void merge(const WindowFunnelState& other) { + void merge(const WindowFunnelStateOld& other) { if (other.events.empty()) { return; } @@ -258,18 +637,19 @@ struct WindowFunnelState { }; template -class AggregateFunctionWindowFunnel +class AggregateFunctionWindowFunnelOld : public IAggregateFunctionDataHelper< - WindowFunnelState, - AggregateFunctionWindowFunnel> { + WindowFunnelStateOld, + AggregateFunctionWindowFunnelOld> { public: - AggregateFunctionWindowFunnel(const DataTypes& argument_types_) + AggregateFunctionWindowFunnelOld(const DataTypes& argument_types_) : IAggregateFunctionDataHelper< - WindowFunnelState, - AggregateFunctionWindowFunnel>(argument_types_) {} + WindowFunnelStateOld, + AggregateFunctionWindowFunnelOld>( + argument_types_) {} void create(AggregateDataPtr __restrict place) const override { - auto data = new (place) WindowFunnelState(); + auto data = new (place) WindowFunnelStateOld(); /// support window funnel mode from 2.0. See `BeExecVersionManager::max_be_exec_version` data->enable_mode = version >= 3; } @@ -318,8 +698,8 @@ class AggregateFunctionWindowFunnel this->data(const_cast(place)).sort(); assert_cast(to).get_data().push_back( IAggregateFunctionDataHelper< - WindowFunnelState, - AggregateFunctionWindowFunnel>::data(place) + WindowFunnelStateOld, + AggregateFunctionWindowFunnelOld>::data(place) .get()); } diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 3588a3a0bffddbd..8dcc2bbef95aff6 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1840,7 +1840,7 @@ public class Config extends ConfigBase { * Max data version of backends serialize block. */ @ConfField(mutable = false) - public static int max_be_exec_version = 6; + public static int max_be_exec_version = 7; /** * Min data version of backends serialize block. diff --git a/regression-test/data/nereids_p0/aggregate/window_funnel.out b/regression-test/data/nereids_p0/aggregate/window_funnel.out index 3396dd90e825860..f16f050c8e69d2a 100644 --- a/regression-test/data/nereids_p0/aggregate/window_funnel.out +++ b/regression-test/data/nereids_p0/aggregate/window_funnel.out @@ -26,3 +26,102 @@ -- !window_funnel_increase -- 2 +-- !window_funnel_neq -- +2 + +-- !window_funnel_default0 -- +100123 4 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_default1 -- +100123 3 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_default2 -- +100123 1 +100125 1 +100126 1 +100127 1 + +-- !window_funnel_default3 -- +100123 1 +100125 1 +100126 1 +100127 1 + +-- !window_funnel_default4 -- +100123 2 +100125 2 +100126 2 +100127 2 + +-- !window_funnel_default5 -- +100123 1 +100125 1 +100126 1 +100127 1 + +-- !window_funnel_default6 -- +100123 4 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_default7 -- +100123 2 +100125 2 +100126 1 +100127 1 + +-- !window_funnel_default8 -- +100123 4 +100125 2 +100126 0 +100127 1 + +-- !window_funnel_default9 -- +100123 4 +100125 3 +100126 4 +100127 2 + +-- !window_funnel_deduplication0 -- +100123 3 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_deduplication1 -- +100123 3 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_deduplication2 -- +100123 3 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_fixed0 -- +100123 2 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_fixed1 -- +100123 2 +100125 3 +100126 2 +100127 2 + +-- !window_funnel_increase0 -- +100123 3 +100125 3 +100126 2 +100127 2 + diff --git a/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out b/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out index 7d7a2488245f506..43948dc795138e6 100644 --- a/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out +++ b/regression-test/data/nereids_p0/sql_functions/window_functions/window_funnel.out @@ -77,3 +77,8 @@ -- !window_funnel_25 -- 1 +-- !window_funnel_26 -- +users 13 +browser 10 +buy 1 + diff --git a/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy b/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy index 02562c49f48db81..5e4eeba7c14a72e 100644 --- a/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy +++ b/regression-test/suites/nereids_p0/aggregate/window_funnel.groovy @@ -294,4 +294,365 @@ suite("window_funnel") { from ${tableName} t; """ sql """ DROP TABLE IF EXISTS ${tableName} """ + + sql """ + CREATE TABLE windowfunnel_test ( + `xwho` varchar(50) NULL COMMENT 'xwho', + `xwhen` datetime COMMENT 'xwhen', + `xwhat` int NULL COMMENT 'xwhat' + ) + DUPLICATE KEY(xwho) + DISTRIBUTED BY HASH(xwho) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ + INSERT into windowfunnel_test (xwho, xwhen, xwhat) values ('1', '2022-03-12 10:41:00', 1), + ('1', '2022-03-12 13:28:02', 2), + ('1', '2022-03-12 16:15:01', 3), + ('1', '2022-03-12 19:05:04', 4); + """ + qt_window_funnel_neq """ + select window_funnel(3600 * 24, 'default', t.xwhen, t.xwhat = 1, t.xwhat != 2,t.xwhat=3 ) AS level from windowfunnel_test t; + """ + + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 3), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + // test default mode + qt_window_funnel_default0 """ + SELECT + user_id, + window_funnel(3600 * 3, "default", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + // in 5 minutes + qt_window_funnel_default1 """ + SELECT + user_id, + window_funnel(300, "default", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + // in 30 seconds + qt_window_funnel_default2 """ + SELECT + user_id, + window_funnel(30, "default", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + qt_window_funnel_default3 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name = '登录',event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + qt_window_funnel_default4 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name = '访问',event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + qt_window_funnel_default5 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name = '登录', event_name = '登录', event_name = '登录', event_name = '登录',event_name = '登录', event_name = '登录') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + // complicate expressions + qt_window_funnel_default6 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name != '登陆', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id; + """ + qt_window_funnel_default7 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, event_name = '登录', event_name != '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id; + """ + qt_window_funnel_default8 """ + SELECT + user_id, + window_funnel(3600000000, "default", event_timestamp, + event_name = '登录' AND phone_brand in ('HONOR', 'XIAOMI', 'VIVO') AND tab_num not in (4, 5), + event_name = '访问' AND tab_num not in (4, 5), + event_name = '下单' AND tab_num not in (6, 7), + event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id; + """ + + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + // test multiple matched event list, output the longest match + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 3), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5), + (100123, '登录', '2022-05-14 13:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 13:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 13:04:00', "HONOR", 3), + (100123, '付款', '2022-05-14 13:10:00', 'HONOR', 4), + (100126, '登录', '2022-05-15 14:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 14:01:00', 'HONOR', 2), + (100126, '下单', '2022-05-15 14:02:00', 'HONOR', 3), + (100126, '付款', '2022-05-15 14:03:00', 'HONOR', 4); + """ + qt_window_funnel_default9 """ + SELECT + user_id, + window_funnel(3600, "default", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + + // test deduplication mode + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '登录', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_deduplication0 """ + SELECT + user_id, + window_funnel(3600, "deduplication", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + sql """ truncate table windowfunnel_test; """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '访问', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_deduplication1 """ + SELECT + user_id, + window_funnel(3600, "deduplication", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + sql """ truncate table windowfunnel_test; """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '登录1', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录2', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录3', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录4', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '登录5', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '下单', '2022-05-14 10:04:00', 'HONOR', 3), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_deduplication2 """ + SELECT + user_id, + window_funnel(3600, "deduplication", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + + + // test fixed mode + sql """ truncate table windowfunnel_test; """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '登录', '2022-05-14 10:03:00', 'HONOR', 3), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_fixed0 """ + SELECT + user_id, + window_funnel(3600, "fixed", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '登录2', '2022-05-14 10:03:00', 'HONOR', 3), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '付款', '2022-05-14 10:10:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_fixed1 """ + SELECT + user_id, + window_funnel(3600, "fixed", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + + // test increase mode + sql """ DROP TABLE IF EXISTS windowfunnel_test """ + sql """ + CREATE TABLE windowfunnel_test( + user_id BIGINT, + event_name VARCHAR(64), + event_timestamp datetime, + phone_brand varchar(64), + tab_num int + ) distributed by hash(user_id) buckets 3 properties("replication_num"="1"); + """ + sql """ + INSERT INTO windowfunnel_test VALUES + (100123, '登录', '2022-05-14 10:01:00', 'HONOR', 1), + (100123, '访问', '2022-05-14 10:02:00', 'HONOR', 2), + (100123, '下单', '2022-05-14 10:04:00', "HONOR", 4), + (100123, '付款', '2022-05-14 10:04:00', 'HONOR', 4), + (100125, '登录', '2022-05-15 11:00:00', 'XIAOMI', 1), + (100125, '访问', '2022-05-15 11:01:00', 'XIAOMI', 2), + (100125, '下单', '2022-05-15 11:02:00', 'XIAOMI', 6), + (100126, '登录', '2022-05-15 12:00:00', 'IPHONE', 1), + (100126, '访问', '2022-05-15 12:01:00', 'HONOR', 2), + (100127, '登录', '2022-05-15 11:30:00', 'VIVO', 1), + (100127, '访问', '2022-05-15 11:31:00', 'VIVO', 5); + """ + qt_window_funnel_increase0 """ + SELECT + user_id, + window_funnel(3600, "increase", event_timestamp, event_name = '登录', event_name = '访问', event_name = '下单', event_name = '付款') AS level + FROM windowfunnel_test + GROUP BY user_id + order BY user_id + """ + } diff --git a/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql b/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql index 850be3e4f4b8c01..00fc65c1cc848cb 100644 --- a/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql +++ b/regression-test/suites/nereids_p0/sql_functions/window_functions/window_funnel.sql @@ -53,41 +53,40 @@ insert into user_analysis values (1000012,'browse','2022-07-28 00:00:00'); insert into user_analysis values (1000013,'browse','2022-07-29 00:00:00'); insert into user_analysis values (1000014,'browse','2022-07-30 00:00:00'); insert into user_analysis values (1000015,'browse','2022-07-31 00:00:00'); ---- Nereids does't support window function ---- WITH ---- level_detail AS ( ---- SELECT ---- level ---- ,COUNT(1) AS count_user ---- FROM ( ---- SELECT ---- user_id ---- ,window_funnel( ---- 1800 ---- ,'default' ---- ,event_time ---- ,event_type = 'browse' ---- ,event_type = 'favorite' ---- ,event_type = 'shopping cart' ---- ,event_type = 'buy' ---- ) AS level ---- FROM user_analysis ---- WHERE event_time >= TIMESTAMP '2022-07-17 00:00:00' ---- AND event_time < TIMESTAMP '2022-07-31 00:00:00' ---- GROUP BY user_id ---- ) AS basic_table ---- GROUP BY level ---- ORDER BY level ASC ) ---- SELECT CASE level WHEN 0 THEN 'users' ---- WHEN 1 THEN 'browser' ---- WHEN 2 THEN 'favorite' ---- WHEN 3 THEN 'shopping cart' ---- WHEN 4 THEN 'buy' ---- END ---- ,SUM(count_user) over ( ORDER BY level DESC ) ---- FROM level_detail ---- GROUP BY level ---- ,count_user ---- ORDER BY level ASC; +WITH + level_detail AS ( + SELECT + level + ,COUNT(1) AS count_user + FROM ( + SELECT + user_id + ,window_funnel( + 1800 + ,'default' + ,event_time + ,event_type = 'browse' + ,event_type = 'favorite' + ,event_type = 'shopping cart' + ,event_type = 'buy' + ) AS level + FROM user_analysis + WHERE event_time >= TIMESTAMP '2022-07-17 00:00:00' + AND event_time < TIMESTAMP '2022-07-31 00:00:00' + GROUP BY user_id + ) AS basic_table + GROUP BY level + ORDER BY level ASC ) +SELECT CASE level WHEN 0 THEN 'users' + WHEN 1 THEN 'browser' + WHEN 2 THEN 'favorite' + WHEN 3 THEN 'shopping cart' + WHEN 4 THEN 'buy' + END + ,SUM(count_user) over ( ORDER BY level DESC ) +FROM level_detail +GROUP BY level + ,count_user +ORDER BY level ASC; From c47399cc0d0dbd0e97ed31810b8c777577a72f96 Mon Sep 17 00:00:00 2001 From: LiBinfeng <46676950+LiBinfeng-01@users.noreply.github.com> Date: Mon, 12 Aug 2024 16:32:44 +0800 Subject: [PATCH 62/94] [fix](Nereids) fix insert into table with null literal default value (#39122) Problem: when use insert with default value null, it can not be insert successfully Solved: when column is allow to be null, it can be null in create table with null default value --- .../doris/analysis/NativeInsertStmt.java | 8 +++- .../java/org/apache/doris/catalog/Column.java | 3 ++ .../nereids/rules/analysis/BindSink.java | 2 +- .../plans/commands/insert/InsertUtils.java | 4 +- .../insert/test_insert_default_value.out | 15 ++++-- .../insert/test_insert_default_value.groovy | 48 ++++++++++++++++++- 6 files changed, 71 insertions(+), 9 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java index ae3c2371e73c01b..64ab872ea8b9065 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java @@ -923,11 +923,15 @@ private void analyzeRow(Analyzer analyzer, List targetColumns, List getColumnToOutput( } else if (column.getDefaultValue() == null) { // throw exception if explicitly use Default value but no default value present // insert into table t values(DEFAULT) - if (columnToChildOutput.get(column) instanceof DefaultValueSlot) { + if (columnToChildOutput.get(column) instanceof DefaultValueSlot && !column.isAllowNull()) { throw new AnalysisException("Column has no default value," + " column=" + column.getName()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java index 67374254c8a6f2e..03ca58e99d103a2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertUtils.java @@ -427,7 +427,9 @@ private static NamedExpression generateDefaultExpression(Column column) { return new Alias(new NullLiteral(DataType.fromCatalogType(column.getType())), column.getName()); } if (column.getDefaultValue() == null) { - throw new AnalysisException("Column has no default value, column=" + column.getName()); + if (!column.isAllowNull()) { + throw new AnalysisException("Column has no default value, column=" + column.getName()); + } } if (column.getDefaultValueExpr() != null) { Expression defualtValueExpression = new NereidsParser().parseExpression( diff --git a/regression-test/data/load_p0/insert/test_insert_default_value.out b/regression-test/data/load_p0/insert/test_insert_default_value.out index 7fc34c82fda0ca9..c63e8496bfab368 100644 --- a/regression-test/data/load_p0/insert/test_insert_default_value.out +++ b/regression-test/data/load_p0/insert/test_insert_default_value.out @@ -1,8 +1,15 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select1 -- -10 10000 10000000 92233720368547758 19223372036854775807 10.3 10.3 -10 10000 10000000 92233720368547758 19223372036854775807 10.3 10.3 +10 10000 10000000 92233720368547758 19223372036854775807 10.30 10.3 +10 10000 10000000 92233720368547758 19223372036854775807 10.30 10.3 -- !select2 -- -true 10 10000 10000000 92233720368547758 19223372036854775807 3.14159 hello world, today is 15/06/2023 2023-06-15 2023-06-15T16:10:15 10.3 -true 10 10000 10000000 92233720368547758 19223372036854775807 3.14159 hello world, today is 15/06/2023 2023-06-15 2023-06-15T16:10:15 10.3 +true 10 10000 10000000 92233720368547758 19223372036854775807 3.14159 hello world, today is 15/06/2023 2023-06-15 2023-06-15T16:10:15 10.30 +true 10 10000 10000000 92233720368547758 19223372036854775807 3.14159 hello world, today is 15/06/2023 2023-06-15 2023-06-15T16:10:15 10.30 + +-- !select3 -- +1 2 test 0 0 0 \N 0.0 0 0 0 \N \N + +-- !select4 -- +1 2 test 0 0 0 \N 0.0 0 0 0 \N \N + diff --git a/regression-test/suites/load_p0/insert/test_insert_default_value.groovy b/regression-test/suites/load_p0/insert/test_insert_default_value.groovy index 1e894196e2898e0..38b512370b4207a 100644 --- a/regression-test/suites/load_p0/insert/test_insert_default_value.groovy +++ b/regression-test/suites/load_p0/insert/test_insert_default_value.groovy @@ -82,4 +82,50 @@ suite("test_insert_default_value") { qt_select2 """ select k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11 from test_insert_dft_tbl """ sql "drop table test_insert_dft_tbl" -} \ No newline at end of file + + sql "drop table if exists test_insert_default_null" + sql """ + CREATE TABLE `test_insert_default_null` ( + `gz_organization_id` int(11) DEFAULT '1', + `company_id` int(11) NOT NULL, + `material_id` varchar(120) NOT NULL COMMENT '素材id', + `material_info_type` varchar(40) DEFAULT '', + `signature` varchar(260) DEFAULT '' COMMENT 'md5', + `size` int(11) DEFAULT '0' COMMENT '大小', + `width` int(11) DEFAULT '0' COMMENT '宽', + `height` int(11) DEFAULT '0' COMMENT '高', + `format` varchar(80) DEFAULT '' COMMENT '格式', + `upload_time` datetime DEFAULT NULL COMMENT '上传时间', + `filename` varchar(500) DEFAULT '' COMMENT '名字', + `duration` decimal(10,1) DEFAULT '0' COMMENT '视频时长', + `producer_name` varchar(200) DEFAULT '', + `producer_id` int(11) DEFAULT '0', + `producer_department_path` varchar(100) DEFAULT '', + `producer_special_id` int(11) DEFAULT '0', + `producer_node_id` int(11) DEFAULT '0', + `update_time` datetime DEFAULT null, + `create_time` datetime DEFAULT null, + INDEX idx_filename(filename) USING INVERTED PROPERTIES("parser" = "chinese"), + ) ENGINE=OLAP + UNIQUE KEY(`gz_organization_id`, `company_id`, `material_id`) + DISTRIBUTED BY HASH(`material_id`) BUCKETS 3 + PROPERTIES ( + "store_row_column" = "true", + "enable_unique_key_merge_on_write" = "true", + "replication_num" = "1" + ); + """ + + sql """ set enable_nereids_planner=true """ + sql """ set enable_nereids_dml=true """ + sql """ INSERT INTO `test_insert_default_null` (gz_organization_id, `company_id`, `material_id`, create_time) VALUES ('1', '2', 'test', DEFAULT); """ + qt_select3 """ select * from test_insert_default_null;""" + sql """ truncate table test_insert_default_null;""" + + sql """ set enable_nereids_planner=false """ + sql """ set enable_nereids_dml=false """ + sql """ INSERT INTO `test_insert_default_null` (gz_organization_id, `company_id`, `material_id`, create_time) VALUES ('1', '2', 'test', DEFAULT); """ + + qt_select4 """ select * from test_insert_default_null;""" + sql "drop table if exists test_insert_default_null" +} From 285f68a098b688ab088dd2954f8b7d53532532f9 Mon Sep 17 00:00:00 2001 From: walter Date: Mon, 12 Aug 2024 18:01:38 +0800 Subject: [PATCH 63/94] [chore](table) Add batch method to get visible version of the olap table (#38949) Since get visible version is a heavy operation in the cloud mode, this PR add a batch method, to obtain all visible versions via only one RPC. --- cloud/src/meta-service/meta_service.cpp | 11 +- .../org/apache/doris/catalog/OlapTable.java | 116 +++++++++++++----- .../doris/cloud/catalog/CloudPartition.java | 17 +-- .../apache/doris/cloud/rpc/VersionHelper.java | 38 +++++- 4 files changed, 132 insertions(+), 50 deletions(-) diff --git a/cloud/src/meta-service/meta_service.cpp b/cloud/src/meta-service/meta_service.cpp index ecf054b68b6e762..0b27d6d50f6198b 100644 --- a/cloud/src/meta-service/meta_service.cpp +++ b/cloud/src/meta-service/meta_service.cpp @@ -315,11 +315,14 @@ void MetaServiceImpl::batch_get_version(::google::protobuf::RpcController* contr return; } - size_t num_acquired = request->partition_ids_size(); + size_t num_acquired = + is_table_version ? request->table_ids_size() : request->partition_ids_size(); response->mutable_versions()->Reserve(num_acquired); response->mutable_db_ids()->CopyFrom(request->db_ids()); response->mutable_table_ids()->CopyFrom(request->table_ids()); - response->mutable_partition_ids()->CopyFrom(request->partition_ids()); + if (!is_table_version) { + response->mutable_partition_ids()->CopyFrom(request->partition_ids()); + } constexpr size_t BATCH_SIZE = 500; std::vector version_keys; @@ -327,7 +330,7 @@ void MetaServiceImpl::batch_get_version(::google::protobuf::RpcController* contr version_keys.reserve(BATCH_SIZE); version_values.reserve(BATCH_SIZE); while ((code == MetaServiceCode::OK || code == MetaServiceCode::KV_TXN_TOO_OLD) && - response->versions_size() < response->partition_ids_size()) { + response->versions_size() < num_acquired) { std::unique_ptr txn; TxnErrorCode err = txn_kv_->create_txn(&txn); if (err != TxnErrorCode::TXN_OK) { @@ -343,11 +346,11 @@ void MetaServiceImpl::batch_get_version(::google::protobuf::RpcController* contr for (size_t j = i; j < limit; j++) { int64_t db_id = request->db_ids(j); int64_t table_id = request->table_ids(j); - int64_t partition_id = request->partition_ids(j); std::string ver_key; if (is_table_version) { table_version_key({instance_id, db_id, table_id}, &ver_key); } else { + int64_t partition_id = request->partition_ids(j); partition_version_key({instance_id, db_id, table_id, partition_id}, &ver_key); } version_keys.push_back(std::move(ver_key)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index 884cd4f4054e692..d7ac361d577efd0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -47,9 +47,9 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.io.DeepCopy; import org.apache.doris.common.io.Text; -import org.apache.doris.common.profile.SummaryProfile; import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.common.util.Util; +import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.mtmv.MTMVRelatedTableIf; import org.apache.doris.mtmv.MTMVSnapshotIf; import org.apache.doris.mtmv.MTMVVersionSnapshot; @@ -57,7 +57,6 @@ import org.apache.doris.persist.gson.GsonUtils; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.OriginStatement; -import org.apache.doris.qe.StmtExecutor; import org.apache.doris.resource.Tag; import org.apache.doris.rpc.RpcException; import org.apache.doris.statistics.AnalysisInfo; @@ -2225,7 +2224,6 @@ public int getBaseSchemaVersion() { return baseIndexMeta.getSchemaVersion(); } - public void setEnableSingleReplicaCompaction(boolean enableSingleReplicaCompaction) { if (tableProperty == null) { tableProperty = new TableProperty(new HashMap<>()); @@ -2849,6 +2847,7 @@ public long getVisibleVersion() { if (Config.isNotCloudMode()) { return tableAttributes.getVisibleVersion(); } + // get version rpc Cloud.GetVersionRequest request = Cloud.GetVersionRequest.newBuilder() .setDbId(this.getDatabase().getId()) @@ -2858,7 +2857,7 @@ public long getVisibleVersion() { .build(); try { - Cloud.GetVersionResponse resp = getVersionFromMeta(request); + Cloud.GetVersionResponse resp = VersionHelper.getVersionFromMeta(request); long version = -1; if (resp.getStatus().getCode() == Cloud.MetaServiceCode.OK) { version = resp.getVersion(); @@ -2874,7 +2873,90 @@ public long getVisibleVersion() { } return version; } catch (RpcException e) { - throw new RuntimeException("get version from meta service failed"); + throw new RuntimeException("get version from meta service failed", e); + } + } + + // Get the table versions in batch. + public static List getVisibleVersionByTableIds(Collection tableIds) { + List tables = new ArrayList<>(); + + InternalCatalog catalog = Env.getCurrentEnv().getInternalCatalog(); + for (long tableId : tableIds) { + Table table = catalog.getTableByTableId(tableId); + if (table == null) { + throw new RuntimeException("get table visible version failed, no such table " + tableId + " exists"); + } + if (table.getType() != TableType.OLAP) { + throw new RuntimeException( + "get table visible version failed, table " + tableId + " is not a OLAP table"); + } + tables.add((OlapTable) table); + } + + return getVisibleVersionInBatch(tables); + } + + // Get the table versions in batch. + public static List getVisibleVersionInBatch(Collection tables) { + if (tables.isEmpty()) { + return new ArrayList<>(); + } + + if (Config.isNotCloudMode()) { + return tables.stream() + .map(table -> table.tableAttributes.getVisibleVersion()) + .collect(Collectors.toList()); + } + + List dbIds = new ArrayList<>(); + List tableIds = new ArrayList<>(); + for (OlapTable table : tables) { + dbIds.add(table.getDatabase().getId()); + tableIds.add(table.getId()); + } + + return getVisibleVersionFromMeta(dbIds, tableIds); + } + + private static List getVisibleVersionFromMeta(List dbIds, List tableIds) { + // get version rpc + Cloud.GetVersionRequest request = Cloud.GetVersionRequest.newBuilder() + .setDbId(-1) + .setTableId(-1) + .setPartitionId(-1) + .addAllDbIds(dbIds) + .addAllTableIds(tableIds) + .setBatchMode(true) + .setIsTableVersion(true) + .build(); + + try { + Cloud.GetVersionResponse resp = VersionHelper.getVersionFromMeta(request); + if (resp.getStatus().getCode() != Cloud.MetaServiceCode.OK) { + throw new RpcException("get table visible version", "unexpected status " + resp.getStatus()); + } + + List versions = resp.getVersionsList(); + if (versions.size() != tableIds.size()) { + throw new RpcException("get table visible version", + "wrong number of versions, required " + tableIds.size() + ", but got " + versions.size()); + } + + if (LOG.isDebugEnabled()) { + LOG.debug("get table version from meta service, tables: {}, versions: {}", tableIds, versions); + } + + for (int i = 0; i < versions.size(); i++) { + // Set visible version to 1 if no such table version exists. + if (versions.get(i) <= 0L) { + versions.set(i, 1L); + } + } + + return versions; + } catch (RpcException e) { + throw new RuntimeException("get table version from meta service failed", e); } } @@ -2921,19 +3003,6 @@ public MTMVSnapshotIf getTableSnapshot() { return new MTMVVersionSnapshot(visibleVersion); } - private static Cloud.GetVersionResponse getVersionFromMeta(Cloud.GetVersionRequest req) - throws RpcException { - long startAt = System.nanoTime(); - try { - return VersionHelper.getVisibleVersion(req); - } finally { - SummaryProfile profile = getSummaryProfile(); - if (profile != null) { - profile.addGetTableVersionTime(System.nanoTime() - startAt); - } - } - } - @Override public boolean needAutoRefresh() { return true; @@ -2944,17 +3013,6 @@ public boolean isPartitionColumnAllowNull() { return true; } - private static SummaryProfile getSummaryProfile() { - ConnectContext ctx = ConnectContext.get(); - if (ctx != null) { - StmtExecutor executor = ctx.getExecutor(); - if (executor != null) { - return executor.getSummaryProfile(); - } - } - return null; - } - public void setStatistics(Statistics statistics) { this.statistics = statistics; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java index 5036a0e01c44645..b2a9751394f2d86 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudPartition.java @@ -125,7 +125,7 @@ public long getVisibleVersion() { .build(); try { - Cloud.GetVersionResponse resp = getVersionFromMeta(request); + Cloud.GetVersionResponse resp = VersionHelper.getVersionFromMeta(request); long version = -1; if (resp.getStatus().getCode() == MetaServiceCode.OK) { version = resp.getVersion(); @@ -238,7 +238,7 @@ public static List getSnapshotVisibleVersion(List dbIds, List if (LOG.isDebugEnabled()) { LOG.debug("getVisibleVersion use CloudPartition {}", partitionIds.toString()); } - Cloud.GetVersionResponse resp = getVersionFromMeta(req); + Cloud.GetVersionResponse resp = VersionHelper.getVersionFromMeta(req); if (resp.getStatus().getCode() != MetaServiceCode.OK) { throw new RpcException("get visible version", "unexpected status " + resp.getStatus()); } @@ -339,19 +339,6 @@ public boolean hasData() { return getVisibleVersion() > Partition.PARTITION_INIT_VERSION; } - private static Cloud.GetVersionResponse getVersionFromMeta(Cloud.GetVersionRequest req) - throws RpcException { - long startAt = System.nanoTime(); - try { - return VersionHelper.getVisibleVersion(req); - } finally { - SummaryProfile profile = getSummaryProfile(); - if (profile != null) { - profile.addGetPartitionVersionTime(System.nanoTime() - startAt); - } - } - } - private static boolean isEmptyPartitionPruneDisabled() { ConnectContext ctx = ConnectContext.get(); if (ctx != null && (ctx.getSessionVariable().getDisableNereidsRules().get(RuleType.valueOf( diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/VersionHelper.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/VersionHelper.java index 1192d42af892026..703f8d2675cac6f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/VersionHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/VersionHelper.java @@ -19,6 +19,9 @@ import org.apache.doris.cloud.proto.Cloud; import org.apache.doris.common.Config; +import org.apache.doris.common.profile.SummaryProfile; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.StmtExecutor; import org.apache.doris.rpc.RpcException; import org.apache.logging.log4j.LogManager; @@ -32,6 +35,26 @@ public class VersionHelper { private static final Logger LOG = LogManager.getLogger(VersionHelper.class); + // Call get_version() from meta service, and save the elapsed to summary profile. + public static Cloud.GetVersionResponse getVersionFromMeta(Cloud.GetVersionRequest req) + throws RpcException { + long startAt = System.nanoTime(); + boolean isTableVersion = req.getIsTableVersion(); + try { + return getVisibleVersion(req); + } finally { + SummaryProfile profile = getSummaryProfile(); + if (profile != null) { + long elapsed = System.nanoTime() - startAt; + if (isTableVersion) { + profile.addGetTableVersionTime(elapsed); + } else { + profile.addGetPartitionVersionTime(elapsed); + } + } + } + } + public static Cloud.GetVersionResponse getVisibleVersion(Cloud.GetVersionRequest request) throws RpcException { int tryTimes = 0; while (tryTimes++ < Config.metaServiceRpcRetryTimes()) { @@ -65,8 +88,7 @@ public static Cloud.GetVersionResponse getVisibleVersionInternal(Cloud.GetVersio long deadline = System.currentTimeMillis() + timeoutMs; Cloud.GetVersionResponse resp = null; try { - Future future = - MetaServiceProxy.getInstance().getVisibleVersionAsync(request); + Future future = MetaServiceProxy.getInstance().getVisibleVersionAsync(request); while (resp == null) { try { @@ -89,4 +111,16 @@ private static void sleepSeveralMs(int lowerMs, int upperMs) { LOG.warn("get snapshot from meta service: sleep get interrupted exception"); } } + + private static SummaryProfile getSummaryProfile() { + ConnectContext ctx = ConnectContext.get(); + if (ctx != null) { + StmtExecutor executor = ctx.getExecutor(); + if (executor != null) { + return executor.getSummaryProfile(); + } + } + return null; + } + } From 1ab9e9dcb6b0efdd699df98d43d1a5691e43f63d Mon Sep 17 00:00:00 2001 From: camby Date: Mon, 12 Aug 2024 19:46:41 +0800 Subject: [PATCH 64/94] [opt](log) avoid lots of json parse error logs (#39190) avoid large error logs like: ``` W0810 16:13:39.178457 674481 function_jsonb.cpp:258] json parse error: Empty document for value: W0810 16:13:39.178460 674481 function_jsonb.cpp:258] json parse error: Empty document for value: ``` --- be/src/vec/functions/function_jsonb.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/be/src/vec/functions/function_jsonb.cpp b/be/src/vec/functions/function_jsonb.cpp index d5310e73e2679ff..53ccec756fd109b 100644 --- a/be/src/vec/functions/function_jsonb.cpp +++ b/be/src/vec/functions/function_jsonb.cpp @@ -254,8 +254,6 @@ class FunctionJsonbParseBase : public IFunction { (size_t)parser.getWriter().getOutput()->getSize()); } else { error = parser.getErrorCode(); - LOG(WARNING) << "json parse error: " << JsonbErrMsg::getErrMsg(error) - << " for value: " << std::string_view(val.data, val.size); switch (parse_error_handle_mode) { case JsonbParseErrorMode::FAIL: From edbe6c659a5c5e7218817627b992a129bf53419f Mon Sep 17 00:00:00 2001 From: wangbo Date: Mon, 12 Aug 2024 20:11:54 +0800 Subject: [PATCH 65/94] [Fix]Fix wg test failed in cloud mode (#39034) --- .../suites/workload_manager_p0/test_curd_wlg.groovy | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/regression-test/suites/workload_manager_p0/test_curd_wlg.groovy b/regression-test/suites/workload_manager_p0/test_curd_wlg.groovy index 76721728bf202e9..41cc190a017afa6 100644 --- a/regression-test/suites/workload_manager_p0/test_curd_wlg.groovy +++ b/regression-test/suites/workload_manager_p0/test_curd_wlg.groovy @@ -688,6 +688,13 @@ suite("test_crud_wlg") { //4 test row filter sql "create user test_wg_priv_user2" sql "grant SELECT_PRIV on *.*.* to test_wg_priv_user2" + //cloud-mode + if (isCloudMode()) { + def clusters = sql " SHOW CLUSTERS; " + assertTrue(!clusters.isEmpty()) + def validCluster = clusters[0][0] + sql """GRANT USAGE_PRIV ON CLUSTER ${validCluster} TO test_wg_priv_user2"""; + } connect(user = 'test_wg_priv_user2', password = '', url = context.config.jdbcUrl) { qt_select_wgp_11 "select GRANTEE,WORKLOAD_GROUP_NAME,PRIVILEGE_TYPE,IS_GRANTABLE from information_schema.workload_group_privileges where grantee like '%test_wg_priv%' order by GRANTEE,WORKLOAD_GROUP_NAME,PRIVILEGE_TYPE,IS_GRANTABLE; " } From dfa0156606b0de21de645b918ed0cc2e1bdaca23 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Mon, 12 Aug 2024 20:33:02 +0800 Subject: [PATCH 66/94] [refactor](minor) Delete non-pipeline code (#39156) --- be/src/pipeline/exec/file_scan_operator.cpp | 6 +++--- be/src/vec/exec/scan/new_es_scanner.cpp | 1 - be/src/vec/exec/scan/new_es_scanner.h | 8 +------ be/src/vec/exec/scan/new_jdbc_scanner.h | 3 +-- be/src/vec/exec/scan/new_olap_scanner.cpp | 6 +----- be/src/vec/exec/scan/new_olap_scanner.h | 3 --- be/src/vec/exec/scan/scanner_context.cpp | 11 +++++----- be/src/vec/exec/scan/scanner_context.h | 2 -- be/src/vec/exec/scan/vfile_scanner.cpp | 23 ++++++++++----------- be/src/vec/exec/scan/vfile_scanner.h | 10 ++++----- be/src/vec/exec/scan/vmeta_scanner.cpp | 1 - be/src/vec/exec/scan/vmeta_scanner.h | 5 +---- be/src/vec/exec/scan/vscanner.h | 6 ++---- be/test/vec/exec/vwal_scanner_test.cpp | 10 ++++----- 14 files changed, 34 insertions(+), 61 deletions(-) diff --git a/be/src/pipeline/exec/file_scan_operator.cpp b/be/src/pipeline/exec/file_scan_operator.cpp index d73cfc405fd008d..686f8be30218e01 100644 --- a/be/src/pipeline/exec/file_scan_operator.cpp +++ b/be/src/pipeline/exec/file_scan_operator.cpp @@ -44,9 +44,9 @@ Status FileScanLocalState::_init_scanners(std::list* s _kv_cache.reset(new vectorized::ShardedKVCache(shard_num)); for (int i = 0; i < _max_scanners; ++i) { std::unique_ptr scanner = vectorized::VFileScanner::create_unique( - state(), this, p._limit, _split_source, _scanner_profile.get(), _kv_cache.get()); - RETURN_IF_ERROR( - scanner->prepare(_conjuncts, &_colname_to_value_range, &p._colname_to_slot_id)); + state(), this, p._limit, _split_source, _scanner_profile.get(), _kv_cache.get(), + &_colname_to_value_range, &p._colname_to_slot_id); + RETURN_IF_ERROR(scanner->prepare(state(), _conjuncts)); scanners->push_back(std::move(scanner)); } return Status::OK(); diff --git a/be/src/vec/exec/scan/new_es_scanner.cpp b/be/src/vec/exec/scan/new_es_scanner.cpp index 792391b3a54e97f..d59aebd98c73411 100644 --- a/be/src/vec/exec/scan/new_es_scanner.cpp +++ b/be/src/vec/exec/scan/new_es_scanner.cpp @@ -32,7 +32,6 @@ namespace doris::vectorized { class VExprContext; -class VScanNode; } // namespace doris::vectorized static const std::string NEW_SCANNER_TYPE = "NewEsScanner"; diff --git a/be/src/vec/exec/scan/new_es_scanner.h b/be/src/vec/exec/scan/new_es_scanner.h index 106955299c363ed..e1fe367cadeded3 100644 --- a/be/src/vec/exec/scan/new_es_scanner.h +++ b/be/src/vec/exec/scan/new_es_scanner.h @@ -45,8 +45,6 @@ class VExprContext; namespace doris::vectorized { -class NewEsScanNode; - class NewEsScanner : public VScanner { ENABLE_FACTORY_CREATOR(NewEsScanner); @@ -58,17 +56,13 @@ class NewEsScanner : public VScanner { Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - -public: - Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) override; protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eof) override; private: Status _get_next(std::vector& columns); - -private: bool _es_eof; const std::map& _properties; diff --git a/be/src/vec/exec/scan/new_jdbc_scanner.h b/be/src/vec/exec/scan/new_jdbc_scanner.h index c8a6e88604fe960..92188e43f37f8d0 100644 --- a/be/src/vec/exec/scan/new_jdbc_scanner.h +++ b/be/src/vec/exec/scan/new_jdbc_scanner.h @@ -37,7 +37,6 @@ class TupleDescriptor; namespace vectorized { class Block; -class NewJdbcScanNode; class VExprContext; class NewJdbcScanner : public VScanner { @@ -52,7 +51,7 @@ class NewJdbcScanner : public VScanner { Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) override; protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; diff --git a/be/src/vec/exec/scan/new_olap_scanner.cpp b/be/src/vec/exec/scan/new_olap_scanner.cpp index 0b06b8e7dd4b0fe..a4733ad20da9a91 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.cpp +++ b/be/src/vec/exec/scan/new_olap_scanner.cpp @@ -125,10 +125,6 @@ static std::string read_columns_to_string(TabletSchemaSPtr tablet_schema, return read_columns_string; } -Status NewOlapScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { - return VScanner::prepare(state, conjuncts); -} - Status NewOlapScanner::init() { _is_init = true; auto* local_state = static_cast(_local_state); @@ -668,7 +664,7 @@ void NewOlapScanner::_collect_profile_before_close() { // Update counters from tablet reader's stats auto& stats = _tablet_reader->stats(); - pipeline::OlapScanLocalState* local_state = (pipeline::OlapScanLocalState*)_local_state; + auto* local_state = (pipeline::OlapScanLocalState*)_local_state; INCR_COUNTER(local_state); #undef INCR_COUNTER diff --git a/be/src/vec/exec/scan/new_olap_scanner.h b/be/src/vec/exec/scan/new_olap_scanner.h index 4eb296f6d357720..df6ff0411436062 100644 --- a/be/src/vec/exec/scan/new_olap_scanner.h +++ b/be/src/vec/exec/scan/new_olap_scanner.h @@ -50,7 +50,6 @@ struct FilterPredicates; namespace vectorized { -class NewOlapScanNode; class Block; class NewOlapScanner : public VScanner { @@ -76,8 +75,6 @@ class NewOlapScanner : public VScanner { Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); - void set_compound_filters(const std::vector& compound_filters); doris::TabletStorageType get_storage_type() override; diff --git a/be/src/vec/exec/scan/scanner_context.cpp b/be/src/vec/exec/scan/scanner_context.cpp index 4b09260f69a056e..bab11616c77c061 100644 --- a/be/src/vec/exec/scan/scanner_context.cpp +++ b/be/src/vec/exec/scan/scanner_context.cpp @@ -73,16 +73,15 @@ ScannerContext::ScannerContext( limit = -1; } MAX_SCALE_UP_RATIO = _state->scanner_scale_up_ratio(); - _max_thread_num = _state->num_scanner_threads() > 0 - ? _state->num_scanner_threads() - : config::doris_scanner_thread_pool_thread_num / - (_local_state ? num_parallel_instances - : state->query_parallel_instance_num()); + _max_thread_num = + _state->num_scanner_threads() > 0 + ? _state->num_scanner_threads() + : config::doris_scanner_thread_pool_thread_num / num_parallel_instances; _max_thread_num = _max_thread_num == 0 ? 1 : _max_thread_num; _max_thread_num = std::min(_max_thread_num, (int32_t)scanners.size()); // 1. Calculate max concurrency // For select * from table limit 10; should just use one thread. - if (_local_state && _local_state->should_run_serial()) { + if (_local_state->should_run_serial()) { _max_thread_num = 1; } // when user not specify scan_thread_num, so we can try downgrade _max_thread_num. diff --git a/be/src/vec/exec/scan/scanner_context.h b/be/src/vec/exec/scan/scanner_context.h index df3624008f60dad..d97fc731fe5067d 100644 --- a/be/src/vec/exec/scan/scanner_context.h +++ b/be/src/vec/exec/scan/scanner_context.h @@ -50,7 +50,6 @@ namespace vectorized { class VScanner; class ScannerDelegate; -class VScanNode; class ScannerScheduler; class SimplifiedScanScheduler; @@ -188,7 +187,6 @@ class ScannerContext : public std::enable_shared_from_this, void _try_to_scale_up(); RuntimeState* _state = nullptr; - VScanNode* _parent = nullptr; pipeline::ScanLocalStateBase* _local_state = nullptr; // the comment of same fields in VScanNode diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 95aea26e265cb0b..aa7835d6e258f41 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -88,16 +88,20 @@ class ShardedKVCache; namespace doris::vectorized { using namespace ErrorCode; -VFileScanner::VFileScanner(RuntimeState* state, pipeline::FileScanLocalState* local_state, - int64_t limit, - std::shared_ptr split_source, - RuntimeProfile* profile, ShardedKVCache* kv_cache) +VFileScanner::VFileScanner( + RuntimeState* state, pipeline::FileScanLocalState* local_state, int64_t limit, + std::shared_ptr split_source, RuntimeProfile* profile, + ShardedKVCache* kv_cache, + std::unordered_map* colname_to_value_range, + const std::unordered_map* colname_to_slot_id) : VScanner(state, local_state, limit, profile), _split_source(split_source), _cur_reader(nullptr), _cur_reader_eof(false), + _colname_to_value_range(colname_to_value_range), _kv_cache(kv_cache), - _strict_mode(false) { + _strict_mode(false), + _col_name_to_slot_id(colname_to_slot_id) { if (state->get_query_ctx() != nullptr && state->get_query_ctx()->file_scan_range_params_map.count(local_state->parent_id()) > 0) { _params = &(state->get_query_ctx()->file_scan_range_params_map[local_state->parent_id()]); @@ -116,13 +120,8 @@ VFileScanner::VFileScanner(RuntimeState* state, pipeline::FileScanLocalState* lo _is_load = (_input_tuple_desc != nullptr); } -Status VFileScanner::prepare( - const VExprContextSPtrs& conjuncts, - std::unordered_map* colname_to_value_range, - const std::unordered_map* colname_to_slot_id) { - RETURN_IF_ERROR(VScanner::prepare(_state, conjuncts)); - _colname_to_value_range = colname_to_value_range; - _col_name_to_slot_id = colname_to_slot_id; +Status VFileScanner::prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) { + RETURN_IF_ERROR(VScanner::prepare(state, conjuncts)); _get_block_timer = ADD_TIMER(_local_state->scanner_profile(), "FileScannerGetBlockTime"); _open_reader_timer = ADD_TIMER(_local_state->scanner_profile(), "FileScannerOpenReaderTime"); _cast_to_input_block_timer = diff --git a/be/src/vec/exec/scan/vfile_scanner.h b/be/src/vec/exec/scan/vfile_scanner.h index fb61c5aa19e2676..82fecd9e67524e2 100644 --- a/be/src/vec/exec/scan/vfile_scanner.h +++ b/be/src/vec/exec/scan/vfile_scanner.h @@ -55,8 +55,6 @@ struct TypeDescriptor; namespace doris::vectorized { -class NewFileScanNode; - class VFileScanner : public VScanner { ENABLE_FACTORY_CREATOR(VFileScanner); @@ -65,7 +63,9 @@ class VFileScanner : public VScanner { VFileScanner(RuntimeState* state, pipeline::FileScanLocalState* parent, int64_t limit, std::shared_ptr split_source, - RuntimeProfile* profile, ShardedKVCache* kv_cache); + RuntimeProfile* profile, ShardedKVCache* kv_cache, + std::unordered_map* colname_to_value_range, + const std::unordered_map* colname_to_slot_id); Status open(RuntimeState* state) override; @@ -73,9 +73,7 @@ class VFileScanner : public VScanner { void try_stop() override; - Status prepare(const VExprContextSPtrs& conjuncts, - std::unordered_map* colname_to_value_range, - const std::unordered_map* colname_to_slot_id); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) override; std::string get_name() override { return VFileScanner::NAME; } diff --git a/be/src/vec/exec/scan/vmeta_scanner.cpp b/be/src/vec/exec/scan/vmeta_scanner.cpp index 64819cfaaa21129..f5864924a389fa7 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.cpp +++ b/be/src/vec/exec/scan/vmeta_scanner.cpp @@ -49,7 +49,6 @@ namespace doris { class RuntimeProfile; namespace vectorized { class VExprContext; -class VScanNode; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/exec/scan/vmeta_scanner.h b/be/src/vec/exec/scan/vmeta_scanner.h index 5936130069ca0fe..8256dff9b910a25 100644 --- a/be/src/vec/exec/scan/vmeta_scanner.h +++ b/be/src/vec/exec/scan/vmeta_scanner.h @@ -41,7 +41,6 @@ class TupleDescriptor; namespace vectorized { class Block; class VExprContext; -class VMetaScanNode; } // namespace vectorized } // namespace doris @@ -57,7 +56,7 @@ class VMetaScanner : public VScanner { Status open(RuntimeState* state) override; Status close(RuntimeState* state) override; - Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); + Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts) override; protected: Status _get_block_impl(RuntimeState* state, Block* block, bool* eos) override; @@ -73,8 +72,6 @@ class VMetaScanner : public VScanner { TFetchSchemaTableDataRequest* request); Status _build_frontends_disks_metadata_request(const TMetaScanRange& meta_scan_range, TFetchSchemaTableDataRequest* request); - Status _build_workload_groups_metadata_request(const TMetaScanRange& meta_scan_range, - TFetchSchemaTableDataRequest* request); Status _build_workload_sched_policy_metadata_request(const TMetaScanRange& meta_scan_range, TFetchSchemaTableDataRequest* request); Status _build_catalogs_metadata_request(const TMetaScanRange& meta_scan_range, diff --git a/be/src/vec/exec/scan/vscanner.h b/be/src/vec/exec/scan/vscanner.h index 19c37f6fc21e552..29ad37e926984ea 100644 --- a/be/src/vec/exec/scan/vscanner.h +++ b/be/src/vec/exec/scan/vscanner.h @@ -69,7 +69,8 @@ class VScanner { } virtual Status init() { return Status::OK(); } - + // Not virtual, all child will call this method explictly + virtual Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); virtual Status open(RuntimeState* state) { return Status::OK(); } Status get_block(RuntimeState* state, Block* block, bool* eos); @@ -98,9 +99,6 @@ class VScanner { Status _do_projections(vectorized::Block* origin_block, vectorized::Block* output_block); - // Not virtual, all child will call this method explictly - Status prepare(RuntimeState* state, const VExprContextSPtrs& conjuncts); - public: int64_t get_time_cost_ns() const { return _per_scanner_timer; } diff --git a/be/test/vec/exec/vwal_scanner_test.cpp b/be/test/vec/exec/vwal_scanner_test.cpp index 0737de2b3dc0f44..f6a9d33885cfb31 100644 --- a/be/test/vec/exec/vwal_scanner_test.cpp +++ b/be/test/vec/exec/vwal_scanner_test.cpp @@ -306,16 +306,16 @@ void VWalScannerTest::init() { void VWalScannerTest::generate_scanner(std::shared_ptr& scanner) { auto split_source = std::make_shared(_scan_range); + std::unordered_map _colname_to_value_range; + std::unordered_map _colname_to_slot_id; scanner = std::make_shared( &_runtime_state, &(_runtime_state.get_local_state(0)->cast()), -1, - split_source, _profile, _kv_cache.get()); + split_source, _profile, _kv_cache.get(), &_colname_to_value_range, + &_colname_to_slot_id); scanner->_is_load = false; vectorized::VExprContextSPtrs _conjuncts; - std::unordered_map _colname_to_value_range; - std::unordered_map _colname_to_slot_id; - WARN_IF_ERROR(scanner->prepare(_conjuncts, &_colname_to_value_range, &_colname_to_slot_id), - "fail to prepare scanner"); + WARN_IF_ERROR(scanner->prepare(&_runtime_state, _conjuncts), "fail to prepare scanner"); } TEST_F(VWalScannerTest, normal) { From 3040b1aca9621ddf4b860e0fca13dd1f9883e1b0 Mon Sep 17 00:00:00 2001 From: HappenLee Date: Mon, 12 Aug 2024 21:30:05 +0800 Subject: [PATCH 67/94] [Refactor](config) remove unless config in doris (#39218) remove unless config in doris `doris_scanner_queue_size` --- be/src/common/config.cpp | 2 -- be/src/common/config.h | 2 -- 2 files changed, 4 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index bc2f6d3e025a275..6d23ce0eed693ba 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -268,8 +268,6 @@ DEFINE_mInt32(doris_scan_range_row_count, "524288"); DEFINE_mInt32(doris_scan_range_max_mb, "1024"); // max bytes number for single scan block, used in segmentv2 DEFINE_mInt32(doris_scan_block_max_mb, "67108864"); -// size of scanner queue between scanner thread and compute thread -DEFINE_mInt32(doris_scanner_queue_size, "1024"); // single read execute fragment row number DEFINE_mInt32(doris_scanner_row_num, "16384"); // single read execute fragment row bytes diff --git a/be/src/common/config.h b/be/src/common/config.h index 3c43ed66593e51a..3dc9b2deed788e7 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -319,8 +319,6 @@ DECLARE_mInt32(doris_scan_range_row_count); DECLARE_mInt32(doris_scan_range_max_mb); // max bytes number for single scan block, used in segmentv2 DECLARE_mInt32(doris_scan_block_max_mb); -// size of scanner queue between scanner thread and compute thread -DECLARE_mInt32(doris_scanner_queue_size); // single read execute fragment row number DECLARE_mInt32(doris_scanner_row_num); // single read execute fragment row bytes From ee4d0a7d87eac9e4cef58dcaa5487ec21a130872 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Mon, 12 Aug 2024 22:05:22 +0800 Subject: [PATCH 68/94] [Fix](JsonReader) Return correct status when parse failed (#39206) When using `JsonFunctions::extract_from_object`, but input obj is not object type but other types like null, then Status should be `simdjson::INCORRECT_TYPE` to fill the default value in the later process. Example, json path is `$.city.name`, but input json is `{"city" : null}` then `Status::NotFound` should be returned, in the following, column will be filled with default values. --- be/src/exprs/json_functions.cpp | 9 +++++-- .../test_json_extract_path_invalid_type.json | 13 +++++++++ .../load_p0/stream_load/test_json_load.out | 6 ++++- .../load_p0/stream_load/test_json_load.groovy | 27 +++++++++++++++++++ 4 files changed, 52 insertions(+), 3 deletions(-) create mode 100644 regression-test/data/load_p0/stream_load/test_json_extract_path_invalid_type.json diff --git a/be/src/exprs/json_functions.cpp b/be/src/exprs/json_functions.cpp index 5e3fb1369295955..7bbb5493d8127e7 100644 --- a/be/src/exprs/json_functions.cpp +++ b/be/src/exprs/json_functions.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include // IWYU pragma: keep #include @@ -259,13 +260,17 @@ Status JsonFunctions::extract_from_object(simdjson::ondemand::object& obj, const std::vector& jsonpath, simdjson::ondemand::value* value) noexcept { // Return DataQualityError when it's a malformed json. -// Otherwise the path was not found, due to array out of bound or not exist +// Otherwise the path was not found, due to +// 1. array out of bound +// 2. not exist such field in object +// 3. the input type is not object but could be null or other types and lead to simdjson::INCORRECT_TYPE #define HANDLE_SIMDJSON_ERROR(err, msg) \ do { \ const simdjson::error_code& _err = err; \ const std::string& _msg = msg; \ if (UNLIKELY(_err)) { \ - if (_err == simdjson::NO_SUCH_FIELD || _err == simdjson::INDEX_OUT_OF_BOUNDS) { \ + if (_err == simdjson::NO_SUCH_FIELD || _err == simdjson::INDEX_OUT_OF_BOUNDS || \ + _err == simdjson::INCORRECT_TYPE) { \ return Status::NotFound( \ fmt::format("Not found target filed, err: {}, msg: {}", \ simdjson::error_message(_err), _msg)); \ diff --git a/regression-test/data/load_p0/stream_load/test_json_extract_path_invalid_type.json b/regression-test/data/load_p0/stream_load/test_json_extract_path_invalid_type.json new file mode 100644 index 000000000000000..945b4143022892c --- /dev/null +++ b/regression-test/data/load_p0/stream_load/test_json_extract_path_invalid_type.json @@ -0,0 +1,13 @@ +[ + { + "id": 789, + "city": { + "name": "beijing", + "region": "haidian" + } + }, + { + "id": 1111, + "city": null + } +] \ No newline at end of file diff --git a/regression-test/data/load_p0/stream_load/test_json_load.out b/regression-test/data/load_p0/stream_load/test_json_load.out index 7df15b74b86f62c..1d6777bb21e7abd 100644 --- a/regression-test/data/load_p0/stream_load/test_json_load.out +++ b/regression-test/data/load_p0/stream_load/test_json_load.out @@ -255,4 +255,8 @@ test k2_value 12345 {"k1":12345,"k2":"11111","k3":111111,"k4":[11111]} {"k1":12345,"k2":"11111","k3":111111,"k4":[11111]} 111111 12346 {"k1":12346,"k2":"22222","k4":[22222]} {"k1":12346,"k2":"22222","k4":[22222]} \N 12347 {"k1":12347,"k3":"33333","k4":[22222]} {"k1":12347,"k3":"33333","k4":[22222]} 33333 -12348 {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} 33333 \ No newline at end of file +12348 {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} 33333 + +-- !select31 -- +789 beijing haidian +1111 \N \N \ No newline at end of file diff --git a/regression-test/suites/load_p0/stream_load/test_json_load.groovy b/regression-test/suites/load_p0/stream_load/test_json_load.groovy index 1cf2108d48ee779..1042b5a3d71cdd6 100644 --- a/regression-test/suites/load_p0/stream_load/test_json_load.groovy +++ b/regression-test/suites/load_p0/stream_load/test_json_load.groovy @@ -905,4 +905,31 @@ suite("test_json_load", "p0,nonConcurrent") { } finally { // try_sql("DROP TABLE IF EXISTS ${testTable}") } + + // test extract json path with invalid type(none object types like null) + try { + sql "DROP TABLE IF EXISTS ${testTable}" + sql """ + CREATE TABLE ${testTable} ( + `id` int NOT NULL, + `name` varchar(24) NULL, + `region` varchar(30) NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT '' + DISTRIBUTED BY RANDOM BUCKETS AUTO + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + load_json_data.call("${testTable}", "${testTable}_case31", 'true', 'false', 'json', '', '[\"$.id\", \"$.city.name\", \"$.city.region\"]', + '', '', '', 'test_json_extract_path_invalid_type.json', false, 2) + + sql "sync" + qt_select31 "select * from ${testTable} order by id" + + } finally { + // try_sql("DROP TABLE IF EXISTS ${testTable}") + } } From fc469dc4414369416b5dea5c5bb85fc5bde55e17 Mon Sep 17 00:00:00 2001 From: Mryange <59914473+Mryange@users.noreply.github.com> Date: Tue, 13 Aug 2024 01:55:29 +0800 Subject: [PATCH 69/94] [fix](pipeline) LocalExchangeSource was closing without opening (#39237) ## Proposed changes which led to accessing uninitialized data. Moved the profile initialization into the init method. ``` /mnt/disk2/yanxuecheng/doris/be/src/pipeline/local_exchange/local_exchanger.cpp:100:9: runtime error: member call on null pointer of type 'doris::RuntimeProfile::Counter' #0 0x563f0fb8b6c9 in doris::pipeline::Exchanger>::_dequeue_data(doris::pipeline::LocalExchangeSourceLocalState&, std::shared_ptr&, bool*, doris::vectorized::Block*, int) /mnt/disk2/yanxuecheng/doris/be/src/pipeline/local_exchange/local_exchanger.cpp:100:9 #1 0x563f0fb78a45 in doris::pipeline::LocalMergeSortExchanger::finalize(doris::pipeline::LocalExchangeSourceLocalState&) /mnt/disk2/yanxuecheng/doris/be/src/pipeline/local_exchange/local_exchanger.cpp:325:16 #2 0x563f09610366 in doris::pipeline::LocalExchangeSharedState::sub_running_source_operators(doris::pipeline::LocalExchangeSourceLocalState&) /mnt/disk2/yanxuecheng/doris/be/src/pipeline/dependency.cpp:196:20 #3 0x563f0fb63709 in doris::pipeline::LocalExchangeSourceLocalState::close(doris::RuntimeState*) /mnt/disk2/yanxuecheng/doris/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp:59:24 #4 0x563f09728e80 in doris::pipeline::OperatorXBase::close(doris::RuntimeState*) /mnt/disk2/yanxuecheng/doris/be/src/pipeline/exec/operator.cpp:245:28 #5 0x563f0fd3fb58 in doris::pipeline::PipelineTask::close(doris::Status) /mnt/disk2/yanxuecheng/doris/be/src/pipeline/pipeline_task.cpp:459:28 #6 0x563f0fdb5315 in doris::pipeline::_close_task(doris::pipeline::PipelineTask*, doris::Status) /mnt/disk2/yanxuecheng/doris/be/src/pipeline/task_scheduler.cpp:91:27 #7 0x563f0fdb6573 in doris::pipeline::TaskScheduler::_do_work(unsigned long) /mnt/disk2/yanxuecheng/doris/be/src/pipeline/task_scheduler.cpp:125:13 #8 0x563f0fdb9d6a in doris::pipeline::TaskScheduler::start()::$_0::operator()() const /mnt/disk2/yanxuecheng/doris/be/src/pipeline/task_scheduler.cpp:64:9 #9 0x563f0fdb9cee in void std::__invoke_impl(std::__invoke_other, doris::pipeline::TaskScheduler::start()::$_0&) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14 #10 0x563f0fdb9c4e in std::enable_if, void>::type std::__invoke_r(doris::pipeline::TaskScheduler::start()::$_0&) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2 #11 0x563f0fdb99d5 in std::_Function_handler::_M_invoke(std::_Any_data const&) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9 #12 0x563ebdddc8cf in std::function::operator()() const /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9 #13 0x563ec4bd6db4 in doris::FunctionRunnable::run() /mnt/disk2/yanxuecheng/doris/be/src/util/threadpool.cpp:48:27 #14 0x563ec4bbc1b5 in doris::ThreadPool::dispatch_thread() /mnt/disk2/yanxuecheng/doris/be/src/util/threadpool.cpp:543:24 #15 0x563ec4bf9a53 in void std::__invoke_impl(std::__invoke_memfun_deref, void (doris::ThreadPool::*&)(), doris::ThreadPool*&) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:74:14 #16 0x563ec4bf9858 in std::__invoke_result::type std::__invoke(void (doris::ThreadPool::*&)(), doris::ThreadPool*&) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96:14 #17 0x563ec4bf9790 in void std::_Bind::__call(std::tuple<>&&, std::_Index_tuple<0ul>) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:506:11 #18 0x563ec4bf9585 in void std::_Bind::operator()() /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:591:17 #19 0x563ec4bf947e in void std::__invoke_impl&>(std::__invoke_other, std::_Bind&) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61:14 #20 0x563ec4bf93be in std::enable_if&>, void>::type std::__invoke_r&>(std::_Bind&) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:111:2 #21 0x563ec4bf8e55 in std::_Function_handler>::_M_invoke(std::_Any_data const&) /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290:9 #22 0x563ebdddc8cf in std::function::operator()() const /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591:9 #23 0x563ec4b78d91 in doris::Thread::supervise_thread(void*) /mnt/disk2/yanxuecheng/doris/be/src/util/thread.cpp:498:5 #24 0x563ebdb2fe0a in asan_thread_start(void*) crtstuff.c #25 0x7feeac9e21c9 in start_thread (/lib64/libpthread.so.0+0x81c9) (BuildId: 823fccea3475e5870a4167dfe47df20e53222db0) #26 0x7feead3d1e72 in clone (/lib64/libc.so.6+0x39e72) (BuildId: ec3d7025354f1f1985831ff08ef0eb3b50aefbce) SUMMARY: UndefinedBehaviorSanitizer: undefined-behavior /mnt/disk2/yanxuecheng/doris/be/src/pipeline/local_exchange/local_exchanger.cpp:100:9 in *** Query id: ea174401bc134452-bd8a35522726a96a *** *** is nereids: 1 *** *** tablet id: 0 *** *** Aborted at 1723455006 (unix time) try "date -d @1723455006" if you are using GNU date *** *** Current BE git commitID: c47399cc0d *** *** SIGSEGV address not mapped to object (@0x0) received by PID 3055435 (TID 3060341 OR 0x7fe320acf700) from PID 0; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk2/yanxuecheng/doris/be/src/common/signal_handler.h:421 1# 0x00007FEEAD3E6B50 in /lib64/libc.so.6 2# doris::pipeline::Exchanger >::_dequeue_data(doris::pipeline::LocalExchangeSourceLocalState&, std::shared_ptr&, bool*, doris::vectorized::Block*, int) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/local_exchange/local_exchanger.cpp:100 3# doris::pipeline::LocalMergeSortExchanger::finalize(doris::pipeline::LocalExchangeSourceLocalState&) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/local_exchange/local_exchanger.cpp:325 4# doris::pipeline::LocalExchangeSharedState::sub_running_source_operators(doris::pipeline::LocalExchangeSourceLocalState&) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/dependency.cpp:196 5# doris::pipeline::LocalExchangeSourceLocalState::close(doris::RuntimeState*) in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 6# doris::pipeline::OperatorXBase::close(doris::RuntimeState*) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/exec/operator.cpp:245 7# doris::pipeline::PipelineTask::close(doris::Status) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/pipeline_task.cpp:459 8# doris::pipeline::_close_task(doris::pipeline::PipelineTask*, doris::Status) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/task_scheduler.cpp:91 9# doris::pipeline::TaskScheduler::_do_work(unsigned long) at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/task_scheduler.cpp:125 10# doris::pipeline::TaskScheduler::start()::$_0::operator()() const at /mnt/disk2/yanxuecheng/doris/be/src/pipeline/task_scheduler.cpp:64 11# void std::__invoke_impl(std::__invoke_other, doris::pipeline::TaskScheduler::start()::$_0&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61 12# std::enable_if, void>::type std::__invoke_r(doris::pipeline::TaskScheduler::start()::$_0&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:117 13# std::_Function_handler::_M_invoke(std::_Any_data const&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290 14# std::function::operator()() const at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591 15# doris::FunctionRunnable::run() at /mnt/disk2/yanxuecheng/doris/be/src/util/threadpool.cpp:48 16# doris::ThreadPool::dispatch_thread() at /mnt/disk2/yanxuecheng/doris/be/src/util/threadpool.cpp:543 17# void std::__invoke_impl(std::__invoke_memfun_deref, void (doris::ThreadPool::*&)(), doris::ThreadPool*&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:74 18# std::__invoke_result::type std::__invoke(void (doris::ThreadPool::*&)(), doris::ThreadPool*&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:96 19# void std::_Bind::__call(std::tuple<>&&, std::_Index_tuple<0ul>) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:506 20# void std::_Bind::operator()<, void>() at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/functional:591 21# void std::__invoke_impl&>(std::__invoke_other, std::_Bind&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:61 22# std::enable_if&>, void>::type std::__invoke_r&>(std::_Bind&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/invoke.h:117 23# std::_Function_handler >::_M_invoke(std::_Any_data const&) at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:290 24# std::function::operator()() const at /mnt/disk2/yanxuecheng/ldb_toolchain/bin/../lib/gcc/x86_64-linux-gnu/13/../../../../include/c++/13/bits/std_function.h:591 25# doris::Thread::supervise_thread(void*) at /mnt/disk2/yanxuecheng/doris/be/src/util/thread.cpp:498 26# asan_thread_start(void*) in /mnt/disk2/yanxuecheng/doris/output/be/lib/doris_be 27# start_thread in /lib64/libpthread.so.0 28# __clone in /lib64/libc.so.6 ``` --- .../local_exchange/local_exchange_source_operator.cpp | 8 -------- .../local_exchange/local_exchange_source_operator.h | 1 - 2 files changed, 9 deletions(-) diff --git a/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp index 32e93fbc5b24327..0d88545b7e64106 100644 --- a/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp +++ b/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp @@ -27,14 +27,6 @@ Status LocalExchangeSourceLocalState::init(RuntimeState* state, LocalStateInfo& SCOPED_TIMER(_init_timer); _channel_id = info.task_idx; _shared_state->mem_trackers[_channel_id] = _mem_tracker.get(); - return Status::OK(); -} - -Status LocalExchangeSourceLocalState::open(RuntimeState* state) { - SCOPED_TIMER(exec_time_counter()); - SCOPED_TIMER(_open_timer); - RETURN_IF_ERROR(Base::open(state)); - _exchanger = _shared_state->exchanger.get(); DCHECK(_exchanger != nullptr); _get_block_failed_counter = diff --git a/be/src/pipeline/local_exchange/local_exchange_source_operator.h b/be/src/pipeline/local_exchange/local_exchange_source_operator.h index d2f68d4ebaca31c..f6c043d44e4e10b 100644 --- a/be/src/pipeline/local_exchange/local_exchange_source_operator.h +++ b/be/src/pipeline/local_exchange/local_exchange_source_operator.h @@ -36,7 +36,6 @@ class LocalExchangeSourceLocalState final : public PipelineXLocalState Date: Tue, 13 Aug 2024 10:16:55 +0800 Subject: [PATCH 70/94] [enhance](Hdfs) Add bvar latencyrecorder for HDFS operation in Recycler (#39236) This pr adds several bvar for HDFS operation in Recycler. The bvars are as follows: ```C++ bvar::LatencyRecorder hdfs_write_latency("hdfs_write"); bvar::LatencyRecorder hdfs_open_latency("hdfs_open"); bvar::LatencyRecorder hdfs_close_latency("hdfs_close"); bvar::LatencyRecorder hdfs_list_dir("hdfs_list_dir"); bvar::LatencyRecorder hdfs_exist_latency("hdfs_exist"); bvar::LatencyRecorder hdfs_delete_latency("hdfs_delete"); ``` --- cloud/src/recycler/azure_obj_client.cpp | 2 ++ cloud/src/recycler/hdfs_accessor.cpp | 33 ++++++++++++++++++++++--- cloud/src/recycler/s3_accessor.h | 9 +------ cloud/src/recycler/s3_obj_client.cpp | 2 ++ cloud/src/recycler/util.h | 6 +++++ 5 files changed, 41 insertions(+), 11 deletions(-) diff --git a/cloud/src/recycler/azure_obj_client.cpp b/cloud/src/recycler/azure_obj_client.cpp index 6a20bff09501823..b50874f1fd37af8 100644 --- a/cloud/src/recycler/azure_obj_client.cpp +++ b/cloud/src/recycler/azure_obj_client.cpp @@ -34,9 +34,11 @@ #include "common/config.h" #include "common/logging.h" +#include "common/stopwatch.h" #include "cpp/s3_rate_limiter.h" #include "cpp/sync_point.h" #include "recycler/s3_accessor.h" +#include "recycler/util.h" using namespace Azure::Storage::Blobs; diff --git a/cloud/src/recycler/hdfs_accessor.cpp b/cloud/src/recycler/hdfs_accessor.cpp index d97c0a37aabdd9f..e5038735f5735f6 100644 --- a/cloud/src/recycler/hdfs_accessor.cpp +++ b/cloud/src/recycler/hdfs_accessor.cpp @@ -17,7 +17,12 @@ #include "recycler/hdfs_accessor.h" +#include #include + +#include "common/stopwatch.h" +#include "recycler/util.h" + #ifdef USE_HADOOP_HDFS #include // IWYU pragma: export #else @@ -44,6 +49,12 @@ std::string hdfs_error() { return fmt::format("({}): {}", std::strerror(errno), err_msg ? err_msg : ""); } +bvar::LatencyRecorder hdfs_write_latency("hdfs_write"); +bvar::LatencyRecorder hdfs_open_latency("hdfs_open"); +bvar::LatencyRecorder hdfs_close_latency("hdfs_close"); +bvar::LatencyRecorder hdfs_list_dir("hdfs_list_dir"); +bvar::LatencyRecorder hdfs_exist_latency("hdfs_exist"); +bvar::LatencyRecorder hdfs_delete_latency("hdfs_delete"); } // namespace class HDFSBuilder { @@ -292,6 +303,7 @@ class HdfsListIterator final : public ListIterator { // Return null if error occured, return emtpy DirEntries if dir is empty or doesn't exist. std::optional list_directory(const char* dir_path) { int num_entries = 0; + SCOPED_BVAR_LATENCY(hdfs_list_dir); auto* file_infos = hdfsListDirectory(hdfs_.get(), dir_path, &num_entries); if (errno != 0 && errno != ENOENT) { LOG_WARNING("failed to list hdfs directory") @@ -430,6 +442,7 @@ int HdfsAccessor::delete_file(const std::string& relative_path) { // Path exists auto path = to_fs_path(relative_path); LOG_INFO("delete object").tag("uri", to_uri(relative_path)); // Audit log + SCOPED_BVAR_LATENCY(hdfs_delete_latency); ret = hdfsDelete(fs_.get(), path.c_str(), 0); if (ret != 0) { LOG_WARNING("failed to delete object") @@ -443,7 +456,11 @@ int HdfsAccessor::delete_file(const std::string& relative_path) { int HdfsAccessor::put_file(const std::string& relative_path, const std::string& content) { auto path = to_fs_path(relative_path); - auto* file = hdfsOpenFile(fs_.get(), path.c_str(), O_WRONLY, 0, 0, 0); + hdfsFile file; + { + SCOPED_BVAR_LATENCY(hdfs_open_latency); + file = hdfsOpenFile(fs_.get(), path.c_str(), O_WRONLY, 0, 0, 0); + } if (!file) { LOG_WARNING("failed to create file") .tag("uri", to_uri(relative_path)) @@ -453,11 +470,16 @@ int HdfsAccessor::put_file(const std::string& relative_path, const std::string& std::unique_ptr> defer((int*)0x01, [&](int*) { if (file) { + SCOPED_BVAR_LATENCY(hdfs_close_latency); hdfsCloseFile(fs_.get(), file); } }); - int64_t written_bytes = hdfsWrite(fs_.get(), file, content.data(), content.size()); + int64_t written_bytes = 0; + { + SCOPED_BVAR_LATENCY(hdfs_write_latency); + written_bytes = hdfsWrite(fs_.get(), file, content.data(), content.size()); + } if (written_bytes < content.size()) { LOG_WARNING("failed to write file") .tag("uri", to_uri(relative_path)) @@ -465,7 +487,11 @@ int HdfsAccessor::put_file(const std::string& relative_path, const std::string& return -1; } - int ret = hdfsCloseFile(fs_.get(), file); + int ret = 0; + { + SCOPED_BVAR_LATENCY(hdfs_close_latency); + ret = hdfsCloseFile(fs_.get(), file); + } file = nullptr; if (ret != 0) { LOG_WARNING("failed to close file") @@ -496,6 +522,7 @@ int HdfsAccessor::list_all(std::unique_ptr* res) { int HdfsAccessor::exists(const std::string& relative_path) { auto path = to_fs_path(relative_path); + SCOPED_BVAR_LATENCY(hdfs_exist_latency); int ret = hdfsExists(fs_.get(), path.c_str()); #ifdef USE_HADOOP_HDFS // when calling hdfsExists() and return non-zero code, diff --git a/cloud/src/recycler/s3_accessor.h b/cloud/src/recycler/s3_accessor.h index 41adc93f04f6e3d..6886ee5e7c56406 100644 --- a/cloud/src/recycler/s3_accessor.h +++ b/cloud/src/recycler/s3_accessor.h @@ -23,8 +23,7 @@ #include #include -#include "common/stopwatch.h" -#include "recycler/s3_obj_client.h" +#include "recycler/obj_storage_client.h" #include "recycler/storage_vault_accessor.h" namespace Aws::S3 { @@ -52,12 +51,6 @@ extern bvar::LatencyRecorder s3_get_bucket_version_latency; extern bvar::LatencyRecorder s3_copy_object_latency; }; // namespace s3_bvar -// The time unit is the same with BE: us -#define SCOPED_BVAR_LATENCY(bvar_item) \ - StopWatch sw; \ - std::unique_ptr> defer( \ - (int*)0x01, [&](int*) { bvar_item << sw.elapsed_us(); }); - struct AccessorRateLimiter { public: ~AccessorRateLimiter() = default; diff --git a/cloud/src/recycler/s3_obj_client.cpp b/cloud/src/recycler/s3_obj_client.cpp index fc0c7e9e9016f63..53fa821c7e55036 100644 --- a/cloud/src/recycler/s3_obj_client.cpp +++ b/cloud/src/recycler/s3_obj_client.cpp @@ -30,9 +30,11 @@ #include "common/config.h" #include "common/logging.h" +#include "common/stopwatch.h" #include "cpp/s3_rate_limiter.h" #include "cpp/sync_point.h" #include "recycler/s3_accessor.h" +#include "recycler/util.h" namespace doris::cloud { diff --git a/cloud/src/recycler/util.h b/cloud/src/recycler/util.h index b63090062bf67b0..b6d4d3299b5e0fc 100644 --- a/cloud/src/recycler/util.h +++ b/cloud/src/recycler/util.h @@ -25,6 +25,12 @@ namespace doris::cloud { +// The time unit is the same with BE: us +#define SCOPED_BVAR_LATENCY(bvar_item) \ + StopWatch sw; \ + std::unique_ptr> defer( \ + (int*)0x01, [&](int*) { bvar_item << sw.elapsed_us(); }); + class TxnKv; /** From 49cd4c4887bfd391dd56d0e854d0d8e562f13e5e Mon Sep 17 00:00:00 2001 From: AlexYue Date: Tue, 13 Aug 2024 10:18:20 +0800 Subject: [PATCH 71/94] [feature](FE) Add COS's list and head object implementation for FE (#38962) --- .../apache/doris/cloud/storage/CosRemote.java | 90 +++++++++++++++++-- 1 file changed, 84 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/CosRemote.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/CosRemote.java index e541014e78efd61..6fbe282fc54e3c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/CosRemote.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/storage/CosRemote.java @@ -20,12 +20,19 @@ import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; +import com.google.common.collect.Lists; import com.qcloud.cos.COSClient; import com.qcloud.cos.ClientConfig; import com.qcloud.cos.auth.BasicCOSCredentials; import com.qcloud.cos.auth.COSCredentials; +import com.qcloud.cos.exception.CosClientException; +import com.qcloud.cos.exception.CosServiceException; import com.qcloud.cos.http.HttpMethodName; import com.qcloud.cos.http.HttpProtocol; +import com.qcloud.cos.model.COSObjectSummary; +import com.qcloud.cos.model.ListObjectsRequest; +import com.qcloud.cos.model.ObjectListing; +import com.qcloud.cos.model.ObjectMetadata; import com.qcloud.cos.region.Region; import com.tencentcloudapi.common.Credential; import com.tencentcloudapi.common.profile.ClientProfile; @@ -34,28 +41,38 @@ import com.tencentcloudapi.sts.v20180813.models.AssumeRoleRequest; import com.tencentcloudapi.sts.v20180813.models.AssumeRoleResponse; import com.tencentcloudapi.sts.v20180813.models.Credentials; +import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Triple; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.net.URL; +import java.util.ArrayList; import java.util.Date; import java.util.HashMap; +import java.util.List; public class CosRemote extends DefaultRemote { private static final Logger LOG = LogManager.getLogger(CosRemote.class); + private COSClient cosClient; public CosRemote(ObjectInfo obj) { super(obj); } + private void initClient() { + if (cosClient == null) { + COSCredentials cred = new BasicCOSCredentials(obj.getAk(), obj.getSk()); + ClientConfig clientConfig = new ClientConfig(); + clientConfig.setRegion(new Region(obj.getRegion())); + clientConfig.setHttpProtocol(HttpProtocol.http); + cosClient = new COSClient(cred, clientConfig); + } + } + @Override public String getPresignedUrl(String fileName) { - COSCredentials cred = new BasicCOSCredentials(obj.getAk(), obj.getSk()); - ClientConfig clientConfig = new ClientConfig(); - clientConfig.setRegion(new Region(obj.getRegion())); - clientConfig.setHttpProtocol(HttpProtocol.https); - COSClient cosClient = new COSClient(cred, clientConfig); + initClient(); Date expirationDate = new Date(System.currentTimeMillis() + SESSION_EXPIRE_SECOND); URL url = cosClient.generatePresignedUrl(obj.getBucket(), normalizePrefix(fileName), expirationDate, HttpMethodName.PUT, @@ -89,11 +106,72 @@ public Triple getStsToken() throws DdlException { return Triple.of(credentials.getTmpSecretId(), credentials.getTmpSecretKey(), credentials.getToken()); } catch (Exception e) { - LOG.warn("Failed get oss sts token", e); + LOG.warn("Failed get cos sts token", e); throw new DdlException(e.getMessage()); } } + @Override + public ListObjectsResult listObjects(String continuationToken) throws DdlException { + return listObjectsInner(normalizePrefix(), continuationToken); + } + + @Override + public ListObjectsResult listObjects(String subPrefix, String continuationToken) throws DdlException { + return listObjectsInner(normalizePrefix(subPrefix), continuationToken); + } + + @Override + public ListObjectsResult headObject(String subKey) throws DdlException { + initClient(); + try { + String key = normalizePrefix(subKey); + ObjectMetadata metadata = cosClient.getObjectMetadata(obj.getBucket(), key); + ObjectFile objectFile = new ObjectFile(key, getRelativePath(key), formatEtag(metadata.getETag()), + metadata.getContentLength()); + return new ListObjectsResult(Lists.newArrayList(objectFile), false, null); + } catch (CosServiceException e) { + if (e.getStatusCode() == 404) { + LOG.warn("NoSuchKey when head object for COS, subKey={}", subKey); + return new ListObjectsResult(Lists.newArrayList(), false, null); + } + LOG.warn("Failed to head object for COS, subKey={}", subKey, e); + throw new DdlException( + "Failed to head object for COS, subKey=" + subKey + ", Error code=" + e.getErrorCode() + + ", Error message=" + e.getCause().getMessage()); + } catch (CosClientException e) { + LOG.warn("Failed to head object for COS, subKey={}", subKey, e); + throw new DdlException( + "Failed to head object for COS, subKey=" + subKey + ", Error code=" + e.getErrorCode() + + ", Error message=" + e.getCause().getMessage()); + } + } + + private ListObjectsResult listObjectsInner(String prefix, String continuationToken) throws DdlException { + initClient(); + try { + ListObjectsRequest listObjectsRequest = new ListObjectsRequest(); + listObjectsRequest.setBucketName(obj.getBucket()); + listObjectsRequest.setPrefix(prefix); + listObjectsRequest.setMaxKeys(1000); + if (!StringUtils.isEmpty(continuationToken)) { + listObjectsRequest.setMarker(continuationToken); + } + ObjectListing objectListing = cosClient.listObjects(listObjectsRequest); + List cosObjectSummaries = objectListing.getObjectSummaries(); + List objectFiles = new ArrayList<>(); + for (COSObjectSummary s : cosObjectSummaries) { + objectFiles.add( + new ObjectFile(s.getKey(), getRelativePath(s.getKey()), formatEtag(s.getETag()), s.getSize())); + } + return new ListObjectsResult(objectFiles, objectListing.isTruncated(), objectListing.getMarker()); + } catch (CosClientException e) { + LOG.warn("Failed to list objects for COS", e); + throw new DdlException("Failed to list objects for COS, Error code=" + e.getErrorCode() + ", Error message=" + + e.getCause().getMessage()); + } + } + @Override public String toString() { return "CosRemote{obj=" + obj + '}'; From 2b46aaa5496ea280a3db628ba0a038b746e9d42c Mon Sep 17 00:00:00 2001 From: qiye Date: Tue, 13 Aug 2024 10:29:26 +0800 Subject: [PATCH 72/94] [fix](ES Catalog)Check isArray before parse json to array (#39104) ## Proposed changes Elasticsearch does not have an explicit array type, but one of its fields can contain [0 or more values](https://www.elastic.co/guide/en/elasticsearch/reference/current/array.html). When the field has one value and we map it as array type in Doris, it will run into segment fault while parsing it. So we add a check before we parse json to array. Issue Number: close #39102 --- be/src/exec/es/es_scroll_parser.cpp | 25 ++++---- .../data/composite_type_array_bulk.json | 10 ++++ .../elasticsearch/scripts/es_init.sh | 58 +++++++++++++++++++ .../array_meta_composite_type_array.json | 9 +++ .../index/es6_composite_type_array.json | 14 +++++ .../index/es7_composite_type_array.json | 12 ++++ .../external_table_p0/es/test_es_query.groovy | 25 ++++++++ 7 files changed, 141 insertions(+), 12 deletions(-) create mode 100755 docker/thirdparties/docker-compose/elasticsearch/scripts/data/composite_type_array_bulk.json create mode 100644 docker/thirdparties/docker-compose/elasticsearch/scripts/index/array_meta_composite_type_array.json create mode 100755 docker/thirdparties/docker-compose/elasticsearch/scripts/index/es6_composite_type_array.json create mode 100644 docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_composite_type_array.json diff --git a/be/src/exec/es/es_scroll_parser.cpp b/be/src/exec/es/es_scroll_parser.cpp index f3c8dc57ebac4b1..3d02e335787d8c7 100644 --- a/be/src/exec/es/es_scroll_parser.cpp +++ b/be/src/exec/es/es_scroll_parser.cpp @@ -100,9 +100,9 @@ static const std::string ERROR_COL_DATA_IS_ARRAY = static const std::string INVALID_NULL_VALUE = "Invalid null value occurs: Non-null column `$0` contains NULL"; -#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type) \ +#define RETURN_ERROR_IF_COL_IS_ARRAY(col, type, is_array) \ do { \ - if (col.IsArray()) { \ + if (col.IsArray() == is_array) { \ std::stringstream ss; \ ss << "Expected value of type: " << type_to_string(type) \ << "; but found type: " << json_type_to_string(col.GetType()) \ @@ -167,7 +167,7 @@ Status get_int_value(const rapidjson::Value& col, PrimitiveType type, void* slot return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); StringParser::ParseResult result; @@ -294,7 +294,7 @@ Status get_date_int(const rapidjson::Value& col, PrimitiveType type, bool pure_d return get_date_value_int(col[0], type, false, slot, time_zone); } else { // this would happened just only when `enable_docvalue_scan = false`, and field has string format date from _source - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); return get_date_value_int(col, type, true, slot, time_zone); } @@ -322,7 +322,7 @@ Status get_float_value(const rapidjson::Value& col, PrimitiveType type, void* sl return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); StringParser::ParseResult result; @@ -351,7 +351,7 @@ Status insert_float_value(const rapidjson::Value& col, PrimitiveType type, return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); StringParser::ParseResult result; @@ -390,7 +390,7 @@ Status insert_int_value(const rapidjson::Value& col, PrimitiveType type, return Status::OK(); } - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); RETURN_ERROR_IF_COL_IS_NOT_STRING(col, type); StringParser::ParseResult result; @@ -543,7 +543,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, val = col[0].GetString(); } } else { - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); if (!col.IsString()) { val = json_value_to_string(col); } else { @@ -623,7 +623,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, const rapidjson::Value& str_col = is_nested_str ? col[0] : col; - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); const std::string& val = str_col.GetString(); size_t val_size = str_col.GetStringLength(); @@ -649,7 +649,7 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, val = col[0].GetString(); } } else { - RETURN_ERROR_IF_COL_IS_ARRAY(col, type); + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, true); if (!col.IsString()) { val = json_value_to_string(col); } else { @@ -679,13 +679,14 @@ Status ScrollParser::fill_columns(const TupleDescriptor* tuple_desc, case TYPE_ARRAY: { vectorized::Array array; const auto& sub_type = tuple_desc->slots()[i]->type().children[0].type; - for (auto& sub_col : col.GetArray()) { + RETURN_ERROR_IF_COL_IS_ARRAY(col, type, false); + for (const auto& sub_col : col.GetArray()) { switch (sub_type) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: { std::string val; - RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type); + RETURN_ERROR_IF_COL_IS_ARRAY(sub_col, sub_type, true); if (!sub_col.IsString()) { val = json_value_to_string(sub_col); } else { diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/data/composite_type_array_bulk.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/composite_type_array_bulk.json new file mode 100755 index 000000000000000..8654ab25e6f0763 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/data/composite_type_array_bulk.json @@ -0,0 +1,10 @@ +{"name": "Andy", "sports": "soccer"} +{"name": "Betty", "sports": "pingpong ball"} +{"name": "Cindy", "sports": "武术"} +{"name": "David", "sports": ["volleyball"]} +{"name": "Emily", "sports": ["baseball", "golf", "hockey"]} +{"name": "Frank", "sports": ["rugby", "cricket", "boxing"]} +{"name": "Grace", "sports": ["table tennis", "badminton", "athletics"]} +{"name": "Henry", "sports": ["archery", "fencing", "weightlifting"]} +{"name": "Ivy", "sports": ["judo", "karate", "taekwondo"]} +{"name": "Jack", "sports": ["wrestling", "gymnastics", "surfing"]} \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh index 575118d8db90b69..258c6c83ef5a374 100755 --- a/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/es_init.sh @@ -16,6 +16,30 @@ # specific language governing permissions and limitations # under the License. +generate_bulk_request() { + local index_name=$1 + local type_value=$2 + local id_prefix=$3 + local data_file=$4 + local output_file=$5 + + // clear output file + echo "" > "$output_file" + + local id=1 + while IFS= read -r line; do + if [ -n "$type_value" ]; then + echo "{\"index\": {\"_index\": \"$index_name\", \"_type\": \"$type_value\", \"_id\": \"${id_prefix}${id}\"}}" >> "$output_file" + else + echo "{\"index\": {\"_index\": \"$index_name\", \"_id\": \"${id_prefix}${id}\"}}" >> "$output_file" + fi + echo "$line" >> "$output_file" + id=$((id + 1)) + done < "$data_file" +} + +array_data_file="/mnt/scripts/data/composite_type_array_bulk.json" + # es 5 # create index test1 # shellcheck disable=SC2154 @@ -43,6 +67,14 @@ curl "http://${ES_5_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:app curl "http://${ES_5_HOST}:9200/test2_20220809/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" # create index .hide curl "http://${ES_5_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_hide.json" +# create index composite_type_array +curl "http://${ES_5_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_composite_type_array.json" +# put data with bulk for composite_type_array +bulk_request_file="/mnt/scripts/data/bulk_request_es5.json" +generate_bulk_request "composite_type_array" "doc" "item_" "$array_data_file" "$bulk_request_file" +curl -X POST "http://${ES_5_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json" +# put _meta for composite_type_array +curl "http://${ES_5_HOST}:9200/composite_type_array/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json" # es 6 # create index test1 @@ -70,6 +102,14 @@ curl "http://${ES_6_HOST}:9200/test2_20220808/doc/_mapping" -H "Content-Type:app curl "http://${ES_6_HOST}:9200/test2_20220809/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" # create index .hide curl "http://${ES_6_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_hide.json" +# create index composite_type_array +curl "http://${ES_6_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es6_composite_type_array.json" +# put data with bulk for composite_type_array +bulk_request_file="/mnt/scripts/data/bulk_request_es6.json" +generate_bulk_request "composite_type_array" "doc" "item_" "$array_data_file" "$bulk_request_file" +curl -X POST "http://${ES_6_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json" +# put _meta for composite_type_array +curl "http://${ES_6_HOST}:9200/composite_type_array/doc/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json" # es7 # create index test1 @@ -106,6 +146,15 @@ curl "http://${ES_7_HOST}:9200/test2_20220809/_mapping" -H "Content-Type:applica # create index .hide curl "http://${ES_7_HOST}:9200/.hide" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_hide.json" +# create index composite_type_array +curl "http://${ES_7_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_composite_type_array.json" +# put data with bulk for composite_type_array +bulk_request_file="/mnt/scripts/data/bulk_request_es7.json" +generate_bulk_request "composite_type_array" "_doc" "item_" "$array_data_file" "$bulk_request_file" +curl -X POST "http://${ES_7_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json" +# put _meta for composite_type_array +curl "http://${ES_7_HOST}:9200/composite_type_array/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json" + # es8 # create index test1 curl "http://${ES_8_HOST}:9200/test1" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_test1.json" @@ -138,3 +187,12 @@ curl "http://${ES_8_HOST}:9200/test3_20231005/_doc/1" -H "Content-Type:applicati curl "http://${ES_8_HOST}:9200/test1/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" curl "http://${ES_8_HOST}:9200/test2_20220808/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" curl "http://${ES_8_HOST}:9200/test2_20220809/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta.json" + +# create index composite_type_array +curl "http://${ES_8_HOST}:9200/composite_type_array" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/es7_composite_type_array.json" +# put data with bulk for composite_type_array +bulk_request_file="/mnt/scripts/data/bulk_request_es8.json" +generate_bulk_request "composite_type_array" "" "item_" "$array_data_file" "$bulk_request_file" +curl -X POST "http://${ES_8_HOST}:9200/_bulk" --data-binary "@$bulk_request_file" -H "Content-Type: application/json" +# put _meta for composite_type_array +curl "http://${ES_8_HOST}:9200/composite_type_array/_mapping" -H "Content-Type:application/json" -X PUT -d "@/mnt/scripts/index/array_meta_composite_type_array.json" \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/array_meta_composite_type_array.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/array_meta_composite_type_array.json new file mode 100644 index 000000000000000..9c55390149d67de --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/array_meta_composite_type_array.json @@ -0,0 +1,9 @@ +{ + "_meta": { + "doris":{ + "array_fields":[ + "sports" + ] + } + } +} diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es6_composite_type_array.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es6_composite_type_array.json new file mode 100755 index 000000000000000..2921628cf380aee --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es6_composite_type_array.json @@ -0,0 +1,14 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 + }, + "mappings": { + "doc": { + "properties": { + "name": { "type": "keyword" }, + "sports": { "type": "keyword", "doc_values": false} + } + } + } +} diff --git a/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_composite_type_array.json b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_composite_type_array.json new file mode 100644 index 000000000000000..8fdd6c88ce765b7 --- /dev/null +++ b/docker/thirdparties/docker-compose/elasticsearch/scripts/index/es7_composite_type_array.json @@ -0,0 +1,12 @@ +{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0 + }, + "mappings": { + "properties": { + "name": { "type": "keyword" }, + "sports": { "type": "keyword", "doc_values": false} + } + } +} diff --git a/regression-test/suites/external_table_p0/es/test_es_query.groovy b/regression-test/suites/external_table_p0/es/test_es_query.groovy index 0f9839aea215d01..54b1c331de58d93 100644 --- a/regression-test/suites/external_table_p0/es/test_es_query.groovy +++ b/regression-test/suites/external_table_p0/es/test_es_query.groovy @@ -214,6 +214,13 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_5_18 """select message from test1 where not_null_or_empty(message)""" order_qt_sql_5_19 """select * from test1 where esquery(c_unsigned_long, '{"match":{"c_unsigned_long":0}}')""" order_qt_sql_5_20 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;""" + try { + sql """select * from composite_type_array;""" + fail("Should not reach here") + } catch (Exception e) { + logger.error(e.getMessage()) + assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\"")) + } sql """switch test_es_query_es6""" // order_qt_sql_6_01 """show tables""" @@ -236,6 +243,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_6_18 """select message from test1 where not_null_or_empty(message)""" order_qt_sql_6_19 """select * from test1 where esquery(c_person, '{"match":{"c_person.name":"Andy"}}')""" order_qt_sql_6_20 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;""" + try { + sql """select * from composite_type_array;""" + fail("Should not reach here") + } catch (Exception e) { + assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\"")) + } List> tables6N = sql """show tables""" boolean notContainHide = true @@ -282,6 +295,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_7_23 """select * from test1 where level = 'debug'""" order_qt_sql_7_24 """select * from test1 where esquery(c_float, '{"match":{"c_float":1.1}}')""" order_qt_sql_7_25 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;""" + try { + sql """select * from composite_type_array;""" + fail("Should not reach here") + } catch (Exception e) { + assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\"")) + } List> tables7N = sql """show tables""" boolean notContainHide7 = true @@ -328,6 +347,12 @@ suite("test_es_query", "p0,external,es,external_docker,external_docker_es") { order_qt_sql_8_21 """select * from test1 where level = 'debug'""" order_qt_sql_8_22 """select * from test1 where esquery(c_ip, '{"match":{"c_ip":"192.168.0.1"}}')""" order_qt_sql_8_23 """select c_person, c_user, json_extract(c_person, '\$.[0].name'), json_extract(c_user, '\$.[1].last') from test1;""" + try { + sql """select * from composite_type_array;""" + fail("Should not reach here") + } catch (Exception e) { + assertTrue(e.getMessage().contains("Expected value of type: ARRAY; but found type: Varchar/Char; Document slice is : \"soccer\"")) + } } From dc713c309cc13b94ee03729fa5c2476259e1e888 Mon Sep 17 00:00:00 2001 From: walter Date: Tue, 13 Aug 2024 10:33:21 +0800 Subject: [PATCH 73/94] [fix](suites) fix backup cancelled cases (#39204) --- .../suites/backup_restore/test_backup_cancelled.groovy | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/regression-test/suites/backup_restore/test_backup_cancelled.groovy b/regression-test/suites/backup_restore/test_backup_cancelled.groovy index 8a472ca9e26555e..3f419a16a69fbe6 100644 --- a/regression-test/suites/backup_restore/test_backup_cancelled.groovy +++ b/regression-test/suites/backup_restore/test_backup_cancelled.groovy @@ -17,7 +17,7 @@ suite("test_backup_cancelled", "backup_cancelled") { String suiteName = "test_backup_cancelled" - String repoName = "${suiteName}_repo" + String repoName = "repo_" + UUID.randomUUID().toString().replace("-", "") String dbName = "${suiteName}_db" String tableName = "${suiteName}_table" String snapshotName = "${suiteName}_snapshot" @@ -125,7 +125,7 @@ suite("test_backup_cooldown_cancelled", "backup_cooldown_cancelled") { String dbName = "${suiteName}_db" String tableName = "${suiteName}_table" String snapshotName = "${suiteName}_snapshot" - String repoName = "${suiteName}_repo" + String repoName = "repo_" + UUID.randomUUID().toString().replace("-", "") def syncer = getSyncer() syncer.createS3Repository(repoName) From b592f557c74214aa06736a78d16900efb2e7bb07 Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 13 Aug 2024 10:44:10 +0800 Subject: [PATCH 74/94] [opt](nereids) Join related cost calculation formula works well at least 3be num (#39118) ## Proposed changes In previouse #37823, we adjust the shuffle and join cost. However the new cost model is not good to single BE. the new cost model count BE number at least 3 Issue Number: close #xxx --- .../org/apache/doris/nereids/cost/CostModelV1.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java index 5c1451e0139674e..ac283c525c1fe42 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/cost/CostModelV1.java @@ -271,14 +271,15 @@ public Cost visitPhysicalDistribute( Statistics childStatistics = context.getChildStatistics(0); double intputRowCount = childStatistics.getRowCount(); DistributionSpec spec = distribute.getDistributionSpec(); - + // cost model is trained by clusters with more than 3 BE. + int beNumForDist = Math.max(3, beNumber); // shuffle if (spec instanceof DistributionSpecHash) { return CostV1.of(context.getSessionVariable(), - intputRowCount / beNumber, + intputRowCount / beNumForDist, 0, intputRowCount * childStatistics.dataSizeFactor( - distribute.child().getOutput()) / beNumber + distribute.child().getOutput()) / beNumForDist ); } @@ -301,7 +302,7 @@ public Cost visitPhysicalDistribute( 0, 0, intputRowCount * childStatistics.dataSizeFactor( - distribute.child().getOutput()) / beNumber); + distribute.child().getOutput()) / beNumForDist); } // any @@ -310,7 +311,7 @@ public Cost visitPhysicalDistribute( 0, 0, intputRowCount * childStatistics.dataSizeFactor(distribute.child().getOutput()) - * RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR / beNumber); + * RANDOM_SHUFFLE_TO_HASH_SHUFFLE_FACTOR / beNumForDist); } private double expressionTreeCost(List expressions) { @@ -422,7 +423,7 @@ public Cost visitPhysicalHashJoin( // on the output rows, taken on outputRowCount() double probeSideFactor = 1.0; double buildSideFactor = context.getSessionVariable().getBroadcastRightTableScaleFactor(); - int totalInstanceNumber = parallelInstance * beNumber; + int totalInstanceNumber = parallelInstance * Math.max(3, beNumber); if (buildSideFactor <= 1.0) { if (buildStats.computeSize(physicalHashJoin.right().getOutput()) < 1024 * 1024) { // no penalty to broadcast if build side is small From dac6bab63da4605a46b525df4d3d06f89890bec1 Mon Sep 17 00:00:00 2001 From: qiye Date: Tue, 13 Aug 2024 10:45:16 +0800 Subject: [PATCH 75/94] [fix](inverted index)Add exception check when write bkd index (#39248) We are not catching the exception when add values in `bkd_writer`, if error throws, BE will run into segment fault. So we add the exception check here to avoid coredump. --- .../segment_v2/inverted_index_writer.cpp | 40 ++++++++------- ...st_index_bkd_writer_fault_injection.groovy | 51 +++++++++++++++++++ 2 files changed, 72 insertions(+), 19 deletions(-) create mode 100644 regression-test/suites/fault_injection_p0/test_index_bkd_writer_fault_injection.groovy diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp index f40bcb38c134694..f676466927a280f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp @@ -362,7 +362,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { _rid++; } } else if constexpr (field_is_numeric_type(field_type)) { - add_numeric_values(values, count); + RETURN_IF_ERROR(add_numeric_values(values, count)); } return Status::OK(); } @@ -455,11 +455,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { continue; } const CppType* p = &reinterpret_cast(value_ptr)[j]; - std::string new_value; - size_t value_length = sizeof(CppType); - - _value_key_coder->full_encode_ascending(p, &new_value); - _bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, _rid); + RETURN_IF_ERROR(add_value(*p)); } start_off += array_elem_size; _row_ids_seen_for_bkd++; @@ -504,11 +500,7 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { if (values->is_null_at(j)) { // bkd do not index null values, so we do nothing here. } else { - std::string new_value; - size_t value_length = sizeof(CppType); - - _value_key_coder->full_encode_ascending(p, &new_value); - _bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, _rid); + RETURN_IF_ERROR(add_value(*p)); } item_data_ptr = (uint8_t*)item_data_ptr + field_size; } @@ -520,23 +512,33 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter { return Status::OK(); } - void add_numeric_values(const void* values, size_t count) { + Status add_numeric_values(const void* values, size_t count) { auto p = reinterpret_cast(values); for (size_t i = 0; i < count; ++i) { - add_value(*p); + RETURN_IF_ERROR(add_value(*p)); + _rid++; p++; _row_ids_seen_for_bkd++; } + return Status::OK(); } - void add_value(const CppType& value) { - std::string new_value; - size_t value_length = sizeof(CppType); + Status add_value(const CppType& value) { + try { + std::string new_value; + size_t value_length = sizeof(CppType); - _value_key_coder->full_encode_ascending(&value, &new_value); - _bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, _rid); + DBUG_EXECUTE_IF("InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_error", { + _CLTHROWA(CL_ERR_IllegalArgument, ("packedValue should be length=xxx")); + }); - _rid++; + _value_key_coder->full_encode_ascending(&value, &new_value); + _bkd_writer->add((const uint8_t*)new_value.c_str(), value_length, _rid); + } catch (const CLuceneError& e) { + return Status::Error( + "CLuceneError add_value: {}", e.what()); + } + return Status::OK(); } int64_t size() const override { diff --git a/regression-test/suites/fault_injection_p0/test_index_bkd_writer_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_index_bkd_writer_fault_injection.groovy new file mode 100644 index 000000000000000..7df72ebeaf1b389 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_index_bkd_writer_fault_injection.groovy @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_index_bkd_writer_fault_injection", "nonConcurrent") { + def isCloudMode = isCloudMode() + def tableName = "test_index_bkd_writer_fault_injection" + + sql """ DROP TABLE IF EXISTS ${tableName}; """ + sql """ + CREATE TABLE ${tableName} ( + `id` int(11) NULL, + `name` varchar(255) NULL, + `hobbies` text NULL, + `score` int(11) NULL, + index index_name (name) using inverted, + index index_hobbies (hobbies) using inverted properties("parser"="english"), + index index_score (score) using inverted + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( "replication_num" = "1", "disable_auto_compaction" = "true", "inverted_index_storage_format" = "V1"); + """ + + try { + GetDebugPoint().enableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_error") + logger.info("trigger_full_compaction_on_tablets with fault injection: InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_error") + sql """ INSERT INTO ${tableName} VALUES (1, "andy", "andy love apple", 100); """ + } catch (Exception e) { + logger.info("error message: ${e.getMessage()}") + assert e.getMessage().contains("packedValue should be length=xxx") + } finally { + GetDebugPoint().disableDebugPointForAllBEs("InvertedIndexColumnWriterImpl::add_value_bkd_writer_add_throw_error") + } +} From 5e20101e6753f3fa8dd2cbf23076fd9e9529772b Mon Sep 17 00:00:00 2001 From: walter Date: Tue, 13 Aug 2024 10:48:38 +0800 Subject: [PATCH 76/94] [chore](cloud) Add show_meta_ranges API (#39208) This interface can roughly estimate the storage space occupied by different types of keys by analyzing the FDB partition key. The usage: ``` curl localhost:5000/MetaService/http/show_meta_ranges?token=greedisgood9999 ``` An example: ``` $ curl localhost:5000/MetaService/http/show_meta_ranges?token=greedisgood9999 total partitions: 5 recycle|default_instance_id|txn: 1 meta|default_instance_id|tablet_index: 1 meta|default_instance_id|rowset_tmp: 1 ``` --- cloud/CMakeLists.txt | 4 +- cloud/src/meta-service/meta_service_http.cpp | 71 ++++++++++++++++++++ cloud/src/meta-service/txn_kv.cpp | 69 +++++++++++++++++++ cloud/src/meta-service/txn_kv.h | 11 +++ 4 files changed, 153 insertions(+), 2 deletions(-) diff --git a/cloud/CMakeLists.txt b/cloud/CMakeLists.txt index ddcf8aab5f97f00..bc3f56642821b03 100644 --- a/cloud/CMakeLists.txt +++ b/cloud/CMakeLists.txt @@ -457,6 +457,8 @@ if (NOT EXISTS ${THIRDPARTY_DIR}/include/foundationdb) execute_process(COMMAND "tar" "xf" "${THIRDPARTY_SRC}/${FDB_LIB}" "-C" "${THIRDPARTY_DIR}/") endif () +set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lfdb_c -L${THIRDPARTY_DIR}/lib") + add_subdirectory(${SRC_DIR}/common) add_subdirectory(${SRC_DIR}/gen-cpp) add_subdirectory(${SRC_DIR}/meta-service) @@ -471,8 +473,6 @@ endif () add_subdirectory(${COMMON_SRC_DIR}/cpp ${BUILD_DIR}/src/common_cpp) if (${MAKE_TEST} STREQUAL "OFF") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lfdb_c -L${THIRDPARTY_DIR}/lib") - add_executable(doris_cloud src/main.cpp) # This permits libraries loaded by dlopen to link to the symbols in the program. diff --git a/cloud/src/meta-service/meta_service_http.cpp b/cloud/src/meta-service/meta_service_http.cpp index ad56ffd9ca217ec..d19ef224c840e10 100644 --- a/cloud/src/meta-service/meta_service_http.cpp +++ b/cloud/src/meta-service/meta_service_http.cpp @@ -30,12 +30,17 @@ #include #include +#include #include #include +#include #include #include "common/config.h" #include "common/logging.h" +#include "meta-service/keys.h" +#include "meta-service/txn_kv.h" +#include "meta-service/txn_kv_error.h" #include "meta_service.h" namespace doris::cloud { @@ -350,6 +355,70 @@ static HttpResponse process_get_value(MetaServiceImpl* service, brpc::Controller return process_http_get_value(service->txn_kv().get(), ctrl->http_request().uri()); } +// show all key ranges and their count. +static HttpResponse process_show_meta_ranges(MetaServiceImpl* service, brpc::Controller* ctrl) { + auto txn_kv = std::dynamic_pointer_cast(service->txn_kv()); + if (!txn_kv) { + return http_json_reply(MetaServiceCode::INVALID_ARGUMENT, + "this method only support fdb txn kv"); + } + + std::vector partition_boundaries; + TxnErrorCode code = txn_kv->get_partition_boundaries(&partition_boundaries); + if (code != TxnErrorCode::TXN_OK) { + auto msg = fmt::format("failed to get boundaries, code={}", code); + return http_json_reply(MetaServiceCode::UNDEFINED_ERR, msg); + } + + std::unordered_map partition_count; + size_t prefix_size = FdbTxnKv::fdb_partition_key_prefix().size(); + for (auto&& boundary : partition_boundaries) { + if (boundary.size() < prefix_size + 1 || boundary[prefix_size] != CLOUD_USER_KEY_SPACE01) { + continue; + } + + std::string_view user_key(boundary); + user_key.remove_prefix(prefix_size + 1); // Skip the KEY_SPACE prefix. + std::vector, int, int>> out; + decode_key(&user_key, &out); // ignore any error, since the boundary key might be truncated. + + auto visitor = [](auto&& arg) -> std::string { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return arg; + } else { + return std::to_string(arg); + } + }; + + if (!out.empty()) { + std::string key; + for (size_t i = 0; i < 3 && i < out.size(); ++i) { + key += std::visit(visitor, std::get<0>(out[i])); + key += '|'; + } + key.pop_back(); // omit the last '|' + partition_count[key]++; + } + } + + // sort ranges by count + std::vector> meta_ranges; + meta_ranges.reserve(partition_count.size()); + for (auto&& [key, count] : partition_count) { + meta_ranges.emplace_back(key, count); + } + + std::sort(meta_ranges.begin(), meta_ranges.end(), + [](const auto& lhs, const auto& rhs) { return lhs.second > rhs.second; }); + + std::string body = fmt::format("total partitions: {}\n", partition_boundaries.size()); + for (auto&& [key, count] : meta_ranges) { + body += fmt::format("{}: {}\n", key, count); + } + return http_text_reply(MetaServiceCode::OK, "", body); +} + static HttpResponse process_get_instance_info(MetaServiceImpl* service, brpc::Controller* ctrl) { auto& uri = ctrl->http_request().uri(); std::string_view instance_id = http_query(uri, "instance_id"); @@ -475,9 +544,11 @@ void MetaServiceImpl::http(::google::protobuf::RpcController* controller, {"decode_key", process_decode_key}, {"encode_key", process_encode_key}, {"get_value", process_get_value}, + {"show_meta_ranges", process_show_meta_ranges}, {"v1/decode_key", process_decode_key}, {"v1/encode_key", process_encode_key}, {"v1/get_value", process_get_value}, + {"v1/show_meta_ranges", process_show_meta_ranges}, // for get {"get_instance", process_get_instance_info}, {"get_obj_store_info", process_get_obj_store_info}, diff --git a/cloud/src/meta-service/txn_kv.cpp b/cloud/src/meta-service/txn_kv.cpp index 31185c1d3b38585..8cb2e906ba0b52f 100644 --- a/cloud/src/meta-service/txn_kv.cpp +++ b/cloud/src/meta-service/txn_kv.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -78,12 +79,67 @@ TxnErrorCode FdbTxnKv::create_txn(std::unique_ptr* txn) { return ret; } +TxnErrorCode FdbTxnKv::create_txn_with_system_access(std::unique_ptr* txn) { + auto t = std::make_unique(database_); + TxnErrorCode code = t->init(); + if (code == TxnErrorCode::TXN_OK) { + code = t->enable_access_system_keys(); + } + if (code != TxnErrorCode::TXN_OK) { + LOG(WARNING) << "failed to init txn, ret=" << code; + return code; + } + + *txn = std::move(t); + return TxnErrorCode::TXN_OK; +} + std::unique_ptr FdbTxnKv::full_range_get(std::string begin, std::string end, FullRangeGetIteratorOptions opts) { return std::make_unique(std::move(begin), std::move(end), std::move(opts)); } +TxnErrorCode FdbTxnKv::get_partition_boundaries(std::vector* boundaries) { + boundaries->clear(); + + std::unique_ptr txn; + TxnErrorCode code = create_txn_with_system_access(&txn); + if (code != TxnErrorCode::TXN_OK) { + return code; + } + + std::string begin_key(fdb_partition_key_prefix()); + std::string end_key(fdb_partition_key_end()); + + std::unique_ptr iter; + do { + code = txn->get(begin_key, end_key, &iter, true); + if (code != TxnErrorCode::TXN_OK) { + if (code == TxnErrorCode::TXN_TOO_OLD) { + code = create_txn_with_system_access(&txn); + if (code == TxnErrorCode::TXN_OK) { + continue; + } + } + LOG_WARNING("failed to get fdb boundaries") + .tag("code", code) + .tag("begin_key", hex(begin_key)) + .tag("end_key", hex(end_key)); + return code; + } + + while (iter->has_next()) { + auto&& [key, value] = iter->next(); + boundaries->emplace_back(key); + } + + begin_key = iter->next_begin_key(); + } while (iter->more()); + + return TxnErrorCode::TXN_OK; +} + } // namespace doris::cloud namespace doris::cloud::fdb { @@ -258,6 +314,19 @@ TxnErrorCode Transaction::init() { return TxnErrorCode::TXN_OK; } +TxnErrorCode Transaction::enable_access_system_keys() { + fdb_error_t err = fdb_transaction_set_option( + txn_, FDBTransactionOption::FDB_TR_OPTION_ACCESS_SYSTEM_KEYS, nullptr, 0); + if (err) { + LOG_WARNING("fdb_transaction_set_option error: ") + .tag("option", "FDB_TR_OPTION_ACCESS_SYSTEM_KEYS") + .tag("code", err) + .tag("msg", fdb_get_error(err)); + return cast_as_txn_code(err); + } + return TxnErrorCode::TXN_OK; +} + void Transaction::put(std::string_view key, std::string_view val) { StopWatch sw; fdb_transaction_set(txn_, (uint8_t*)key.data(), key.size(), (uint8_t*)val.data(), val.size()); diff --git a/cloud/src/meta-service/txn_kv.h b/cloud/src/meta-service/txn_kv.h index 99b187e0f637ebd..5da5d4e1d63d690 100644 --- a/cloud/src/meta-service/txn_kv.h +++ b/cloud/src/meta-service/txn_kv.h @@ -323,12 +323,22 @@ class FdbTxnKv : public TxnKv { ~FdbTxnKv() override = default; TxnErrorCode create_txn(std::unique_ptr* txn) override; + TxnErrorCode create_txn_with_system_access(std::unique_ptr* txn); int init() override; std::unique_ptr full_range_get(std::string begin, std::string end, FullRangeGetIteratorOptions opts) override; + // Return the partition boundaries of the database. + TxnErrorCode get_partition_boundaries(std::vector* boundaries); + + static std::string_view fdb_partition_key_prefix() { return "\xff/keyServers/"; } + static std::string_view fdb_partition_key_end() { + // '0' is the next byte after '/' in the ASCII table + return "\xff/keyServers0"; + } + private: std::shared_ptr network_; std::shared_ptr database_; @@ -478,6 +488,7 @@ class Transaction : public cloud::Transaction { * @return TxnErrorCode for success otherwise false */ TxnErrorCode init(); + TxnErrorCode enable_access_system_keys(); void put(std::string_view key, std::string_view val) override; From 86fb35af57de77de92bfcc81852361d61623b91b Mon Sep 17 00:00:00 2001 From: AlexYue Date: Tue, 13 Aug 2024 11:08:45 +0800 Subject: [PATCH 77/94] [refactor](S3) Refactor S3 file writer's duplicate code into common function (#38771) There were some duplicate code in S3 File writer, which might be error-prone, this pr tries to reduce code. --- be/src/io/fs/s3_file_writer.cpp | 116 ++++++++++++++------------------ be/src/io/fs/s3_file_writer.h | 2 + 2 files changed, 53 insertions(+), 65 deletions(-) diff --git a/be/src/io/fs/s3_file_writer.cpp b/be/src/io/fs/s3_file_writer.cpp index a9fb1c96fda180e..24b72a4b6c902c2 100644 --- a/be/src/io/fs/s3_file_writer.cpp +++ b/be/src/io/fs/s3_file_writer.cpp @@ -157,6 +157,50 @@ Status S3FileWriter::close(bool non_block) { return _st; } +bool S3FileWriter::_complete_part_task_callback(Status s) { + bool ret = false; + if (!s.ok()) [[unlikely]] { + VLOG_NOTICE << "failed at key: " << _obj_storage_path_opts.key + << ", status: " << s.to_string(); + std::unique_lock _lck {_completed_lock}; + _failed = true; + ret = true; + _st = std::move(s); + } + // After the signal, there is a scenario where the previous invocation of _wait_until_finish + // returns to the caller, and subsequently, the S3 file writer is destructed. + // This means that accessing _failed afterwards would result in a heap use after free vulnerability. + _countdown_event.signal(); + return ret; +} + +Status S3FileWriter::_build_upload_buffer() { + auto builder = FileBufferBuilder(); + builder.set_type(BufferType::UPLOAD) + .set_upload_callback([part_num = _cur_part_num, this](UploadFileBuffer& buf) { + _upload_one_part(part_num, buf); + }) + .set_file_offset(_bytes_appended) + .set_sync_after_complete_task([this](auto&& PH1) { + return _complete_part_task_callback(std::forward(PH1)); + }) + .set_is_cancelled([this]() { return _failed.load(); }); + if (_cache_builder != nullptr) { + // We would load the data into file cache asynchronously which indicates + // that this instance of S3FileWriter might have been destructed when we + // try to do writing into file cache, so we make the lambda capture the variable + // we need by value to extend their lifetime + builder.set_allocate_file_blocks_holder( + [builder = *_cache_builder, offset = _bytes_appended]() -> FileBlocksHolderPtr { + return builder.allocate_cache_holder(offset, config::s3_write_buffer_size); + }); + } + RETURN_IF_ERROR(builder.build(&_pending_buf)); + auto* buf = dynamic_cast(_pending_buf.get()); + DCHECK(buf != nullptr); + return Status::OK(); +} + Status S3FileWriter::_close_impl() { VLOG_DEBUG << "S3FileWriter::close, path: " << _obj_storage_path_opts.path.native(); @@ -165,35 +209,13 @@ Status S3FileWriter::_close_impl() { } if (_bytes_appended == 0) { + DCHECK(_cur_part_num == 1); // No data written, but need to create an empty file - auto builder = FileBufferBuilder(); - builder.set_type(BufferType::UPLOAD) - .set_upload_callback([this](UploadFileBuffer& buf) { _put_object(buf); }) - .set_sync_after_complete_task([this](Status s) { - bool ret = false; - if (!s.ok()) [[unlikely]] { - VLOG_NOTICE << "failed at key: " << _obj_storage_path_opts.key - << ", status: " << s.to_string(); - std::unique_lock _lck {_completed_lock}; - _failed = true; - ret = true; - this->_st = std::move(s); - } - // After the signal, there is a scenario where the previous invocation of _wait_until_finish - // returns to the caller, and subsequently, the S3 file writer is destructed. - // This means that accessing _failed afterwards would result in a heap use after free vulnerability. - _countdown_event.signal(); - return ret; - }) - .set_is_cancelled([this]() { return _failed.load(); }); - RETURN_IF_ERROR(builder.build(&_pending_buf)); - auto* buf = dynamic_cast(_pending_buf.get()); - DCHECK(buf != nullptr); - if (_used_by_s3_committer) { - buf->set_upload_to_remote([part_num = _cur_part_num, this](UploadFileBuffer& buf) { - _upload_one_part(part_num, buf); - }); - DCHECK(_cur_part_num == 1); + RETURN_IF_ERROR(_build_upload_buffer()); + if (!_used_by_s3_committer) { + auto* pending_buf = dynamic_cast(_pending_buf.get()); + pending_buf->set_upload_to_remote([this](UploadFileBuffer& buf) { _put_object(buf); }); + } else { RETURN_IF_ERROR(_create_multi_upload_request()); } } @@ -225,43 +247,7 @@ Status S3FileWriter::appendv(const Slice* data, size_t data_cnt) { return _st; } if (!_pending_buf) { - auto builder = FileBufferBuilder(); - builder.set_type(BufferType::UPLOAD) - .set_upload_callback( - [part_num = _cur_part_num, this](UploadFileBuffer& buf) { - _upload_one_part(part_num, buf); - }) - .set_file_offset(_bytes_appended) - .set_sync_after_complete_task([this, part_num = _cur_part_num](Status s) { - bool ret = false; - if (!s.ok()) [[unlikely]] { - VLOG_NOTICE << "failed at key: " << _obj_storage_path_opts.key - << ", load part " << part_num << ", st " << s; - std::unique_lock _lck {_completed_lock}; - _failed = true; - ret = true; - this->_st = std::move(s); - } - // After the signal, there is a scenario where the previous invocation of _wait_until_finish - // returns to the caller, and subsequently, the S3 file writer is destructed. - // This means that accessing _failed afterwards would result in a heap use after free vulnerability. - _countdown_event.signal(); - return ret; - }) - .set_is_cancelled([this]() { return _failed.load(); }); - if (_cache_builder != nullptr) { - // We would load the data into file cache asynchronously which indicates - // that this instance of S3FileWriter might have been destructed when we - // try to do writing into file cache, so we make the lambda capture the variable - // we need by value to extend their lifetime - builder.set_allocate_file_blocks_holder( - [builder = *_cache_builder, - offset = _bytes_appended]() -> FileBlocksHolderPtr { - return builder.allocate_cache_holder(offset, - config::s3_write_buffer_size); - }); - } - RETURN_IF_ERROR(builder.build(&_pending_buf)); + RETURN_IF_ERROR(_build_upload_buffer()); } // we need to make sure all parts except the last one to be 5MB or more // and shouldn't be larger than buf diff --git a/be/src/io/fs/s3_file_writer.h b/be/src/io/fs/s3_file_writer.h index c67c79ce5366b03..95ad52ddb670812 100644 --- a/be/src/io/fs/s3_file_writer.h +++ b/be/src/io/fs/s3_file_writer.h @@ -84,6 +84,8 @@ class S3FileWriter final : public FileWriter { Status _set_upload_to_remote_less_than_buffer_size(); void _put_object(UploadFileBuffer& buf); void _upload_one_part(int64_t part_num, UploadFileBuffer& buf); + bool _complete_part_task_callback(Status s); + Status _build_upload_buffer(); ObjectStoragePathOptions _obj_storage_path_opts; From 7a3f3b659c0f0af96d9601ec73cd74699b2a2965 Mon Sep 17 00:00:00 2001 From: Dongyang Li Date: Tue, 13 Aug 2024 11:10:44 +0800 Subject: [PATCH 78/94] [chore](ci) add branch-3.0 required checks (#39250) add required checks to accelerate the process of pipeline-related problem fixing --- .asf.yaml | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/.asf.yaml b/.asf.yaml index 821947fa1a0451b..4049ca5f764e5db 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -72,6 +72,33 @@ github: dismiss_stale_reviews: true require_code_owner_reviews: true required_approving_review_count: 1 + + branch-3.0: + required_status_checks: + # if strict is true, means "Require branches to be up to date before merging". + strict: false + contexts: + - License Check + - Clang Formatter + - CheckStyle + - Build Broker + - ShellCheck + - Build Third Party Libraries (Linux) + - Build Third Party Libraries (macOS) + - Build Third Party Libraries (macOS-arm64) + - FE UT (Doris FE UT) + - BE UT (Doris BE UT) + - Cloud UT (Doris Cloud UT) + - COMPILE (DORIS_COMPILE) + - P0 Regression (Doris Regression) + - P1 Regression (Doris Regression) + - External Regression (Doris External Regression) + - cloud_p1 (Doris Cloud Regression) + - cloud_p0 (Doris Cloud Regression) + required_pull_request_reviews: + dismiss_stale_reviews: true + required_approving_review_count: 1 + branch-1.1-lts: required_status_checks: # if strict is true, means "Require branches to be up to date before merging". From aa2929e7e6db55c7a7a047e272782816e9626a45 Mon Sep 17 00:00:00 2001 From: zhiqiang Date: Tue, 13 Aug 2024 11:49:31 +0800 Subject: [PATCH 79/94] [opt](assert_cast) Make assert cast do type check in release build by default (#39030) * Problem to solve We encountered many issues that ultimately proved to be caused by memory insecurity. These issues are hard to solve, and the final crash log maybe not related to the root problem. * Fix We make `assert_cast` do type check in release build by default. And we can use a template arg `TypeCheckOnRelease::DISABLE` to disable type check in release build. `TypeCheckOnRelease::DISABLE` should be used when user agrees that this function will be called many many times (eg. add method of AggregatedData, which will be called by rows) or you think type safe has already been guaranteed (eg. `assert_cast(this)`. --- be/src/olap/base_tablet.cpp | 4 +- be/src/olap/bloom_filter_predicate.h | 1 + .../aggregate_functions/aggregate_function.h | 109 +++++++++------ ...aggregate_function_approx_count_distinct.h | 7 +- .../aggregate_function_avg.h | 3 +- .../aggregate_function_avg_weighted.h | 6 +- .../aggregate_function_bit.h | 4 +- .../aggregate_function_bitmap.h | 19 ++- .../aggregate_function_bitmap_agg.h | 9 +- .../aggregate_function_collect.h | 42 ++++-- .../aggregate_function_count.h | 4 +- .../aggregate_function_covar.h | 9 +- .../aggregate_function_distinct.h | 4 +- .../aggregate_function_foreach.h | 9 +- ...aggregate_function_group_array_intersect.h | 23 +-- .../aggregate_function_group_concat.h | 10 +- .../aggregate_function_histogram.h | 7 +- .../aggregate_function_hll_union_agg.h | 2 +- .../aggregate_function_map.h | 22 +-- .../aggregate_function_min_max.h | 45 ++++-- .../aggregate_function_min_max_by.h | 3 +- .../aggregate_function_null.h | 7 +- .../aggregate_function_orthogonal_bitmap.h | 21 ++- .../aggregate_function_percentile.h | 131 ++++++++++++------ .../aggregate_function_percentile_approx.h | 20 ++- .../aggregate_function_product.h | 3 +- .../aggregate_function_quantile_state.h | 10 +- .../aggregate_function_reader_first_last.h | 25 ++-- .../aggregate_function_retention.h | 4 +- .../aggregate_function_sequence_match.h | 14 +- .../aggregate_function_stddev.h | 3 +- .../aggregate_function_sum.h | 3 +- .../aggregate_function_topn.h | 57 +++++--- .../aggregate_function_uniq.h | 7 +- .../aggregate_function_window.h | 6 +- .../aggregate_function_window_funnel.h | 11 +- be/src/vec/columns/column_array.cpp | 34 +++-- be/src/vec/columns/column_array.h | 4 +- be/src/vec/columns/column_complex.h | 4 +- be/src/vec/columns/column_const.h | 2 +- be/src/vec/columns/column_decimal.cpp | 2 +- be/src/vec/columns/column_decimal.h | 4 +- be/src/vec/columns/column_map.cpp | 2 +- be/src/vec/columns/column_map.h | 5 +- be/src/vec/columns/column_nullable.cpp | 10 +- be/src/vec/columns/column_nullable.h | 22 ++- be/src/vec/columns/column_object.cpp | 6 +- be/src/vec/columns/column_string.cpp | 4 +- be/src/vec/columns/column_struct.cpp | 10 +- be/src/vec/columns/column_vector.cpp | 3 +- be/src/vec/columns/column_vector.h | 9 +- be/src/vec/common/assert_cast.h | 35 +++-- .../vec/data_types/data_type_number_base.cpp | 29 ++-- .../serde/data_type_datetimev2_serde.cpp | 6 +- .../serde/data_type_struct_serde.cpp | 10 +- .../functions/array/function_array_element.h | 1 + .../functions/comparison_equal_for_null.cpp | 8 +- .../functions/function_binary_arithmetic.h | 24 +++- be/src/vec/functions/function_case.h | 8 +- be/src/vec/functions/function_cast.h | 17 ++- be/src/vec/functions/function_coalesce.cpp | 4 +- be/src/vec/functions/function_helpers.cpp | 2 +- be/src/vec/functions/function_helpers.h | 2 +- be/src/vec/functions/function_ip.h | 4 +- be/src/vec/functions/function_string.cpp | 1 + be/src/vec/functions/function_string.h | 17 ++- .../functions/function_variant_element.cpp | 4 +- 67 files changed, 632 insertions(+), 325 deletions(-) diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 0fb12dd074f8b06..db1e0283854a399 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -36,6 +36,7 @@ #include "util/crc32c.h" #include "util/debug_points.h" #include "util/doris_metrics.h" +#include "vec/common/assert_cast.h" #include "vec/common/schema_util.h" #include "vec/data_types/data_type_factory.hpp" #include "vec/jsonb/serialize.h" @@ -1030,7 +1031,8 @@ Status BaseTablet::generate_new_block_for_partial_update( if (rs_column.has_default_value()) { mutable_column->insert_from(*mutable_default_value_columns[i].get(), 0); } else if (rs_column.is_nullable()) { - assert_cast(mutable_column.get()) + assert_cast( + mutable_column.get()) ->insert_null_elements(1); } else { mutable_column->insert_default(); diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index 9cc95d7152aa321..260c08fbbb77c31 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -25,6 +25,7 @@ #include "vec/columns/column_nullable.h" #include "vec/columns/column_vector.h" #include "vec/columns/predicate_column.h" +#include "vec/common/assert_cast.h" #include "vec/exprs/vruntimefilter_wrapper.h" namespace doris { diff --git a/be/src/vec/aggregate_functions/aggregate_function.h b/be/src/vec/aggregate_functions/aggregate_function.h index e9d7ff37dbc6e8f..12d629b42c89f8e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function.h +++ b/be/src/vec/aggregate_functions/aggregate_function.h @@ -237,13 +237,16 @@ class IAggregateFunctionHelper : public IAggregateFunction { void destroy_vec(AggregateDataPtr __restrict place, const size_t num_rows) const noexcept override { const size_t size_of_data_ = size_of_data(); + const Derived* derived = assert_cast(this); for (size_t i = 0; i != num_rows; ++i) { - assert_cast(this)->destroy(place + size_of_data_ * i); + derived->destroy(place + size_of_data_ * i); } } void add_batch(size_t batch_size, AggregateDataPtr* places, size_t place_offset, const IColumn** columns, Arena* arena, bool agg_many) const override { + const Derived* derived = assert_cast(this); + if constexpr (std::is_same_v> || std::is_same_v> || std::is_same_v(this)->add_many(iter->first, columns, iter->second, - arena); + derived->add_many(iter->first, columns, iter->second, arena); iter++; } return; @@ -271,23 +273,25 @@ class IAggregateFunctionHelper : public IAggregateFunction { } for (size_t i = 0; i < batch_size; ++i) { - assert_cast(this)->add(places[i] + place_offset, columns, i, arena); + derived->add(places[i] + place_offset, columns, i, arena); } } void add_batch_selected(size_t batch_size, AggregateDataPtr* places, size_t place_offset, const IColumn** columns, Arena* arena) const override { + const Derived* derived = assert_cast(this); for (size_t i = 0; i < batch_size; ++i) { if (places[i]) { - assert_cast(this)->add(places[i] + place_offset, columns, i, arena); + derived->add(places[i] + place_offset, columns, i, arena); } } } void add_batch_single_place(size_t batch_size, AggregateDataPtr place, const IColumn** columns, Arena* arena) const override { + const Derived* derived = assert_cast(this); for (size_t i = 0; i < batch_size; ++i) { - assert_cast(this)->add(place, columns, i, arena); + derived->add(place, columns, i, arena); } } //now this is use for sum/count/avg/min/max win function, other win function should override this function in class @@ -295,31 +299,35 @@ class IAggregateFunctionHelper : public IAggregateFunction { void add_range_single_place(int64_t partition_start, int64_t partition_end, int64_t frame_start, int64_t frame_end, AggregateDataPtr place, const IColumn** columns, Arena* arena) const override { + const Derived* derived = assert_cast(this); frame_start = std::max(frame_start, partition_start); frame_end = std::min(frame_end, partition_end); for (int64_t i = frame_start; i < frame_end; ++i) { - assert_cast(this)->add(place, columns, i, arena); + derived->add(place, columns, i, arena); } } void add_batch_range(size_t batch_begin, size_t batch_end, AggregateDataPtr place, const IColumn** columns, Arena* arena, bool has_null) override { + const Derived* derived = assert_cast(this); for (size_t i = batch_begin; i <= batch_end; ++i) { - assert_cast(this)->add(place, columns, i, arena); + derived->add(place, columns, i, arena); } } void insert_result_into_vec(const std::vector& places, const size_t offset, IColumn& to, const size_t num_rows) const override { + const Derived* derived = assert_cast(this); for (size_t i = 0; i != num_rows; ++i) { - assert_cast(this)->insert_result_into(places[i] + offset, to); + derived->insert_result_into(places[i] + offset, to); } } void serialize_vec(const std::vector& places, size_t offset, BufferWritable& buf, const size_t num_rows) const override { + const Derived* derived = assert_cast(this); for (size_t i = 0; i != num_rows; ++i) { - assert_cast(this)->serialize(places[i] + offset, buf); + derived->serialize(places[i] + offset, buf); buf.commit(); } } @@ -333,11 +341,12 @@ class IAggregateFunctionHelper : public IAggregateFunction { void streaming_agg_serialize(const IColumn** columns, BufferWritable& buf, const size_t num_rows, Arena* arena) const override { std::vector place(size_of_data()); + const Derived* derived = assert_cast(this); for (size_t i = 0; i != num_rows; ++i) { - assert_cast(this)->create(place.data()); - DEFER({ assert_cast(this)->destroy(place.data()); }); - assert_cast(this)->add(place.data(), columns, i, arena); - assert_cast(this)->serialize(place.data(), buf); + derived->create(place.data()); + DEFER({ derived->destroy(place.data()); }); + derived->add(place.data(), columns, i, arena); + derived->serialize(place.data(), buf); buf.commit(); } } @@ -357,17 +366,18 @@ class IAggregateFunctionHelper : public IAggregateFunction { void deserialize_vec(AggregateDataPtr places, const ColumnString* column, Arena* arena, size_t num_rows) const override { - const auto size_of_data = assert_cast(this)->size_of_data(); + const Derived* derived = assert_cast(this); + const auto size_of_data = derived->size_of_data(); for (size_t i = 0; i != num_rows; ++i) { try { auto place = places + size_of_data * i; VectorBufferReader buffer_reader(column->get_data_at(i)); - assert_cast(this)->create(place); - assert_cast(this)->deserialize(place, buffer_reader, arena); + derived->create(place); + derived->deserialize(place, buffer_reader, arena); } catch (...) { for (int j = 0; j < i; ++j) { auto place = places + size_of_data * j; - assert_cast(this)->destroy(place); + derived->destroy(place); } throw; } @@ -377,49 +387,52 @@ class IAggregateFunctionHelper : public IAggregateFunction { void deserialize_and_merge_vec(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { - const auto size_of_data = assert_cast(this)->size_of_data(); + const Derived* derived = assert_cast(this); + const auto size_of_data = derived->size_of_data(); const auto* column_string = assert_cast(column); + for (size_t i = 0; i != num_rows; ++i) { try { auto rhs_place = rhs + size_of_data * i; VectorBufferReader buffer_reader(column_string->get_data_at(i)); - assert_cast(this)->create(rhs_place); - assert_cast(this)->deserialize_and_merge( - places[i] + offset, rhs_place, buffer_reader, arena); + derived->create(rhs_place); + derived->deserialize_and_merge(places[i] + offset, rhs_place, buffer_reader, arena); } catch (...) { for (int j = 0; j < i; ++j) { auto place = rhs + size_of_data * j; - assert_cast(this)->destroy(place); + derived->destroy(place); } throw; } } - assert_cast(this)->destroy_vec(rhs, num_rows); + + derived->destroy_vec(rhs, num_rows); } void deserialize_and_merge_vec_selected(const AggregateDataPtr* places, size_t offset, AggregateDataPtr rhs, const IColumn* column, Arena* arena, const size_t num_rows) const override { - const auto size_of_data = assert_cast(this)->size_of_data(); + const auto* derived = assert_cast(this); + const auto size_of_data = derived->size_of_data(); const auto* column_string = assert_cast(column); for (size_t i = 0; i != num_rows; ++i) { try { auto rhs_place = rhs + size_of_data * i; VectorBufferReader buffer_reader(column_string->get_data_at(i)); - assert_cast(this)->create(rhs_place); + derived->create(rhs_place); if (places[i]) { - assert_cast(this)->deserialize_and_merge( - places[i] + offset, rhs_place, buffer_reader, arena); + derived->deserialize_and_merge(places[i] + offset, rhs_place, buffer_reader, + arena); } } catch (...) { for (int j = 0; j < i; ++j) { auto place = rhs + size_of_data * j; - assert_cast(this)->destroy(place); + derived->destroy(place); } throw; } } - assert_cast(this)->destroy_vec(rhs, num_rows); + derived->destroy_vec(rhs, num_rows); } void deserialize_from_column(AggregateDataPtr places, const IColumn& column, Arena* arena, @@ -429,21 +442,21 @@ class IAggregateFunctionHelper : public IAggregateFunction { void merge_vec(const AggregateDataPtr* places, size_t offset, ConstAggregateDataPtr rhs, Arena* arena, const size_t num_rows) const override { - const auto size_of_data = assert_cast(this)->size_of_data(); + const auto* derived = assert_cast(this); + const auto size_of_data = derived->size_of_data(); for (size_t i = 0; i != num_rows; ++i) { - assert_cast(this)->merge(places[i] + offset, rhs + size_of_data * i, - arena); + derived->merge(places[i] + offset, rhs + size_of_data * i, arena); } } void merge_vec_selected(const AggregateDataPtr* places, size_t offset, ConstAggregateDataPtr rhs, Arena* arena, const size_t num_rows) const override { - const auto size_of_data = assert_cast(this)->size_of_data(); + const auto* derived = assert_cast(this); + const auto size_of_data = derived->size_of_data(); for (size_t i = 0; i != num_rows; ++i) { if (places[i]) { - assert_cast(this)->merge(places[i] + offset, rhs + size_of_data * i, - arena); + derived->merge(places[i] + offset, rhs + size_of_data * i, arena); } } } @@ -455,13 +468,15 @@ class IAggregateFunctionHelper : public IAggregateFunction { << ", begin:" << begin << ", end:" << end << ", column.size():" << column.size(); std::vector deserialized_data(size_of_data()); auto* deserialized_place = (AggregateDataPtr)deserialized_data.data(); + const ColumnString& column_string = assert_cast(column); + const Derived* derived = assert_cast(this); for (size_t i = begin; i <= end; ++i) { - VectorBufferReader buffer_reader( - (assert_cast(column)).get_data_at(i)); - assert_cast(this)->create(deserialized_place); - DEFER({ assert_cast(this)->destroy(deserialized_place); }); - assert_cast(this)->deserialize_and_merge(place, deserialized_place, - buffer_reader, arena); + VectorBufferReader buffer_reader(column_string.get_data_at(i)); + derived->create(deserialized_place); + + DEFER({ derived->destroy(deserialized_place); }); + + derived->deserialize_and_merge(place, deserialized_place, buffer_reader, arena); } } @@ -475,8 +490,9 @@ class IAggregateFunctionHelper : public IAggregateFunction { void deserialize_and_merge(AggregateDataPtr __restrict place, AggregateDataPtr __restrict rhs, BufferReadable& buf, Arena* arena) const override { - assert_cast(this)->deserialize(rhs, buf, arena); - assert_cast(this)->merge(place, rhs, arena); + assert_cast(this)->deserialize(rhs, buf, + arena); + assert_cast(this)->merge(place, rhs, arena); } }; @@ -513,8 +529,9 @@ class IAggregateFunctionDataHelper : public IAggregateFunctionHelper { void deserialize_and_merge(AggregateDataPtr __restrict place, AggregateDataPtr __restrict rhs, BufferReadable& buf, Arena* arena) const override { - assert_cast(this)->deserialize(rhs, buf, arena); - assert_cast(this)->merge(place, rhs, arena); + assert_cast(this)->deserialize(rhs, buf, + arena); + assert_cast(this)->merge(place, rhs, arena); } }; diff --git a/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.h index d0f5bce81a02bee..d267499e059818d 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_approx_count_distinct.h @@ -31,6 +31,7 @@ #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" #include "vec/data_types/data_type_number.h" @@ -98,12 +99,14 @@ class AggregateFunctionApproxCountDistinct final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { if constexpr (IsFixLenColumnType::value) { - auto column = assert_cast(columns[0]); + auto column = + assert_cast(columns[0]); auto value = column->get_element(row_num); this->data(place).add( HashUtil::murmur_hash64A((char*)&value, sizeof(value), HashUtil::MURMUR_SEED)); } else { - auto value = assert_cast(columns[0])->get_data_at(row_num); + auto value = assert_cast(columns[0]) + ->get_data_at(row_num); uint64_t hash_value = HashUtil::murmur_hash64A(value.data, value.size, HashUtil::MURMUR_SEED); this->data(place).add(hash_value); diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg.h b/be/src/vec/aggregate_functions/aggregate_function_avg.h index 6827c6db373667c..8a18a88839b4db4 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_avg.h @@ -145,7 +145,8 @@ class AggregateFunctionAvg final #ifdef __clang__ #pragma clang fp reassociate(on) #endif - const auto& column = assert_cast(*columns[0]); + const auto& column = + assert_cast(*columns[0]); if constexpr (IsDecimalNumber) { this->data(place).sum += column.get_data()[row_num].value; } else { diff --git a/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.h b/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.h index af3b2c98cd8b4c6..b59a3dccf0cea82 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.h +++ b/be/src/vec/aggregate_functions/aggregate_function_avg_weighted.h @@ -108,8 +108,10 @@ class AggregateFunctionAvgWeight final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& column = assert_cast(*columns[0]); - const auto& weight = assert_cast(*columns[1]); + const auto& column = + assert_cast(*columns[0]); + const auto& weight = + assert_cast(*columns[1]); this->data(place).add(column.get_data()[row_num], weight.get_element(row_num)); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_bit.h b/be/src/vec/aggregate_functions/aggregate_function_bit.h index c0b2df85ba25d21..1ab01b03ceea38a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bit.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bit.h @@ -25,6 +25,7 @@ #include #include "vec/aggregate_functions/aggregate_function.h" +#include "vec/common/assert_cast.h" #include "vec/core/types.h" #include "vec/io/io_helper.h" @@ -114,7 +115,8 @@ class AggregateFunctionBitwise final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& column = assert_cast&>(*columns[0]); + const auto& column = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]); this->data(place).add(column.get_data()[row_num]); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h index dd7af71de06ae03..6c504b91bf4abd1 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap.h @@ -166,9 +166,12 @@ class AggregateFunctionBitmapSerializationHelper col.resize(num_rows); auto* data = col.get_data().data(); for (size_t i = 0; i != num_rows; ++i) { - assert_cast(this)->create(place); - DEFER({ assert_cast(this)->destroy(place); }); - assert_cast(this)->add(place, columns, i, arena); + assert_cast(this)->create(place); + DEFER({ + assert_cast(this)->destroy(place); + }); + assert_cast(this)->add(place, columns, + i, arena); data[i] = std::move(this->data(place).value); } } else { @@ -304,7 +307,8 @@ class AggregateFunctionBitmapOp final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& column = assert_cast(*columns[0]); + const auto& column = + assert_cast(*columns[0]); this->data(place).add(column.get_data()[row_num]); } @@ -367,12 +371,13 @@ class AggregateFunctionBitmapCount final if constexpr (arg_is_nullable) { auto& nullable_column = assert_cast(*columns[0]); if (!nullable_column.is_null_at(row_num)) { - const auto& column = - assert_cast(nullable_column.get_nested_column()); + const auto& column = assert_cast( + nullable_column.get_nested_column()); this->data(place).add(column.get_data()[row_num]); } } else { - const auto& column = assert_cast(*columns[0]); + const auto& column = + assert_cast(*columns[0]); this->data(place).add(column.get_data()[row_num]); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h index ce80b38d0913ba8..19352e022fa7a27 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_bitmap_agg.h @@ -27,6 +27,7 @@ #include "util/bitmap_value.h" #include "vec/aggregate_functions/aggregate_function.h" +#include "vec/common/assert_cast.h" #include "vec/data_types/data_type_bitmap.h" namespace doris { @@ -74,14 +75,16 @@ class AggregateFunctionBitmapAgg final Arena* arena) const override { DCHECK_LT(row_num, columns[0]->size()); if constexpr (arg_nullable) { - auto& nullable_col = assert_cast(*columns[0]); + auto& nullable_col = + assert_cast(*columns[0]); auto& nullable_map = nullable_col.get_null_map_data(); if (!nullable_map[row_num]) { - auto& col = assert_cast(nullable_col.get_nested_column()); + auto& col = assert_cast( + nullable_col.get_nested_column()); this->data(place).add(col.get_data()[row_num]); } } else { - auto& col = assert_cast(*columns[0]); + auto& col = assert_cast(*columns[0]); this->data(place).add(col.get_data()[row_num]); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_collect.h b/be/src/vec/aggregate_functions/aggregate_function_collect.h index 4da6e023eb39496..b99ecd959245e3f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_collect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_collect.h @@ -69,7 +69,8 @@ struct AggregateFunctionCollectSetData { size_t size() const { return data_set.size(); } void add(const IColumn& column, size_t row_num) { - data_set.insert(assert_cast(column).get_data()[row_num]); + data_set.insert(assert_cast(column) + .get_data()[row_num]); } void merge(const SelfType& rhs) { @@ -191,7 +192,8 @@ struct AggregateFunctionCollectListData { size_t size() const { return data.size(); } void add(const IColumn& column, size_t row_num) { - const auto& vec = assert_cast(column).get_data(); + const auto& vec = + assert_cast(column).get_data(); data.push_back(vec[row_num]); } @@ -256,8 +258,10 @@ struct AggregateFunctionCollectListData { } max_size = rhs.max_size; - data->insert_range_from(*rhs.data, 0, - std::min(assert_cast(max_size - size()), rhs.size())); + data->insert_range_from( + *rhs.data, 0, + std::min(assert_cast(max_size - size()), + rhs.size())); } else { data->insert_range_from(*rhs.data, 0, rhs.size()); } @@ -326,8 +330,10 @@ struct AggregateFunctionArrayAggData { } void add(const IColumn& column, size_t row_num) { - const auto& col = assert_cast(column); - const auto& vec = assert_cast(col.get_nested_column()).get_data(); + const auto& col = assert_cast(column); + const auto& vec = + assert_cast(col.get_nested_column()) + .get_data(); null_map->push_back(col.get_null_map_data()[row_num]); nested_column->get_data().push_back(vec[row_num]); DCHECK(null_map->size() == nested_column->size()); @@ -426,8 +432,9 @@ struct AggregateFunctionArrayAggData { } void add(const IColumn& column, size_t row_num) { - const auto& col = assert_cast(column); - const auto& vec = assert_cast(col.get_nested_column()); + const auto& col = assert_cast(column); + const auto& vec = assert_cast( + col.get_nested_column()); null_map->push_back(col.get_null_map_data()[row_num]); nested_column->insert_from(vec, row_num); DCHECK(null_map->size() == nested_column->size()); @@ -561,7 +568,9 @@ class AggregateFunctionCollect if constexpr (HasLimit::value) { if (data.max_size == -1) { data.max_size = - (UInt64)assert_cast(columns[1])->get_element(row_num); + (UInt64)assert_cast( + columns[1]) + ->get_element(row_num); } if (data.size() >= data.max_size) { return; @@ -711,15 +720,20 @@ class AggregateFunctionCollect for (size_t i = 0; i < num_rows; ++i) { col_null->get_null_map_data().push_back(col_src.get_null_map_data()[i]); if constexpr (std::is_same_v) { - auto& vec = assert_cast(col_null->get_nested_column()); + auto& vec = assert_cast( + col_null->get_nested_column()); const auto& vec_src = - assert_cast(col_src.get_nested_column()); + assert_cast( + col_src.get_nested_column()); vec.insert_from(vec_src, i); } else { using ColVecType = ColumnVectorOrDecimal; - auto& vec = assert_cast(col_null->get_nested_column()).get_data(); - auto& vec_src = - assert_cast(col_src.get_nested_column()).get_data(); + auto& vec = assert_cast( + col_null->get_nested_column()) + .get_data(); + auto& vec_src = assert_cast( + col_src.get_nested_column()) + .get_data(); vec.push_back(vec_src[i]); } to_arr.get_offsets().push_back(to_nested_col.size()); diff --git a/be/src/vec/aggregate_functions/aggregate_function_count.h b/be/src/vec/aggregate_functions/aggregate_function_count.h index 7449c949cb90471..62aa869771c0a53 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_count.h +++ b/be/src/vec/aggregate_functions/aggregate_function_count.h @@ -196,7 +196,9 @@ class AggregateFunctionCountNotNullUnary final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - data(place).count += !assert_cast(*columns[0]).is_null_at(row_num); + data(place).count += + !assert_cast(*columns[0]) + .is_null_at(row_num); } void reset(AggregateDataPtr place) const override { data(place).count = 0; } diff --git a/be/src/vec/aggregate_functions/aggregate_function_covar.h b/be/src/vec/aggregate_functions/aggregate_function_covar.h index 9dc2d2d5b381c66..609b5f6d89c0cb3 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_covar.h +++ b/be/src/vec/aggregate_functions/aggregate_function_covar.h @@ -107,9 +107,11 @@ struct BaseData { } void add(const IColumn* column_x, const IColumn* column_y, size_t row_num) { - const auto& sources_x = assert_cast&>(*column_x); + const auto& sources_x = + assert_cast&, TypeCheckOnRelease::DISABLE>(*column_x); double source_data_x = sources_x.get_data()[row_num]; - const auto& sources_y = assert_cast&>(*column_y); + const auto& sources_y = + assert_cast&, TypeCheckOnRelease::DISABLE>(*column_y); double source_data_y = sources_y.get_data()[row_num]; sum_x += source_data_x; @@ -186,7 +188,8 @@ struct BaseDatadecimal { } DecimalV2Value get_source_data(const IColumn* column, size_t row_num) { - const auto& sources = assert_cast&>(*column); + const auto& sources = + assert_cast&, TypeCheckOnRelease::DISABLE>(*column); Field field = sources[row_num]; auto decimal_field = field.template get>(); int128_t value; diff --git a/be/src/vec/aggregate_functions/aggregate_function_distinct.h b/be/src/vec/aggregate_functions/aggregate_function_distinct.h index 4f42e8509f2acc3..6193b28a131e9f2 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_distinct.h +++ b/be/src/vec/aggregate_functions/aggregate_function_distinct.h @@ -65,7 +65,9 @@ struct AggregateFunctionDistinctSingleNumericData { Container data; void add(const IColumn** columns, size_t /* columns_num */, size_t row_num, Arena*) { - const auto& vec = assert_cast&>(*columns[0]).get_data(); + const auto& vec = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]) + .get_data(); if constexpr (stable) { data.emplace(vec[row_num], data.size()); } else { diff --git a/be/src/vec/aggregate_functions/aggregate_function_foreach.h b/be/src/vec/aggregate_functions/aggregate_function_foreach.h index d5c1f7d09e47b5e..4261ef24343b95f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_foreach.h +++ b/be/src/vec/aggregate_functions/aggregate_function_foreach.h @@ -228,10 +228,12 @@ class AggregateFunctionForEach : public IAggregateFunctionDataHelper nested(num_arguments); for (size_t i = 0; i < num_arguments; ++i) { - nested[i] = &assert_cast(*columns[i]).get_data(); + nested[i] = &assert_cast(*columns[i]) + .get_data(); } - const auto& first_array_column = assert_cast(*columns[0]); + const auto& first_array_column = + assert_cast(*columns[0]); const auto& offsets = first_array_column.get_offsets(); size_t begin = offsets[row_num - 1]; @@ -239,7 +241,8 @@ class AggregateFunctionForEach : public IAggregateFunctionDataHelper(*columns[i]); + const auto& ith_column = + assert_cast(*columns[i]); const auto& ith_offsets = ith_column.get_offsets(); if (ith_offsets[row_num] != end || diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h index 5d627782f25e4d7..d8cf91865f1ed2c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h +++ b/be/src/vec/aggregate_functions/aggregate_function_group_array_intersect.h @@ -102,7 +102,8 @@ struct AggregateFunctionGroupArrayIntersectData { if (is_column_data_nullable) { auto* const_col_data = const_cast(&column_data); col_null = static_cast(const_col_data); - nested_column_data = &assert_cast(col_null->get_nested_column()); + nested_column_data = &assert_cast( + col_null->get_nested_column()); } else { nested_column_data = &static_cast(column_data); } @@ -172,10 +173,12 @@ class AggregateFunctionGroupArrayIntersect const bool col_is_nullable = (*columns[0]).is_nullable(); const ColumnArray& column = - col_is_nullable ? assert_cast( - assert_cast(*columns[0]) - .get_nested_column()) - : assert_cast(*columns[0]); + col_is_nullable + ? assert_cast( + assert_cast( + *columns[0]) + .get_nested_column()) + : assert_cast(*columns[0]); const auto& offsets = column.get_offsets(); const auto offset = offsets[row_num - 1]; @@ -364,10 +367,12 @@ class AggregateFunctionGroupArrayIntersectGeneric const bool col_is_nullable = (*columns[0]).is_nullable(); const ColumnArray& column = - col_is_nullable ? assert_cast( - assert_cast(*columns[0]) - .get_nested_column()) - : assert_cast(*columns[0]); + col_is_nullable + ? assert_cast( + assert_cast( + *columns[0]) + .get_nested_column()) + : assert_cast(*columns[0]); const auto nested_column_data = column.get_data_ptr(); const auto& offsets = column.get_offsets(); diff --git a/be/src/vec/aggregate_functions/aggregate_function_group_concat.h b/be/src/vec/aggregate_functions/aggregate_function_group_concat.h index 87ed907377ea364..a62ffb8da619f99 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_group_concat.h +++ b/be/src/vec/aggregate_functions/aggregate_function_group_concat.h @@ -25,6 +25,7 @@ #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_simple_factory.h" #include "vec/columns/column_string.h" +#include "vec/common/assert_cast.h" #include "vec/common/string_ref.h" #include "vec/core/types.h" #include "vec/data_types/data_type_string.h" @@ -98,7 +99,8 @@ struct AggregateFunctionGroupConcatImplStr { static const std::string separator; static void add(AggregateFunctionGroupConcatData& __restrict place, const IColumn** columns, size_t row_num) { - place.add(assert_cast(*columns[0]).get_data_at(row_num), + place.add(assert_cast(*columns[0]) + .get_data_at(row_num), StringRef(separator.data(), separator.length())); } }; @@ -106,8 +108,10 @@ struct AggregateFunctionGroupConcatImplStr { struct AggregateFunctionGroupConcatImplStrStr { static void add(AggregateFunctionGroupConcatData& __restrict place, const IColumn** columns, size_t row_num) { - place.add(assert_cast(*columns[0]).get_data_at(row_num), - assert_cast(*columns[1]).get_data_at(row_num)); + place.add(assert_cast(*columns[0]) + .get_data_at(row_num), + assert_cast(*columns[1]) + .get_data_at(row_num)); } }; diff --git a/be/src/vec/aggregate_functions/aggregate_function_histogram.h b/be/src/vec/aggregate_functions/aggregate_function_histogram.h index 8fcd133b055bd30..25fc6957321586e 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_histogram.h +++ b/be/src/vec/aggregate_functions/aggregate_function_histogram.h @@ -208,9 +208,12 @@ class AggregateFunctionHistogram final if constexpr (std::is_same_v) { this->data(place).add( - assert_cast(*columns[0]).get_data_at(row_num)); + assert_cast(*columns[0]) + .get_data_at(row_num)); } else { - this->data(place).add(assert_cast(*columns[0]).get_data()[row_num]); + this->data(place).add( + assert_cast(*columns[0]) + .get_data()[row_num]); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h index f976e959f8558da..1cf6dc7f2a29a9a 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h +++ b/be/src/vec/aggregate_functions/aggregate_function_hll_union_agg.h @@ -76,7 +76,7 @@ struct AggregateFunctionHLLData { void reset() { dst_hll.clear(); } void add(const IColumn* column, size_t row_num) { - const auto& sources = assert_cast(*column); + const auto& sources = assert_cast(*column); dst_hll.merge(sources.get_element(row_num)); } }; diff --git a/be/src/vec/aggregate_functions/aggregate_function_map.h b/be/src/vec/aggregate_functions/aggregate_function_map.h index 0f1a298aed10f3a..ca962bd32076c94 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_map.h +++ b/be/src/vec/aggregate_functions/aggregate_function_map.h @@ -146,7 +146,9 @@ struct AggregateFunctionMapAggData { const size_t size = _key_column->size(); write_binary(size, buf); for (size_t i = 0; i < size; i++) { - write_binary(assert_cast(*_key_column).get_data_at(i), buf); + write_binary(assert_cast(*_key_column) + .get_data_at(i), + buf); } for (size_t i = 0; i < size; i++) { write_binary(_value_column->get_data_at(i), buf); @@ -163,7 +165,8 @@ struct AggregateFunctionMapAggData { continue; } key.data = _arena.insert(key.data, key.size); - assert_cast(*_key_column).insert_data(key.data, key.size); + assert_cast(*_key_column) + .insert_data(key.data, key.size); } StringRef val; for (size_t i = 0; i < size; i++) { @@ -205,22 +208,25 @@ class AggregateFunctionMapAgg final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena* arena) const override { if (columns[0]->is_nullable()) { - auto& nullable_col = assert_cast(*columns[0]); + auto& nullable_col = + assert_cast(*columns[0]); auto& nullable_map = nullable_col.get_null_map_data(); if (nullable_map[row_num]) { return; } Field value; columns[1]->get(row_num, value); - this->data(place).add( - assert_cast(nullable_col.get_nested_column()) - .get_data_at(row_num), - value); + this->data(place).add(assert_cast( + nullable_col.get_nested_column()) + .get_data_at(row_num), + value); } else { Field value; columns[1]->get(row_num, value); this->data(place).add( - assert_cast(*columns[0]).get_data_at(row_num), value); + assert_cast(*columns[0]) + .get_data_at(row_num), + value); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max.h b/be/src/vec/aggregate_functions/aggregate_function_min_max.h index 7fe6e2923e17039..1281e7ca4c48419 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max.h +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max.h @@ -73,9 +73,13 @@ struct SingleValueDataFixed { void change_if(const IColumn& column, size_t row_num, bool less) { has_value = true; - value = less ? std::min(assert_cast&>(column).get_data()[row_num], + value = less ? std::min(assert_cast&, TypeCheckOnRelease::DISABLE>( + column) + .get_data()[row_num], value) - : std::max(assert_cast&>(column).get_data()[row_num], + : std::max(assert_cast&, TypeCheckOnRelease::DISABLE>( + column) + .get_data()[row_num], value); } @@ -109,7 +113,8 @@ struct SingleValueDataFixed { void change(const IColumn& column, size_t row_num, Arena*) { has_value = true; - value = assert_cast&>(column).get_data()[row_num]; + value = assert_cast&, TypeCheckOnRelease::DISABLE>(column) + .get_data()[row_num]; } /// Assuming to.has() @@ -119,7 +124,8 @@ struct SingleValueDataFixed { } bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) { - if (!has() || assert_cast&>(column).get_data()[row_num] < value) { + if (!has() || assert_cast&, TypeCheckOnRelease::DISABLE>(column) + .get_data()[row_num] < value) { change(column, row_num, arena); return true; } else { @@ -137,7 +143,8 @@ struct SingleValueDataFixed { } bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) { - if (!has() || assert_cast&>(column).get_data()[row_num] > value) { + if (!has() || assert_cast&, TypeCheckOnRelease::DISABLE>(column) + .get_data()[row_num] > value) { change(column, row_num, arena); return true; } else { @@ -188,9 +195,13 @@ struct SingleValueDataDecimal { void change_if(const IColumn& column, size_t row_num, bool less) { has_value = true; - value = less ? std::min(assert_cast&>(column).get_data()[row_num], + value = less ? std::min(assert_cast&, TypeCheckOnRelease::DISABLE>( + column) + .get_data()[row_num], value) - : std::max(assert_cast&>(column).get_data()[row_num], + : std::max(assert_cast&, TypeCheckOnRelease::DISABLE>( + column) + .get_data()[row_num], value); } @@ -224,7 +235,8 @@ struct SingleValueDataDecimal { void change(const IColumn& column, size_t row_num, Arena*) { has_value = true; - value = assert_cast&>(column).get_data()[row_num]; + value = assert_cast&, TypeCheckOnRelease::DISABLE>(column) + .get_data()[row_num]; } /// Assuming to.has() @@ -234,7 +246,8 @@ struct SingleValueDataDecimal { } bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) { - if (!has() || assert_cast&>(column).get_data()[row_num] < value) { + if (!has() || assert_cast&, TypeCheckOnRelease::DISABLE>(column) + .get_data()[row_num] < value) { change(column, row_num, arena); return true; } else { @@ -252,7 +265,8 @@ struct SingleValueDataDecimal { } bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) { - if (!has() || assert_cast&>(column).get_data()[row_num] > value) { + if (!has() || assert_cast&, TypeCheckOnRelease::DISABLE>(column) + .get_data()[row_num] > value) { change(column, row_num, arena); return true; } else { @@ -389,14 +403,18 @@ struct SingleValueDataString { } void change(const IColumn& column, size_t row_num, Arena* arena) { - change_impl(assert_cast(column).get_data_at(row_num), arena); + change_impl( + assert_cast(column).get_data_at( + row_num), + arena); } void change(const Self& to, Arena* arena) { change_impl(to.get_string_ref(), arena); } bool change_if_less(const IColumn& column, size_t row_num, Arena* arena) { if (!has() || - assert_cast(column).get_data_at(row_num) < get_string_ref()) { + assert_cast(column).get_data_at( + row_num) < get_string_ref()) { change(column, row_num, arena); return true; } else { @@ -406,7 +424,8 @@ struct SingleValueDataString { bool change_if_greater(const IColumn& column, size_t row_num, Arena* arena) { if (!has() || - assert_cast(column).get_data_at(row_num) > get_string_ref()) { + assert_cast(column).get_data_at( + row_num) > get_string_ref()) { change(column, row_num, arena); return true; } else { diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h b/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h index 634dc171f5960c6..e46931151202c21 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h @@ -73,7 +73,8 @@ struct BitmapValueData { void change(const IColumn& column, size_t row_num, Arena*) { has_value = true; - value = assert_cast(column).get_data()[row_num]; + value = assert_cast(column) + .get_data()[row_num]; } void change(const Self& to, Arena*) { diff --git a/be/src/vec/aggregate_functions/aggregate_function_null.h b/be/src/vec/aggregate_functions/aggregate_function_null.h index d99d1aae3c39a32..382fb8f7a5310ee 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_null.h +++ b/be/src/vec/aggregate_functions/aggregate_function_null.h @@ -200,7 +200,8 @@ class AggregateFunctionNullUnaryInline final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena* arena) const override { - const ColumnNullable* column = assert_cast(columns[0]); + const ColumnNullable* column = + assert_cast(columns[0]); if (!column->is_null_at(row_num)) { this->set_flag(place); const IColumn* nested_column = &column->get_nested_column(); @@ -308,7 +309,9 @@ class AggregateFunctionNullVariadicInline final for (size_t i = 0; i < number_of_arguments; ++i) { if (is_nullable[i]) { - const auto& nullable_col = assert_cast(*columns[i]); + const auto& nullable_col = + assert_cast( + *columns[i]); if (nullable_col.is_null_at(row_num)) { /// If at least one column has a null value in the current row, /// we don't process this row. diff --git a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h index d7de66681ed110b..deb53241abb92f8 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h +++ b/be/src/vec/aggregate_functions/aggregate_function_orthogonal_bitmap.h @@ -53,8 +53,10 @@ struct AggOrthBitmapBaseData { using ColVecData = std::conditional_t, ColumnVector, ColumnString>; void add(const IColumn** columns, size_t row_num) { - const auto& bitmap_col = assert_cast(*columns[0]); - const auto& data_col = assert_cast(*columns[1]); + const auto& bitmap_col = + assert_cast(*columns[0]); + const auto& data_col = + assert_cast(*columns[1]); const auto& bitmap_value = bitmap_col.get_element(row_num); if constexpr (IsNumber) { @@ -71,7 +73,8 @@ struct AggOrthBitmapBaseData { if (first_init) { DCHECK(argument_size > 1); for (int idx = 2; idx < argument_size; ++idx) { - const auto& col = assert_cast(*columns[idx]); + const auto& col = + assert_cast(*columns[idx]); if constexpr (IsNumber) { bitmap.add_key(col.get_element(row_num)); } @@ -202,8 +205,10 @@ struct AggOrthBitmapExprCalBaseData { using ColVecData = std::conditional_t, ColumnVector, ColumnString>; void add(const IColumn** columns, size_t row_num) { - const auto& bitmap_col = assert_cast(*columns[0]); - const auto& data_col = assert_cast(*columns[1]); + const auto& bitmap_col = + assert_cast(*columns[0]); + const auto& data_col = + assert_cast(*columns[1]); const auto& bitmap_value = bitmap_col.get_element(row_num); std::string update_key = data_col.get_data_at(row_num).to_string(); bitmap_expr_cal.update(update_key, bitmap_value); @@ -212,7 +217,8 @@ struct AggOrthBitmapExprCalBaseData { void init_add_key(const IColumn** columns, size_t row_num, int argument_size) { if (first_init) { DCHECK(argument_size > 1); - const auto& col = assert_cast(*columns[2]); + const auto& col = + assert_cast(*columns[2]); std::string expr = col.get_data_at(row_num).to_string(); bitmap_expr_cal.bitmap_calculation_init(expr); first_init = false; @@ -306,7 +312,8 @@ struct OrthBitmapUnionCountData { void init_add_key(const IColumn** columns, size_t row_num, int argument_size) {} void add(const IColumn** columns, size_t row_num) { - const auto& column = assert_cast(*columns[0]); + const auto& column = + assert_cast(*columns[0]); value |= column.get_data()[row_num]; } void merge(const OrthBitmapUnionCountData& rhs) { result += rhs.result; } diff --git a/be/src/vec/aggregate_functions/aggregate_function_percentile.h b/be/src/vec/aggregate_functions/aggregate_function_percentile.h index 3f83744f13e468c..fe3e18861877a6b 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_percentile.h +++ b/be/src/vec/aggregate_functions/aggregate_function_percentile.h @@ -222,12 +222,15 @@ class AggregateFunctionPercentileApproxTwoParams_OLDER : public AggregateFunctio for (int i = 0; i < 2; ++i) { const auto* nullable_column = check_and_get_column(columns[i]); if (nullable_column == nullptr) { //Not Nullable column - const auto& column = assert_cast(*columns[i]); + const auto& column = + assert_cast( + *columns[i]); column_data[i] = column.get_element(row_num); } else if (!nullable_column->is_null_at( row_num)) { // Nullable column && Not null data const auto& column = - assert_cast(nullable_column->get_nested_column()); + assert_cast( + nullable_column->get_nested_column()); column_data[i] = column.get_element(row_num); } else { // Nullable column && null data if (i == 0) { @@ -240,8 +243,10 @@ class AggregateFunctionPercentileApproxTwoParams_OLDER : public AggregateFunctio this->data(place).add(column_data[0], column_data[1]); } else { - const auto& sources = assert_cast(*columns[0]); - const auto& quantile = assert_cast(*columns[1]); + const auto& sources = + assert_cast(*columns[0]); + const auto& quantile = + assert_cast(*columns[1]); this->data(place).init(); this->data(place).add(sources.get_element(row_num), quantile.get_element(row_num)); @@ -255,8 +260,10 @@ class AggregateFunctionPercentileApproxTwoParams : public AggregateFunctionPerce : AggregateFunctionPercentileApprox(argument_types_) {} void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& sources = assert_cast(*columns[0]); - const auto& quantile = assert_cast(*columns[1]); + const auto& sources = + assert_cast(*columns[0]); + const auto& quantile = + assert_cast(*columns[1]); this->data(place).init(); this->data(place).add(sources.get_element(row_num), quantile.get_element(row_num)); } @@ -276,12 +283,15 @@ class AggregateFunctionPercentileApproxThreeParams_OLDER for (int i = 0; i < 3; ++i) { const auto* nullable_column = check_and_get_column(columns[i]); if (nullable_column == nullptr) { //Not Nullable column - const auto& column = assert_cast(*columns[i]); + const auto& column = + assert_cast( + *columns[i]); column_data[i] = column.get_element(row_num); } else if (!nullable_column->is_null_at( row_num)) { // Nullable column && Not null data const auto& column = - assert_cast(nullable_column->get_nested_column()); + assert_cast( + nullable_column->get_nested_column()); column_data[i] = column.get_element(row_num); } else { // Nullable column && null data if (i == 0) { @@ -294,9 +304,12 @@ class AggregateFunctionPercentileApproxThreeParams_OLDER this->data(place).add(column_data[0], column_data[1]); } else { - const auto& sources = assert_cast(*columns[0]); - const auto& quantile = assert_cast(*columns[1]); - const auto& compression = assert_cast(*columns[2]); + const auto& sources = + assert_cast(*columns[0]); + const auto& quantile = + assert_cast(*columns[1]); + const auto& compression = + assert_cast(*columns[2]); this->data(place).init(compression.get_element(row_num)); this->data(place).add(sources.get_element(row_num), quantile.get_element(row_num)); @@ -310,9 +323,12 @@ class AggregateFunctionPercentileApproxThreeParams : public AggregateFunctionPer : AggregateFunctionPercentileApprox(argument_types_) {} void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& sources = assert_cast(*columns[0]); - const auto& quantile = assert_cast(*columns[1]); - const auto& compression = assert_cast(*columns[2]); + const auto& sources = + assert_cast(*columns[0]); + const auto& quantile = + assert_cast(*columns[1]); + const auto& compression = + assert_cast(*columns[2]); this->data(place).init(compression.get_element(row_num)); this->data(place).add(sources.get_element(row_num), quantile.get_element(row_num)); @@ -334,12 +350,15 @@ class AggregateFunctionPercentileApproxWeightedThreeParams_OLDER for (int i = 0; i < 3; ++i) { const auto* nullable_column = check_and_get_column(columns[i]); if (nullable_column == nullptr) { //Not Nullable column - const auto& column = assert_cast&>(*columns[i]); + const auto& column = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[i]); column_data[i] = column.get_element(row_num); } else if (!nullable_column->is_null_at( row_num)) { // Nullable column && Not null data - const auto& column = assert_cast&>( - nullable_column->get_nested_column()); + const auto& column = + assert_cast&, TypeCheckOnRelease::DISABLE>( + nullable_column->get_nested_column()); column_data[i] = column.get_element(row_num); } else { // Nullable column && null data if (i == 0) { @@ -351,9 +370,15 @@ class AggregateFunctionPercentileApproxWeightedThreeParams_OLDER this->data(place).add_with_weight(column_data[0], column_data[1], column_data[2]); } else { - const auto& sources = assert_cast&>(*columns[0]); - const auto& weight = assert_cast&>(*columns[1]); - const auto& quantile = assert_cast&>(*columns[2]); + const auto& sources = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[0]); + const auto& weight = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[1]); + const auto& quantile = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[2]); this->data(place).init(); this->data(place).add_with_weight(sources.get_element(row_num), @@ -371,9 +396,12 @@ class AggregateFunctionPercentileApproxWeightedThreeParams void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& sources = assert_cast&>(*columns[0]); - const auto& weight = assert_cast&>(*columns[1]); - const auto& quantile = assert_cast&>(*columns[2]); + const auto& sources = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]); + const auto& weight = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[1]); + const auto& quantile = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[2]); this->data(place).init(); this->data(place).add_with_weight(sources.get_element(row_num), weight.get_element(row_num), @@ -395,12 +423,15 @@ class AggregateFunctionPercentileApproxWeightedFourParams_OLDER for (int i = 0; i < 4; ++i) { const auto* nullable_column = check_and_get_column(columns[i]); if (nullable_column == nullptr) { //Not Nullable column - const auto& column = assert_cast&>(*columns[i]); + const auto& column = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[i]); column_data[i] = column.get_element(row_num); } else if (!nullable_column->is_null_at( row_num)) { // Nullable column && Not null data - const auto& column = assert_cast&>( - nullable_column->get_nested_column()); + const auto& column = + assert_cast&, TypeCheckOnRelease::DISABLE>( + nullable_column->get_nested_column()); column_data[i] = column.get_element(row_num); } else { // Nullable column && null data if (i == 0) { @@ -413,10 +444,18 @@ class AggregateFunctionPercentileApproxWeightedFourParams_OLDER this->data(place).add_with_weight(column_data[0], column_data[1], column_data[2]); } else { - const auto& sources = assert_cast&>(*columns[0]); - const auto& weight = assert_cast&>(*columns[1]); - const auto& quantile = assert_cast&>(*columns[2]); - const auto& compression = assert_cast&>(*columns[3]); + const auto& sources = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[0]); + const auto& weight = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[1]); + const auto& quantile = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[2]); + const auto& compression = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[3]); this->data(place).init(compression.get_element(row_num)); this->data(place).add_with_weight(sources.get_element(row_num), @@ -433,10 +472,14 @@ class AggregateFunctionPercentileApproxWeightedFourParams : AggregateFunctionPercentileApprox(argument_types_) {} void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& sources = assert_cast&>(*columns[0]); - const auto& weight = assert_cast&>(*columns[1]); - const auto& quantile = assert_cast&>(*columns[2]); - const auto& compression = assert_cast&>(*columns[3]); + const auto& sources = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]); + const auto& weight = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[1]); + const auto& quantile = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[2]); + const auto& compression = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[3]); this->data(place).init(compression.get_element(row_num)); this->data(place).add_with_weight(sources.get_element(row_num), weight.get_element(row_num), @@ -542,8 +585,10 @@ class AggregateFunctionPercentile final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& sources = assert_cast(*columns[0]); - const auto& quantile = assert_cast(*columns[1]); + const auto& sources = + assert_cast(*columns[0]); + const auto& quantile = + assert_cast(*columns[1]); AggregateFunctionPercentile::data(place).add(sources.get_data()[row_num], quantile.get_data(), 1); } @@ -590,12 +635,16 @@ class AggregateFunctionPercentileArray final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& sources = assert_cast(*columns[0]); - const auto& quantile_array = assert_cast(*columns[1]); + const auto& sources = + assert_cast(*columns[0]); + const auto& quantile_array = + assert_cast(*columns[1]); const auto& offset_column_data = quantile_array.get_offsets(); - const auto& nested_column = - assert_cast(quantile_array.get_data()).get_nested_column(); - const auto& nested_column_data = assert_cast(nested_column); + const auto& nested_column = assert_cast( + quantile_array.get_data()) + .get_nested_column(); + const auto& nested_column_data = + assert_cast(nested_column); AggregateFunctionPercentileArray::data(place).add( sources.get_int(row_num), nested_column_data.get_data(), diff --git a/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.h b/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.h index e73b71e9c9b4c5e..8698355897d2cb3 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.h +++ b/be/src/vec/aggregate_functions/aggregate_function_percentile_approx.h @@ -156,8 +156,10 @@ class AggregateFunctionPercentileOld final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& sources = assert_cast&>(*columns[0]); - const auto& quantile = assert_cast(*columns[1]); + const auto& sources = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]); + const auto& quantile = + assert_cast(*columns[1]); AggregateFunctionPercentileOld::data(place).add(sources.get_int(row_num), quantile.get_data(), 1); } @@ -203,12 +205,16 @@ class AggregateFunctionPercentileArrayOld final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& sources = assert_cast&>(*columns[0]); - const auto& quantile_array = assert_cast(*columns[1]); + const auto& sources = + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]); + const auto& quantile_array = + assert_cast(*columns[1]); const auto& offset_column_data = quantile_array.get_offsets(); - const auto& nested_column = - assert_cast(quantile_array.get_data()).get_nested_column(); - const auto& nested_column_data = assert_cast(nested_column); + const auto& nested_column = assert_cast( + quantile_array.get_data()) + .get_nested_column(); + const auto& nested_column_data = + assert_cast(nested_column); AggregateFunctionPercentileArrayOld::data(place).add( sources.get_int(row_num), nested_column_data.get_data(), diff --git a/be/src/vec/aggregate_functions/aggregate_function_product.h b/be/src/vec/aggregate_functions/aggregate_function_product.h index 22a217263b22744..1ec9a2711cef72f 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_product.h +++ b/be/src/vec/aggregate_functions/aggregate_function_product.h @@ -133,7 +133,8 @@ class AggregateFunctionProduct final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& column = assert_cast(*columns[0]); + const auto& column = + assert_cast(*columns[0]); this->data(place).add(TResult(column.get_data()[row_num]), multiplier); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_quantile_state.h b/be/src/vec/aggregate_functions/aggregate_function_quantile_state.h index 14250087d2bd748..5954120553e17e9 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_quantile_state.h +++ b/be/src/vec/aggregate_functions/aggregate_function_quantile_state.h @@ -114,14 +114,16 @@ class AggregateFunctionQuantileStateOp final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { if constexpr (arg_is_nullable) { - auto& nullable_column = assert_cast(*columns[0]); + auto& nullable_column = + assert_cast(*columns[0]); if (!nullable_column.is_null_at(row_num)) { - const auto& column = - assert_cast(nullable_column.get_nested_column()); + const auto& column = assert_cast( + nullable_column.get_nested_column()); this->data(place).add(column.get_data()[row_num]); } } else { - const auto& column = assert_cast(*columns[0]); + const auto& column = + assert_cast(*columns[0]); this->data(place).add(column.get_data()[row_num]); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h b/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h index f807f67a14b9a3f..b9d2545e0c00dd2 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h +++ b/be/src/vec/aggregate_functions/aggregate_function_reader_first_last.h @@ -43,17 +43,19 @@ struct Value { return true; } if constexpr (arg_is_nullable) { - return assert_cast(_ptr)->is_null_at(_offset); + return assert_cast(_ptr) + ->is_null_at(_offset); } return false; } void insert_into(IColumn& to) const { if constexpr (arg_is_nullable) { - auto* col = assert_cast(_ptr); - assert_cast(to).insert_from(col->get_nested_column(), _offset); + auto* col = assert_cast(_ptr); + assert_cast(to).insert_from( + col->get_nested_column(), _offset); } else { - assert_cast(to).insert_from(*_ptr, _offset); + assert_cast(to).insert_from(*_ptr, _offset); } } @@ -75,7 +77,9 @@ struct Value { template struct CopiedValue : public Value { public: - void insert_into(IColumn& to) const { assert_cast(to).insert(_copied_value); } + void insert_into(IColumn& to) const { + assert_cast(to).insert(_copied_value); + } bool is_null() const { return this->_ptr == nullptr; } @@ -85,12 +89,13 @@ struct CopiedValue : public Value { // because the address have meaningless, only need it to check is nullptr this->_ptr = (IColumn*)0x00000001; if constexpr (arg_is_nullable) { - auto* col = assert_cast(column); + auto* col = assert_cast(column); if (col->is_null_at(row)) { this->reset(); return; } else { - auto& nested_col = assert_cast(col->get_nested_column()); + auto& nested_col = assert_cast( + col->get_nested_column()); nested_col.get(row, _copied_value); } } else { @@ -162,7 +167,8 @@ struct ReaderFunctionFirstNonNullData : Data { return; } if constexpr (Data::nullable) { - const auto* nullable_column = assert_cast(columns[0]); + const auto* nullable_column = + assert_cast(columns[0]); if (nullable_column->is_null_at(row)) { return; } @@ -182,7 +188,8 @@ template struct ReaderFunctionLastNonNullData : Data { void add(int64_t row, const IColumn** columns) { if constexpr (Data::nullable) { - const auto* nullable_column = assert_cast(columns[0]); + const auto* nullable_column = + assert_cast(columns[0]); if (nullable_column->is_null_at(row)) { return; } diff --git a/be/src/vec/aggregate_functions/aggregate_function_retention.h b/be/src/vec/aggregate_functions/aggregate_function_retention.h index f38f1cf45a00d1b..f6bf03282e57113 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_retention.h +++ b/be/src/vec/aggregate_functions/aggregate_function_retention.h @@ -127,7 +127,9 @@ class AggregateFunctionRetention void add(AggregateDataPtr __restrict place, const IColumn** columns, const ssize_t row_num, Arena*) const override { for (int i = 0; i < get_argument_types().size(); i++) { - auto event = assert_cast*>(columns[i])->get_data()[row_num]; + auto event = + assert_cast*, TypeCheckOnRelease::DISABLE>(columns[i]) + ->get_data()[row_num]; if (event) { this->data(place).set(i); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_sequence_match.h b/be/src/vec/aggregate_functions/aggregate_function_sequence_match.h index 101c2c16fd00c8f..586003043311c48 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_sequence_match.h +++ b/be/src/vec/aggregate_functions/aggregate_function_sequence_match.h @@ -201,7 +201,7 @@ struct AggregateFunctionSequenceMatchData final { using PatternActions = PODArrayWithStackMemory; - Derived& derived() { return assert_cast(*this); } + Derived& derived() { return assert_cast(*this); } void parse_pattern() { actions.clear(); @@ -602,16 +602,22 @@ class AggregateFunctionSequenceBase void add(AggregateDataPtr __restrict place, const IColumn** columns, const ssize_t row_num, Arena*) const override { std::string pattern = - assert_cast(columns[0])->get_data_at(0).to_string(); + assert_cast(columns[0]) + ->get_data_at(0) + .to_string(); this->data(place).init(pattern, arg_count); const auto& timestamp = - assert_cast&>(*columns[1]).get_data()[row_num]; + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[1]) + .get_data()[row_num]; typename AggregateFunctionSequenceMatchData::Events events; for (auto i = 2; i < arg_count; i++) { - const auto event = assert_cast(columns[i])->get_data()[row_num]; + const auto event = + assert_cast(columns[i]) + ->get_data()[row_num]; events.set(i - 2, event); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_stddev.h b/be/src/vec/aggregate_functions/aggregate_function_stddev.h index 496212bc35c81b8..2822334b6d70813 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_stddev.h +++ b/be/src/vec/aggregate_functions/aggregate_function_stddev.h @@ -105,7 +105,8 @@ struct BaseData { } void add(const IColumn* column, size_t row_num) { - const auto& sources = assert_cast&>(*column); + const auto& sources = + assert_cast&, TypeCheckOnRelease::DISABLE>(*column); double source_data = sources.get_data()[row_num]; double delta = source_data - mean; diff --git a/be/src/vec/aggregate_functions/aggregate_function_sum.h b/be/src/vec/aggregate_functions/aggregate_function_sum.h index 376b6ece4aafea0..cc05435a95030bf 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_sum.h +++ b/be/src/vec/aggregate_functions/aggregate_function_sum.h @@ -100,7 +100,8 @@ class AggregateFunctionSum final void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { - const auto& column = assert_cast(*columns[0]); + const auto& column = + assert_cast(*columns[0]); this->data(place).add(TResult(column.get_data()[row_num])); } diff --git a/be/src/vec/aggregate_functions/aggregate_function_topn.h b/be/src/vec/aggregate_functions/aggregate_function_topn.h index 6c7502c99a38fa5..1c0fba5099fb174 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_topn.h +++ b/be/src/vec/aggregate_functions/aggregate_function_topn.h @@ -195,10 +195,11 @@ struct AggregateFunctionTopNData { for (int i = 0; i < std::min((int)counter_vector.size(), top_num); i++) { const auto& element = counter_vector[i]; if constexpr (std::is_same_v) { - assert_cast(to).insert_data(element.second.c_str(), - element.second.length()); + assert_cast(to).insert_data( + element.second.c_str(), element.second.length()); } else { - assert_cast(to).get_data().push_back(element.second); + assert_cast(to).get_data().push_back( + element.second); } } } @@ -213,16 +214,22 @@ struct AggregateFunctionTopNData { struct AggregateFunctionTopNImplInt { static void add(AggregateFunctionTopNData& __restrict place, const IColumn** columns, size_t row_num) { - place.set_paramenters(assert_cast(columns[1])->get_element(row_num)); - place.add(assert_cast(*columns[0]).get_data_at(row_num)); + place.set_paramenters( + assert_cast(columns[1]) + ->get_element(row_num)); + place.add(assert_cast(*columns[0]) + .get_data_at(row_num)); } }; struct AggregateFunctionTopNImplIntInt { static void add(AggregateFunctionTopNData& __restrict place, const IColumn** columns, size_t row_num) { - place.set_paramenters(assert_cast(columns[1])->get_element(row_num), - assert_cast(columns[2])->get_element(row_num)); + place.set_paramenters( + assert_cast(columns[1]) + ->get_element(row_num), + assert_cast(columns[2]) + ->get_element(row_num)); place.add(assert_cast(*columns[0]).get_data_at(row_num)); } }; @@ -235,17 +242,22 @@ struct AggregateFunctionTopNImplArray { size_t row_num) { if constexpr (has_default_param) { place.set_paramenters( - assert_cast(columns[1])->get_element(row_num), - assert_cast(columns[2])->get_element(row_num)); + assert_cast(columns[1]) + ->get_element(row_num), + assert_cast(columns[2]) + ->get_element(row_num)); } else { place.set_paramenters( - assert_cast(columns[1])->get_element(row_num)); + assert_cast(columns[1]) + ->get_element(row_num)); } if constexpr (std::is_same_v) { - place.add(assert_cast(*columns[0]).get_data_at(row_num)); + place.add(assert_cast(*columns[0]) + .get_data_at(row_num)); } else { - T val = assert_cast(*columns[0]).get_data()[row_num]; + T val = assert_cast(*columns[0]) + .get_data()[row_num]; place.add(val); } } @@ -259,19 +271,28 @@ struct AggregateFunctionTopNImplWeight { size_t row_num) { if constexpr (has_default_param) { place.set_paramenters( - assert_cast(columns[2])->get_element(row_num), - assert_cast(columns[3])->get_element(row_num)); + assert_cast(columns[2]) + ->get_element(row_num), + assert_cast(columns[3]) + ->get_element(row_num)); } else { place.set_paramenters( assert_cast(columns[2])->get_element(row_num)); } if constexpr (std::is_same_v) { - auto weight = assert_cast&>(*columns[1]).get_data()[row_num]; - place.add(assert_cast(*columns[0]).get_data_at(row_num), weight); + auto weight = assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[1]) + .get_data()[row_num]; + place.add(assert_cast(*columns[0]) + .get_data_at(row_num), + weight); } else { - T val = assert_cast(*columns[0]).get_data()[row_num]; - auto weight = assert_cast&>(*columns[1]).get_data()[row_num]; + T val = assert_cast(*columns[0]) + .get_data()[row_num]; + auto weight = assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[1]) + .get_data()[row_num]; place.add(val, weight); } } diff --git a/be/src/vec/aggregate_functions/aggregate_function_uniq.h b/be/src/vec/aggregate_functions/aggregate_function_uniq.h index 58abd3842c21b27..356e0ead2d3d56c 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_uniq.h +++ b/be/src/vec/aggregate_functions/aggregate_function_uniq.h @@ -90,9 +90,12 @@ struct OneAdder { StringRef value = column.get_data_at(row_num); data.set.insert(Data::get_key(value)); } else if constexpr (IsDecimalNumber) { - data.set.insert(assert_cast&>(column).get_data()[row_num]); + data.set.insert( + assert_cast&, TypeCheckOnRelease::DISABLE>(column) + .get_data()[row_num]); } else { - data.set.insert(assert_cast&>(column).get_data()[row_num]); + data.set.insert(assert_cast&, TypeCheckOnRelease::DISABLE>(column) + .get_data()[row_num]); } } }; diff --git a/be/src/vec/aggregate_functions/aggregate_function_window.h b/be/src/vec/aggregate_functions/aggregate_function_window.h index 24ff2ad7ead1655..ec1aab99e6a5fea 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window.h @@ -387,7 +387,8 @@ struct LeadLagData { void set_value(const IColumn** columns, size_t pos) { if constexpr (arg_is_nullable) { - if (assert_cast(columns[0])->is_null_at(pos)) { + if (assert_cast(columns[0]) + ->is_null_at(pos)) { // ptr == nullptr means nullable _data_value.reset(); return; @@ -400,7 +401,8 @@ struct LeadLagData { void check_default(const IColumn* column) { if (!_is_inited) { if (is_column_nullable(*column)) { - const auto* nullable_column = assert_cast(column); + const auto* nullable_column = + assert_cast(column); if (nullable_column->is_null_at(0)) { _default_value.reset(); } else { diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h index 3751078910baddc..00ca7c58ed701af 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h @@ -663,14 +663,19 @@ class AggregateFunctionWindowFunnelOld void add(AggregateDataPtr __restrict place, const IColumn** columns, ssize_t row_num, Arena*) const override { const auto& window = - assert_cast&>(*columns[0]).get_data()[row_num]; + assert_cast&, TypeCheckOnRelease::DISABLE>(*columns[0]) + .get_data()[row_num]; StringRef mode = columns[1]->get_data_at(row_num); const auto& timestamp = - assert_cast&>(*columns[2]).get_data()[row_num]; + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[2]) + .get_data()[row_num]; const int NON_EVENT_NUM = 3; for (int i = NON_EVENT_NUM; i < IAggregateFunction::get_argument_types().size(); i++) { const auto& is_set = - assert_cast&>(*columns[i]).get_data()[row_num]; + assert_cast&, TypeCheckOnRelease::DISABLE>( + *columns[i]) + .get_data()[row_num]; if (is_set) { this->data(place).add( binary_cast(timestamp), i - NON_EVENT_NUM, diff --git a/be/src/vec/columns/column_array.cpp b/be/src/vec/columns/column_array.cpp index 450ed7d5eb68912..2889e7ff3817096 100644 --- a/be/src/vec/columns/column_array.cpp +++ b/be/src/vec/columns/column_array.cpp @@ -233,7 +233,7 @@ StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena& arena, int ColumnArray::compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const { // since column type is complex, we can't use this function - const auto& rhs = assert_cast(rhs_); + const auto& rhs = assert_cast(rhs_); size_t lhs_size = size_at(n); size_t rhs_size = rhs.size_at(m); @@ -583,15 +583,17 @@ ColumnPtr ColumnArray::filter_number(const Filter& filt, ssize_t result_size_hin auto& res_elems = assert_cast&>(res->get_data()).get_data(); auto& res_offsets = res->get_offsets(); - filter_arrays_impl(assert_cast&>(*data).get_data(), - get_offsets(), res_elems, res_offsets, filt, result_size_hint); + filter_arrays_impl( + assert_cast&, TypeCheckOnRelease::DISABLE>(*data).get_data(), + get_offsets(), res_elems, res_offsets, filt, result_size_hint); return res; } template size_t ColumnArray::filter_number(const Filter& filter) { - return filter_arrays_impl(assert_cast&>(*data).get_data(), - get_offsets(), filter); + return filter_arrays_impl( + assert_cast&, TypeCheckOnRelease::DISABLE>(*data).get_data(), + get_offsets(), filter); } ColumnPtr ColumnArray::filter_string(const Filter& filt, ssize_t result_size_hint) const { @@ -794,7 +796,8 @@ size_t ColumnArray::filter_generic(const Filter& filter) { ColumnPtr ColumnArray::filter_nullable(const Filter& filt, ssize_t result_size_hint) const { if (get_offsets().empty()) return ColumnArray::create(data); - const ColumnNullable& nullable_elems = assert_cast(*data); + const ColumnNullable& nullable_elems = + assert_cast(*data); auto array_of_nested = ColumnArray::create(nullable_elems.get_nested_column_ptr(), offsets); auto filtered_array_of_nested_owner = array_of_nested->filter(filt, result_size_hint); @@ -817,7 +820,8 @@ size_t ColumnArray::filter_nullable(const Filter& filter) { return 0; } - ColumnNullable& nullable_elems = assert_cast(*data); + ColumnNullable& nullable_elems = + assert_cast(*data); const auto result_size = filter_arrays_impl_only_data(nullable_elems.get_null_map_data(), get_offsets(), filter); @@ -916,7 +920,7 @@ ColumnPtr ColumnArray::replicate_string(const IColumn::Offsets& replicate_offset if (0 == col_size) return res; - ColumnArray& res_arr = assert_cast(*res); + ColumnArray& res_arr = assert_cast(*res); const ColumnString& src_string = typeid_cast(*data); const ColumnString::Chars& src_chars = src_string.get_chars(); @@ -1019,7 +1023,7 @@ ColumnPtr ColumnArray::replicate_generic(const IColumn::Offsets& replicate_offse column_match_offsets_size(col_size, replicate_offsets.size()); MutableColumnPtr res = clone_empty(); - ColumnArray& res_concrete = assert_cast(*res); + ColumnArray& res_concrete = assert_cast(*res); if (0 == col_size) return res; @@ -1037,7 +1041,8 @@ ColumnPtr ColumnArray::replicate_generic(const IColumn::Offsets& replicate_offse } ColumnPtr ColumnArray::replicate_nullable(const IColumn::Offsets& replicate_offsets) const { - const ColumnNullable& nullable = assert_cast(*data); + const ColumnNullable& nullable = + assert_cast(*data); /// Make temporary arrays for each components of Nullable. Then replicate them independently and collect back to result. /// NOTE Offsets are calculated twice and it is redundant. @@ -1051,9 +1056,12 @@ ColumnPtr ColumnArray::replicate_nullable(const IColumn::Offsets& replicate_offs return ColumnArray::create( ColumnNullable::create( - assert_cast(*array_of_nested).get_data_ptr(), - assert_cast(*array_of_null_map).get_data_ptr()), - assert_cast(*array_of_nested).get_offsets_ptr()); + assert_cast(*array_of_nested) + .get_data_ptr(), + assert_cast(*array_of_null_map) + .get_data_ptr()), + assert_cast(*array_of_nested) + .get_offsets_ptr()); } ColumnPtr ColumnArray::permute(const Permutation& perm, size_t limit) const { diff --git a/be/src/vec/columns/column_array.h b/be/src/vec/columns/column_array.h index 7839503faa46ba1..e2226ab01e1719d 100644 --- a/be/src/vec/columns/column_array.h +++ b/be/src/vec/columns/column_array.h @@ -171,11 +171,11 @@ class ColumnArray final : public COWHelper { const IColumn& get_offsets_column() const { return *offsets; } Offsets64& ALWAYS_INLINE get_offsets() { - return assert_cast(*offsets).get_data(); + return assert_cast(*offsets).get_data(); } const Offsets64& ALWAYS_INLINE get_offsets() const { - return assert_cast(*offsets).get_data(); + return assert_cast(*offsets).get_data(); } bool has_equal_offsets(const ColumnArray& other) const; diff --git a/be/src/vec/columns/column_complex.h b/be/src/vec/columns/column_complex.h index c380ee1d0dd13a8..feeb8f71b9d5937 100644 --- a/be/src/vec/columns/column_complex.h +++ b/be/src/vec/columns/column_complex.h @@ -58,7 +58,7 @@ class ColumnComplexType final : public COWHelper> } void insert_from(const IColumn& src, size_t n) override { - data.push_back(assert_cast(src).get_data()[n]); + data.push_back(assert_cast(src).get_data()[n]); } void insert_data(const char* pos, size_t /*length*/) override { @@ -236,7 +236,7 @@ class ColumnComplexType final : public COWHelper> void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { DCHECK(size() > self_row); - data[self_row] = assert_cast(rhs).data[row]; + data[self_row] = assert_cast(rhs).data[row]; } private: diff --git a/be/src/vec/columns/column_const.h b/be/src/vec/columns/column_const.h index 2dad8cc69457836..0d1b16161eb2e37 100644 --- a/be/src/vec/columns/column_const.h +++ b/be/src/vec/columns/column_const.h @@ -228,7 +228,7 @@ class ColumnConst final : public COWHelper { size_t allocated_bytes() const override { return data->allocated_bytes() + sizeof(s); } int compare_at(size_t, size_t, const IColumn& rhs, int nan_direction_hint) const override { - auto rhs_const_column = assert_cast(rhs); + auto rhs_const_column = assert_cast(rhs); const auto* this_nullable = check_and_get_column(data.get()); const auto* rhs_nullable = diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index f11e183a54f1fcc..65e8c9d79ac57f3 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -46,7 +46,7 @@ namespace doris::vectorized { template int ColumnDecimal::compare_at(size_t n, size_t m, const IColumn& rhs_, int) const { - auto& other = assert_cast(rhs_); + auto& other = assert_cast(rhs_); const T& a = data[n]; const T& b = other.data[m]; diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index cc1661312a8dc2a..0927cb88e15abc2 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -116,7 +116,7 @@ class ColumnDecimal final : public COWHelper> { void resize(size_t n) override { data.resize(n); } void insert_from(const IColumn& src, size_t n) override { - data.push_back(assert_cast(src).get_data()[n]); + data.push_back(assert_cast(src).get_data()[n]); } void insert_indices_from(const IColumn& src, const uint32_t* indices_begin, @@ -241,7 +241,7 @@ class ColumnDecimal final : public COWHelper> { void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { DCHECK(size() > self_row); - data[self_row] = assert_cast(rhs).data[row]; + data[self_row] = assert_cast(rhs).data[row]; } void replace_column_null_data(const uint8_t* __restrict null_map) override; diff --git a/be/src/vec/columns/column_map.cpp b/be/src/vec/columns/column_map.cpp index a8fa5b2b51645d2..b83ff6709dd16d2 100644 --- a/be/src/vec/columns/column_map.cpp +++ b/be/src/vec/columns/column_map.cpp @@ -242,7 +242,7 @@ const char* ColumnMap::deserialize_and_insert_from_arena(const char* pos) { } int ColumnMap::compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const { - const auto& rhs = assert_cast(rhs_); + const auto& rhs = assert_cast(rhs_); size_t lhs_size = size_at(n); size_t rhs_size = rhs.size_at(m); diff --git a/be/src/vec/columns/column_map.h b/be/src/vec/columns/column_map.h index 9f2862da09ba4fe..88705a514aeb665 100644 --- a/be/src/vec/columns/column_map.h +++ b/be/src/vec/columns/column_map.h @@ -141,10 +141,11 @@ class ColumnMap final : public COWHelper { } ColumnArray::Offsets64& ALWAYS_INLINE get_offsets() { - return assert_cast(*offsets_column).get_data(); + return assert_cast(*offsets_column).get_data(); } const ColumnArray::Offsets64& ALWAYS_INLINE get_offsets() const { - return assert_cast(*offsets_column).get_data(); + return assert_cast(*offsets_column) + .get_data(); } IColumn& get_offsets_column() { return *offsets_column; } const IColumn& get_offsets_column() const { return *offsets_column; } diff --git a/be/src/vec/columns/column_nullable.cpp b/be/src/vec/columns/column_nullable.cpp index 7424fa270e7a848..483ed5ca6cd59fa 100644 --- a/be/src/vec/columns/column_nullable.cpp +++ b/be/src/vec/columns/column_nullable.cpp @@ -626,14 +626,18 @@ ColumnPtr make_nullable(const ColumnPtr& column, bool is_nullable) { ColumnPtr remove_nullable(const ColumnPtr& column) { if (is_column_nullable(*column)) { - return reinterpret_cast(column.get())->get_nested_column_ptr(); + return assert_cast(column.get()) + ->get_nested_column_ptr(); } if (is_column_const(*column)) { - const auto& column_nested = assert_cast(*column).get_data_column_ptr(); + const auto& column_nested = + assert_cast(*column) + .get_data_column_ptr(); if (is_column_nullable(*column_nested)) { return ColumnConst::create( - assert_cast(*column_nested).get_nested_column_ptr(), + assert_cast(*column_nested) + .get_nested_column_ptr(), column->size()); } } diff --git a/be/src/vec/columns/column_nullable.h b/be/src/vec/columns/column_nullable.h index 718d5e9e6cf1612..5425242aad7142c 100644 --- a/be/src/vec/columns/column_nullable.h +++ b/be/src/vec/columns/column_nullable.h @@ -88,9 +88,12 @@ class ColumnNullable final : public COWHelper { const char* get_family_name() const override { return "Nullable"; } std::string get_name() const override { return "Nullable(" + nested_column->get_name() + ")"; } MutableColumnPtr clone_resized(size_t size) const override; - size_t size() const override { return assert_cast(*null_map).size(); } + size_t size() const override { + return assert_cast(*null_map).size(); + } PURE bool is_null_at(size_t n) const override { - return assert_cast(*null_map).get_data()[n] != 0; + return assert_cast(*null_map) + .get_data()[n] != 0; } Field operator[](size_t n) const override; void get(size_t n, Field& res) const override; @@ -100,7 +103,9 @@ class ColumnNullable final : public COWHelper { // column must be nullable(uint8) bool get_bool_inline(size_t n) const { return is_null_at(n) ? false - : assert_cast(nested_column.get())->get_bool(n); + : assert_cast( + nested_column.get()) + ->get_bool(n); } StringRef get_data_at(size_t n) const override; @@ -306,10 +311,10 @@ class ColumnNullable final : public COWHelper { ColumnUInt8& get_null_map_column() { _need_update_has_null = true; - return assert_cast(*null_map); + return assert_cast(*null_map); } const ColumnUInt8& get_null_map_column() const { - return assert_cast(*null_map); + return assert_cast(*null_map); } void clear() override { @@ -345,7 +350,8 @@ class ColumnNullable final : public COWHelper { void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { DCHECK(size() > self_row); - const auto& nullable_rhs = assert_cast(rhs); + const auto& nullable_rhs = + assert_cast(rhs); null_map->replace_column_data(*nullable_rhs.null_map, row, self_row); if (!nullable_rhs.is_null_at(row)) { @@ -408,7 +414,9 @@ class ColumnNullable final : public COWHelper { private: // the two functions will not update `_need_update_has_null` - ColumnUInt8& _get_null_map_column() { return assert_cast(*null_map); } + ColumnUInt8& _get_null_map_column() { + return assert_cast(*null_map); + } NullMap& _get_null_map_data() { return _get_null_map_column().get_data(); } WrappedPtr nested_column; diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index a00d18f0ce147a1..b5ec5e8cfd7a39d 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -768,7 +768,8 @@ void ColumnObject::insert_from(const IColumn& src, size_t n) { if (src_v != nullptr && src_v->is_scalar_variant() && is_scalar_variant() && src_v->get_root_type()->equals(*get_root_type()) && src_v->is_finalized() && is_finalized()) { - assert_cast(*get_root()).insert_from(*src_v->get_root(), n); + assert_cast(*get_root()) + .insert_from(*src_v->get_root(), n); ++num_rows; return; } @@ -1332,7 +1333,8 @@ Status ColumnObject::merge_sparse_to_root_column() { Arena mem_pool; for (const auto& subcolumn : sparse_columns) { auto& column = subcolumn->data.get_finalized_column_ptr(); - if (assert_cast(*column).is_null_at(i)) { + if (assert_cast(*column).is_null_at( + i)) { ++null_count; continue; } diff --git a/be/src/vec/columns/column_string.cpp b/be/src/vec/columns/column_string.cpp index 952a1a979150467..c3cf6dadf0a5faa 100644 --- a/be/src/vec/columns/column_string.cpp +++ b/be/src/vec/columns/column_string.cpp @@ -571,7 +571,9 @@ void ColumnStr::compare_internal(size_t rhs_row_id, const IColumn& rhs, int n uint8* __restrict filter) const { auto sz = offsets.size(); DCHECK(cmp_res.size() == sz); - const auto& cmp_base = assert_cast&>(rhs).get_data_at(rhs_row_id); + const auto& cmp_base = + assert_cast&, TypeCheckOnRelease::DISABLE>(rhs).get_data_at( + rhs_row_id); size_t begin = simd::find_zero(cmp_res, 0); while (begin < sz) { size_t end = simd::find_one(cmp_res, begin + 1); diff --git a/be/src/vec/columns/column_struct.cpp b/be/src/vec/columns/column_struct.cpp index 78250bc952d075a..c08d12560c98632 100644 --- a/be/src/vec/columns/column_struct.cpp +++ b/be/src/vec/columns/column_struct.cpp @@ -187,7 +187,7 @@ const char* ColumnStruct::deserialize_and_insert_from_arena(const char* pos) { int ColumnStruct::compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const { - const ColumnStruct& rhs = assert_cast(rhs_); + const ColumnStruct& rhs = assert_cast(rhs_); const size_t lhs_tuple_size = columns.size(); const size_t rhs_tuple_size = rhs.tuple_size(); @@ -246,8 +246,9 @@ void ColumnStruct::insert_indices_from(const IColumn& src, const uint32_t* indic void ColumnStruct::insert_range_from(const IColumn& src, size_t start, size_t length) { const size_t tuple_size = columns.size(); for (size_t i = 0; i < tuple_size; ++i) { - columns[i]->insert_range_from(*assert_cast(src).columns[i], start, - length); + columns[i]->insert_range_from( + *assert_cast(src).columns[i], + start, length); } } @@ -256,7 +257,8 @@ void ColumnStruct::insert_range_from_ignore_overflow(const IColumn& src, size_t const size_t tuple_size = columns.size(); for (size_t i = 0; i < tuple_size; ++i) { columns[i]->insert_range_from_ignore_overflow( - *assert_cast(src).columns[i], start, length); + *assert_cast(src).columns[i], + start, length); } } diff --git a/be/src/vec/columns/column_vector.cpp b/be/src/vec/columns/column_vector.cpp index f8d05c3d492a6ff..590e2047cab7c65 100644 --- a/be/src/vec/columns/column_vector.cpp +++ b/be/src/vec/columns/column_vector.cpp @@ -162,7 +162,8 @@ void ColumnVector::compare_internal(size_t rhs_row_id, const IColumn& rhs, uint8* __restrict filter) const { const auto sz = data.size(); DCHECK(cmp_res.size() == sz); - const auto& cmp_base = assert_cast&>(rhs).get_data()[rhs_row_id]; + const auto& cmp_base = assert_cast&, TypeCheckOnRelease::DISABLE>(rhs) + .get_data()[rhs_row_id]; size_t begin = simd::find_zero(cmp_res, 0); while (begin < sz) { size_t end = simd::find_one(cmp_res, begin + 1); diff --git a/be/src/vec/columns/column_vector.h b/be/src/vec/columns/column_vector.h index d9e59b0fa86c3ee..60b1845c4ee8185 100644 --- a/be/src/vec/columns/column_vector.h +++ b/be/src/vec/columns/column_vector.h @@ -161,7 +161,7 @@ class ColumnVector final : public COWHelper> { } void insert_from(const IColumn& src, size_t n) override { - data.push_back(assert_cast(src).get_data()[n]); + data.push_back(assert_cast(src).get_data()[n]); } void insert_data(const char* pos, size_t /*length*/) override { @@ -324,8 +324,9 @@ class ColumnVector final : public COWHelper> { /// This method implemented in header because it could be possibly devirtualized. int compare_at(size_t n, size_t m, const IColumn& rhs_, int nan_direction_hint) const override { - return CompareHelper::compare(data[n], assert_cast(rhs_).data[m], - nan_direction_hint); + return CompareHelper::compare( + data[n], assert_cast(rhs_).data[m], + nan_direction_hint); } void get_permutation(bool reverse, size_t limit, int nan_direction_hint, @@ -401,7 +402,7 @@ class ColumnVector final : public COWHelper> { void replace_column_data(const IColumn& rhs, size_t row, size_t self_row = 0) override { DCHECK(size() > self_row); - data[self_row] = assert_cast(rhs).data[row]; + data[self_row] = assert_cast(rhs).data[row]; } void replace_column_null_data(const uint8_t* __restrict null_map) override; diff --git a/be/src/vec/common/assert_cast.h b/be/src/vec/common/assert_cast.h index 6d8765befa203fe..02dce99e967bdbc 100644 --- a/be/src/vec/common/assert_cast.h +++ b/be/src/vec/common/assert_cast.h @@ -26,14 +26,17 @@ #include "common/logging.h" #include "vec/common/demangle.h" -/** Perform static_cast in release build. - * Checks type by comparing typeid and throw an exception in debug build. +enum class TypeCheckOnRelease : bool { ENABLE = true, DISABLE = false }; + +/** Perform static_cast in release build when TypeCheckOnRelease is set to DISABLE. + * Checks type by comparing typeid and throw an exception in all the other situations. * The exact match of the type is checked. That is, cast to the ancestor will be unsuccessful. */ -template +template PURE To assert_cast(From&& from) { -#ifndef NDEBUG - try { + // https://godbolt.org/z/nrsx7nYhs + // perform_cast will not be compiled to asm in release build with TypeCheckOnRelease::DISABLE + auto perform_cast = [](auto&& from) -> To { if constexpr (std::is_pointer_v) { if (typeid(*from) == typeid(std::remove_pointer_t)) { return static_cast(from); @@ -51,14 +54,28 @@ PURE To assert_cast(From&& from) { return static_cast(from); } } + LOG(FATAL) << fmt::format("Bad cast from type:{} to {}", demangle(typeid(from).name()), + demangle(typeid(To).name())); + __builtin_unreachable(); + }; + +#ifndef NDEBUG + try { + return perform_cast(std::forward(from)); } catch (const std::exception& e) { LOG(FATAL) << "assert cast err:" << e.what(); } - - LOG(FATAL) << fmt::format("Bad cast from type:{} to {}", demangle(typeid(from).name()), - demangle(typeid(To).name())); __builtin_unreachable(); #else - return static_cast(from); + if constexpr (check == TypeCheckOnRelease::ENABLE) { + try { + return perform_cast(std::forward(from)); + } catch (const std::exception& e) { + LOG(FATAL) << "assert cast err:" << e.what(); + } + __builtin_unreachable(); + } else { + return static_cast(from); + } #endif } diff --git a/be/src/vec/data_types/data_type_number_base.cpp b/be/src/vec/data_types/data_type_number_base.cpp index 78d6e81fb5841c5..c5441256cdf6223 100644 --- a/be/src/vec/data_types/data_type_number_base.cpp +++ b/be/src/vec/data_types/data_type_number_base.cpp @@ -51,17 +51,22 @@ void DataTypeNumberBase::to_string(const IColumn& column, size_t row_num, row_num = result.second; if constexpr (std::is_same::value) { - std::string hex = - int128_to_string(assert_cast&>(*ptr).get_element(row_num)); + std::string hex = int128_to_string( + assert_cast&, TypeCheckOnRelease::DISABLE>(*ptr).get_element( + row_num)); ostr.write(hex.data(), hex.size()); } else if constexpr (std::is_same_v) { // fmt::format_to maybe get inaccurate results at float type, so we use gutil implement. char buf[MAX_FLOAT_STR_LENGTH + 2]; - int len = FloatToBuffer(assert_cast&>(*ptr).get_element(row_num), - MAX_FLOAT_STR_LENGTH + 2, buf); + int len = FloatToBuffer( + assert_cast&, TypeCheckOnRelease::DISABLE>(*ptr).get_element( + row_num), + MAX_FLOAT_STR_LENGTH + 2, buf); ostr.write(buf, len); } else if constexpr (std::is_integral::value || std::numeric_limits::is_iec559) { - ostr.write_number(assert_cast&>(*ptr).get_element(row_num)); + ostr.write_number( + assert_cast&, TypeCheckOnRelease::DISABLE>(*ptr).get_element( + row_num)); } } @@ -162,13 +167,19 @@ std::string DataTypeNumberBase::to_string(const IColumn& column, size_t row_n if constexpr (std::is_same::value || std::is_same::value || std::is_same::value) { - return int128_to_string(assert_cast&>(*ptr).get_element(row_num)); + return int128_to_string( + assert_cast&, TypeCheckOnRelease::DISABLE>(*ptr).get_element( + row_num)); } else if constexpr (std::is_integral::value) { - return std::to_string(assert_cast&>(*ptr).get_element(row_num)); + return std::to_string( + assert_cast&, TypeCheckOnRelease::DISABLE>(*ptr).get_element( + row_num)); } else if constexpr (std::numeric_limits::is_iec559) { fmt::memory_buffer buffer; // only use in size-predictable type. - fmt::format_to(buffer, "{}", - assert_cast&>(*ptr).get_element(row_num)); + fmt::format_to( + buffer, "{}", + assert_cast&, TypeCheckOnRelease::DISABLE>(*ptr).get_element( + row_num)); return std::string(buffer.data(), buffer.size()); } } diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index 02787f7deec8238..e57af914d43e040 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -49,7 +49,9 @@ Status DataTypeDateTimeV2SerDe::serialize_one_cell_to_json(const IColumn& column ColumnPtr ptr = result.first; row_num = result.second; - UInt64 int_val = assert_cast(*ptr).get_element(row_num); + UInt64 int_val = + assert_cast(*ptr).get_element( + row_num); DateV2Value val = binary_cast>(int_val); @@ -76,7 +78,7 @@ Status DataTypeDateTimeV2SerDe::deserialize_column_from_json_vector( } Status DataTypeDateTimeV2SerDe::deserialize_one_cell_from_json(IColumn& column, Slice& slice, const FormatOptions& options) const { - auto& column_data = assert_cast(column); + auto& column_data = assert_cast(column); UInt64 val = 0; if (options.date_olap_format) { DateV2Value datetimev2_value; diff --git a/be/src/vec/data_types/serde/data_type_struct_serde.cpp b/be/src/vec/data_types/serde/data_type_struct_serde.cpp index 14d894e256d2ed1..9b975be7ec92e20 100644 --- a/be/src/vec/data_types/serde/data_type_struct_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_struct_serde.cpp @@ -53,7 +53,8 @@ Status DataTypeStructSerDe::serialize_one_cell_to_json(const IColumn& column, in ColumnPtr ptr = result.first; row_num = result.second; - const ColumnStruct& struct_column = assert_cast(*ptr); + const ColumnStruct& struct_column = + assert_cast(*ptr); bw.write('{'); for (int i = 0; i < struct_column.get_columns().size(); i++) { if (i != 0) { @@ -73,7 +74,7 @@ Status DataTypeStructSerDe::deserialize_one_cell_from_json(IColumn& column, Slic if (slice.empty()) { return Status::InvalidArgument("slice is empty!"); } - auto& struct_column = assert_cast(column); + auto& struct_column = assert_cast(column); if (slice[0] != '{') { std::stringstream ss; @@ -279,7 +280,8 @@ void DataTypeStructSerDe::serialize_one_cell_to_hive_text( ColumnPtr ptr = result.first; row_num = result.second; - const ColumnStruct& struct_column = assert_cast(*ptr); + const ColumnStruct& struct_column = + assert_cast(*ptr); char collection_delimiter = options.get_collection_delimiter(hive_text_complex_type_delimiter_level); @@ -335,7 +337,7 @@ Status DataTypeStructSerDe::_write_column_to_mysql(const IColumn& column, MysqlRowBuffer& result, int row_idx, bool col_const, const FormatOptions& options) const { - auto& col = assert_cast(column); + auto& col = assert_cast(column); const auto col_index = index_check_const(row_idx, col_const); result.open_dynamic_mode(); if (0 != result.push_string("{", 1)) { diff --git a/be/src/vec/functions/array/function_array_element.h b/be/src/vec/functions/array/function_array_element.h index 49fdc4176ce55af..4a9bffdbb3cc1d1 100644 --- a/be/src/vec/functions/array/function_array_element.h +++ b/be/src/vec/functions/array/function_array_element.h @@ -38,6 +38,7 @@ #include "vec/columns/column_struct.h" #include "vec/columns/column_vector.h" #include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" #include "vec/core/block.h" #include "vec/core/column_numbers.h" #include "vec/core/column_with_type_and_name.h" diff --git a/be/src/vec/functions/comparison_equal_for_null.cpp b/be/src/vec/functions/comparison_equal_for_null.cpp index cca941840e8565b..49db471f8d1c9c1 100644 --- a/be/src/vec/functions/comparison_equal_for_null.cpp +++ b/be/src/vec/functions/comparison_equal_for_null.cpp @@ -136,14 +136,18 @@ class FunctionEqForNull : public IFunction { if (left_const) { left_column = check_and_get_column( - assert_cast(col_left.column.get())->get_data_column_ptr()); + assert_cast( + col_left.column.get()) + ->get_data_column_ptr()); } else { left_column = check_and_get_column(col_left.column); } if (right_const) { right_column = check_and_get_column( - assert_cast(col_right.column.get())->get_data_column_ptr()); + assert_cast( + col_right.column.get()) + ->get_data_column_ptr()); } else { right_column = check_and_get_column(col_right.column); } diff --git a/be/src/vec/functions/function_binary_arithmetic.h b/be/src/vec/functions/function_binary_arithmetic.h index d69b00043a15ec5..9f2af326f719015 100644 --- a/be/src/vec/functions/function_binary_arithmetic.h +++ b/be/src/vec/functions/function_binary_arithmetic.h @@ -413,9 +413,13 @@ struct DecimalBinaryOperation { const ResultType& max_result_number, const ResultType& scale_diff_multiplier, DataTypePtr res_data_type) { - auto type_result = assert_cast&>(*res_data_type); + auto type_result = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *res_data_type); auto column_result = ColumnDecimal::create( - 1, assert_cast&>(*res_data_type).get_scale()); + 1, assert_cast&, TypeCheckOnRelease::DISABLE>( + *res_data_type) + .get_scale()); if constexpr (check_overflow && !is_to_null_type && ((!OpTraits::is_multiply && !OpTraits::is_plus_minus))) { @@ -441,11 +445,15 @@ struct DecimalBinaryOperation { const ResultType& max_result_number, const ResultType& scale_diff_multiplier, DataTypePtr res_data_type) { - auto type_result = assert_cast&>(*res_data_type); + auto type_result = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *res_data_type); auto column_left_ptr = check_and_get_column(column_left); auto column_result = ColumnDecimal::create( column_left->size(), - assert_cast&>(*res_data_type).get_scale()); + assert_cast&, TypeCheckOnRelease::DISABLE>( + *res_data_type) + .get_scale()); DCHECK(column_left_ptr != nullptr); if constexpr (check_overflow && !is_to_null_type && @@ -472,11 +480,15 @@ struct DecimalBinaryOperation { const ResultType& max_result_number, const ResultType& scale_diff_multiplier, DataTypePtr res_data_type) { - auto type_result = assert_cast&>(*res_data_type); + auto type_result = + assert_cast&, TypeCheckOnRelease::DISABLE>( + *res_data_type); auto column_right_ptr = check_and_get_column(column_right); auto column_result = ColumnDecimal::create( column_right->size(), - assert_cast&>(*res_data_type).get_scale()); + assert_cast&, TypeCheckOnRelease::DISABLE>( + *res_data_type) + .get_scale()); DCHECK(column_right_ptr != nullptr); if constexpr (check_overflow && !is_to_null_type && diff --git a/be/src/vec/functions/function_case.h b/be/src/vec/functions/function_case.h index f320ab046c762eb..f02b85aed456bf5 100644 --- a/be/src/vec/functions/function_case.h +++ b/be/src/vec/functions/function_case.h @@ -302,11 +302,11 @@ class FunctionCase : public IFunction { } size_t target = is_consts[then_idx[row_idx]] ? 0 : row_idx; if constexpr (then_null) { - assert_cast(result_column_ptr.get()) + assert_cast(result_column_ptr.get()) ->insert_from_with_type(*raw_columns[then_idx[row_idx]], target); } else { - assert_cast(result_column_ptr.get()) + assert_cast(result_column_ptr.get()) ->insert_from(*raw_columns[then_idx[row_idx]], target); } } @@ -323,7 +323,9 @@ class FunctionCase : public IFunction { size_t rows_count = column_holder.rows_count; result_column_ptr->resize(rows_count); auto* __restrict result_raw_data = - assert_cast(result_column_ptr.get())->get_data().data(); + assert_cast(result_column_ptr.get()) + ->get_data() + .data(); // set default value for (int i = 0; i < rows_count; i++) { diff --git a/be/src/vec/functions/function_cast.h b/be/src/vec/functions/function_cast.h index f83cd63918d62cf..17c7b4b76103cf0 100644 --- a/be/src/vec/functions/function_cast.h +++ b/be/src/vec/functions/function_cast.h @@ -720,8 +720,8 @@ struct ConvertImplGenericFromJsonb { // add string to string column if (context->jsonb_string_as_string() && is_dst_string && value->isString()) { const auto* blob = static_cast(value); - assert_cast(*col_to).insert_data(blob->getBlob(), - blob->getBlobLen()); + assert_cast(*col_to).insert_data( + blob->getBlob(), blob->getBlobLen()); (*vec_null_map_to)[i] = 0; continue; } @@ -1525,7 +1525,15 @@ struct StringParsing { const ColumnString::Chars* chars = &col_from_string->get_chars(); const IColumn::Offsets* offsets = &col_from_string->get_offsets(); + [[maybe_unused]] UInt32 scale = 0; + if constexpr (IsDataTypeDateTimeV2) { + const auto* type = assert_cast( + block.get_by_position(result).type.get()); + scale = type->get_scale(); + } + size_t current_offset = 0; + for (size_t i = 0; i < row; ++i) { size_t next_offset = (*offsets)[i]; size_t string_size = next_offset - current_offset; @@ -1541,10 +1549,7 @@ struct StringParsing { res == StringParser::PARSE_OVERFLOW || res == StringParser::PARSE_UNDERFLOW); } else if constexpr (IsDataTypeDateTimeV2) { - const auto* type = assert_cast( - block.get_by_position(result).type.get()); - parsed = try_parse_impl(vec_to[i], read_buffer, context, - type->get_scale()); + parsed = try_parse_impl(vec_to[i], read_buffer, context, scale); } else { parsed = try_parse_impl(vec_to[i], read_buffer, context); diff --git a/be/src/vec/functions/function_coalesce.cpp b/be/src/vec/functions/function_coalesce.cpp index 63217c6d6bfd57b..dbe75cf140845d5 100644 --- a/be/src/vec/functions/function_coalesce.cpp +++ b/be/src/vec/functions/function_coalesce.cpp @@ -163,7 +163,9 @@ class FunctionCoalesce : public IFunction { auto res_column = (*temporary_block.get_by_position(1).column->convert_to_full_column_if_const()) .mutate(); - auto& res_map = assert_cast*>(res_column.get())->get_data(); + auto& res_map = + assert_cast*, TypeCheckOnRelease::DISABLE>(res_column.get()) + ->get_data(); auto* __restrict res = res_map.data(); // Here it's SIMD thought the compiler automatically diff --git a/be/src/vec/functions/function_helpers.cpp b/be/src/vec/functions/function_helpers.cpp index 22dbd9073d17975..ea3d98511b0624d 100644 --- a/be/src/vec/functions/function_helpers.cpp +++ b/be/src/vec/functions/function_helpers.cpp @@ -148,7 +148,7 @@ void validate_argument_type(const IFunction& func, const DataTypes& arguments, const ColumnConst* check_and_get_column_const_string_or_fixedstring(const IColumn* column) { if (!is_column_const(*column)) return {}; - const ColumnConst* res = assert_cast(column); + const ColumnConst* res = assert_cast(column); if (check_column(&res->get_data_column())) return res; diff --git a/be/src/vec/functions/function_helpers.h b/be/src/vec/functions/function_helpers.h index f5d343f3678d812..28f79a8d0fb1930 100644 --- a/be/src/vec/functions/function_helpers.h +++ b/be/src/vec/functions/function_helpers.h @@ -55,7 +55,7 @@ template const ColumnConst* check_and_get_column_const(const IColumn* column) { if (!column || !is_column_const(*column)) return {}; - const ColumnConst* res = assert_cast(column); + const ColumnConst* res = assert_cast(column); if (!check_column(&res->get_data_column())) return {}; diff --git a/be/src/vec/functions/function_ip.h b/be/src/vec/functions/function_ip.h index 3b02d779246a031..f019fa32797a7d0 100644 --- a/be/src/vec/functions/function_ip.h +++ b/be/src/vec/functions/function_ip.h @@ -398,8 +398,10 @@ ColumnPtr convert_to_ipv6(const StringColumnType& string_column, std::string string_buffer; int offset_inc = 1; + ColumnString* column_string = nullptr; if constexpr (std::is_same_v) { offset_inc = IPV6_BINARY_LENGTH; + column_string = assert_cast(col_res.get()); } for (size_t out_offset = 0, i = 0; i < column_size; out_offset += offset_inc, ++i) { @@ -429,7 +431,7 @@ ColumnPtr convert_to_ipv6(const StringColumnType& string_column, (*vec_null_map_to)[i] = true; } if constexpr (std::is_same_v) { - auto* column_string = assert_cast(col_res.get()); + DCHECK(column_string != nullptr); column_string->get_offsets().push_back((i + 1) * IPV6_BINARY_LENGTH); } src_offset = src_next_offset; diff --git a/be/src/vec/functions/function_string.cpp b/be/src/vec/functions/function_string.cpp index 30384413d1d4cda..d0e12bb498430ba 100644 --- a/be/src/vec/functions/function_string.cpp +++ b/be/src/vec/functions/function_string.cpp @@ -27,6 +27,7 @@ #include "common/status.h" #include "runtime/string_search.hpp" #include "util/url_coding.h" +#include "vec/columns/column.h" #include "vec/columns/column_string.h" #include "vec/common/pod_array_fwd.h" #include "vec/common/string_ref.h" diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 1c843acb6972480..bb4dbc67c5d5cdb 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -3144,20 +3144,23 @@ class FunctionReplace : public IFunction { Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { + // We need a local variable to hold a reference to the converted column. + // So that the converted column will not be released before we use it. auto col_origin = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + auto col_origin_str = assert_cast(col_origin.get()); auto col_old = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + auto col_old_str = assert_cast(col_old.get()); auto col_new = block.get_by_position(arguments[2]).column->convert_to_full_column_if_const(); + auto col_new_str = assert_cast(col_new.get()); ColumnString::MutablePtr col_res = ColumnString::create(); - for (int i = 0; i < input_rows_count; ++i) { - StringRef origin_str = - assert_cast(col_origin.get())->get_data_at(i); - StringRef old_str = assert_cast(col_old.get())->get_data_at(i); - StringRef new_str = assert_cast(col_new.get())->get_data_at(i); + StringRef origin_str = col_origin_str->get_data_at(i); + StringRef old_str = col_old_str->get_data_at(i); + StringRef new_str = col_new_str->get_data_at(i); std::string result = replace(origin_str.to_string(), old_str.to_string_view(), new_str.to_string_view()); @@ -3694,8 +3697,8 @@ class FunctionIntToChar : public IFunction { continue; } if (auto const_column = check_and_get_column(*str_columns[j])) { - auto str_column = - assert_cast(&(const_column->get_data_column())); + auto str_column = assert_cast( + &(const_column->get_data_column())); auto data_item = str_column->get_data_at(0); memcpy_small_allow_read_write_overflow15( &res_data[res_offset[i - 1]] + current_length, data_item.data, diff --git a/be/src/vec/functions/function_variant_element.cpp b/be/src/vec/functions/function_variant_element.cpp index 22bf45beb5ef97a..76076a498d07c08 100644 --- a/be/src/vec/functions/function_variant_element.cpp +++ b/be/src/vec/functions/function_variant_element.cpp @@ -123,9 +123,9 @@ class FunctionVariantElement : public IFunction { field_name = "$." + field_name; } JsonFunctions::parse_json_paths(field_name, &parsed_paths); + ColumnString* col_str = assert_cast(result_column.get()); for (size_t i = 0; i < docs.size(); ++i) { - if (!extract_from_document(parser, docs.get_data_at(i), parsed_paths, - assert_cast(result_column.get()))) { + if (!extract_from_document(parser, docs.get_data_at(i), parsed_paths, col_str)) { VLOG_DEBUG << "failed to parse " << docs.get_data_at(i) << ", field " << field_name; result_column->insert_default(); From f3dd685645bb5f6ea6ae54b8ae15c6d6497154ef Mon Sep 17 00:00:00 2001 From: Pxl Date: Tue, 13 Aug 2024 14:22:56 +0800 Subject: [PATCH 80/94] [Improvement](runtime-filter) do not use bloom to replace in_or_bloom when rf need merge (#39147) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Proposed changes do not use bloom to replace in_or_bloom when rf need merge Because in some cases, this will lead to poor performance 图片 图片 --- .../post/RuntimeFilterPushDownVisitor.java | 6 ---- .../shape/query24.out | 4 +-- .../shape/query24.out | 4 +-- .../hint_tpcds/shape/query64.out | 28 +++++++++---------- .../tpcds_sf100/shape/query24.out | 4 +-- .../tpcds_sf1000/shape/query24.out | 4 +-- 6 files changed, 22 insertions(+), 28 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java index 6ae5532ea8117e9..f1b488c6e68ae86 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/processor/post/RuntimeFilterPushDownVisitor.java @@ -203,12 +203,6 @@ public Boolean visitPhysicalRelation(PhysicalRelation scan, PushDownContext ctx) } TRuntimeFilterType type = ctx.type; - if (type == TRuntimeFilterType.IN_OR_BLOOM - && RuntimeFilterGenerator.hasRemoteTarget(ctx.builderNode, scan) - && !ctx.builderNode.isBroadCastJoin()) { - type = TRuntimeFilterType.BLOOM; - } - RuntimeFilter filter = ctx.rfContext.getRuntimeFilterBySrcAndType(ctx.srcExpr, type, ctx.builderNode); if (filter != null) { if (!filter.hasTargetScan(scan)) { diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out index 6d93096d40594fc..b1e5bf298283944 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query24.out @@ -7,7 +7,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[ss_item_sk];RF7 sr_item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] --------------------PhysicalProject @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] apply RFs: RF7 +----------------------PhysicalOlapScan[item] apply RFs: RF6 ----------------PhysicalProject ------------------PhysicalOlapScan[store_returns] --PhysicalResultSink diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out index a13884492fae43f..14dbf92e98e5fd4 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query24.out @@ -7,7 +7,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[ss_item_sk];RF7 sr_item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] --------------------PhysicalProject @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] apply RFs: RF7 +----------------------PhysicalOlapScan[item] apply RFs: RF6 ----------------PhysicalProject ------------------PhysicalOlapScan[store_returns] --PhysicalResultSink diff --git a/regression-test/data/new_shapes_p0/hint_tpcds/shape/query64.out b/regression-test/data/new_shapes_p0/hint_tpcds/shape/query64.out index 9c731c9fba36dfc..b33dcb2a77ae65e 100644 --- a/regression-test/data/new_shapes_p0/hint_tpcds/shape/query64.out +++ b/regression-test/data/new_shapes_p0/hint_tpcds/shape/query64.out @@ -7,23 +7,23 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF20 cs_item_sk->[i_item_sk,sr_item_sk,ss_item_sk] +--------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = cs_ui.cs_item_sk)) otherCondition=() build RFs:RF19 cs_item_sk->[i_item_sk,sr_item_sk,ss_item_sk] ----------------PhysicalProject -------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF19 p_promo_sk->[ss_promo_sk] +------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF18 p_promo_sk->[ss_promo_sk] --------------------PhysicalProject -----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF18 s_store_sk->[ss_store_sk] +----------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF17 s_store_sk->[ss_store_sk] ------------------------PhysicalProject ---------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF17 d_date_sk->[ss_sold_date_sk] +--------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = d1.d_date_sk)) otherCondition=() build RFs:RF16 d_date_sk->[ss_sold_date_sk] ----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF16 ib_income_band_sk->[hd_income_band_sk] +------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd2.hd_income_band_sk = ib2.ib_income_band_sk)) otherCondition=() build RFs:RF15 ib_income_band_sk->[hd_income_band_sk] --------------------------------PhysicalProject -----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF15 ca_address_sk->[c_current_addr_sk] +----------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_addr_sk = ad2.ca_address_sk)) otherCondition=() build RFs:RF14 ca_address_sk->[c_current_addr_sk] ------------------------------------PhysicalProject ---------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF14 hd_demo_sk->[c_current_hdemo_sk] +--------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_hdemo_sk = hd2.hd_demo_sk)) otherCondition=() build RFs:RF13 hd_demo_sk->[c_current_hdemo_sk] ----------------------------------------PhysicalProject -------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF13 ca_address_sk->[ss_addr_sk] +------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_addr_sk = ad1.ca_address_sk)) otherCondition=() build RFs:RF12 ca_address_sk->[ss_addr_sk] --------------------------------------------PhysicalProject -----------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF10 sr_item_sk->[ss_item_sk];RF11 sr_item_sk->[i_item_sk];RF12 sr_ticket_number->[ss_ticket_number] +----------------------------------------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF10 sr_item_sk->[i_item_sk,ss_item_sk];RF11 sr_ticket_number->[ss_ticket_number] ------------------------------------------------PhysicalProject --------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_hdemo_sk = hd1.hd_demo_sk)) otherCondition=() build RFs:RF9 hd_demo_sk->[ss_hdemo_sk] ----------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 i_item_sk->[ss_item_sk] @@ -34,13 +34,13 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------------------------------------------------------------PhysicalProject ----------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF5 c_customer_sk->[ss_customer_sk] ------------------------------------------------------------------PhysicalProject ---------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF8 RF9 RF10 RF12 RF13 RF17 RF18 RF19 RF20 +--------------------------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF8 RF9 RF10 RF11 RF12 RF16 RF17 RF18 RF19 ------------------------------------------------------------------PhysicalProject --------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_current_cdemo_sk = cd2.cd_demo_sk)) otherCondition=() build RFs:RF4 cd_demo_sk->[c_current_cdemo_sk] ----------------------------------------------------------------------PhysicalProject ------------------------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((customer.c_first_sales_date_sk = d2.d_date_sk)) otherCondition=() build RFs:RF3 d_date_sk->[c_first_sales_date_sk] --------------------------------------------------------------------------PhysicalProject -----------------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 RF4 RF7 RF14 RF15 +----------------------------------------------------------------------------PhysicalOlapScan[customer] apply RFs: RF3 RF4 RF7 RF13 RF14 --------------------------------------------------------------------------PhysicalProject ----------------------------------------------------------------------------PhysicalOlapScan[date_dim] ----------------------------------------------------------------------PhysicalProject @@ -51,7 +51,7 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) ------------------------------------------------------------PhysicalOlapScan[date_dim] ------------------------------------------------------PhysicalProject --------------------------------------------------------filter((item.i_current_price <= 58.00) and (item.i_current_price >= 49.00) and i_color IN ('blush', 'lace', 'lawn', 'misty', 'orange', 'pink')) -----------------------------------------------------------PhysicalOlapScan[item] apply RFs: RF11 RF20 +----------------------------------------------------------PhysicalOlapScan[item] apply RFs: RF10 RF19 ----------------------------------------------------PhysicalProject ------------------------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((hd1.hd_income_band_sk = ib1.ib_income_band_sk)) otherCondition=() build RFs:RF2 ib_income_band_sk->[hd_income_band_sk] --------------------------------------------------------PhysicalProject @@ -59,11 +59,11 @@ PhysicalCteAnchor ( cteId=CTEId#1 ) --------------------------------------------------------PhysicalProject ----------------------------------------------------------PhysicalOlapScan[income_band] ------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF20 +--------------------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF19 --------------------------------------------PhysicalProject ----------------------------------------------PhysicalOlapScan[customer_address] ----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF16 +------------------------------------------PhysicalOlapScan[household_demographics] apply RFs: RF15 ------------------------------------PhysicalProject --------------------------------------PhysicalOlapScan[customer_address] --------------------------------PhysicalProject diff --git a/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query24.out b/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query24.out index cb8e44d35b32a89..0dc2c851744de21 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query24.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query24.out @@ -7,7 +7,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[ss_item_sk];RF7 sr_item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] --------------------PhysicalProject @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer_address] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] apply RFs: RF7 +----------------------PhysicalOlapScan[item] apply RFs: RF6 ----------------PhysicalProject ------------------PhysicalOlapScan[store_returns] --PhysicalResultSink diff --git a/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query24.out b/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query24.out index a64d7f776a15469..d93b73ade16ecbf 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query24.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf1000/shape/query24.out @@ -7,7 +7,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) --------PhysicalDistribute[DistributionSpecHash] ----------hashAgg[LOCAL] ------------PhysicalProject ---------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[ss_item_sk];RF7 sr_item_sk->[i_item_sk] +--------------hashJoin[INNER_JOIN colocated] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF5 sr_ticket_number->[ss_ticket_number];RF6 sr_item_sk->[i_item_sk,ss_item_sk] ----------------PhysicalProject ------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 i_item_sk->[ss_item_sk] --------------------PhysicalProject @@ -26,7 +26,7 @@ PhysicalCteAnchor ( cteId=CTEId#0 ) ----------------------------PhysicalProject ------------------------------PhysicalOlapScan[customer_address] --------------------PhysicalProject -----------------------PhysicalOlapScan[item] apply RFs: RF7 +----------------------PhysicalOlapScan[item] apply RFs: RF6 ----------------PhysicalProject ------------------PhysicalOlapScan[store_returns] --PhysicalResultSink From 1f87ecf8e00261fbac5231a63987bcf9120be939 Mon Sep 17 00:00:00 2001 From: zclllhhjj Date: Tue, 13 Aug 2024 15:01:51 +0800 Subject: [PATCH 81/94] [Fix](function) fix coredump because short of check on randoms arguments (#39255) ## Proposed changes Issue Number: close #xxx before: crash or ```sql mysql [test]>select random(1,array_size(split_by_string(fcst_emp,','))) from test_random; +---------------------------------------------------------+ | random(1, array_size(split_by_string(`fcst_emp`, ','))) | +---------------------------------------------------------+ | 7471044383762196303 | +---------------------------------------------------------+ 1 row in set (0.05 sec) ``` now: both for nereids and legacy planner: ```sql mysql [test]>select random(1,array_size(split_by_string(fcst_emp,','))) from test_random; ERROR 1105 (HY000): errCode = 2, detailMessage = (10.16.10.8)[INVALID_ARGUMENT]The param of rand function must be literal ``` doc pr: https://github.com/apache/doris-website/pull/992 --- be/src/vec/functions/random.cpp | 11 +++++++--- .../expressions/functions/scalar/Random.java | 11 ++++++++-- .../nereids_p0/system/test_query_sys.groovy | 21 +++++++++++++++++++ 3 files changed, 38 insertions(+), 5 deletions(-) diff --git a/be/src/vec/functions/random.cpp b/be/src/vec/functions/random.cpp index f073491e37220ce..1b8dea935d19a17 100644 --- a/be/src/vec/functions/random.cpp +++ b/be/src/vec/functions/random.cpp @@ -69,15 +69,19 @@ class Random : public IFunction { if (context->get_num_args() == 1) { // This is a call to RandSeed, initialize the seed if (!context->is_col_constant(0)) { - return Status::InvalidArgument("Seed argument to rand() must be constant."); + return Status::InvalidArgument("The param of rand function must be literal"); } uint32_t seed = 0; if (!context->get_constant_col(0)->column_ptr->is_null_at(0)) { seed = (*context->get_constant_col(0)->column_ptr)[0].get(); } generator->seed(seed); - } else { - // 0 or 2 args + } else if (context->get_num_args() == 2) { + if (!context->is_col_constant(0) || !context->is_col_constant(1)) { + return Status::InvalidArgument("The param of rand function must be literal"); + } + generator->seed(std::random_device()()); + } else { // zero args generator->seed(std::random_device()()); } } @@ -108,6 +112,7 @@ class Random : public IFunction { context->get_function_state(FunctionContext::THREAD_LOCAL)); DCHECK(generator != nullptr); + // checked in open() Int64 min = assert_cast( assert_cast( block.get_by_position(arguments[0]).column.get()) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Random.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Random.java index a7f3a360a6a7520..5045d85c9194213 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Random.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Random.java @@ -65,10 +65,17 @@ public Random(Expression arg) { */ public Random(Expression lchild, Expression rchild) { super("random", lchild, rchild); + } + + @Override + public void checkLegalityBeforeTypeCoercion() { // align with original planner behavior, refer to: // org/apache/doris/analysis/Expr.getBuiltinFunction() - Preconditions.checkState(lchild instanceof Literal && rchild instanceof Literal, - "The param of rand function must be literal"); + for (Expression child : children()) { + if (!child.isLiteral()) { + throw new AnalysisException("The param of rand function must be literal "); + } + } } /** diff --git a/regression-test/suites/nereids_p0/system/test_query_sys.groovy b/regression-test/suites/nereids_p0/system/test_query_sys.groovy index 85d612b9c17b594..e0e68f909fd94ff 100644 --- a/regression-test/suites/nereids_p0/system/test_query_sys.groovy +++ b/regression-test/suites/nereids_p0/system/test_query_sys.groovy @@ -48,4 +48,25 @@ suite("test_query_sys", "query,p0") { sql "set enable_nereids_planner=true" def v2 = sql "select version()" assertEquals(v1, v2) + + test { + sql "select random(random());" + exception "The param of rand function must be literal" + } + + sql "set enable_nereids_planner=false" + sql """ + CREATE TABLE IF NOT EXISTS `test_random` ( + fcst_emp varchar(128) NOT NULL + ) ENGINE=OLAP + DISTRIBUTED BY HASH(`fcst_emp`) + PROPERTIES( + "replication_num" = "1", + "compression" = "LZ4" ); + """ + sql """ insert into test_random values('123,1233,4123,3131'); """ + test { + sql "select random(1,array_size(split_by_string(fcst_emp,','))) from test_random;" + exception "The param of rand function must be literal" + } } From ea189e9517c5f9087daf7503fb6e75cb9b44b167 Mon Sep 17 00:00:00 2001 From: Dongyang Li Date: Tue, 13 Aug 2024 15:30:25 +0800 Subject: [PATCH 82/94] Revert "[chore](ci) add branch-3.0 required checks" (#39294) Reverts apache/doris#39250 --- .asf.yaml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/.asf.yaml b/.asf.yaml index 4049ca5f764e5db..821947fa1a0451b 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -72,33 +72,6 @@ github: dismiss_stale_reviews: true require_code_owner_reviews: true required_approving_review_count: 1 - - branch-3.0: - required_status_checks: - # if strict is true, means "Require branches to be up to date before merging". - strict: false - contexts: - - License Check - - Clang Formatter - - CheckStyle - - Build Broker - - ShellCheck - - Build Third Party Libraries (Linux) - - Build Third Party Libraries (macOS) - - Build Third Party Libraries (macOS-arm64) - - FE UT (Doris FE UT) - - BE UT (Doris BE UT) - - Cloud UT (Doris Cloud UT) - - COMPILE (DORIS_COMPILE) - - P0 Regression (Doris Regression) - - P1 Regression (Doris Regression) - - External Regression (Doris External Regression) - - cloud_p1 (Doris Cloud Regression) - - cloud_p0 (Doris Cloud Regression) - required_pull_request_reviews: - dismiss_stale_reviews: true - required_approving_review_count: 1 - branch-1.1-lts: required_status_checks: # if strict is true, means "Require branches to be up to date before merging". From 8cded12044a82d78738c29ce57283714a8a32669 Mon Sep 17 00:00:00 2001 From: Chester Date: Tue, 13 Aug 2024 16:04:36 +0800 Subject: [PATCH 83/94] [refactor](opt) optimize BE code of array_distinct function (#38560) ## Proposed changes refactor part of BE code of `array_distinct` function by: 1. optimize the head files 2. use type deduction(auto) 3. use macro definition to simplify if-else code 4. add error log for unsupported element type of array --- .../functions/array/function_array_distinct.h | 106 +++++++----------- 1 file changed, 40 insertions(+), 66 deletions(-) diff --git a/be/src/vec/functions/array/function_array_distinct.h b/be/src/vec/functions/array/function_array_distinct.h index 936db4354474d73..a3b70aa60d73859 100644 --- a/be/src/vec/functions/array/function_array_distinct.h +++ b/be/src/vec/functions/array/function_array_distinct.h @@ -21,12 +21,10 @@ #include #include -#include -#include +#include #include #include -#include #include #include "common/status.h" @@ -103,7 +101,7 @@ class FunctionArrayDistinct : public IFunction { const NullMapType* src_null_map = nullptr; if (src_nested_column->is_nullable()) { - const ColumnNullable* src_nested_nullable_col = + const auto* src_nested_nullable_col = check_and_get_column(*src_nested_column); src_nested_column = src_nested_nullable_col->get_nested_column_ptr(); src_null_map = &src_nested_nullable_col->get_null_map_column().get_data(); @@ -111,8 +109,7 @@ class FunctionArrayDistinct : public IFunction { NullMapType* dest_null_map = nullptr; if (dest_nested_column->is_nullable()) { - ColumnNullable* dest_nested_nullable_col = - reinterpret_cast(dest_nested_column); + auto* dest_nested_nullable_col = reinterpret_cast(dest_nested_column); dest_nested_column = dest_nested_nullable_col->get_nested_column_ptr(); dest_null_map = &dest_nested_nullable_col->get_null_map_column().get_data(); } @@ -140,13 +137,13 @@ class FunctionArrayDistinct : public IFunction { using NestType = typename ColumnType::value_type; using ElementNativeType = typename NativeType::Type; - const ColumnType* src_data_concrete = reinterpret_cast(&src_column); + const auto* src_data_concrete = reinterpret_cast(&src_column); if (!src_data_concrete) { return false; } const PaddedPODArray& src_datas = src_data_concrete->get_data(); - ColumnType& dest_data_concrete = reinterpret_cast(dest_column); + auto& dest_data_concrete = reinterpret_cast(dest_column); PaddedPODArray& dest_datas = dest_data_concrete.get_data(); using Set = HashSetWithStackMemory, @@ -194,12 +191,12 @@ class FunctionArrayDistinct : public IFunction { bool _execute_string(const IColumn& src_column, const ColumnArray::Offsets64& src_offsets, IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map) const { - const ColumnString* src_data_concrete = reinterpret_cast(&src_column); + const auto* src_data_concrete = reinterpret_cast(&src_column); if (!src_data_concrete) { return false; } - ColumnString& dest_column_string = reinterpret_cast(dest_column); + auto& dest_column_string = reinterpret_cast(dest_column); ColumnString::Chars& column_string_chars = dest_column_string.get_chars(); ColumnString::Offsets& column_string_offsets = dest_column_string.get_offsets(); column_string_chars.reserve(src_column.size()); @@ -257,64 +254,41 @@ class FunctionArrayDistinct : public IFunction { IColumn& dest_column, ColumnArray::Offsets64& dest_offsets, const NullMapType* src_null_map, NullMapType* dest_null_map, DataTypePtr& nested_type) const { - bool res = false; +#define EXECUTE_NUMBER(TYPE, NAME) \ + if (which.is_##NAME()) { \ + return _execute_number(src_column, src_offsets, dest_column, dest_offsets, \ + src_null_map, dest_null_map); \ + } + WhichDataType which(remove_nullable(nested_type)); - if (which.is_uint8()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_int8()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_int16()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_int32()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_int64()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_int128()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_float32()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_float64()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_date()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_date_time()) { - res = _execute_number(src_column, src_offsets, dest_column, - dest_offsets, src_null_map, dest_null_map); - } else if (which.is_date_v2()) { - res = _execute_number(src_column, src_offsets, dest_column, dest_offsets, - src_null_map, dest_null_map); - } else if (which.is_date_time_v2()) { - res = _execute_number(src_column, src_offsets, dest_column, - dest_offsets, src_null_map, dest_null_map); - } else if (which.is_decimal32()) { - res = _execute_number(src_column, src_offsets, dest_column, - dest_offsets, src_null_map, dest_null_map); - } else if (which.is_decimal64()) { - res = _execute_number(src_column, src_offsets, dest_column, - dest_offsets, src_null_map, dest_null_map); - } else if (which.is_decimal128v3()) { - res = _execute_number(src_column, src_offsets, dest_column, - dest_offsets, src_null_map, dest_null_map); - } else if (which.is_decimal256()) { - res = _execute_number(src_column, src_offsets, dest_column, - dest_offsets, src_null_map, dest_null_map); - } else if (which.is_decimal128v2()) { - res = _execute_number(src_column, src_offsets, dest_column, - dest_offsets, src_null_map, dest_null_map); - } else if (which.is_string()) { - res = _execute_string(src_column, src_offsets, dest_column, dest_offsets, src_null_map, - dest_null_map); + EXECUTE_NUMBER(ColumnUInt8, uint8); + EXECUTE_NUMBER(ColumnInt8, int8); + EXECUTE_NUMBER(ColumnInt16, int16); + EXECUTE_NUMBER(ColumnInt32, int32); + EXECUTE_NUMBER(ColumnInt64, int64); + EXECUTE_NUMBER(ColumnInt128, int128); + EXECUTE_NUMBER(ColumnFloat32, float32); + EXECUTE_NUMBER(ColumnFloat64, float64); + EXECUTE_NUMBER(ColumnDate, date); + EXECUTE_NUMBER(ColumnDateTime, date_time); + EXECUTE_NUMBER(ColumnDateV2, date_v2); + EXECUTE_NUMBER(ColumnDateTimeV2, date_time_v2); + EXECUTE_NUMBER(ColumnDecimal32, decimal32); + EXECUTE_NUMBER(ColumnDecimal64, decimal64); + EXECUTE_NUMBER(ColumnDecimal128V3, decimal128v3); + EXECUTE_NUMBER(ColumnDecimal256, decimal256); + EXECUTE_NUMBER(ColumnDecimal128V2, decimal128v2); + if (which.is_string()) { + return _execute_string(src_column, src_offsets, dest_column, dest_offsets, src_null_map, + dest_null_map); + } else { + LOG(ERROR) << "Unsupported array's element type: " + << remove_nullable(nested_type)->get_name() << " for function " + << this->get_name(); + return false; } - return res; + +#undef EXECUTE_NUMBER } }; From 139faad38a4ef33fc05ed5fe7df8fa6b7771a99f Mon Sep 17 00:00:00 2001 From: xzj7019 <131111794+xzj7019@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:22:22 +0800 Subject: [PATCH 84/94] [fix](nereids) fix partitionTopN choosing under multi winexprs (#39233) intro by #38393 Fix the cases whose window function both contains row_number and other types but only the other types contains pushing down filter. --- .../nereids/trees/plans/logical/LogicalWindow.java | 2 +- .../push_down_multi_filter_through_window.groovy | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalWindow.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalWindow.java index 560be6e8420f1c6..6cd8389349a0074 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalWindow.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalWindow.java @@ -285,8 +285,8 @@ && child(0).child(0) instanceof LogicalPartitionTopN)) { if (curPartitionLimit < chosenRowNumberPartitionLimit) { chosenRowNumberPartitionLimit = curPartitionLimit; chosenWindowFunc = windowFunc; + hasRowNumber = true; } - hasRowNumber = true; } else if (!hasRowNumber) { // if no row_number, choose the one with minimal limit value if (curPartitionLimit < chosenPartitionLimit) { diff --git a/regression-test/suites/nereids_rules_p0/push_down_filter_through_window/push_down_multi_filter_through_window.groovy b/regression-test/suites/nereids_rules_p0/push_down_filter_through_window/push_down_multi_filter_through_window.groovy index 39b70e76f52b3ac..d808d30f8ebdfe2 100644 --- a/regression-test/suites/nereids_rules_p0/push_down_filter_through_window/push_down_multi_filter_through_window.groovy +++ b/regression-test/suites/nereids_rules_p0/push_down_filter_through_window/push_down_multi_filter_through_window.groovy @@ -62,6 +62,20 @@ suite("push_down_multi_filter_through_window") { contains "partition limit: 10" } + explain { + sql ("select * from (select rank() over(partition by c1 order by c3) as rk, row_number() over(partition by c1, c2 order by c3) as rn from push_down_multi_predicate_through_window_t) t where rk <= 1;") + contains "VPartitionTopN" + contains "functions: rank" + contains "partition limit: 1" + } + + explain { + sql ("select * from (select rank() over(partition by c1 order by c3) as rk, row_number() over(partition by c1, c2 order by c3) as rn from push_down_multi_predicate_through_window_t) t where rn <= 10;") + contains "VPartitionTopN" + contains "functions: row_number" + contains "partition limit: 10" + } + explain { sql ("select * from (select rank() over(partition by c1 order by c3) as rk, rank() over(partition by c1, c2 order by c3) as rn from push_down_multi_predicate_through_window_t) t where rn <= 1 and rk <= 10;") contains "VPartitionTopN" From b0519d29efa43801ae5dd5dfa1e3849632bf731a Mon Sep 17 00:00:00 2001 From: zzzxl <33418555+zzzxl1993@users.noreply.github.com> Date: Tue, 13 Aug 2024 16:38:55 +0800 Subject: [PATCH 85/94] [fix](inverted index) Writing to the inverted index also writes to the file cache. (#39076) 1. When write_file_cache is true, writing to the inverted index also writes to the file cache. --- be/src/olap/compaction.cpp | 3 + be/src/olap/rowset/beta_rowset_writer.cpp | 8 +- be/src/olap/rowset/rowset_writer_context.h | 10 ++ .../segment_v2/inverted_index_file_writer.cpp | 3 + .../segment_v2/inverted_index_file_writer.h | 3 + .../inverted_index_fs_directory.cpp | 18 ++- .../segment_v2/inverted_index_fs_directory.h | 9 +- .../olap/rowset/segment_v2/segment_writer.cpp | 2 + .../segment_v2/vertical_segment_writer.cpp | 2 + .../rowset/vertical_beta_rowset_writer.cpp | 9 +- .../test_index_writer_file_cache.groovy | 116 ++++++++++++++++++ 11 files changed, 162 insertions(+), 21 deletions(-) create mode 100644 regression-test/suites/fault_injection_p0/test_index_writer_file_cache.groovy diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 8c109eec1c1c4d4..9ed27bad382a566 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -686,6 +686,9 @@ Status Compaction::do_inverted_index_compaction() { << st; return st; } + for (const auto& writer : inverted_index_file_writers) { + writer->set_file_writer_opts(ctx.get_file_writer_options()); + } } // use tmp file dir to store index files diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index f3a0ade24f39b9f..ec1bba7621b45e8 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -846,13 +846,7 @@ Status BaseBetaRowsetWriter::_build_tmp(RowsetSharedPtr& rowset_ptr) { Status BaseBetaRowsetWriter::_create_file_writer(const std::string& path, io::FileWriterPtr& file_writer) { - io::FileWriterOptions opts { - .write_file_cache = _context.write_file_cache, - .is_cold_data = _context.is_hot_data, - .file_cache_expiration = - _context.file_cache_ttl_sec > 0 && _context.newest_write_timestamp > 0 - ? _context.newest_write_timestamp + _context.file_cache_ttl_sec - : 0}; + io::FileWriterOptions opts = _context.get_file_writer_options(); Status st = _context.fs()->create_file(path, &file_writer, &opts); if (!st.ok()) { LOG(WARNING) << "failed to create writable file. path=" << path << ", err: " << st; diff --git a/be/src/olap/rowset/rowset_writer_context.h b/be/src/olap/rowset/rowset_writer_context.h index 0130916bfb48118..e13f7efe6e94fa4 100644 --- a/be/src/olap/rowset/rowset_writer_context.h +++ b/be/src/olap/rowset/rowset_writer_context.h @@ -140,6 +140,16 @@ struct RowsetWriterContext { return *storage_resource->fs; } } + + io::FileWriterOptions get_file_writer_options() const { + io::FileWriterOptions opts { + .write_file_cache = write_file_cache, + .is_cold_data = is_hot_data, + .file_cache_expiration = file_cache_ttl_sec > 0 && newest_write_timestamp > 0 + ? newest_write_timestamp + file_cache_ttl_sec + : 0}; + return opts; + } }; } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp index f2ac0e922650c6d..6eb54878924a0ff 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp @@ -283,6 +283,7 @@ size_t InvertedIndexFileWriter::write_v1() { ram_dir.close(); auto* out_dir = DorisFSDirectoryFactory::getDirectory(_fs, idx_path.c_str()); + out_dir->set_file_writer_opts(_opts); auto* out = out_dir->createOutput(idx_name.c_str()); if (out == nullptr) { @@ -348,6 +349,8 @@ size_t InvertedIndexFileWriter::write_v2() { io::Path index_path {InvertedIndexDescriptor::get_index_file_path_v2(_index_path_prefix)}; auto* out_dir = DorisFSDirectoryFactory::getDirectory(_fs, index_path.parent_path().c_str()); + out_dir->set_file_writer_opts(_opts); + std::unique_ptr compound_file_output; // idx v2 writer != nullptr means memtable on sink node now if (_idx_v2_writer != nullptr) { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h index b9f9b983e44c3f5..024c1dec9861ec0 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_writer.h @@ -71,6 +71,8 @@ class InvertedIndexFileWriter { lucene::store::IndexOutput* output, uint8_t* buffer, int64_t bufferLength); InvertedIndexStorageFormatPB get_storage_format() const { return _storage_format; } + void set_file_writer_opts(const io::FileWriterOptions& opts) { _opts = opts; } + private: InvertedIndexDirectoryMap _indices_dirs; const io::FileSystemSPtr _fs; @@ -81,6 +83,7 @@ class InvertedIndexFileWriter { size_t _file_size = 0; // write to disk or stream io::FileWriterPtr _idx_v2_writer; + io::FileWriterOptions _opts; }; } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp index 0443bf345ba1d67..27e03b43da2a508 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp @@ -84,9 +84,6 @@ namespace doris::segment_v2 { const char* const DorisFSDirectory::WRITE_LOCK_FILE = "write.lock"; class DorisFSDirectory::FSIndexOutput : public lucene::store::BufferedIndexOutput { -private: - io::FileWriterPtr _writer; - protected: void flushBuffer(const uint8_t* b, const int32_t size) override; @@ -96,6 +93,12 @@ class DorisFSDirectory::FSIndexOutput : public lucene::store::BufferedIndexOutpu ~FSIndexOutput() override; void close() override; int64_t length() const override; + + void set_file_writer_opts(const io::FileWriterOptions& opts) { _opts = opts; } + +private: + io::FileWriterPtr _writer; + io::FileWriterOptions _opts; }; class DorisFSDirectory::FSIndexOutputV2 : public lucene::store::BufferedIndexOutput { @@ -242,7 +245,13 @@ void DorisFSDirectory::FSIndexInput::readInternal(uint8_t* b, const int32_t len) } void DorisFSDirectory::FSIndexOutput::init(const io::FileSystemSPtr& fs, const char* path) { - Status status = fs->create_file(path, &_writer); + DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexOutput::init.file_cache", { + if (fs->type() == io::FileSystemType::S3 && _opts.write_file_cache == false) { + _CLTHROWA(CL_ERR_IO, "Inverted index failed to enter file cache"); + } + }); + + Status status = fs->create_file(path, &_writer, &_opts); DBUG_EXECUTE_IF( "DorisFSDirectory::FSIndexOutput._throw_clucene_error_in_fsindexoutput_" "init", @@ -579,6 +588,7 @@ lucene::store::IndexOutput* DorisFSDirectory::createOutput(const char* name) { assert(!exists); } auto* ret = _CLNEW FSIndexOutput(); + ret->set_file_writer_opts(_opts); try { ret->init(_fs, fl); } catch (CLuceneError& err) { diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h index b3e0352d7adf91b..357ac65c6782fa4 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h @@ -29,6 +29,7 @@ #include "CLucene/SharedHeader.h" #include "io/fs/file_reader_writer_fwd.h" #include "io/fs/file_system.h" +#include "io/fs/file_writer.h" #include "io/io_common.h" class CLuceneError; @@ -46,8 +47,6 @@ class CLUCENE_EXPORT DorisFSDirectory : public lucene::store::Directory { public: static const char* const WRITE_LOCK_FILE; static const int64_t MAX_HEADER_DATA_SIZE = 1024 * 128; // 128k -private: - int filemode; protected: mutable std::mutex _this_lock; @@ -91,6 +90,12 @@ class CLUCENE_EXPORT DorisFSDirectory : public lucene::store::Directory { virtual void init(const io::FileSystemSPtr& fs, const char* path, lucene::store::LockFactory* lock_factory = nullptr); + + void set_file_writer_opts(const io::FileWriterOptions& opts) { _opts = opts; } + +private: + int32_t filemode; + io::FileWriterOptions _opts; }; class CLUCENE_EXPORT DorisRAMFSDirectory : public DorisFSDirectory { diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 36b200fe8e3a8d4..f20af3df80a9ae2 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -139,6 +139,8 @@ SegmentWriter::SegmentWriter(io::FileWriter* file_writer, uint32_t segment_id, _opts.rowset_ctx->rowset_id.to_string(), segment_id, _tablet_schema->get_inverted_index_storage_format(), std::move(inverted_file_writer)); + _inverted_index_file_writer->set_file_writer_opts( + _opts.rowset_ctx->get_file_writer_options()); } } diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index aa9376a8d789ac0..3e23b1fda520f07 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -120,6 +120,8 @@ VerticalSegmentWriter::VerticalSegmentWriter(io::FileWriter* file_writer, uint32 _opts.rowset_ctx->rowset_id.to_string(), segment_id, _tablet_schema->get_inverted_index_storage_format(), std::move(inverted_file_writer)); + _inverted_index_file_writer->set_file_writer_opts( + _opts.rowset_ctx->get_file_writer_options()); } } diff --git a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp index 1db74843697a764..ee687d18edc8074 100644 --- a/be/src/olap/rowset/vertical_beta_rowset_writer.cpp +++ b/be/src/olap/rowset/vertical_beta_rowset_writer.cpp @@ -165,14 +165,7 @@ Status VerticalBetaRowsetWriter::_create_segment_writer( int seg_id = this->_num_segment.fetch_add(1, std::memory_order_relaxed); io::FileWriterPtr file_writer; - io::FileWriterOptions opts { - .write_file_cache = this->_context.write_file_cache, - .is_cold_data = this->_context.is_hot_data, - .file_cache_expiration = this->_context.file_cache_ttl_sec > 0 && - this->_context.newest_write_timestamp > 0 - ? this->_context.newest_write_timestamp + - this->_context.file_cache_ttl_sec - : 0}; + io::FileWriterOptions opts = this->_context.get_file_writer_options(); auto path = context.segment_path(seg_id); auto& fs = context.fs_ref(); diff --git a/regression-test/suites/fault_injection_p0/test_index_writer_file_cache.groovy b/regression-test/suites/fault_injection_p0/test_index_writer_file_cache.groovy new file mode 100644 index 000000000000000..b26794e36714afa --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_index_writer_file_cache.groovy @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +suite("test_index_writer_file_cache_fault_injection", "nonConcurrent") { + if (!isCloudMode()) { + return; + } + + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + def testTable1 = "test_index_writer_file_cache_fault_injection_1" + def testTable2 = "test_index_writer_file_cache_fault_injection_2" + + sql "DROP TABLE IF EXISTS ${testTable1}" + sql """ + CREATE TABLE ${testTable1} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` string NULL COMMENT "", + `request` string NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '', + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '', + INDEX status_idx (`status`) USING INVERTED COMMENT '', + INDEX size_idx (`size`) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + sql "DROP TABLE IF EXISTS ${testTable2}" + sql """ + CREATE TABLE ${testTable2} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` string NULL COMMENT "", + `request` string NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX clientip_idx (`clientip`) USING INVERTED COMMENT '', + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "unicode", "support_phrase" = "true") COMMENT '', + INDEX status_idx (`status`) USING INVERTED COMMENT '', + INDEX size_idx (`size`) USING INVERTED COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`@timestamp`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "disable_auto_compaction" = "true" + ); + """ + + def insert_and_compaction = { tableName -> + sql """ INSERT INTO ${tableName} VALUES (893964617, '40.135.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 24736); """ + sql """ INSERT INTO ${tableName} VALUES (893964653, '232.0.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 200, 3781); """ + sql """ INSERT INTO ${tableName} VALUES (893964672, '26.1.0.0', 'GET /images/hm_bg.jpg HTTP/1.0', 304, 0); """ + + def tablets = sql_return_maparray """ show tablets from ${tableName}; """ + + for (def tablet in tablets) { + String tablet_id = tablet.TabletId + String backend_id = tablet.BackendId + def (code, out, err) = be_run_full_compaction(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactJson = parseJson(out.trim()) + assertEquals("success", compactJson.status.toLowerCase()) + } + + for (def tablet in tablets) { + boolean running = true + do { + Thread.sleep(1000) + String tablet_id = tablet.TabletId + String backend_id = tablet.BackendId + def (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) + logger.info("Get compaction status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def compactionStatus = parseJson(out.trim()) + assertEquals("success", compactionStatus.status.toLowerCase()) + running = compactionStatus.run_status + } while (running) + } + } + + try { + GetDebugPoint().enableDebugPointForAllBEs("DorisFSDirectory::FSIndexOutput::init.file_cache") + + insert_and_compaction.call(testTable1); + insert_and_compaction.call(testTable2); + } finally { + GetDebugPoint().disableDebugPointForAllBEs("DorisFSDirectory::FSIndexOutput::init.file_cache") + } +} \ No newline at end of file From ee356e53ae23ab2dfa6c4dbe8378ab9a8f81a8b9 Mon Sep 17 00:00:00 2001 From: Pxl Date: Tue, 13 Aug 2024 16:44:02 +0800 Subject: [PATCH 86/94] [Bug](brpc) fix sync_filter_size/apply_filterv2 has wrong closure (#39155) ## Proposed changes 1. fix sync_filter_size/apply_filterv2 has wrong closure 2. avoid to use thread local storage of iobuf --- be/src/runtime/runtime_filter_mgr.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 8eb3ab5eebf8d73..b9bd01095f34e86 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -343,7 +343,7 @@ Status RuntimeFilterMergeControllerEntity::send_filter_size(const PSendFilterSiz closure->request_->set_filter_size(cnt_val->global_size); stub->sync_filter_size(closure->cntl_.get(), closure->request_.get(), - closure->response_.get(), brpc::DoNothing()); + closure->response_.get(), closure.get()); closure.release(); } } @@ -425,7 +425,11 @@ Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* requ } if (data != nullptr && len > 0) { - request_attachment.append(data, len); + void* allocated = malloc(len); + memcpy(allocated, data, len); + // control the memory by doris self to avoid using brpc's thread local storage + // because the memory of tls will not be released + request_attachment.append_user_data(allocated, len, [](void* ptr) { free(ptr); }); has_attachment = true; } @@ -459,7 +463,7 @@ Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* requ continue; } stub->apply_filterv2(closure->cntl_.get(), closure->request_.get(), - closure->response_.get(), brpc::DoNothing()); + closure->response_.get(), closure.get()); closure.release(); } } From 99a984c2e23e3efb88c48d475265d70977842155 Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 13 Aug 2024 17:00:22 +0800 Subject: [PATCH 87/94] [feat](nereids) enable_stats supports external table (#39079) ## Proposed changes session variable enable_stats supports external table Issue Number: close #xxx --- .../java/org/apache/doris/nereids/stats/StatsCalculator.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index ab06a8c7fb8f4f2..7d3a7f339b967ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -1045,8 +1045,6 @@ private ColumnStatistic getColumnStatistic(TableIf table, String colName, long i */ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { StatisticsBuilder builder = new StatisticsBuilder(); - double tableRowCount = catalogRelation.getTable().getRowCount(); - // for FeUt, use ColumnStatistic.UNKNOWN if (!FeConstants.enableInternalSchemaDb || ConnectContext.get() == null @@ -1067,7 +1065,7 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { } } Set slotSet = slotSetBuilder.build(); - + double tableRowCount = catalogRelation.getTable().getRowCount(); if (tableRowCount <= 0) { // try to get row count from col stats for (SlotReference slot : slotSet) { @@ -1075,7 +1073,6 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { tableRowCount = Math.max(cache.count, tableRowCount); } } - for (SlotReference slot : slotSet) { ColumnStatistic cache; if (ConnectContext.get() != null && ! ConnectContext.get().getSessionVariable().enableStats) { From b24cbe294aef3875862b312cbed1c3e06a2d4b3f Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Tue, 13 Aug 2024 17:14:44 +0800 Subject: [PATCH 88/94] [fix](expr) Enhance SQL Expression Handling by Introducing printSqlInParens to CompoundPredicate (#39064) This PR enhances SQL expression handling by introducing a printSqlInParens flag to the CompoundPredicate class and setting it to true. This ensures that expressions in a CompoundPredicate are always enclosed in parentheses, similar to how it is handled in a BinaryPredicate. . By removing redundant handling of CompoundPredicate, the logic for handling SQL generation in the JdbcScanNode node has been simplified as the updated toSql method now handles this uniformly across expressions. --- .../doris/analysis/CompoundPredicate.java | 2 + .../datasource/jdbc/source/JdbcScanNode.java | 34 +-------- .../doris/analysis/CancelExportStmtTest.java | 4 +- .../doris/analysis/CancelLoadStmtTest.java | 4 +- .../apache/doris/analysis/SelectStmtTest.java | 28 ++++---- .../analysis/ShowBuildIndexStmtTest.java | 8 +-- .../apache/doris/analysis/SqlModeTest.java | 2 +- .../apache/doris/planner/QueryPlanTest.java | 23 +++--- .../org/apache/doris/policy/PolicyTest.java | 10 +-- .../apache/doris/qe/OlapQueryCacheTest.java | 18 ++--- .../ExtractCommonFactorsRuleFunctionTest.java | 6 +- .../get_assignment_compatible_type.out | 2 +- .../jdbc/test_clickhouse_jdbc_catalog.groovy | 2 +- .../test_compoundpredicate_explain.groovy | 72 +++++++++++++++++++ 14 files changed, 128 insertions(+), 87 deletions(-) create mode 100644 regression-test/suites/query_p0/explain/test_compoundpredicate_explain.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java index 67af35e0870f576..dcd67598dd41220 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CompoundPredicate.java @@ -67,11 +67,13 @@ public CompoundPredicate(Operator op, Expr e1, Expr e2) { if (e2 != null) { children.add(e2); } + printSqlInParens = true; } protected CompoundPredicate(CompoundPredicate other) { super(other); op = other.op; + printSqlInParens = true; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java index ab3f9f809fb6900..6b63c7c7b6e9347 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java @@ -20,8 +20,6 @@ import org.apache.doris.analysis.Analyzer; import org.apache.doris.analysis.BinaryPredicate; import org.apache.doris.analysis.BoolLiteral; -import org.apache.doris.analysis.CompoundPredicate; -import org.apache.doris.analysis.CompoundPredicate.Operator; import org.apache.doris.analysis.DateLiteral; import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.ExprSubstitutionMap; @@ -329,36 +327,6 @@ private static boolean containsFunctionCallExpr(Expr expr) { } public static String conjunctExprToString(TOdbcTableType tableType, Expr expr, TableIf tbl) { - if (expr instanceof CompoundPredicate) { - StringBuilder result = new StringBuilder(); - CompoundPredicate compoundPredicate = (CompoundPredicate) expr; - - // If the operator is 'NOT', prepend 'NOT' to the start of the string - if (compoundPredicate.getOp() == Operator.NOT) { - result.append("NOT "); - } - - // Iterate through all children of the CompoundPredicate - for (Expr child : compoundPredicate.getChildren()) { - // Recursively call conjunctExprToString for each child and append to the result - result.append(conjunctExprToString(tableType, child, tbl)); - - // If the operator is not 'NOT', append the operator after each child expression - if (!(compoundPredicate.getOp() == Operator.NOT)) { - result.append(" ").append(compoundPredicate.getOp().toString()).append(" "); - } - } - - // For operators other than 'NOT', remove the extra appended operator at the end - // This is necessary for operators like 'AND' or 'OR' that appear between child expressions - if (!(compoundPredicate.getOp() == Operator.NOT)) { - result.setLength(result.length() - compoundPredicate.getOp().toString().length() - 2); - } - - // Return the processed string trimmed of any extra spaces - return result.toString().trim(); - } - if (expr.contains(DateLiteral.class) && expr instanceof BinaryPredicate) { ArrayList children = expr.getChildren(); String filter = children.get(0).toExternalSql(TableType.JDBC_EXTERNAL_TABLE, tbl); @@ -375,7 +343,7 @@ public static String conjunctExprToString(TOdbcTableType tableType, Expr expr, T return filter; } - // only for old planner + // Only for old planner if (expr.contains(BoolLiteral.class) && "1".equals(expr.getStringValue()) && expr.getChildren().isEmpty()) { return "1 = 1"; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/CancelExportStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/CancelExportStmtTest.java index 8808597ac3e295c..2d188230d8b5a88 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/CancelExportStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/CancelExportStmtTest.java @@ -93,7 +93,7 @@ public void testNormal() throws UserException { stmt = new CancelExportStmt(null, compoundAndPredicate); stmt.analyze(analyzer); Assertions.assertEquals( - "CANCEL EXPORT FROM testDb WHERE (`label` = 'doris_test_label') AND (`state` = 'PENDING')", + "CANCEL EXPORT FROM testDb WHERE ((`label` = 'doris_test_label') AND (`state` = 'PENDING'))", stmt.toString()); CompoundPredicate compoundOrPredicate = new CompoundPredicate(Operator.OR, labelBinaryPredicate, @@ -101,7 +101,7 @@ public void testNormal() throws UserException { stmt = new CancelExportStmt(null, compoundOrPredicate); stmt.analyze(analyzer); Assertions.assertEquals( - "CANCEL EXPORT FROM testDb WHERE (`label` = 'doris_test_label') OR (`state` = 'PENDING')", + "CANCEL EXPORT FROM testDb WHERE ((`label` = 'doris_test_label') OR (`state` = 'PENDING'))", stmt.toString()); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/CancelLoadStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/CancelLoadStmtTest.java index f51ac74c3429561..6e8bea509f62f62 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/CancelLoadStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/CancelLoadStmtTest.java @@ -94,7 +94,7 @@ public void testNormal() throws UserException { stmt = new CancelLoadStmt(null, compoundAndPredicate); stmt.analyze(analyzer); Assertions.assertEquals( - "CANCEL LOAD FROM testDb WHERE (`label` = 'doris_test_label') AND (`state` = 'LOADING')", + "CANCEL LOAD FROM testDb WHERE ((`label` = 'doris_test_label') AND (`state` = 'LOADING'))", stmt.toString()); CompoundPredicate compoundOrPredicate = new CompoundPredicate(Operator.OR, labelBinaryPredicate, @@ -102,7 +102,7 @@ public void testNormal() throws UserException { stmt = new CancelLoadStmt(null, compoundOrPredicate); stmt.analyze(analyzer); Assertions.assertEquals( - "CANCEL LOAD FROM testDb WHERE (`label` = 'doris_test_label') OR (`state` = 'LOADING')", + "CANCEL LOAD FROM testDb WHERE ((`label` = 'doris_test_label') OR (`state` = 'LOADING'))", stmt.toString()); // test match diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java index 16273abfd094f29..b4299f0f62f60cf 100755 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SelectStmtTest.java @@ -300,9 +300,9 @@ public void testDeduplicateOrs() throws Exception { String commonExpr2 = "`t3`.`k3` = `t1`.`k3`"; String commonExpr3 = "`t1`.`k1` = `t5`.`k1`"; String commonExpr4 = "t5`.`k2` = 'United States'"; - String betweenExpanded1 = "(CAST(CAST(`t1`.`k4` AS decimalv3(12,2)) AS int) >= 100) AND (CAST(CAST(`t1`.`k4` AS decimalv3(12,2)) AS int) <= 150)"; - String betweenExpanded2 = "(CAST(CAST(`t1`.`k4` AS decimalv3(12,2)) AS int) >= 50) AND (CAST(CAST(`t1`.`k4` AS decimalv3(12,2)) AS int) <= 100)"; - String betweenExpanded3 = "(`t1`.`k4` >= 50) AND (`t1`.`k4` <= 250)"; + String betweenExpanded1 = "(CAST(CAST(`t1`.`k4` AS decimalv3(12,2)) AS int) >= 100)) AND (CAST(CAST(`t1`.`k4` AS decimalv3(12,2)) AS int) <= 150))"; + String betweenExpanded2 = "(CAST(CAST(`t1`.`k4` AS decimalv3(12,2)) AS int) >= 50)) AND (CAST(CAST(`t1`.`k4` AS decimalv3(12,2)) AS int) <= 100))"; + String betweenExpanded3 = "(`t1`.`k4` >= 50)) AND (`t1`.`k4` <= 250)"; String rewrittenSql = stmt.toSql(); Assert.assertTrue(rewrittenSql.contains(commonExpr1)); @@ -346,17 +346,17 @@ public void testDeduplicateOrs() throws Exception { SelectStmt stmt2 = (SelectStmt) UtFrameUtils.parseAndAnalyzeStmt(sql2, ctx); stmt2.rewriteExprs(new Analyzer(ctx.getEnv(), ctx).getExprRewriter()); String fragment3 = - "((((`t1`.`k4` >= 50) AND (`t1`.`k4` <= 300)) AND `t2`.`k2` IN ('United States', 'United States1') " + "(((((`t1`.`k4` >= 50) AND (`t1`.`k4` <= 300)) AND `t2`.`k2` IN ('United States', 'United States1')) " + "AND `t2`.`k3` IN ('CO', 'IL', 'MN', 'OH', 'MT', 'NM', 'TX', 'MO', 'MI')) " - + "AND (`t1`.`k1` = `t2`.`k3`) AND (`t2`.`k2` = 'United States') " - + "AND `t2`.`k3` IN ('CO', 'IL', 'MN') AND (`t1`.`k4` >= 100) AND (`t1`.`k4` <= 200) " + + "AND (((((((`t1`.`k1` = `t2`.`k3`) AND (`t2`.`k2` = 'United States')) " + + "AND `t2`.`k3` IN ('CO', 'IL', 'MN')) AND (`t1`.`k4` >= 100)) AND (`t1`.`k4` <= 200)) " + "OR " - + "(`t1`.`k1` = `t2`.`k1`) AND (`t2`.`k2` = 'United States1') " - + "AND `t2`.`k3` IN ('OH', 'MT', 'NM') AND (`t1`.`k4` >= 150) AND (`t1`.`k4` <= 300) " + + "(((((`t1`.`k1` = `t2`.`k1`) AND (`t2`.`k2` = 'United States1')) " + + "AND `t2`.`k3` IN ('OH', 'MT', 'NM')) AND (`t1`.`k4` >= 150)) AND (`t1`.`k4` <= 300))) " + "OR " - + "(`t1`.`k1` = `t2`.`k1`) AND (`t2`.`k2` = 'United States') " - + "AND `t2`.`k3` IN ('TX', 'MO', 'MI') " - + "AND (`t1`.`k4` >= 50) AND (`t1`.`k4` <= 250))"; + + "(((((`t1`.`k1` = `t2`.`k1`) AND (`t2`.`k2` = 'United States')) " + + "AND `t2`.`k3` IN ('TX', 'MO', 'MI')) " + + "AND (`t1`.`k4` >= 50)) AND (`t1`.`k4` <= 250))))"; Assert.assertTrue(stmt2.toSql().contains(fragment3)); String sql3 = "select\n" @@ -416,7 +416,7 @@ public void testDeduplicateOrs() throws Exception { SelectStmt stmt7 = (SelectStmt) UtFrameUtils.parseAndAnalyzeStmt(sql7, ctx); stmt7.rewriteExprs(new Analyzer(ctx.getEnv(), ctx).getExprRewriter()); Assert.assertTrue(stmt7.toSql() - .contains("`t2`.`k1` IS NOT NULL OR `t1`.`k1` IS NOT NULL AND `t1`.`k2` IS NOT NULL")); + .contains("`t2`.`k1` IS NOT NULL OR (`t1`.`k1` IS NOT NULL AND `t1`.`k2` IS NOT NULL)")); String sql8 = "select\n" + " avg(t1.k4)\n" @@ -428,13 +428,13 @@ public void testDeduplicateOrs() throws Exception { SelectStmt stmt8 = (SelectStmt) UtFrameUtils.parseAndAnalyzeStmt(sql8, ctx); stmt8.rewriteExprs(new Analyzer(ctx.getEnv(), ctx).getExprRewriter()); Assert.assertTrue(stmt8.toSql() - .contains("`t2`.`k1` IS NOT NULL AND `t1`.`k1` IS NOT NULL AND `t1`.`k1` IS NOT NULL")); + .contains("(`t2`.`k1` IS NOT NULL AND `t1`.`k1` IS NOT NULL) AND `t1`.`k1` IS NOT NULL")); String sql9 = "select * from db1.tbl1 where (k1='shutdown' and k4<1) or (k1='switchOff' and k4>=1)"; SelectStmt stmt9 = (SelectStmt) UtFrameUtils.parseAndAnalyzeStmt(sql9, ctx); stmt9.rewriteExprs(new Analyzer(ctx.getEnv(), ctx).getExprRewriter()); Assert.assertTrue( - stmt9.toSql().contains("(`k1` = 'shutdown') AND (`k4` < 1) OR (`k1` = 'switchOff') AND (`k4` >= 1)")); + stmt9.toSql().contains("((`k1` = 'shutdown') AND (`k4` < 1)) OR ((`k1` = 'switchOff') AND (`k4` >= 1))")); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowBuildIndexStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowBuildIndexStmtTest.java index 61ea17c374bbc37..b4c48af64a883c2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowBuildIndexStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/ShowBuildIndexStmtTest.java @@ -98,14 +98,14 @@ public ProcNodeInterface open(String path) throws AnalysisException { List orderBy = Arrays.asList( new OrderByElement(new SlotRef(tableName, "TableName"), false, false)); ShowBuildIndexStmt stmt1 = new ShowBuildIndexStmt(null, where, orderBy, new LimitElement(1, 100)); - Assertions.assertEquals(stmt1.toSql(), "SHOW BUILD INDEX WHERE (`a`.`b`.`c`.`createtime` > '%.b.%') " - + "AND (`a`.`b`.`c`.`tablename` = '%.b.%') ORDER BY `a`.`b`.`c`.`TableName` DESC NULLS LAST " + Assertions.assertEquals(stmt1.toSql(), "SHOW BUILD INDEX WHERE ((`a`.`b`.`c`.`createtime` > '%.b.%') " + + "AND (`a`.`b`.`c`.`tablename` = '%.b.%')) ORDER BY `a`.`b`.`c`.`TableName` DESC NULLS LAST " + "LIMIT 1, 100"); stmt1.analyze(analyzer); Assertions.assertEquals(stmt1.toSql(), "SHOW BUILD INDEX FROM `testDb` WHERE " - + "(`a`.`b`.`c`.`createtime` > CAST('%.b.%' AS datetimev2(0))) " - + "AND (`a`.`b`.`c`.`tablename` = '%.b.%') " + + "((`a`.`b`.`c`.`createtime` > CAST('%.b.%' AS datetimev2(0))) " + + "AND (`a`.`b`.`c`.`tablename` = '%.b.%')) " + "ORDER BY `a`.`b`.`c`.`TableName` DESC NULLS LAST LIMIT 1, 100"); Assertions.assertEquals(stmt1.getFilterMap().size(), 2); diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/SqlModeTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/SqlModeTest.java index c27743a951ac062..fe3c1b44f6ec6cc 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/SqlModeTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/SqlModeTest.java @@ -80,7 +80,7 @@ public void testPipesAsConcatMode() { if (!(expr instanceof CompoundPredicate)) { Assert.fail(); } - Assert.assertEquals("'a' OR 'b' OR 'c'", expr.toSql()); + Assert.assertEquals("(('a' OR 'b') OR 'c')", expr.toSql()); } @Test diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java index f0880e59f709a62..36194494ee1da23 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/QueryPlanTest.java @@ -2235,15 +2235,15 @@ public void testRewriteOrToIn() throws Exception { sql = "SELECT /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where (query_time = 1 or query_time = 2) and query_time in (3, 4)"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); - Assert.assertTrue(explainString.contains("PREDICATES: `query_time` IN (1, 2) AND `query_time` IN (3, 4)\n")); + Assert.assertTrue(explainString.contains("PREDICATES: (`query_time` IN (1, 2) AND `query_time` IN (3, 4))\n")); sql = "SELECT /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where (query_time = 1 or query_time = 2 or scan_bytes = 2) and scan_bytes in (2, 3)"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); - Assert.assertTrue(explainString.contains("PREDICATES: `query_time` IN (1, 2) OR (`scan_bytes` = 2) AND `scan_bytes` IN (2, 3)\n")); + Assert.assertTrue(explainString.contains("PREDICATES: ((`query_time` IN (1, 2) OR (`scan_bytes` = 2)) AND `scan_bytes` IN (2, 3))\n")); sql = "SELECT /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where (query_time = 1 or query_time = 2) and (scan_bytes = 2 or scan_bytes = 3)"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); - Assert.assertTrue(explainString.contains("PREDICATES: `query_time` IN (1, 2) AND `scan_bytes` IN (2, 3)\n")); + Assert.assertTrue(explainString.contains("PREDICATES: (`query_time` IN (1, 2) AND `scan_bytes` IN (2, 3))\n")); sql = "SELECT /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where query_time = 1 or query_time = 2 or query_time = 3 or query_time = 1"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); @@ -2256,22 +2256,22 @@ public void testRewriteOrToIn() throws Exception { connectContext.getSessionVariable().setRewriteOrToInPredicateThreshold(100); sql = "SELECT /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where query_time = 1 or query_time = 2 or query_time in (3, 4)"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); - Assert.assertTrue(explainString.contains("PREDICATES: (`query_time` = 1) OR (`query_time` = 2) OR `query_time` IN (3, 4)\n")); + Assert.assertTrue(explainString.contains("PREDICATES: (((`query_time` = 1) OR (`query_time` = 2)) OR `query_time` IN (3, 4))\n")); connectContext.getSessionVariable().setRewriteOrToInPredicateThreshold(2); sql = "SELECT /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where (query_time = 1 or query_time = 2) and query_time in (3, 4)"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); - Assert.assertTrue(explainString.contains("PREDICATES: `query_time` IN (1, 2) AND `query_time` IN (3, 4)\n")); + Assert.assertTrue(explainString.contains("PREDICATES: (`query_time` IN (1, 2) AND `query_time` IN (3, 4))\n")); //test we can handle `!=` and `not in` sql = "select /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where (query_time = 1 or query_time = 2 or query_time!= 3 or query_time not in (5, 6))"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); - Assert.assertTrue(explainString.contains("PREDICATES: `query_time` IN (1, 2) OR (`query_time` != 3) OR `query_time` NOT IN (5, 6)\n")); + Assert.assertTrue(explainString.contains("PREDICATES: (`query_time` IN (1, 2) OR ((`query_time` != 3) OR `query_time` NOT IN (5, 6)))\n")); //test we can handle merge 2 or more columns sql = "select /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where (query_time = 1 or query_time = 2 or scan_rows = 3 or scan_rows = 4)"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); - Assert.assertTrue(explainString.contains("PREDICATES: `query_time` IN (1, 2) OR `scan_rows` IN (3, 4)")); + Assert.assertTrue(explainString.contains("PREDICATES: (`query_time` IN (1, 2) OR `scan_rows` IN (3, 4))")); //merge in-pred or in-pred sql = "select /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where (query_time = 1 or query_time = 2 or query_time = 3 or query_time = 4)"; @@ -2286,16 +2286,15 @@ public void testRewriteOrToIn() throws Exception { + " or (db not in ('x', 'y')) "; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); Assert.assertTrue(explainString.contains( - "PREDICATES: (`query_id` = `client_ip`) " - + "AND (`stmt_id` IN (1, 2, 3) OR (`user` = 'abc') AND `state` IN ('a', 'b', 'c', 'd')) " - + "OR (`db` NOT IN ('x', 'y'))\n")); + "PREDICATES: (((`query_id` = `client_ip`) AND (`stmt_id` IN (1, 2, 3) OR ((`user` = 'abc') " + + "AND `state` IN ('a', 'b', 'c', 'd')))) OR (`db` NOT IN ('x', 'y')))\n")); //ExtractCommonFactorsRule may generate more expr, test the rewriteOrToIn applied on generated exprs sql = "select /*+ SET_VAR(enable_nereids_planner=false) */ * from test1 where (stmt_id=1 and state='a') or (stmt_id=2 and state='b')"; explainString = UtFrameUtils.getSQLPlanOrErrorMsg(connectContext, "EXPLAIN " + sql); Assert.assertTrue(explainString.contains( - "PREDICATES: `state` IN ('a', 'b') AND `stmt_id` IN (1, 2) AND" - + " (`stmt_id` = 1) AND (`state` = 'a') OR (`stmt_id` = 2) AND (`state` = 'b')\n" + "PREDICATES: ((`state` IN ('a', 'b') AND `stmt_id` IN (1, 2)) AND (((`stmt_id` = 1) AND " + + "(`state` = 'a')) OR ((`stmt_id` = 2) AND (`state` = 'b'))))\n" )); } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/policy/PolicyTest.java b/fe/fe-core/src/test/java/org/apache/doris/policy/PolicyTest.java index 8d6a2a48ae36a21..aa04c14bbe9c6ba 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/policy/PolicyTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/policy/PolicyTest.java @@ -270,7 +270,7 @@ public void testMergeFilter() throws Exception { createPolicy("CREATE ROW POLICY test_row_policy4 ON test.table1 AS PERMISSIVE TO test_policy USING (k2 = 1)"); String queryStr = "EXPLAIN select /*+ SET_VAR(enable_nereids_planner=false) */ * from test.table1"; String explainString = getSQLPlanOrErrorMsg(queryStr); - Assertions.assertTrue(explainString.contains("(`k1` = 1) AND (`k2` = 1) AND (`k2` = 2) OR (`k2` = 1)")); + Assertions.assertTrue(explainString.contains("(((`k1` = 1) AND (`k2` = 1)) AND ((`k2` = 2) OR (`k2` = 1)))")); dropPolicy("DROP ROW POLICY test_row_policy1 ON test.table1"); dropPolicy("DROP ROW POLICY test_row_policy2 ON test.table1"); dropPolicy("DROP ROW POLICY test_row_policy3 ON test.table1"); @@ -318,13 +318,13 @@ public void testComplexSqlNereidsPlanner() throws Exception { createPolicy("CREATE ROW POLICY test_row_policy1 ON test.table1 AS RESTRICTIVE TO test_policy USING (k1 = 1)"); createPolicy("CREATE ROW POLICY test_row_policy2 ON test.table1 AS RESTRICTIVE TO test_policy USING (k2 = 1)"); String joinSql = "select * from table1 join table2 on table1.k1=table2.k1"; - Assertions.assertTrue(getSQLPlanOrErrorMsg(joinSql).contains("PREDICATES: (k1 = 1) AND (k2 = 1)")); + Assertions.assertTrue(getSQLPlanOrErrorMsg(joinSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); String unionSql = "select * from table1 union select * from table2"; - Assertions.assertTrue(getSQLPlanOrErrorMsg(unionSql).contains("PREDICATES: (k1 = 1) AND (k2 = 1)")); + Assertions.assertTrue(getSQLPlanOrErrorMsg(unionSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); String subQuerySql = "select * from table2 where k1 in (select k1 from table1)"; - Assertions.assertTrue(getSQLPlanOrErrorMsg(subQuerySql).contains("PREDICATES: (k1 = 1) AND (k2 = 1)")); + Assertions.assertTrue(getSQLPlanOrErrorMsg(subQuerySql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); String aliasSql = "select * from table1 t1 join table2 t2 on t1.k1=t2.k1"; - Assertions.assertTrue(getSQLPlanOrErrorMsg(aliasSql).contains("PREDICATES: (k1 = 1) AND (k2 = 1)")); + Assertions.assertTrue(getSQLPlanOrErrorMsg(aliasSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); dropPolicy("DROP ROW POLICY test_row_policy1 ON test.table1"); dropPolicy("DROP ROW POLICY test_row_policy2 ON test.table1"); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/OlapQueryCacheTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/OlapQueryCacheTest.java index cbc12f4ecce8060..3c793cfc72090dd 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/qe/OlapQueryCacheTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/OlapQueryCacheTest.java @@ -640,8 +640,8 @@ public void testSqlCacheKey() { SqlCache sqlCache = (SqlCache) ca.getCache(); String cacheKey = sqlCache.getSqlWithViewStmt(); Assert.assertEquals(cacheKey, "SELECT `eventdate` AS `eventdate`, count(`userid`) " - + "AS `count(``userid``)` FROM `testDb`.`appevent` WHERE (`eventdate` >= '2020-01-12') " - + "AND (`eventdate` <= '2020-01-14') GROUP BY `eventdate`|"); + + "AS `count(``userid``)` FROM `testDb`.`appevent` WHERE ((`eventdate` >= '2020-01-12') " + + "AND (`eventdate` <= '2020-01-14')) GROUP BY `eventdate`|"); Assert.assertEquals(selectedPartitionIds.size(), sqlCache.getSumOfPartitionNum()); } @@ -679,8 +679,8 @@ public void testSqlCacheKeyWithView() { Assert.assertEquals(cacheKey, "SELECT `testDb`.`view1`.`eventdate` AS `eventdate`, " + "`testDb`.`view1`.`__count_1` AS `__count_1` FROM `testDb`.`view1`|" + "SELECT `eventdate` AS `eventdate`, count(`userid`) AS `__count_1` FROM " - + "`testDb`.`appevent` WHERE (`eventdate` >= '2020-01-12') AND " - + "(`eventdate` <= '2020-01-14') GROUP BY `eventdate`"); + + "`testDb`.`appevent` WHERE ((`eventdate` >= '2020-01-12') AND " + + "(`eventdate` <= '2020-01-14')) GROUP BY `eventdate`"); Assert.assertEquals(selectedPartitionIds.size(), sqlCache.getSumOfPartitionNum()); } @@ -698,7 +698,7 @@ public void testSqlCacheKeyWithViewForNereids() { String cacheKey = sqlCache.getSqlWithViewStmt(); Assert.assertEquals(cacheKey, "SELECT * from testDb.view1|SELECT `eventdate` AS `eventdate`, " + "count(`userid`) AS `__count_1` FROM `testDb`.`appevent` " - + "WHERE (`eventdate` >= '2020-01-12') AND (`eventdate` <= '2020-01-14') GROUP BY `eventdate`"); + + "WHERE ((`eventdate` >= '2020-01-12') AND (`eventdate` <= '2020-01-14')) GROUP BY `eventdate`"); Assert.assertEquals(selectedPartitionIds.size(), sqlCache.getSumOfPartitionNum()); } @@ -724,7 +724,7 @@ public void testSqlCacheKeyWithSubSelectView() { Assert.assertEquals(cacheKey, "SELECT `origin`.`eventdate` AS `eventdate`, " + "`origin`.`userid` AS `userid` FROM (SELECT `view2`.`eventdate` `eventdate`, " + "`view2`.`userid` `userid` FROM `testDb`.`view2` view2 " - + "WHERE (`view2`.`eventdate` >= '2020-01-12') AND (`view2`.`eventdate` <= '2020-01-14')) origin|" + + "WHERE ((`view2`.`eventdate` >= '2020-01-12') AND (`view2`.`eventdate` <= '2020-01-14'))) origin|" + "SELECT `eventdate` AS `eventdate`, `userid` AS `userid` FROM `testDb`.`appevent`"); Assert.assertEquals(selectedPartitionIds.size(), sqlCache.getSumOfPartitionNum()); } @@ -773,7 +773,7 @@ public void testSqlCacheKeyWithNestedView() { Assert.assertEquals(cacheKey, "SELECT `testDb`.`view4`.`eventdate` AS `eventdate`, " + "`testDb`.`view4`.`__count_1` AS `__count_1` FROM `testDb`.`view4`|" + "SELECT `eventdate` AS `eventdate`, count(`userid`) AS `__count_1` FROM `testDb`.`view2` " - + "WHERE (`eventdate` >= '2020-01-12') AND (`eventdate` <= '2020-01-14') GROUP BY `eventdate`|" + + "WHERE ((`eventdate` >= '2020-01-12') AND (`eventdate` <= '2020-01-14')) GROUP BY `eventdate`|" + "SELECT `eventdate` AS `eventdate`, `userid` AS `userid` FROM `testDb`.`appevent`"); Assert.assertEquals(selectedPartitionIds.size(), sqlCache.getSumOfPartitionNum()); } @@ -791,8 +791,8 @@ public void testSqlCacheKeyWithNestedViewForNereids() { SqlCache sqlCache = (SqlCache) ca.getCache(); String cacheKey = sqlCache.getSqlWithViewStmt(); Assert.assertEquals(cacheKey, "SELECT * from testDb.view4|SELECT `eventdate` AS `eventdate`, " - + "count(`userid`) AS `__count_1` FROM `testDb`.`view2` WHERE (`eventdate` >= '2020-01-12') " - + "AND (`eventdate` <= '2020-01-14') GROUP BY `eventdate`|SELECT `eventdate` AS `eventdate`, " + + "count(`userid`) AS `__count_1` FROM `testDb`.`view2` WHERE ((`eventdate` >= '2020-01-12') " + + "AND (`eventdate` <= '2020-01-14')) GROUP BY `eventdate`|SELECT `eventdate` AS `eventdate`, " + "`userid` AS `userid` FROM `testDb`.`appevent`"); Assert.assertEquals(selectedPartitionIds.size(), sqlCache.getSumOfPartitionNum()); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java index d7841313d714829..915dce122d8dce6 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/rewrite/ExtractCommonFactorsRuleFunctionTest.java @@ -100,7 +100,7 @@ public void testCommonFactors() throws Exception { public void testWideCommonFactorsWithOrPredicate() throws Exception { String query = "select /*+ SET_VAR(enable_nereids_planner=false) */ * from tb1 where tb1.k1 > 1000 or tb1.k1 < 200 or tb1.k1 = 300"; String planString = dorisAssert.query(query).explainQuery(); - Assert.assertTrue(planString.contains("(`tb1`.`k1` = 300) OR (`tb1`.`k1` > 1000) OR (`tb1`.`k1` < 200)")); + Assert.assertTrue(planString.contains("((`tb1`.`k1` = 300) OR ((`tb1`.`k1` > 1000) OR (`tb1`.`k1` < 200)))")); } @Test @@ -259,8 +259,8 @@ public void testComplexQuery() throws Exception { Assert.assertTrue(planString.contains("`l_partkey` = `p_partkey`")); Assert.assertTrue(planString.contains("`l_shipmode` IN ('AIR', 'AIR REG')")); Assert.assertTrue(planString.contains("`l_shipinstruct` = 'DELIVER IN PERSON'")); - Assert.assertTrue(planString.contains("(`l_quantity` >= 9.00) AND (`l_quantity` <= 19.00) " - + "OR (`l_quantity` >= 20.00) AND (`l_quantity` <= 36.00)")); + Assert.assertTrue(planString.contains("(((`l_quantity` >= 9.00) AND (`l_quantity` <= 19.00)) " + + "OR ((`l_quantity` >= 20.00) AND (`l_quantity` <= 36.00)))")); Assert.assertTrue(planString.contains("`p_size` >= 1")); Assert.assertTrue(planString.contains("`p_brand` IN ('Brand#11', 'Brand#21', 'Brand#32')")); Assert.assertTrue(planString.contains("`p_size` <= 15")); diff --git a/regression-test/data/datatype_p0/scalar_types/get_assignment_compatible_type.out b/regression-test/data/datatype_p0/scalar_types/get_assignment_compatible_type.out index 1875bb026596791..030a9b1286c2141 100644 --- a/regression-test/data/datatype_p0/scalar_types/get_assignment_compatible_type.out +++ b/regression-test/data/datatype_p0/scalar_types/get_assignment_compatible_type.out @@ -1,6 +1,6 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !test_sql -- -test_decimal_boolean_view CREATE VIEW `test_decimal_boolean_view` AS SELECT `id` AS `id`, `c1` AS `c1`, `c2` AS `c2` FROM `regression_test_datatype_p0_scalar_types`.`test_decimal_boolean` WHERE (0.0 = CAST(`c1` AS decimalv3(2,1))) AND (CAST(`c2` AS decimalv3(6,1)) = 1.0); utf8mb4 utf8mb4_0900_bin +test_decimal_boolean_view CREATE VIEW `test_decimal_boolean_view` AS SELECT `id` AS `id`, `c1` AS `c1`, `c2` AS `c2` FROM `regression_test_datatype_p0_scalar_types`.`test_decimal_boolean` WHERE ((0.0 = CAST(`c1` AS decimalv3(2,1))) AND (CAST(`c2` AS decimalv3(6,1)) = 1.0)); utf8mb4 utf8mb4_0900_bin -- !test_union -- 0.0 diff --git a/regression-test/suites/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.groovy index e83cf6a38f251b7..f92663660af480f 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.groovy @@ -84,7 +84,7 @@ suite("test_clickhouse_jdbc_catalog", "p0,external,clickhouse,external_docker,ex contains """QUERY: SELECT "id", "ts" FROM "doris_test"."ts" WHERE ((FROM_UNIXTIME("ts", '%Y%m%d') >= '2022-01-01'))""" } explain { - sql("select * from ts where nvl(ts,null) >= '2022-01-01';") + sql("select * from ts where nvl(ts,null) >= '1';") contains """QUERY: SELECT "id", "ts" FROM "doris_test"."ts""" } order_qt_func_push2 """select * from ts where ts <= unix_timestamp(from_unixtime(ts,'yyyyMMdd'));""" diff --git a/regression-test/suites/query_p0/explain/test_compoundpredicate_explain.groovy b/regression-test/suites/query_p0/explain/test_compoundpredicate_explain.groovy new file mode 100644 index 000000000000000..fccdd1b2e2fefa9 --- /dev/null +++ b/regression-test/suites/query_p0/explain/test_compoundpredicate_explain.groovy @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_compoundpredicate_explain") { + sql "drop table if exists test_compoundpredicate_explain" + sql """create table test_compoundpredicate_explain + (k1 int, k2 int) + distributed by hash(k1) buckets 3 properties('replication_num' = '1');""" + + sql """INSERT INTO test_compoundpredicate_explain (k1, k2) VALUES (500, 450), (1100, 400), (300, 600), (700, 650), (800, 800), (1500, 300);""" + + def testQueries = [ + "select * from test_compoundpredicate_explain where k1 > 500 and k2 < 700 or k1 < 3000", + "select * from test_compoundpredicate_explain where k1 > 500 or k2 < 700 and k1 < 3000", + "select * from test_compoundpredicate_explain where not (k1 > 500 and k2 < 700) or k1 < 3000", + "select * from test_compoundpredicate_explain where k1 > 500 and (k2 < 700 or k1 < 3000)", + "select * from test_compoundpredicate_explain where not (k1 > 500 or k2 < 700) and k1 < 3000", + "select * from test_compoundpredicate_explain where (k1 > 500 and not k2 < 700) or k1 < 3000", + "select * from test_compoundpredicate_explain where (k1 > 500 and k2 < 700) and (k1 < 3000 or k2 > 400)", + "select * from test_compoundpredicate_explain where not (k1 > 500 or (k2 < 700 and k1 < 3000))", + "select * from test_compoundpredicate_explain where k1 > 500 or not (k2 < 700 and k1 < 3000)", + "select * from test_compoundpredicate_explain where k1 < 1000 and (k2 < 700 or k1 > 500) and not (k2 > 300)", + "select * from test_compoundpredicate_explain where not ((k1 > 500 and k2 < 700) or k1 < 3000)", + "select * from test_compoundpredicate_explain where k1 > 500 and not (k2 < 700 or k1 < 3000)", + "select * from test_compoundpredicate_explain where (k1 > 500 or k2 < 700) and (k1 < 3000 and k2 > 200)", + "select * from test_compoundpredicate_explain where (k1 > 500 and k2 < 700) or not (k1 < 3000 and k2 > 200)" + ] + + testQueries.each { query -> + def explainResult1 = sql "explain all plan ${query}" + def explainResult2 = sql "explain ${query}" + + def predicates2Line = explainResult2.find { line -> + line[0].toString().trim().startsWith("PREDICATES:") + } + + if (predicates2Line != null) { + def predicates2 = predicates2Line[0].split("PREDICATES:").last().trim() + + predicates2 = predicates2?.replaceAll(/\[\#(\d+)\]/) { match, group1 -> "#" + group1 } + + def isMatch = explainResult1.any { line -> + line.toString().contains(predicates2) + } + + log.info("Testing query: " + query) + log.info("Standardized Predicates from PREDICATES: " + predicates2) + log.info("Match found in OPTIMIZED PLAN: " + isMatch) + + assert isMatch : "Predicates are not equal for query: ${query}" + } else { + logger.error("PREDICATES: not found in explain result for query: ${query}") + assert false : "PREDICATES: not found in explain result" + } + } + + sql "drop table if exists test_compoundpredicate_explain" +} \ No newline at end of file From a0cf8b97be31bdfe7ae226ea6d1b25a82b395b26 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 13 Aug 2024 17:54:02 +0800 Subject: [PATCH 89/94] [Fix](local merge) Fix local exchange dependencies acquired by local merge (#39238) --- be/src/pipeline/dependency.h | 17 +++++++++++------ be/src/pipeline/exec/operator.cpp | 7 ++----- .../local_exchange_source_operator.cpp | 17 +++++++++++++---- be/src/pipeline/pipeline_fragment_context.cpp | 2 +- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index 36f06b91095b8d3..957a6ca8bd3efeb 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -817,9 +817,9 @@ struct LocalExchangeSharedState : public BasicSharedState { std::atomic mem_usage = 0; // We need to make sure to add mem_usage first and then enqueue, otherwise sub mem_usage may cause negative mem_usage during concurrent dequeue. std::mutex le_lock; - virtual void create_dependencies(int operator_id, int node_id) { + virtual void create_dependencies(int local_exchange_id) { for (auto& source_dep : source_deps) { - source_dep = std::make_shared(operator_id, node_id, + source_dep = std::make_shared(local_exchange_id, local_exchange_id, "LOCAL_EXCHANGE_OPERATOR_DEPENDENCY"); source_dep->set_shared_state(this); } @@ -874,6 +874,7 @@ struct LocalExchangeSharedState : public BasicSharedState { }; struct LocalMergeExchangeSharedState : public LocalExchangeSharedState { + ENABLE_FACTORY_CREATOR(LocalMergeExchangeSharedState); LocalMergeExchangeSharedState(int num_instances) : LocalExchangeSharedState(num_instances), _queues_mem_usage(num_instances), @@ -883,14 +884,18 @@ struct LocalMergeExchangeSharedState : public LocalExchangeSharedState { } } - void create_dependencies(int operator_id, int node_id) override { + void create_dependencies(int local_exchange_id) override { sink_deps.resize(source_deps.size()); + std::vector new_deps(sink_deps.size(), nullptr); + source_deps.swap(new_deps); for (size_t i = 0; i < source_deps.size(); i++) { - source_deps[i] = std::make_shared(operator_id, node_id, - "LOCAL_EXCHANGE_OPERATOR_DEPENDENCY"); + source_deps[i] = + std::make_shared(local_exchange_id, local_exchange_id, + "LOCAL_MERGE_EXCHANGE_OPERATOR_DEPENDENCY"); source_deps[i]->set_shared_state(this); sink_deps[i] = std::make_shared( - operator_id, node_id, "LOCAL_EXCHANGE_OPERATOR_SINK_DEPENDENCY", true); + local_exchange_id, local_exchange_id, + "LOCAL_MERGE_EXCHANGE_OPERATOR_SINK_DEPENDENCY", true); sink_deps[i]->set_shared_state(this); } } diff --git a/be/src/pipeline/exec/operator.cpp b/be/src/pipeline/exec/operator.cpp index 1e00b9fcbcbc86c..5f9a904abf23cff 100644 --- a/be/src/pipeline/exec/operator.cpp +++ b/be/src/pipeline/exec/operator.cpp @@ -452,10 +452,7 @@ Status PipelineXLocalState::init(RuntimeState* state, LocalState DCHECK(info.le_state_map.find(_parent->operator_id()) != info.le_state_map.end()); _shared_state = info.le_state_map.at(_parent->operator_id()).first.get(); - auto deps = _shared_state->get_dep_by_channel_id(info.task_idx); - if (deps.size() == 1) { - _dependency = deps.front().get(); - } + _dependency = _shared_state->get_dep_by_channel_id(info.task_idx).front().get(); _wait_for_dependency_timer = ADD_TIMER_WITH_LEVEL( _runtime_profile, "WaitForDependency[" + _dependency->name() + "]Time", 1); } else if (info.shared_state) { @@ -541,7 +538,7 @@ Status PipelineXSinkLocalState::init(RuntimeState* state, LocalSink if constexpr (std::is_same_v) { DCHECK(info.le_state_map.find(_parent->dests_id().front()) != info.le_state_map.end()); _dependency = info.le_state_map.at(_parent->dests_id().front()).second.get(); - _shared_state = (SharedState*)_dependency->shared_state(); + _shared_state = _dependency->shared_state()->template cast(); } else { _shared_state = info.shared_state->template cast(); _dependency = _shared_state->create_sink_dependency( diff --git a/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp index 0d88545b7e64106..0cffe125a1fdb92 100644 --- a/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp +++ b/be/src/pipeline/local_exchange/local_exchange_source_operator.cpp @@ -55,15 +55,24 @@ Status LocalExchangeSourceLocalState::close(RuntimeState* state) { } std::vector LocalExchangeSourceLocalState::dependencies() const { - auto deps = Base::dependencies(); - auto le_deps = _shared_state->get_dep_by_channel_id(_channel_id); - if (le_deps.size() > 1) { + if (_exchanger->get_type() == ExchangeType::LOCAL_MERGE_SORT && _channel_id == 0) { + // If this is a local merge exchange, source operator is runnable only if all sink operators + // set dependencies ready + std::vector deps; + auto le_deps = _shared_state->get_dep_by_channel_id(_channel_id); + DCHECK_GT(le_deps.size(), 1); // If this is a local merge exchange, we should use all dependencies here. for (auto& dep : le_deps) { deps.push_back(dep.get()); } + return deps; + } else if (_exchanger->get_type() == ExchangeType::LOCAL_MERGE_SORT && _channel_id != 0) { + // If this is a local merge exchange and is not the first task, source operators always + // return empty result so no dependencies here. + return {}; + } else { + return Base::dependencies(); } - return deps; } std::string LocalExchangeSourceLocalState::debug_string(int indentation_level) const { diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index 6f7a59c0f9818c0..928191562b1bc15 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -800,7 +800,7 @@ Status PipelineFragmentContext::_add_local_exchange_impl( } operator_xs.insert(operator_xs.begin(), source_op); - shared_state->create_dependencies(source_op->operator_id(), source_op->node_id()); + shared_state->create_dependencies(local_exchange_id); // 5. Set children for two pipelines separately. std::vector> new_children; From a3f4a92856f08f8d6d99a2da99e09210944f235c Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 13 Aug 2024 17:55:08 +0800 Subject: [PATCH 90/94] [refactor](minor) Init counter in prepare phase (#39287) --- be/src/pipeline/exec/exchange_sink_operator.cpp | 8 +++----- .../exec/partitioned_aggregation_sink_operator.cpp | 2 ++ be/src/vec/common/sort/sorter.cpp | 2 ++ 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp index 2a8aa56dc62b2f5..f1f6c2d0c5d1ccc 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.cpp +++ b/be/src/pipeline/exec/exchange_sink_operator.cpp @@ -99,7 +99,10 @@ Status ExchangeSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo& inf // Make sure brpc stub is ready before execution. for (int i = 0; i < channels.size(); ++i) { RETURN_IF_ERROR(channels[i]->init_stub(state)); + _wait_channel_timer.push_back(_profile->add_nonzero_counter( + fmt::format("WaitForLocalExchangeBuffer{}", i), TUnit ::TIME_NS, timer_name, 1)); } + _wait_broadcast_buffer_timer = ADD_CHILD_TIMER(_profile, "WaitForBroadcastBuffer", timer_name); return Status::OK(); } @@ -142,8 +145,6 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) { _sink_buffer->set_broadcast_dependency(_broadcast_dependency); _broadcast_pb_mem_limiter = vectorized::BroadcastPBlockHolderMemLimiter::create_shared(_broadcast_dependency); - _wait_broadcast_buffer_timer = - ADD_CHILD_TIMER(_profile, "WaitForBroadcastBuffer", timer_name); } else if (local_size > 0) { size_t dep_id = 0; for (auto* channel : channels) { @@ -151,9 +152,6 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) { if (auto dep = channel->get_local_channel_dependency()) { _local_channels_dependency.push_back(dep); DCHECK(_local_channels_dependency[dep_id] != nullptr); - _wait_channel_timer.push_back(_profile->add_nonzero_counter( - fmt::format("WaitForLocalExchangeBuffer{}", dep_id), TUnit ::TIME_NS, - timer_name, 1)); dep_id++; } else { LOG(WARNING) << "local recvr is null: query id = " diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp index a70718e7763275b..980217fe6958911 100644 --- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp @@ -34,6 +34,7 @@ PartitionedAggSinkLocalState::PartitionedAggSinkLocalState(DataSinkOperatorXBase std::make_shared(parent->operator_id(), parent->node_id(), parent->get_name() + "_SPILL_DEPENDENCY", true); } + Status PartitionedAggSinkLocalState::init(doris::RuntimeState* state, doris::pipeline::LocalSinkStateInfo& info) { RETURN_IF_ERROR(Base::init(state, info)); @@ -66,6 +67,7 @@ Status PartitionedAggSinkLocalState::open(RuntimeState* state) { SCOPED_TIMER(Base::_open_timer); return Base::open(state); } + Status PartitionedAggSinkLocalState::close(RuntimeState* state, Status exec_status) { SCOPED_TIMER(Base::exec_time_counter()); SCOPED_TIMER(Base::_close_timer); diff --git a/be/src/vec/common/sort/sorter.cpp b/be/src/vec/common/sort/sorter.cpp index cfbd3cb41c85d17..eca7e15626b2eb0 100644 --- a/be/src/vec/common/sort/sorter.cpp +++ b/be/src/vec/common/sort/sorter.cpp @@ -66,6 +66,7 @@ void MergeSorterState::reset() { unsorted_block_ = Block::create_unique(unsorted_block_->clone_empty()); in_mem_sorted_bocks_size_ = 0; } + Status MergeSorterState::add_sorted_block(Block& block) { auto rows = block.rows(); if (0 == rows) { @@ -279,6 +280,7 @@ Status FullSorter::_do_sort() { } return Status::OK(); } + size_t FullSorter::data_size() const { return _state->data_size(); } From 2d7683f3bf94586e167d34e4563e144ac0bbf9b0 Mon Sep 17 00:00:00 2001 From: hui lai <1353307710@qq.com> Date: Tue, 13 Aug 2024 18:07:16 +0800 Subject: [PATCH 91/94] [fix](routine load) add read lock to fix some concurrent bugs (#39242) --- .../load/routineload/RoutineLoadJob.java | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index a25cd99985892e1..9ecd0b78787a8f7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -1635,24 +1635,28 @@ public List getShowInfo() { public List> getTasksShowInfo() throws AnalysisException { List> rows = Lists.newArrayList(); - if (null == routineLoadTaskInfoList || routineLoadTaskInfoList.isEmpty()) { + readLock(); + try { + if (null == routineLoadTaskInfoList || routineLoadTaskInfoList.isEmpty()) { + return rows; + } + routineLoadTaskInfoList.forEach(entity -> { + long txnId = entity.getTxnId(); + if (RoutineLoadTaskInfo.INIT_TXN_ID == txnId) { + rows.add(entity.getTaskShowInfo()); + return; + } + TransactionState transactionState = Env.getCurrentGlobalTransactionMgr() + .getTransactionState(dbId, entity.getTxnId()); + if (null != transactionState && null != transactionState.getTransactionStatus()) { + entity.setTxnStatus(transactionState.getTransactionStatus()); + } + rows.add(entity.getTaskShowInfo()); + }); return rows; + } finally { + readUnlock(); } - - routineLoadTaskInfoList.forEach(entity -> { - long txnId = entity.getTxnId(); - if (RoutineLoadTaskInfo.INIT_TXN_ID == txnId) { - rows.add(entity.getTaskShowInfo()); - return; - } - TransactionState transactionState = Env.getCurrentGlobalTransactionMgr() - .getTransactionState(dbId, entity.getTxnId()); - if (null != transactionState && null != transactionState.getTransactionStatus()) { - entity.setTxnStatus(transactionState.getTransactionStatus()); - } - rows.add(entity.getTaskShowInfo()); - }); - return rows; } public String getShowCreateInfo() { @@ -1768,12 +1772,17 @@ public List getShowStatistic() { private String getTaskStatistic() { Map result = Maps.newHashMap(); - result.put("running_task", - String.valueOf(routineLoadTaskInfoList.stream().filter(entity -> entity.isRunning()).count())); - result.put("waiting_task", - String.valueOf(routineLoadTaskInfoList.stream().filter(entity -> !entity.isRunning()).count())); - Gson gson = new GsonBuilder().disableHtmlEscaping().create(); - return gson.toJson(result); + readLock(); + try { + result.put("running_task", + String.valueOf(routineLoadTaskInfoList.stream().filter(entity -> entity.isRunning()).count())); + result.put("waiting_task", + String.valueOf(routineLoadTaskInfoList.stream().filter(entity -> !entity.isRunning()).count())); + Gson gson = new GsonBuilder().disableHtmlEscaping().create(); + return gson.toJson(result); + } finally { + readUnlock(); + } } private String jobPropertiesToJsonString() { From 9d912080868117efeed8ceea4ad7f4164a5505e8 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:29:13 +0800 Subject: [PATCH 92/94] [opt](set operation) INTERSECT should evaluated before others (#39095) this is a behaviour change PR. set operation INTERSECT should evaluated before others. In Doris history, all set operators have same priority. This PR change Nereids, let it be same with MySQL. --- .../src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 | 3 ++- .../infer_set_operator_distinct.groovy | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 659355c6597201d..63931a92392536b 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -385,7 +385,8 @@ query queryTerm : queryPrimary #queryTermDefault - | left=queryTerm operator=(UNION | EXCEPT | MINUS | INTERSECT) + | left=queryTerm operator=INTERSECT setQuantifier? right=queryTerm #setOperation + | left=queryTerm operator=(UNION | EXCEPT | MINUS) setQuantifier? right=queryTerm #setOperation ; diff --git a/regression-test/suites/nereids_rules_p0/infer_set_operator_distinct/infer_set_operator_distinct.groovy b/regression-test/suites/nereids_rules_p0/infer_set_operator_distinct/infer_set_operator_distinct.groovy index 169b0c35199cee8..9c33f5a9f453123 100644 --- a/regression-test/suites/nereids_rules_p0/infer_set_operator_distinct/infer_set_operator_distinct.groovy +++ b/regression-test/suites/nereids_rules_p0/infer_set_operator_distinct/infer_set_operator_distinct.groovy @@ -110,7 +110,7 @@ suite("infer_set_operator_distinct") { """ qt_mixed_set_operators """ - explain shape plan select * from t1 union select * from t2 except select * from t3 intersect select * from t4; + explain shape plan (select * from t1 union select * from t2 except select * from t3) intersect select * from t4; """ qt_join_with_union """ @@ -202,7 +202,7 @@ suite("infer_set_operator_distinct") { """ qt_with_hint_mixed_set_operators """ - explain shape plan select /*+ USE_CBO_RULE(INFER_SET_OPERATOR_DISTINCT) */ * from t1 union select * from t2 except select * from t3 intersect select * from t4; + explain shape plan (select /*+ USE_CBO_RULE(INFER_SET_OPERATOR_DISTINCT) */ * from t1 union select * from t2 except select * from t3) intersect select * from t4; """ qt_with_hint_join_with_union """ @@ -294,7 +294,7 @@ suite("infer_set_operator_distinct") { """ qt_with_hint_no_mixed_set_operators """ - explain shape plan select /*+ USE_CBO_RULE(NO_INFER_SET_OPERATOR_DISTINCT) */ * from t1 union select * from t2 except select * from t3 intersect select * from t4; + explain shape plan (select /*+ USE_CBO_RULE(NO_INFER_SET_OPERATOR_DISTINCT) */ * from t1 union select * from t2 except select * from t3) intersect select * from t4; """ qt_with_hint_no_join_with_union """ From af35f40fae84db6aac099f9726607ccbb310694a Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Tue, 13 Aug 2024 19:34:12 +0800 Subject: [PATCH 93/94] [enhance](mtmv) mtmv query sql expand star (#36543) before: SELECT * from user now: SELECT `internal`.`zd`.`user`.`k1`, `internal`.`zd`.`user`.`k2` from `internal`.`zd`.`user` Therefore, there is no need for EnvInfo to store catalog and db information when creating materialized views --- .../apache/doris/analysis/CreateMTMVStmt.java | 9 +- .../java/org/apache/doris/catalog/MTMV.java | 15 ---- .../doris/catalog/OlapTableFactory.java | 13 +-- .../doris/job/extensions/mtmv/MTMVTask.java | 12 +-- .../java/org/apache/doris/mtmv/EnvInfo.java | 51 ------------ .../org/apache/doris/mtmv/MTMVPlanUtil.java | 35 -------- .../plans/commands/info/AlterViewInfo.java | 2 +- .../plans/commands/info/BaseViewInfo.java | 17 +++- .../plans/commands/info/CreateMTMVInfo.java | 34 ++++++-- .../plans/commands/info/CreateViewInfo.java | 2 +- .../tablefunction/MetadataGenerator.java | 1 - .../MvInfosTableValuedFunction.java | 1 - .../java/org/apache/doris/mtmv/MTMVTest.java | 2 - .../data/mtmv_p0/test_expand_star_mtmv.out | 7 ++ .../mtmv_p0/test_env_db_dropped_mtmv.groovy | 2 +- .../mtmv_p0/test_expand_star_mtmv.groovy | 82 +++++++++++++++++++ .../mtmv_p0/test_show_create_mtmv.groovy | 2 +- 17 files changed, 139 insertions(+), 148 deletions(-) delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/mtmv/EnvInfo.java create mode 100644 regression-test/data/mtmv_p0/test_expand_star_mtmv.out create mode 100644 regression-test/suites/mtmv_p0/test_expand_star_mtmv.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMTMVStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMTMVStmt.java index 9421bb047c4ece5..d586535572bfc4a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMTMVStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateMTMVStmt.java @@ -19,7 +19,6 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Index; -import org.apache.doris.mtmv.EnvInfo; import org.apache.doris.mtmv.MTMVPartitionInfo; import org.apache.doris.mtmv.MTMVRefreshInfo; import org.apache.doris.mtmv.MTMVRelation; @@ -31,7 +30,6 @@ public class CreateMTMVStmt extends CreateTableStmt { private final MTMVRefreshInfo refreshInfo; private final String querySql; - private final EnvInfo envInfo; private Map mvProperties; private MTMVPartitionInfo mvPartitionInfo; private MTMVRelation relation; @@ -39,12 +37,11 @@ public class CreateMTMVStmt extends CreateTableStmt { public CreateMTMVStmt(boolean ifNotExists, TableName mvName, List columns, MTMVRefreshInfo refreshInfo, KeysDesc keyDesc, DistributionDesc distributionDesc, Map properties, Map mvProperties, String querySql, String comment, - EnvInfo envInfo, PartitionDesc partitionDesc, MTMVPartitionInfo mvPartitionInfo, MTMVRelation relation) { + PartitionDesc partitionDesc, MTMVPartitionInfo mvPartitionInfo, MTMVRelation relation) { super(ifNotExists, false, mvName, columns, new ArrayList(), DEFAULT_ENGINE_NAME, keyDesc, partitionDesc, distributionDesc, properties, null, comment, null, null); this.refreshInfo = refreshInfo; this.querySql = querySql; - this.envInfo = envInfo; this.mvProperties = mvProperties; this.mvPartitionInfo = mvPartitionInfo; this.relation = relation; @@ -58,10 +55,6 @@ public String getQuerySql() { return querySql; } - public EnvInfo getEnvInfo() { - return envInfo; - } - public Map getMvProperties() { return mvProperties; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java index 5097f83e2491e78..cd7583193e8c2b9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java @@ -27,7 +27,6 @@ import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.job.common.TaskStatus; import org.apache.doris.job.extensions.mtmv.MTMVTask; -import org.apache.doris.mtmv.EnvInfo; import org.apache.doris.mtmv.MTMVCache; import org.apache.doris.mtmv.MTMVJobInfo; import org.apache.doris.mtmv.MTMVJobManager; @@ -75,8 +74,6 @@ public class MTMV extends OlapTable { private String querySql; @SerializedName("s") private MTMVStatus status; - @SerializedName("ei") - private EnvInfo envInfo; @SerializedName("ji") private MTMVJobInfo jobInfo; @SerializedName("mp") @@ -108,7 +105,6 @@ public MTMV() { this.type = TableType.MATERIALIZED_VIEW; this.querySql = params.querySql; this.refreshInfo = params.refreshInfo; - this.envInfo = params.envInfo; this.status = new MTMVStatus(); this.jobInfo = new MTMVJobInfo(MTMVJobManager.MTMV_JOB_PREFIX + params.tableId); this.mvProperties = params.mvProperties; @@ -140,10 +136,6 @@ public MTMVStatus getStatus() { } } - public EnvInfo getEnvInfo() { - return envInfo; - } - public MTMVJobInfo getJobInfo() { readMvLock(); try { @@ -407,11 +399,6 @@ public void setStatus(MTMVStatus status) { this.status = status; } - // for test - public void setEnvInfo(EnvInfo envInfo) { - this.envInfo = envInfo; - } - // for test public void setJobInfo(MTMVJobInfo jobInfo) { this.jobInfo = jobInfo; @@ -467,7 +454,6 @@ public void readFields(DataInput in) throws IOException { refreshInfo = materializedView.refreshInfo; querySql = materializedView.querySql; status = materializedView.status; - envInfo = materializedView.envInfo; jobInfo = materializedView.jobInfo; mvProperties = materializedView.mvProperties; relation = materializedView.relation; @@ -485,7 +471,6 @@ public String toInfoString() { sb.append("refreshInfo=").append(refreshInfo); sb.append(", querySql='").append(querySql).append('\''); sb.append(", status=").append(status); - sb.append(", envInfo=").append(envInfo); if (jobInfo != null) { sb.append(", jobInfo=").append(jobInfo.toInfoString()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTableFactory.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTableFactory.java index 7d11ed7bdd9c1fa..cc86535c8b1345f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTableFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTableFactory.java @@ -21,7 +21,6 @@ import org.apache.doris.analysis.CreateTableStmt; import org.apache.doris.analysis.DdlStmt; import org.apache.doris.catalog.TableIf.TableType; -import org.apache.doris.mtmv.EnvInfo; import org.apache.doris.mtmv.MTMVPartitionInfo; import org.apache.doris.mtmv.MTMVRefreshInfo; import org.apache.doris.mtmv.MTMVRelation; @@ -48,7 +47,6 @@ public static class OlapTableParams extends BuildParams { public static class MTMVParams extends BuildParams { public MTMVRefreshInfo refreshInfo; - public EnvInfo envInfo; public String querySql; public Map mvProperties; public MTMVPartitionInfo mvPartitionInfo; @@ -154,14 +152,6 @@ private OlapTableFactory withRefreshInfo(MTMVRefreshInfo refreshInfo) { return this; } - private OlapTableFactory withEnvInfo(EnvInfo envInfo) { - Preconditions.checkState(params instanceof MTMVParams, "Invalid argument for " - + params.getClass().getSimpleName()); - MTMVParams mtmvParams = (MTMVParams) params; - mtmvParams.envInfo = envInfo; - return this; - } - private OlapTableFactory withMvPartitionInfo(MTMVPartitionInfo mvPartitionInfo) { Preconditions.checkState(params instanceof MTMVParams, "Invalid argument for " + params.getClass().getSimpleName()); @@ -189,8 +179,7 @@ public OlapTableFactory withExtraParams(DdlStmt stmt) { .withQuerySql(createMTMVStmt.getQuerySql()) .withMvProperties(createMTMVStmt.getMvProperties()) .withMvPartitionInfo(createMTMVStmt.getMvPartitionInfo()) - .withMvRelation(createMTMVStmt.getRelation()) - .withEnvInfo(createMTMVStmt.getEnvInfo()); + .withMvRelation(createMTMVStmt.getRelation()); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java index 0207301c2d2f642..5c48649bf36b0a0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java @@ -25,7 +25,6 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; import org.apache.doris.common.FeConstants; -import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; import org.apache.doris.common.util.DebugUtil; import org.apache.doris.common.util.TimeUtils; @@ -204,15 +203,8 @@ public void run() throws JobException { } } catch (Throwable e) { if (getStatus() == TaskStatus.RUNNING) { - StringBuilder errMsg = new StringBuilder(); - // when env ctl/db not exist, need give client tips - Pair pair = MTMVPlanUtil.checkEnvInfo(mtmv.getEnvInfo(), ctx); - if (!pair.first) { - errMsg.append(pair.second); - } - errMsg.append(e.getMessage()); - LOG.warn("run task failed: ", errMsg.toString()); - throw new JobException(errMsg.toString(), e); + LOG.warn("run task failed: ", e.getMessage()); + throw new JobException(e.getMessage(), e); } else { // if status is not `RUNNING`,maybe the task was canceled, therefore, it is a normal situation LOG.info("task [{}] interruption running, because status is [{}]", getTaskId(), getStatus()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/EnvInfo.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/EnvInfo.java deleted file mode 100644 index 97ad491cb317b75..000000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/EnvInfo.java +++ /dev/null @@ -1,51 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.mtmv; - -import com.google.gson.annotations.SerializedName; - -/** - * EnvInfo - */ -public class EnvInfo { - @SerializedName("ci") - private long ctlId; - @SerializedName("di") - private long dbId; - - public EnvInfo(long ctlId, long dbId) { - this.ctlId = ctlId; - this.dbId = dbId; - } - - public long getCtlId() { - return ctlId; - } - - public long getDbId() { - return dbId; - } - - @Override - public String toString() { - return "EnvInfo{" - + "ctlId='" + ctlId + '\'' - + ", dbId='" + dbId + '\'' - + '}'; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java index cf80e58eb7e5635..27fe6b8ff6badb6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java @@ -19,13 +19,10 @@ import org.apache.doris.analysis.StatementBase; import org.apache.doris.analysis.UserIdentity; -import org.apache.doris.catalog.DatabaseIf; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MTMV; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.TableIf.TableType; -import org.apache.doris.common.Pair; -import org.apache.doris.datasource.CatalogIf; import org.apache.doris.mysql.privilege.Auth; import org.apache.doris.nereids.NereidsPlanner; import org.apache.doris.nereids.exceptions.ParseException; @@ -63,41 +60,9 @@ public static ConnectContext createMTMVContext(MTMV mtmv) { if (workloadGroup.isPresent()) { ctx.getSessionVariable().setWorkloadGroup(workloadGroup.get()); } - // switch catalog; - CatalogIf catalog = Env.getCurrentEnv().getCatalogMgr().getCatalog(mtmv.getEnvInfo().getCtlId()); - // if catalog not exist, it may not have any impact, so there is no error and it will be returned directly - if (catalog == null) { - return ctx; - } - ctx.changeDefaultCatalog(catalog.getName()); - // use db - Optional> databaseIf = catalog.getDb(mtmv.getEnvInfo().getDbId()); - // if db not exist, it may not have any impact, so there is no error and it will be returned directly - if (!databaseIf.isPresent()) { - return ctx; - } - ctx.setDatabase(databaseIf.get().getFullName()); return ctx; } - public static Pair checkEnvInfo(EnvInfo envInfo, ConnectContext ctx) { - if (envInfo.getCtlId() != ctx.getCurrentCatalog().getId()) { - return Pair.of(false, String.format( - "The catalog selected when creating the materialized view was %s, " - + "but now this catalog has been deleted. " - + "Please recreate the materialized view.", - envInfo.getCtlId())); - } - if (envInfo.getDbId() != ctx.getCurrentDbId()) { - return Pair.of(false, String.format( - "The database selected when creating the materialized view was %s, " - + "but now this database has been deleted. " - + "Please recreate the materialized view.", - envInfo.getDbId())); - } - return Pair.of(true, ""); - } - public static MTMVRelation generateMTMVRelation(MTMV mtmv, ConnectContext ctx) { // Should not make table without data to empty relation when analyze the related table, // so add disable rules diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterViewInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterViewInfo.java index 73231ab461c82a5..c4b20acaf014eb2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterViewInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterViewInfo.java @@ -104,7 +104,7 @@ public AlterViewStmt translateToLegacyStmt(ConnectContext ctx) { AlterViewStmt alterViewStmt = new AlterViewStmt(viewName.transferToTableName(), cols, null); // expand star(*) in project list and replace table name with qualifier - String rewrittenSql = rewriteSql(ctx.getStatementContext().getIndexInSqlToString()); + String rewrittenSql = rewriteSql(ctx.getStatementContext().getIndexInSqlToString(), querySql); // rewrite project alias rewrittenSql = rewriteProjectsToUserDefineAlias(rewrittenSql); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java index 79a4521309ae615..591c14a71ce7c11 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/BaseViewInfo.java @@ -107,7 +107,15 @@ protected void analyzeAndFillRewriteSqlMap(String sql, ConnectContext ctx) throw analyzedPlan.accept(PlanSlotFinder.INSTANCE, ctx.getStatementContext()); } - protected String rewriteSql(TreeMap, String> indexStringSqlMap) { + /** + * Add the full path to the field + * + * @param indexStringSqlMap key is the start and end position of the sql substring that needs to be replaced, + * and value is the new string used for replacement. + * @param querySql origin query sql + * @return sql rewritten sql + */ + public static String rewriteSql(TreeMap, String> indexStringSqlMap, String querySql) { StringBuilder builder = new StringBuilder(); int beg = 0; for (Map.Entry, String> entry : indexStringSqlMap.entrySet()) { @@ -245,8 +253,11 @@ private static List buildAnalyzeViewJobsForStar() { } } - private static class PlanSlotFinder extends DefaultPlanVisitor { - private static PlanSlotFinder INSTANCE = new PlanSlotFinder(); + /** + * PlanSlotFinder + */ + public static class PlanSlotFinder extends DefaultPlanVisitor { + public static PlanSlotFinder INSTANCE = new PlanSlotFinder(); @Override public Void visitLogicalView(LogicalView alias, StatementContext context) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java index 2e8774f4f8a9343..91940efecb1c812 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java @@ -37,7 +37,6 @@ import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.util.DynamicPartitionUtil; import org.apache.doris.common.util.PropertyAnalyzer; -import org.apache.doris.mtmv.EnvInfo; import org.apache.doris.mtmv.MTMVPartitionInfo; import org.apache.doris.mtmv.MTMVPartitionInfo.MTMVPartitionType; import org.apache.doris.mtmv.MTMVPartitionUtil; @@ -48,10 +47,12 @@ import org.apache.doris.mtmv.MTMVRelation; import org.apache.doris.mtmv.MTMVUtil; import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.NereidsPlanner; import org.apache.doris.nereids.StatementContext; import org.apache.doris.nereids.analyzer.UnboundResultSink; import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.parser.NereidsParser; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewUtils; import org.apache.doris.nereids.trees.expressions.Expression; @@ -59,6 +60,8 @@ import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.algebra.OneRowRelation; import org.apache.doris.nereids.trees.plans.commands.ExplainCommand.ExplainLevel; +import org.apache.doris.nereids.trees.plans.commands.info.BaseViewInfo.AnalyzerForCreateView; +import org.apache.doris.nereids.trees.plans.commands.info.BaseViewInfo.PlanSlotFinder; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalSink; import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; @@ -101,11 +104,10 @@ public class CreateMTMVInfo { private Map mvProperties = Maps.newHashMap(); private final LogicalPlan logicalQuery; - private final String querySql; + private String querySql; private final MTMVRefreshInfo refreshInfo; private final List columns = Lists.newArrayList(); private final List simpleColumnDefinitions; - private final EnvInfo envInfo; private final MTMVPartitionDefinition mvPartitionDefinition; private PartitionDesc partitionDesc; private MTMVRelation relation; @@ -132,8 +134,6 @@ public CreateMTMVInfo(boolean ifNotExists, TableNameInfo mvName, this.refreshInfo = Objects.requireNonNull(refreshInfo, "require refreshInfo object"); this.simpleColumnDefinitions = Objects .requireNonNull(simpleColumnDefinitions, "require simpleColumnDefinitions object"); - this.envInfo = new EnvInfo(ConnectContext.get().getCurrentCatalog().getId(), - ConnectContext.get().getCurrentDbId()); this.mvPartitionDefinition = Objects .requireNonNull(mvPartitionDefinition, "require mtmvPartitionInfo object"); } @@ -182,6 +182,28 @@ public void analyze(ConnectContext ctx) throws Exception { refreshInfo.validate(); analyzeProperties(); + rewriteQuerySql(ctx); + } + + private void rewriteQuerySql(ConnectContext ctx) { + analyzeAndFillRewriteSqlMap(querySql, ctx); + querySql = BaseViewInfo.rewriteSql(ctx.getStatementContext().getIndexInSqlToString(), querySql); + } + + private void analyzeAndFillRewriteSqlMap(String sql, ConnectContext ctx) { + StatementContext stmtCtx = ctx.getStatementContext(); + LogicalPlan parsedViewPlan = new NereidsParser().parseForCreateView(sql); + if (parsedViewPlan instanceof UnboundResultSink) { + parsedViewPlan = (LogicalPlan) ((UnboundResultSink) parsedViewPlan).child(); + } + CascadesContext viewContextForStar = CascadesContext.initContext( + stmtCtx, parsedViewPlan, PhysicalProperties.ANY); + AnalyzerForCreateView analyzerForStar = new AnalyzerForCreateView(viewContextForStar); + analyzerForStar.analyze(); + Plan analyzedPlan = viewContextForStar.getRewritePlan(); + // Traverse all slots in the plan, and add the slot's location information + // and the fully qualified replacement string to the indexInSqlToString of the StatementContext. + analyzedPlan.accept(PlanSlotFinder.INSTANCE, ctx.getStatementContext()); } private void analyzeProperties() { @@ -414,7 +436,7 @@ public CreateMTMVStmt translateToLegacyStmt() { .map(ColumnDefinition::translateToCatalogStyle) .collect(Collectors.toList()); return new CreateMTMVStmt(ifNotExists, tableName, catalogColumns, refreshInfo, keysDesc, - distribution.translateToCatalogStyle(), properties, mvProperties, querySql, comment, envInfo, + distribution.translateToCatalogStyle(), properties, mvProperties, querySql, comment, partitionDesc, mvPartitionInfo, relation); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateViewInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateViewInfo.java index 65eb3453ec304ad..a881be046e7dd1f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateViewInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateViewInfo.java @@ -97,7 +97,7 @@ public CreateViewStmt translateToLegacyStmt(ConnectContext ctx) { CreateViewStmt createViewStmt = new CreateViewStmt(ifNotExists, viewName.transferToTableName(), cols, comment, null); // expand star(*) in project list and replace table name with qualifier - String rewrittenSql = rewriteSql(ctx.getStatementContext().getIndexInSqlToString()); + String rewrittenSql = rewriteSql(ctx.getStatementContext().getIndexInSqlToString(), querySql); // rewrite project alias rewrittenSql = rewriteProjectsToUserDefineAlias(rewrittenSql); diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java index b446cd4210a7641..c318aea0b1ccb89 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MetadataGenerator.java @@ -798,7 +798,6 @@ private static TFetchSchemaTableDataResult mtmvMetadataResult(TMetadataTableRequ trow.addToColumnValue(new TCell().setStringVal(mv.getStatus().getRefreshState().name())); trow.addToColumnValue(new TCell().setStringVal(mv.getRefreshInfo().toString())); trow.addToColumnValue(new TCell().setStringVal(mv.getQuerySql())); - trow.addToColumnValue(new TCell().setStringVal(mv.getEnvInfo().toString())); trow.addToColumnValue(new TCell().setStringVal(mv.getMvProperties().toString())); trow.addToColumnValue(new TCell().setStringVal(mv.getMvPartitionInfo().toNameString())); trow.addToColumnValue(new TCell().setBoolVal(MTMVPartitionUtil.isMTMVSync(mv))); diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MvInfosTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MvInfosTableValuedFunction.java index 4135173c34ac473..02002033bbeec7d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MvInfosTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/MvInfosTableValuedFunction.java @@ -57,7 +57,6 @@ public class MvInfosTableValuedFunction extends MetadataTableValuedFunction { new Column("RefreshState", ScalarType.createStringType()), new Column("RefreshInfo", ScalarType.createStringType()), new Column("QuerySql", ScalarType.createStringType()), - new Column("EnvInfo", ScalarType.createStringType()), new Column("MvProperties", ScalarType.createStringType()), new Column("MvPartitionInfo", ScalarType.createStringType()), new Column("SyncWithBaseTables", ScalarType.createType(PrimitiveType.BOOLEAN))); diff --git a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVTest.java b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVTest.java index 64f0958beb8c1b0..ca226218cfc4e3a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVTest.java @@ -52,7 +52,6 @@ public void testToInfoString() { = "MTMV{refreshInfo=BUILD IMMEDIATE REFRESH COMPLETE ON SCHEDULE EVERY 2 SECOND STARTS ss, " + "querySql='select * from xxx;', " + "status=MTMVStatus{state=INIT, schemaChangeDetail='null', refreshState=INIT}, " - + "envInfo=EnvInfo{ctlId='1', dbId='2'}, " + "jobInfo=MTMVJobInfo{jobName='job1', " + "historyTasks=[MTMVTask{dbId=0, mtmvId=0, taskContext=null, " + "needRefreshPartitions=null, completedPartitions=null, refreshMode=null} " @@ -70,7 +69,6 @@ public void testToInfoString() { mtmv.setRefreshInfo(buildMTMVRefreshInfo(mtmv)); mtmv.setQuerySql("select * from xxx;"); mtmv.setStatus(new MTMVStatus()); - mtmv.setEnvInfo(new EnvInfo(1L, 2L)); mtmv.setJobInfo(buildMTMVJobInfo(mtmv)); mtmv.setMvProperties(new HashMap<>()); mtmv.setRelation(new MTMVRelation(Sets.newHashSet(), Sets.newHashSet(), Sets.newHashSet())); diff --git a/regression-test/data/mtmv_p0/test_expand_star_mtmv.out b/regression-test/data/mtmv_p0/test_expand_star_mtmv.out new file mode 100644 index 000000000000000..dbe6642fb42e926 --- /dev/null +++ b/regression-test/data/mtmv_p0/test_expand_star_mtmv.out @@ -0,0 +1,7 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query -- +SELECT `internal`.`regression_test_mtmv_p0`.`test_expand_star_mtmv_table`.`k2`, `internal`.`regression_test_mtmv_p0`.`test_expand_star_mtmv_table`.`k3` from `internal`.`regression_test_mtmv_p0`.`test_expand_star_mtmv_table` + +-- !udf -- +SELECT `regression_test_mtmv_p0`.`test_expand_star_mtmv_function` ('2011-01-01','2011-01-03') as `k1` from `internal`.`regression_test_mtmv_p0`.`test_expand_star_mtmv_table` + diff --git a/regression-test/suites/mtmv_p0/test_env_db_dropped_mtmv.groovy b/regression-test/suites/mtmv_p0/test_env_db_dropped_mtmv.groovy index 11063ba0e5bd31f..516f5a648c76760 100644 --- a/regression-test/suites/mtmv_p0/test_env_db_dropped_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_env_db_dropped_mtmv.groovy @@ -89,5 +89,5 @@ suite("test_env_db_dropped_mtmv") { waitingMTMVTaskFinishedNotNeedSuccess(jobName) def msg = sql """select ErrorMsg from tasks('type'='mv') where JobName = '${jobName}' order by CreateTime DESC limit 1""" logger.info(msg.toString()) - assertTrue(msg.toString().contains("has been deleted")) + assertTrue(msg.toString().contains("does not exist")) } diff --git a/regression-test/suites/mtmv_p0/test_expand_star_mtmv.groovy b/regression-test/suites/mtmv_p0/test_expand_star_mtmv.groovy new file mode 100644 index 000000000000000..f550dc78c3db332 --- /dev/null +++ b/regression-test/suites/mtmv_p0/test_expand_star_mtmv.groovy @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.Assert; + +suite("test_expand_star_mtmv","mtmv") { + String suiteName = "test_expand_star_mtmv" + String tableName = "${suiteName}_table" + String mvName = "${suiteName}_mv" + String functionName = "${suiteName}_function" + String dbName = context.config.getDbNameByFile(context.file) + + sql """drop table if exists `${tableName}`""" + sql """drop materialized view if exists ${mvName};""" + + sql """ + CREATE TABLE ${tableName} + ( + k2 INT, + k3 varchar(32) + ) + DISTRIBUTED BY HASH(k2) BUCKETS 2 + PROPERTIES ( + "replication_num" = "1" + ); + """ + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ( + 'replication_num' = '1', + 'version_info'='3' + ) + AS + SELECT * from ${tableName}; + """ + + order_qt_query "SELECT QuerySql FROM mv_infos('database'='${dbName}') where Name = '${mvName}'" + + sql """drop materialized view if exists ${mvName};""" + + def jarPath = """${context.config.suitePath}/javaudf_p0/jars/java-udf-case-jar-with-dependencies.jar""" + scp_udf_file_to_all_be(jarPath) + log.info("jarPath:${jarPath}") + + sql "drop function if exists ${functionName}(date, date)" + sql """ CREATE FUNCTION ${functionName}(date, date) RETURNS boolean PROPERTIES ( + "file"="file://${jarPath}", + "symbol"="org.apache.doris.udf.DateTest1", + "type"="JAVA_UDF" + ); """ + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH AUTO ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ( + 'replication_num' = '1', + 'version_info'='3' + ) + AS + SELECT ${functionName} ('2011-01-01','2011-01-03') as k1 from ${tableName}; + """ + order_qt_udf "SELECT QuerySql FROM mv_infos('database'='${dbName}') where Name = '${mvName}'" + sql "drop function if exists ${functionName}(date, date)" + sql """drop materialized view if exists ${mvName};""" +} diff --git a/regression-test/suites/mtmv_p0/test_show_create_mtmv.groovy b/regression-test/suites/mtmv_p0/test_show_create_mtmv.groovy index 34cda2f6ffd78b6..88f9de70865189a 100644 --- a/regression-test/suites/mtmv_p0/test_show_create_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_show_create_mtmv.groovy @@ -60,7 +60,7 @@ suite("test_show_create_mtmv","mtmv") { assertTrue(showCreateMTMVResult.toString().contains("DUPLICATE KEY(`k1`, `k2`)")) assertTrue(showCreateMTMVResult.toString().contains("PARTITION BY (date_trunc(`k2`, 'month'))")) assertTrue(showCreateMTMVResult.toString().contains("DISTRIBUTED BY RANDOM BUCKETS 2")) - assertTrue(showCreateMTMVResult.toString().contains("SELECT * FROM")) + assertTrue(showCreateMTMVResult.toString().contains("SELECT")) assertTrue(showCreateMTMVResult.toString().contains("grace_period")) sql """drop materialized view if exists ${mvName};""" From 66f624376eb9d692c26ecd7841e7135762c50fda Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Tue, 13 Aug 2024 19:42:46 +0800 Subject: [PATCH 94/94] [fix](Nereids) polish function signature search algorithm (#38497) use array for array - array_avg - array_cum_sum - array_difference - array_product use array for array - bitmap_from_array use double first - fmod - pmod let high order function throw friendly exception - array_filter - array_first - array_last - array_reverse_split - array_sort_by - array_split let return type same as parameter's type - array_push_back - array_push_front - array_with_constant - if - nullf let greatest / least work same as mysql's greatest --- .../rules/analysis/ExpressionAnalyzer.java | 4 +- .../functions/scalar/ArrayAvg.java | 4 +- .../functions/scalar/ArrayCumSum.java | 4 +- .../functions/scalar/ArrayDifference.java | 2 +- .../functions/scalar/ArrayEnumerateUniq.java | 3 +- .../functions/scalar/ArrayFilter.java | 2 +- .../functions/scalar/ArrayFirst.java | 5 -- .../functions/scalar/ArrayLast.java | 5 -- .../functions/scalar/ArrayProduct.java | 4 +- .../functions/scalar/ArrayPushBack.java | 4 +- .../functions/scalar/ArrayPushFront.java | 4 +- .../functions/scalar/ArrayReverseSplit.java | 14 +++--- .../functions/scalar/ArraySortBy.java | 2 +- .../functions/scalar/ArraySplit.java | 6 +-- .../functions/scalar/ArrayWithConstant.java | 4 +- .../functions/scalar/BitmapFromArray.java | 6 +-- .../functions/scalar/Coalesce.java | 6 +-- .../expressions/functions/scalar/Fmod.java | 4 +- .../functions/scalar/Greatest.java | 47 +++++++++++++------ .../expressions/functions/scalar/If.java | 11 ++++- .../expressions/functions/scalar/LastDay.java | 6 +-- .../expressions/functions/scalar/Least.java | 47 +++++++++++++------ .../functions/scalar/MinutesDiff.java | 2 +- .../expressions/functions/scalar/NullIf.java | 39 ++------------- .../expressions/functions/scalar/Pmod.java | 42 +++++++++++++++-- .../doris/nereids/util/TypeCoercionUtils.java | 32 ++++++++++++- .../nereids_function_p0/type_coercion.out | 34 ++++++++++++++ .../nereids_function_p0/type_coercion.groovy | 30 ++++++++++++ 28 files changed, 253 insertions(+), 120 deletions(-) create mode 100644 regression-test/data/nereids_function_p0/type_coercion.out create mode 100644 regression-test/suites/nereids_function_p0/type_coercion.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java index afec568545bc0fc..91b5bf671ca9158 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/ExpressionAnalyzer.java @@ -811,9 +811,7 @@ protected void couldNotFoundColumn(UnboundSlot unboundSlot, String tableName) { Lambda lambdaClosure = lambda.withLambdaFunctionArguments(lambdaFunction, arrayItemReferences); // We don't add the ArrayExpression in high order function at all - return unboundFunction.withChildren(ImmutableList.builder() - .add(lambdaClosure) - .build()); + return unboundFunction.withChildren(ImmutableList.of(lambdaClosure)); } private boolean shouldBindSlotBy(int namePartSize, Slot boundSlot) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayAvg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayAvg.java index eac07935a38447a..e07351f2a5c6e30 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayAvg.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayAvg.java @@ -48,6 +48,7 @@ public class ArrayAvg extends ScalarFunction implements ExplicitlyCastableSignat ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(DoubleType.INSTANCE)), FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(BooleanType.INSTANCE)), FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(TinyIntType.INSTANCE)), FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(SmallIntType.INSTANCE)), @@ -56,8 +57,7 @@ public class ArrayAvg extends ScalarFunction implements ExplicitlyCastableSignat FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(LargeIntType.INSTANCE)), FunctionSignature.ret(DecimalV3Type.WILDCARD).args(ArrayType.of(DecimalV3Type.WILDCARD)), FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).args(ArrayType.of(DecimalV2Type.SYSTEM_DEFAULT)), - FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(FloatType.INSTANCE)), - FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(DoubleType.INSTANCE)) + FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(FloatType.INSTANCE)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayCumSum.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayCumSum.java index f3f0e54e5c2f3cc..4b469b8cbdb7fb0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayCumSum.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayCumSum.java @@ -47,6 +47,7 @@ public class ArrayCumSum extends ScalarFunction implements ExplicitlyCastableSignature, ComputePrecisionForArrayItemAgg, UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(ArrayType.of(DoubleType.INSTANCE)).args(ArrayType.of(DoubleType.INSTANCE)), FunctionSignature.ret(ArrayType.of(BigIntType.INSTANCE)).args(ArrayType.of(TinyIntType.INSTANCE)), FunctionSignature.ret(ArrayType.of(BigIntType.INSTANCE)).args(ArrayType.of(SmallIntType.INSTANCE)), FunctionSignature.ret(ArrayType.of(BigIntType.INSTANCE)).args(ArrayType.of(IntegerType.INSTANCE)), @@ -55,8 +56,7 @@ public class ArrayCumSum extends ScalarFunction FunctionSignature.ret(ArrayType.of(DecimalV3Type.WILDCARD)).args(ArrayType.of(DecimalV3Type.WILDCARD)), FunctionSignature.ret(ArrayType.of(DecimalV2Type.SYSTEM_DEFAULT)) .args(ArrayType.of(DecimalV2Type.SYSTEM_DEFAULT)), - FunctionSignature.ret(ArrayType.of(DoubleType.INSTANCE)).args(ArrayType.of(FloatType.INSTANCE)), - FunctionSignature.ret(ArrayType.of(DoubleType.INSTANCE)).args(ArrayType.of(DoubleType.INSTANCE)) + FunctionSignature.ret(ArrayType.of(DoubleType.INSTANCE)).args(ArrayType.of(FloatType.INSTANCE)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayDifference.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayDifference.java index ce1aa70b0f16487..21245315e3230c3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayDifference.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayDifference.java @@ -46,13 +46,13 @@ public class ArrayDifference extends ScalarFunction implements UnaryExpression, ExplicitlyCastableSignature, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(ArrayType.of(DoubleType.INSTANCE)).args(ArrayType.of(DoubleType.INSTANCE)), FunctionSignature.ret(ArrayType.of(SmallIntType.INSTANCE)).args(ArrayType.of(TinyIntType.INSTANCE)), FunctionSignature.ret(ArrayType.of(IntegerType.INSTANCE)).args(ArrayType.of(SmallIntType.INSTANCE)), FunctionSignature.ret(ArrayType.of(BigIntType.INSTANCE)).args(ArrayType.of(IntegerType.INSTANCE)), FunctionSignature.ret(ArrayType.of(LargeIntType.INSTANCE)).args(ArrayType.of(BigIntType.INSTANCE)), FunctionSignature.ret(ArrayType.of(LargeIntType.INSTANCE)).args(ArrayType.of(LargeIntType.INSTANCE)), FunctionSignature.ret(ArrayType.of(DoubleType.INSTANCE)).args(ArrayType.of(FloatType.INSTANCE)), - FunctionSignature.ret(ArrayType.of(DoubleType.INSTANCE)).args(ArrayType.of(DoubleType.INSTANCE)), FunctionSignature.retArgType(0).args(ArrayType.of(DecimalV2Type.SYSTEM_DEFAULT)), FunctionSignature.retArgType(0).args(ArrayType.of(DecimalV3Type.WILDCARD)) ); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayEnumerateUniq.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayEnumerateUniq.java index 4743cf15516b5f0..99eba19e5dc5863 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayEnumerateUniq.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayEnumerateUniq.java @@ -41,7 +41,8 @@ public class ArrayEnumerateUniq extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(ArrayType.of(BigIntType.INSTANCE)).varArgs(ArrayType.of(new AnyDataType(0))) + FunctionSignature.ret(ArrayType.of(BigIntType.INSTANCE)) + .varArgs(ArrayType.of(AnyDataType.INSTANCE_WITHOUT_INDEX)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayFilter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayFilter.java index 386449116c0e3cb..a71ddbb4d4f2c52 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayFilter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayFilter.java @@ -49,7 +49,7 @@ private ArrayFilter(List expressions) { * array_filter(lambda, a1, ...) = array_filter(a1, array_map(lambda, a1, ...)) */ public ArrayFilter(Expression arg) { - super("array_filter", arg.child(1).child(0), new ArrayMap(arg)); + super("array_filter", arg instanceof Lambda ? arg.child(1).child(0) : arg, new ArrayMap(arg)); if (!(arg instanceof Lambda)) { throw new AnalysisException( String.format("The 1st arg of %s must be lambda but is %s", getName(), arg)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayFirst.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayFirst.java index 8563a3e455f248d..da5324f301df8bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayFirst.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayFirst.java @@ -18,7 +18,6 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; -import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; @@ -36,10 +35,6 @@ public class ArrayFirst extends ElementAt */ public ArrayFirst(Expression arg) { super(new ArrayFilter(arg), new BigIntLiteral(1)); - if (!(arg instanceof Lambda)) { - throw new AnalysisException( - String.format("The 1st arg of %s must be lambda but is %s", getName(), arg)); - } } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayLast.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayLast.java index 71466614e3e0975..d90dd6b30e6034c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayLast.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayLast.java @@ -18,7 +18,6 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; -import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.literal.BigIntLiteral; @@ -36,10 +35,6 @@ public class ArrayLast extends ElementAt */ public ArrayLast(Expression arg) { super(new ArrayFilter(arg), new BigIntLiteral(-1)); - if (!(arg instanceof Lambda)) { - throw new AnalysisException( - String.format("The 1st arg of %s must be lambda but is %s", getName(), arg)); - } } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayProduct.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayProduct.java index 16aaf27c14872ad..ecc4eaac5ebfe4c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayProduct.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayProduct.java @@ -48,6 +48,7 @@ public class ArrayProduct extends ScalarFunction implements ExplicitlyCastableSi ComputePrecisionForArrayItemAgg, UnaryExpression, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(DoubleType.INSTANCE)), FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(BooleanType.INSTANCE)), FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(TinyIntType.INSTANCE)), FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(SmallIntType.INSTANCE)), @@ -56,8 +57,7 @@ public class ArrayProduct extends ScalarFunction implements ExplicitlyCastableSi FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(LargeIntType.INSTANCE)), FunctionSignature.ret(DecimalV3Type.WILDCARD).args(ArrayType.of(DecimalV3Type.WILDCARD)), FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).args(ArrayType.of(DecimalV2Type.SYSTEM_DEFAULT)), - FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(FloatType.INSTANCE)), - FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(DoubleType.INSTANCE)) + FunctionSignature.ret(DoubleType.INSTANCE).args(ArrayType.of(FloatType.INSTANCE)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayPushBack.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayPushBack.java index c1791a66f076680..932c24461329030 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayPushBack.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayPushBack.java @@ -25,6 +25,7 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.coercion.AnyDataType; +import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -38,7 +39,8 @@ public class ArrayPushBack extends ScalarFunction implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.retArgType(0).args(ArrayType.of(new AnyDataType(0)), new AnyDataType(0)) + FunctionSignature.retArgType(0) + .args(ArrayType.of(new AnyDataType(0)), new FollowToAnyDataType(0)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayPushFront.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayPushFront.java index 458e062ba4664f8..26e1cdd91e3d21c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayPushFront.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayPushFront.java @@ -25,6 +25,7 @@ import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.coercion.AnyDataType; +import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -38,7 +39,8 @@ public class ArrayPushFront extends ScalarFunction implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.retArgType(0).args(ArrayType.of(new AnyDataType(0)), new AnyDataType(0)) + FunctionSignature.retArgType(0) + .args(ArrayType.of(new AnyDataType(0)), new FollowToAnyDataType(0)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayReverseSplit.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayReverseSplit.java index 4b7cea0f23dbcad..cb76a723091d279 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayReverseSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayReverseSplit.java @@ -27,6 +27,7 @@ import org.apache.doris.nereids.types.coercion.AnyDataType; import org.apache.doris.nereids.types.coercion.FollowToArgumentType; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import java.util.List; @@ -41,10 +42,6 @@ public class ArrayReverseSplit extends ScalarFunction implements PropagateNullab ArrayType.of(AnyDataType.INSTANCE_WITHOUT_INDEX), ArrayType.of(BooleanType.INSTANCE))); - private ArrayReverseSplit(List expressions) { - super("array_reverse_split", expressions); - } - /** * constructor with arguments. */ @@ -57,7 +54,7 @@ public ArrayReverseSplit(Expression arg0, Expression arg1) { * array_split(lambda, a1, ...) = array_split(a1, array_map(lambda, a1, ...)) */ public ArrayReverseSplit(Expression arg) { - super("array_reverse_split", arg.child(1).child(0), new ArrayMap(arg)); + super("array_reverse_split", arg instanceof Lambda ? arg.child(1).child(0) : arg, new ArrayMap(arg)); if (!(arg instanceof Lambda)) { throw new AnalysisException( String.format("The 1st arg of %s must be lambda but is %s", getName(), arg)); @@ -66,7 +63,12 @@ public ArrayReverseSplit(Expression arg) { @Override public ArrayReverseSplit withChildren(List children) { - return new ArrayReverseSplit(children.get(0), children.get(1)); + Preconditions.checkArgument(children.size() == 1 || children.size() == 2); + if (children.size() == 1) { + return new ArrayReverseSplit(children.get(0)); + } else { + return new ArrayReverseSplit(children.get(0), children.get(1)); + } } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraySortBy.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraySortBy.java index c2c90717cdcc59e..5271662ea958ad8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraySortBy.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraySortBy.java @@ -48,7 +48,7 @@ private ArraySortBy(List expressions) { * array_sortby(lambda, a1, ...) = array_sortby(a1, array_map(lambda, a1, ...)) */ public ArraySortBy(Expression arg) { - super("array_sortby", arg.child(1).child(0), new ArrayMap(arg)); + super("array_sortby", arg instanceof Lambda ? arg.child(1).child(0) : arg, new ArrayMap(arg)); if (!(arg instanceof Lambda)) { throw new AnalysisException( String.format("The 1st arg of %s must be lambda but is %s", getName(), arg)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraySplit.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraySplit.java index 07ca4aafe65baf4..4ecd989df30d482 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraySplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArraySplit.java @@ -41,10 +41,6 @@ public class ArraySplit extends ScalarFunction implements PropagateNullable, Hig ArrayType.of(AnyDataType.INSTANCE_WITHOUT_INDEX), ArrayType.of(BooleanType.INSTANCE))); - private ArraySplit(List expressions) { - super("array_split", expressions); - } - /** * constructor with arguments. */ @@ -57,7 +53,7 @@ public ArraySplit(Expression arg0, Expression arg1) { * array_split(lambda, a1, ...) = array_split(a1, array_map(lambda, a1, ...)) */ public ArraySplit(Expression arg) { - super("array_split", arg.child(1).child(0), new ArrayMap(arg)); + super("array_split", arg instanceof Lambda ? arg.child(1).child(0) : arg, new ArrayMap(arg)); if (!(arg instanceof Lambda)) { throw new AnalysisException( String.format("The 1st arg of %s must be lambda but is %s", getName(), arg)); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayWithConstant.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayWithConstant.java index 49485e7a0facfc7..26d76ac9aad8a6f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayWithConstant.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ArrayWithConstant.java @@ -26,6 +26,7 @@ import org.apache.doris.nereids.types.ArrayType; import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.types.coercion.AnyDataType; +import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -39,7 +40,8 @@ public class ArrayWithConstant extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNotNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(ArrayType.of(new AnyDataType(0))).args(BigIntType.INSTANCE, new AnyDataType(0)) + FunctionSignature.ret(ArrayType.of(new FollowToAnyDataType(0))) + .args(BigIntType.INSTANCE, new AnyDataType(0)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapFromArray.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapFromArray.java index 00f950d589bab44..8304e0684a52413 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapFromArray.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/BitmapFromArray.java @@ -42,10 +42,10 @@ public class BitmapFromArray extends ScalarFunction implements UnaryExpression, ExplicitlyCastableSignature, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(BitmapType.INSTANCE).args(ArrayType.of(TinyIntType.INSTANCE)), - FunctionSignature.ret(BitmapType.INSTANCE).args(ArrayType.of(SmallIntType.INSTANCE)), + FunctionSignature.ret(BitmapType.INSTANCE).args(ArrayType.of(BigIntType.INSTANCE)), FunctionSignature.ret(BitmapType.INSTANCE).args(ArrayType.of(IntegerType.INSTANCE)), - FunctionSignature.ret(BitmapType.INSTANCE).args(ArrayType.of(BigIntType.INSTANCE)) + FunctionSignature.ret(BitmapType.INSTANCE).args(ArrayType.of(SmallIntType.INSTANCE)), + FunctionSignature.ret(BitmapType.INSTANCE).args(ArrayType.of(TinyIntType.INSTANCE)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Coalesce.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Coalesce.java index 7dca8a7f2ea14e8..f1d122d0179d4f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Coalesce.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Coalesce.java @@ -58,12 +58,12 @@ public class Coalesce extends ScalarFunction FunctionSignature.ret(IntegerType.INSTANCE).varArgs(IntegerType.INSTANCE), FunctionSignature.ret(BigIntType.INSTANCE).varArgs(BigIntType.INSTANCE), FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(LargeIntType.INSTANCE), - FunctionSignature.ret(FloatType.INSTANCE).varArgs(FloatType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).varArgs(DoubleType.INSTANCE), - FunctionSignature.ret(DateTimeType.INSTANCE).varArgs(DateTimeType.INSTANCE), - FunctionSignature.ret(DateType.INSTANCE).varArgs(DateType.INSTANCE), + FunctionSignature.ret(FloatType.INSTANCE).varArgs(FloatType.INSTANCE), FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).varArgs(DateTimeV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(DateTimeType.INSTANCE).varArgs(DateTimeType.INSTANCE), FunctionSignature.ret(DateV2Type.INSTANCE).varArgs(DateV2Type.INSTANCE), + FunctionSignature.ret(DateType.INSTANCE).varArgs(DateType.INSTANCE), FunctionSignature.ret(DecimalV3Type.WILDCARD).varArgs(DecimalV3Type.WILDCARD), FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).varArgs(DecimalV2Type.SYSTEM_DEFAULT), FunctionSignature.ret(BitmapType.INSTANCE).varArgs(BitmapType.INSTANCE), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Fmod.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Fmod.java index 5173903147e88d4..08ee2af55f8dc2e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Fmod.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Fmod.java @@ -38,8 +38,8 @@ public class Fmod extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(FloatType.INSTANCE).args(FloatType.INSTANCE, FloatType.INSTANCE), - FunctionSignature.ret(DoubleType.INSTANCE).args(DoubleType.INSTANCE, DoubleType.INSTANCE) + FunctionSignature.ret(DoubleType.INSTANCE).args(DoubleType.INSTANCE, DoubleType.INSTANCE), + FunctionSignature.ret(FloatType.INSTANCE).args(FloatType.INSTANCE, FloatType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Greatest.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Greatest.java index 0cb415c287eefa0..92028076701b427 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Greatest.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Greatest.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.DateTimeType; import org.apache.doris.nereids.types.DateTimeV2Type; import org.apache.doris.nereids.types.DateType; @@ -37,6 +38,7 @@ import org.apache.doris.nereids.types.StringType; import org.apache.doris.nereids.types.TinyIntType; import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.nereids.util.ExpressionUtils; import com.google.common.base.Preconditions; @@ -51,21 +53,21 @@ public class Greatest extends ScalarFunction implements ExplicitlyCastableSignature, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(TinyIntType.INSTANCE), - FunctionSignature.ret(SmallIntType.INSTANCE).varArgs(SmallIntType.INSTANCE), - FunctionSignature.ret(IntegerType.INSTANCE).varArgs(IntegerType.INSTANCE), - FunctionSignature.ret(BigIntType.INSTANCE).varArgs(BigIntType.INSTANCE), - FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(LargeIntType.INSTANCE), - FunctionSignature.ret(FloatType.INSTANCE).varArgs(FloatType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).varArgs(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).varArgs(StringType.INSTANCE), + FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).varArgs(DateTimeV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(DateTimeType.INSTANCE).varArgs(DateTimeType.INSTANCE), + FunctionSignature.ret(DateV2Type.INSTANCE).varArgs(DateV2Type.INSTANCE), + FunctionSignature.ret(DateType.INSTANCE).varArgs(DateType.INSTANCE), FunctionSignature.ret(DoubleType.INSTANCE).varArgs(DoubleType.INSTANCE), - FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).varArgs(DecimalV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(FloatType.INSTANCE).varArgs(FloatType.INSTANCE), FunctionSignature.ret(DecimalV3Type.WILDCARD).varArgs(DecimalV3Type.WILDCARD), - FunctionSignature.ret(DateType.INSTANCE).varArgs(DateType.INSTANCE), - FunctionSignature.ret(DateV2Type.INSTANCE).varArgs(DateV2Type.INSTANCE), - FunctionSignature.ret(DateTimeType.INSTANCE).varArgs(DateTimeType.INSTANCE), - FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).varArgs(DateTimeV2Type.SYSTEM_DEFAULT), - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).varArgs(VarcharType.SYSTEM_DEFAULT), - FunctionSignature.ret(StringType.INSTANCE).varArgs(StringType.INSTANCE) + FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).varArgs(DecimalV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(LargeIntType.INSTANCE), + FunctionSignature.ret(BigIntType.INSTANCE).varArgs(BigIntType.INSTANCE), + FunctionSignature.ret(IntegerType.INSTANCE).varArgs(IntegerType.INSTANCE), + FunctionSignature.ret(SmallIntType.INSTANCE).varArgs(SmallIntType.INSTANCE), + FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(TinyIntType.INSTANCE) ); /** @@ -80,11 +82,28 @@ public Greatest(Expression arg, Expression... varArgs) { */ @Override public Greatest withChildren(List children) { - Preconditions.checkArgument(children.size() >= 1); + Preconditions.checkArgument(!children.isEmpty()); return new Greatest(children.get(0), children.subList(1, children.size()).toArray(new Expression[0])); } + @Override + public FunctionSignature searchSignature(List signatures) { + List argTypes = getArgumentsTypes(); + if (argTypes.stream().anyMatch(CharacterType.class::isInstance)) { + return FunctionSignature.ret(StringType.INSTANCE).varArgs(StringType.INSTANCE); + } else if (argTypes.stream().anyMatch(DateTimeV2Type.class::isInstance)) { + return FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).varArgs(DateTimeV2Type.SYSTEM_DEFAULT); + } else if (argTypes.stream().anyMatch(DateTimeType.class::isInstance)) { + return FunctionSignature.ret(DateTimeType.INSTANCE).varArgs(DateTimeType.INSTANCE); + } else if (argTypes.stream().anyMatch(DateV2Type.class::isInstance)) { + return FunctionSignature.ret(DateV2Type.INSTANCE).varArgs(DateV2Type.INSTANCE); + } else if (argTypes.stream().anyMatch(DateType.class::isInstance)) { + return FunctionSignature.ret(DateType.INSTANCE).varArgs(DateType.INSTANCE); + } + return ExplicitlyCastableSignature.super.searchSignature(signatures); + } + @Override public List getSignatures() { return SIGNATURES; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/If.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/If.java index e7ffcebdfe5005d..4b6f62f6bc8bc62 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/If.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/If.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.trees.expressions.functions.scalar; import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.analyzer.Unbound; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.shape.TernaryExpression; @@ -45,6 +46,7 @@ import org.apache.doris.nereids.types.TinyIntType; import org.apache.doris.nereids.types.VarcharType; import org.apache.doris.nereids.types.coercion.AnyDataType; +import org.apache.doris.nereids.util.TypeCoercionUtils; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -111,7 +113,8 @@ public class If extends ScalarFunction * constructor with 3 arguments. */ public If(Expression arg0, Expression arg1, Expression arg2) { - super("if", arg0, arg1, arg2); + super("if", arg0 instanceof Unbound ? arg0 : TypeCoercionUtils.castIfNotSameType(arg0, BooleanType.INSTANCE), + arg1, arg2); } /** @@ -145,4 +148,10 @@ public R accept(ExpressionVisitor visitor, C context) { public List getSignatures() { return SIGNATURES; } + + @Override + public FunctionSignature searchSignature(List signatures) { + + return ExplicitlyCastableSignature.super.searchSignature(signatures); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/LastDay.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/LastDay.java index d372868a34331a1..bd3783abd9a9631 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/LastDay.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/LastDay.java @@ -40,10 +40,10 @@ public class LastDay extends ScalarFunction implements UnaryExpression, ExplicitlyCastableSignature, PropagateNullableOnDateLikeV2Args { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(DateType.INSTANCE).args(DateTimeType.INSTANCE), - FunctionSignature.ret(DateType.INSTANCE).args(DateType.INSTANCE), + FunctionSignature.ret(DateV2Type.INSTANCE).args(DateV2Type.INSTANCE), FunctionSignature.ret(DateV2Type.INSTANCE).args(DateTimeV2Type.SYSTEM_DEFAULT), - FunctionSignature.ret(DateV2Type.INSTANCE).args(DateV2Type.INSTANCE) + FunctionSignature.ret(DateType.INSTANCE).args(DateType.INSTANCE), + FunctionSignature.ret(DateType.INSTANCE).args(DateTimeType.INSTANCE) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Least.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Least.java index 39af66269089912..e22f9ea115f0166 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Least.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Least.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.DateTimeType; import org.apache.doris.nereids.types.DateTimeV2Type; import org.apache.doris.nereids.types.DateType; @@ -37,6 +38,7 @@ import org.apache.doris.nereids.types.StringType; import org.apache.doris.nereids.types.TinyIntType; import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.nereids.util.ExpressionUtils; import com.google.common.base.Preconditions; @@ -51,21 +53,21 @@ public class Least extends ScalarFunction implements ExplicitlyCastableSignature, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(TinyIntType.INSTANCE), - FunctionSignature.ret(SmallIntType.INSTANCE).varArgs(SmallIntType.INSTANCE), - FunctionSignature.ret(IntegerType.INSTANCE).varArgs(IntegerType.INSTANCE), - FunctionSignature.ret(BigIntType.INSTANCE).varArgs(BigIntType.INSTANCE), - FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(LargeIntType.INSTANCE), - FunctionSignature.ret(FloatType.INSTANCE).varArgs(FloatType.INSTANCE), - FunctionSignature.ret(DoubleType.INSTANCE).varArgs(DoubleType.INSTANCE), - FunctionSignature.ret(DateType.INSTANCE).varArgs(DateType.INSTANCE), - FunctionSignature.ret(DateV2Type.INSTANCE).varArgs(DateV2Type.INSTANCE), - FunctionSignature.ret(DateTimeType.INSTANCE).varArgs(DateTimeType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).varArgs(VarcharType.SYSTEM_DEFAULT), + FunctionSignature.ret(StringType.INSTANCE).varArgs(StringType.INSTANCE), FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).varArgs(DateTimeV2Type.SYSTEM_DEFAULT), - FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).varArgs(DecimalV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(DateTimeType.INSTANCE).varArgs(DateTimeType.INSTANCE), + FunctionSignature.ret(DateV2Type.INSTANCE).varArgs(DateV2Type.INSTANCE), + FunctionSignature.ret(DateType.INSTANCE).varArgs(DateType.INSTANCE), + FunctionSignature.ret(DoubleType.INSTANCE).varArgs(DoubleType.INSTANCE), + FunctionSignature.ret(FloatType.INSTANCE).varArgs(FloatType.INSTANCE), FunctionSignature.ret(DecimalV3Type.WILDCARD).varArgs(DecimalV3Type.WILDCARD), - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).varArgs(VarcharType.SYSTEM_DEFAULT), - FunctionSignature.ret(StringType.INSTANCE).varArgs(StringType.INSTANCE) + FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT).varArgs(DecimalV2Type.SYSTEM_DEFAULT), + FunctionSignature.ret(LargeIntType.INSTANCE).varArgs(LargeIntType.INSTANCE), + FunctionSignature.ret(BigIntType.INSTANCE).varArgs(BigIntType.INSTANCE), + FunctionSignature.ret(IntegerType.INSTANCE).varArgs(IntegerType.INSTANCE), + FunctionSignature.ret(SmallIntType.INSTANCE).varArgs(SmallIntType.INSTANCE), + FunctionSignature.ret(TinyIntType.INSTANCE).varArgs(TinyIntType.INSTANCE) ); /** @@ -80,11 +82,28 @@ public Least(Expression arg, Expression... varArgs) { */ @Override public Least withChildren(List children) { - Preconditions.checkArgument(children.size() >= 1); + Preconditions.checkArgument(!children.isEmpty()); return new Least(children.get(0), children.subList(1, children.size()).toArray(new Expression[0])); } + @Override + public FunctionSignature searchSignature(List signatures) { + List argTypes = getArgumentsTypes(); + if (argTypes.stream().anyMatch(CharacterType.class::isInstance)) { + return FunctionSignature.ret(StringType.INSTANCE).varArgs(StringType.INSTANCE); + } else if (argTypes.stream().anyMatch(DateTimeV2Type.class::isInstance)) { + return FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT).varArgs(DateTimeV2Type.SYSTEM_DEFAULT); + } else if (argTypes.stream().anyMatch(DateTimeType.class::isInstance)) { + return FunctionSignature.ret(DateTimeType.INSTANCE).varArgs(DateTimeType.INSTANCE); + } else if (argTypes.stream().anyMatch(DateV2Type.class::isInstance)) { + return FunctionSignature.ret(DateV2Type.INSTANCE).varArgs(DateV2Type.INSTANCE); + } else if (argTypes.stream().anyMatch(DateType.class::isInstance)) { + return FunctionSignature.ret(DateType.INSTANCE).varArgs(DateType.INSTANCE); + } + return ExplicitlyCastableSignature.super.searchSignature(signatures); + } + @Override public List getSignatures() { return SIGNATURES; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesDiff.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesDiff.java index 178b6a49331e91f..4d011116334bf47 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesDiff.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/MinutesDiff.java @@ -42,9 +42,9 @@ public class MinutesDiff extends ScalarFunction private static final List SIGNATURES = ImmutableList.of( FunctionSignature.ret(BigIntType.INSTANCE) .args(DateTimeV2Type.SYSTEM_DEFAULT, DateTimeV2Type.SYSTEM_DEFAULT), - FunctionSignature.ret(BigIntType.INSTANCE).args(DateTimeType.INSTANCE, DateTimeType.INSTANCE), FunctionSignature.ret(BigIntType.INSTANCE).args(DateV2Type.INSTANCE, DateTimeV2Type.SYSTEM_DEFAULT), FunctionSignature.ret(BigIntType.INSTANCE).args(DateTimeV2Type.SYSTEM_DEFAULT, DateV2Type.INSTANCE), + FunctionSignature.ret(BigIntType.INSTANCE).args(DateTimeType.INSTANCE, DateTimeType.INSTANCE), FunctionSignature.ret(BigIntType.INSTANCE).args(DateV2Type.INSTANCE, DateV2Type.INSTANCE) ); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/NullIf.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/NullIf.java index 447e60a752fc646..d8985519a1bd0cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/NullIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/NullIf.java @@ -23,22 +23,8 @@ import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; -import org.apache.doris.nereids.types.BigIntType; -import org.apache.doris.nereids.types.BooleanType; -import org.apache.doris.nereids.types.DateTimeType; -import org.apache.doris.nereids.types.DateTimeV2Type; -import org.apache.doris.nereids.types.DateType; -import org.apache.doris.nereids.types.DateV2Type; -import org.apache.doris.nereids.types.DecimalV2Type; -import org.apache.doris.nereids.types.DecimalV3Type; -import org.apache.doris.nereids.types.DoubleType; -import org.apache.doris.nereids.types.FloatType; -import org.apache.doris.nereids.types.IntegerType; -import org.apache.doris.nereids.types.LargeIntType; -import org.apache.doris.nereids.types.SmallIntType; -import org.apache.doris.nereids.types.StringType; -import org.apache.doris.nereids.types.TinyIntType; -import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.coercion.AnyDataType; +import org.apache.doris.nereids.types.coercion.FollowToAnyDataType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -52,26 +38,7 @@ public class NullIf extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(BooleanType.INSTANCE).args(BooleanType.INSTANCE, BooleanType.INSTANCE), - FunctionSignature.ret(TinyIntType.INSTANCE).args(TinyIntType.INSTANCE, TinyIntType.INSTANCE), - FunctionSignature.ret(SmallIntType.INSTANCE).args(SmallIntType.INSTANCE, SmallIntType.INSTANCE), - FunctionSignature.ret(IntegerType.INSTANCE).args(IntegerType.INSTANCE, IntegerType.INSTANCE), - FunctionSignature.ret(BigIntType.INSTANCE).args(BigIntType.INSTANCE, BigIntType.INSTANCE), - FunctionSignature.ret(LargeIntType.INSTANCE).args(LargeIntType.INSTANCE, LargeIntType.INSTANCE), - FunctionSignature.ret(FloatType.INSTANCE).args(FloatType.INSTANCE, FloatType.INSTANCE), - FunctionSignature.ret(DoubleType.INSTANCE).args(DoubleType.INSTANCE, DoubleType.INSTANCE), - FunctionSignature.ret(DateTimeType.INSTANCE).args(DateTimeType.INSTANCE, DateTimeType.INSTANCE), - FunctionSignature.ret(DateType.INSTANCE).args(DateType.INSTANCE, DateType.INSTANCE), - FunctionSignature.ret(DateTimeV2Type.SYSTEM_DEFAULT) - .args(DateTimeV2Type.SYSTEM_DEFAULT, DateTimeV2Type.SYSTEM_DEFAULT), - FunctionSignature.ret(DateV2Type.INSTANCE).args(DateV2Type.INSTANCE, DateV2Type.INSTANCE), - FunctionSignature.ret(DecimalV2Type.SYSTEM_DEFAULT) - .args(DecimalV2Type.SYSTEM_DEFAULT, DecimalV2Type.SYSTEM_DEFAULT), - FunctionSignature.ret(DecimalV3Type.WILDCARD) - .args(DecimalV3Type.WILDCARD, DecimalV3Type.WILDCARD), - FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT) - .args(VarcharType.SYSTEM_DEFAULT, VarcharType.SYSTEM_DEFAULT), - FunctionSignature.ret(StringType.INSTANCE).args(StringType.INSTANCE, StringType.INSTANCE) + FunctionSignature.retArgType(0).args(new AnyDataType(0), new FollowToAnyDataType(0)) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Pmod.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Pmod.java index 56ba753f1ff9eba..808e84ea02600e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Pmod.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/Pmod.java @@ -21,10 +21,12 @@ import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.literal.StringLikeLiteral; import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.types.DoubleType; +import org.apache.doris.nereids.util.TypeCoercionUtils; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -37,10 +39,10 @@ public class Pmod extends ScalarFunction implements BinaryExpression, ExplicitlyCastableSignature, AlwaysNullable { - public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(BigIntType.INSTANCE).args(BigIntType.INSTANCE, BigIntType.INSTANCE), - FunctionSignature.ret(DoubleType.INSTANCE).args(DoubleType.INSTANCE, DoubleType.INSTANCE) - ); + public static final FunctionSignature BIGINT_SIGNATURE = FunctionSignature.ret(BigIntType.INSTANCE) + .args(BigIntType.INSTANCE, BigIntType.INSTANCE); + public static final FunctionSignature DOUBLE_SIGNATURE = FunctionSignature.ret(DoubleType.INSTANCE) + .args(DoubleType.INSTANCE, DoubleType.INSTANCE); /** * constructor with 2 arguments. @@ -58,9 +60,39 @@ public Pmod withChildren(List children) { return new Pmod(children.get(0), children.get(1)); } + /** + * already override searchSignature and computeSignature, so getSignatures is useless anymore. + * + * @return empty list + */ @Override public List getSignatures() { - return SIGNATURES; + return ImmutableList.of(); + } + + @Override + public FunctionSignature computeSignature(FunctionSignature signature) { + return signature; + } + + @Override + public FunctionSignature searchSignature(List signatures) { + boolean leftCouldBeBigInt = false; + boolean rightCouldBeBigInt = false; + if (getArgument(0) instanceof StringLikeLiteral) { + leftCouldBeBigInt = TypeCoercionUtils.characterLiteralTypeCoercion( + ((StringLikeLiteral) getArgument(0)).getValue(), BigIntType.INSTANCE).isPresent(); + } + if (getArgument(1) instanceof StringLikeLiteral) { + rightCouldBeBigInt = TypeCoercionUtils.characterLiteralTypeCoercion( + ((StringLikeLiteral) getArgument(1)).getValue(), BigIntType.INSTANCE).isPresent(); + } + if ((getArgument(0).getDataType().isIntegerLikeType() || leftCouldBeBigInt) + && (getArgument(1).getDataType().isIntegerLikeType() || rightCouldBeBigInt)) { + return BIGINT_SIGNATURE; + } else { + return DOUBLE_SIGNATURE; + } } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java index 96cbcca642d4d46..651e018c20e9824 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java @@ -534,7 +534,7 @@ public static Optional characterLiteralTypeCoercion(String value, Da if ("false".equalsIgnoreCase(value)) { ret = BooleanLiteral.FALSE; } - } else if (dataType instanceof IntegralType) { + } else if (dataType instanceof TinyIntType) { BigInteger bigInt = new BigInteger(value); if (BigInteger.valueOf(bigInt.byteValue()).equals(bigInt)) { ret = new TinyIntLiteral(bigInt.byteValue()); @@ -547,6 +547,36 @@ public static Optional characterLiteralTypeCoercion(String value, Da } else { ret = new LargeIntLiteral(bigInt); } + } else if (dataType instanceof SmallIntType) { + BigInteger bigInt = new BigInteger(value); + if (BigInteger.valueOf(bigInt.shortValue()).equals(bigInt)) { + ret = new SmallIntLiteral(bigInt.shortValue()); + } else if (BigInteger.valueOf(bigInt.intValue()).equals(bigInt)) { + ret = new IntegerLiteral(bigInt.intValue()); + } else if (BigInteger.valueOf(bigInt.longValue()).equals(bigInt)) { + ret = new BigIntLiteral(bigInt.longValueExact()); + } else { + ret = new LargeIntLiteral(bigInt); + } + } else if (dataType instanceof IntegerType) { + BigInteger bigInt = new BigInteger(value); + if (BigInteger.valueOf(bigInt.intValue()).equals(bigInt)) { + ret = new IntegerLiteral(bigInt.intValue()); + } else if (BigInteger.valueOf(bigInt.longValue()).equals(bigInt)) { + ret = new BigIntLiteral(bigInt.longValueExact()); + } else { + ret = new LargeIntLiteral(bigInt); + } + } else if (dataType instanceof BigIntType) { + BigInteger bigInt = new BigInteger(value); + if (BigInteger.valueOf(bigInt.longValue()).equals(bigInt)) { + ret = new BigIntLiteral(bigInt.longValueExact()); + } else { + ret = new LargeIntLiteral(bigInt); + } + } else if (dataType instanceof LargeIntType) { + BigInteger bigInt = new BigInteger(value); + ret = new LargeIntLiteral(bigInt); } else if (dataType instanceof FloatType) { ret = new FloatLiteral(Float.parseFloat(value)); } else if (dataType instanceof DoubleType) { diff --git a/regression-test/data/nereids_function_p0/type_coercion.out b/regression-test/data/nereids_function_p0/type_coercion.out new file mode 100644 index 000000000000000..54fddd1e5b682ad --- /dev/null +++ b/regression-test/data/nereids_function_p0/type_coercion.out @@ -0,0 +1,34 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !greatest -- +333 + +-- !least -- +2000000 + +-- !if -- +2222 + +-- !array_product -- +6000.0 + +-- !array_avg -- +1001.0 + +-- !array_pushfront -- +[4444, 1, 2, 3, 555555] + +-- !array_pushback -- +[1, 2, 3, 555555, 4444] + +-- !array_difference -- +[0, 1, 198] + +-- !array_enumerate_uniq -- +[1, 2, 1] + +-- !array_cum_sum -- +[1, 3, 3003] + +-- !pmod -- +0.0 + diff --git a/regression-test/suites/nereids_function_p0/type_coercion.groovy b/regression-test/suites/nereids_function_p0/type_coercion.groovy new file mode 100644 index 000000000000000..8c5dcb8134e49ac --- /dev/null +++ b/regression-test/suites/nereids_function_p0/type_coercion.groovy @@ -0,0 +1,30 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("function_type_coercion") { + sql """set enable_fold_constant_by_be=false""" // remove this if array BE return result be fixed. + qt_greatest """select greatest(1, 2222, '333')""" + qt_least """select least(5,2000000,'3.0023')""" + qt_if """select if (1, 2222, 33)""" + qt_array_product """select array_product(array(1, 2, '3000'))""" + qt_array_avg """select array_avg(array(1, 2, '3000'))""" + qt_array_pushfront """select array_pushfront(array(1,2,3,555555), '4444')""" + qt_array_pushback """select array_pushback(array(1,2,3,555555), '4444')""" + qt_array_difference """select array_difference(array(1,2,'200'))""" + qt_array_enumerate_uniq """select array_enumerate_uniq([1,1,1],['1','1','1.0'])""" + qt_array_cum_sum """select array_cum_sum(array('1', '2', '3000'))""" + qt_pmod """select pmod(2, '1.0')""" +}