From 653d375f2adb1f483fe6bb11a14dbc6d33c44fc7 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Tue, 12 Sep 2023 11:10:52 +0800 Subject: [PATCH 01/31] [Fix](status)Fix leaky abstraction and shield the status code `END_OF_FILE` from upper layers (#24165) --- be/src/olap/delete_bitmap_calculator.cpp | 1 + be/src/olap/rowset/segment_v2/binary_prefix_page.cpp | 8 +++++++- be/src/olap/rowset/segment_v2/segment.cpp | 7 +++++-- be/src/olap/tablet.cpp | 2 +- be/test/olap/primary_key_index_test.cpp | 2 +- 5 files changed, 15 insertions(+), 5 deletions(-) diff --git a/be/src/olap/delete_bitmap_calculator.cpp b/be/src/olap/delete_bitmap_calculator.cpp index bfdb506c066e6b..a370f0c06bf8e7 100644 --- a/be/src/olap/delete_bitmap_calculator.cpp +++ b/be/src/olap/delete_bitmap_calculator.cpp @@ -17,6 +17,7 @@ #include "olap/delete_bitmap_calculator.h" +#include "common/status.h" #include "olap/primary_key_index.h" #include "vec/data_types/data_type_factory.hpp" diff --git a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp index ab9056def1b9b3..183e7bb0853440 100644 --- a/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp +++ b/be/src/olap/rowset/segment_v2/binary_prefix_page.cpp @@ -202,7 +202,13 @@ Status BinaryPrefixPageDecoder::seek_at_or_after_value(const void* value, bool* return Status::OK(); } _cur_pos++; - RETURN_IF_ERROR(_read_next_value()); + auto st = _read_next_value(); + if (st.is()) { + return Status::Error("all value small than the value"); + } + if (!st.ok()) { + return st; + } } } diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 195b40fa838438..0e2d6a6e3e30f8 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -373,8 +373,11 @@ Status Segment::lookup_row_key(const Slice& key, bool with_seq_col, RowLocation* bool exact_match = false; std::unique_ptr index_iterator; RETURN_IF_ERROR(_pk_index_reader->new_iterator(&index_iterator)); - RETURN_IF_ERROR(index_iterator->seek_at_or_after(&key_without_seq, &exact_match)); - if (!has_seq_col && !exact_match) { + auto st = index_iterator->seek_at_or_after(&key_without_seq, &exact_match); + if (!st.ok() && !st.is()) { + return st; + } + if (st.is() || (!has_seq_col && !exact_match)) { return Status::Error("Can't find key in the segment"); } row_location->row_id = index_iterator->get_current_ordinal(); diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index 71da59808e6e8f..c732b200f77eb1 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -2832,7 +2832,7 @@ Status Tablet::lookup_row_key(const Slice& encoded_key, bool with_seq_col, for (auto id : picked_segments) { Status s = segments[id]->lookup_row_key(encoded_key, with_seq_col, &loc); - if (s.is() || s.is()) { + if (s.is()) { continue; } if (!s.ok() && !s.is()) { diff --git a/be/test/olap/primary_key_index_test.cpp b/be/test/olap/primary_key_index_test.cpp index 37189f22ec9095..d643ab501e8e3c 100644 --- a/be/test/olap/primary_key_index_test.cpp +++ b/be/test/olap/primary_key_index_test.cpp @@ -128,7 +128,7 @@ TEST_F(PrimaryKeyIndexTest, builder) { EXPECT_FALSE(exists); auto status = index_iterator->seek_at_or_after(&slice, &exact_match); EXPECT_FALSE(exact_match); - EXPECT_TRUE(status.is()); + EXPECT_TRUE(status.is()); } // read all key From 40d35bd04dda473f4fe13ecea127f943e24b8875 Mon Sep 17 00:00:00 2001 From: Kang Date: Tue, 12 Sep 2023 21:27:51 +0800 Subject: [PATCH 02/31] change version to 2.0.2-rc02 --- gensrc/script/gen_build_version.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensrc/script/gen_build_version.sh b/gensrc/script/gen_build_version.sh index 3d522a508c038c..89efd13dc70464 100755 --- a/gensrc/script/gen_build_version.sh +++ b/gensrc/script/gen_build_version.sh @@ -31,7 +31,7 @@ build_version_prefix="doris" build_version_major=2 build_version_minor=0 build_version_patch=2 -build_version_rc_version="rc01" +build_version_rc_version="rc02" build_version="${build_version_prefix}-${build_version_major}.${build_version_minor}.${build_version_patch}-${build_version_rc_version}" From 8137ce536e9847bdf9fe6a5f776d5c8308a267f8 Mon Sep 17 00:00:00 2001 From: TengJianPing <18241664+jacktengg@users.noreply.github.com> Date: Mon, 11 Sep 2023 10:33:38 +0800 Subject: [PATCH 03/31] [fix](local exchange) fix bug of accessing released counter of local data stream receiver (#24160) --- be/src/vec/runtime/vdata_stream_recvr.cpp | 12 ++++++++---- be/src/vec/runtime/vdata_stream_recvr.h | 12 ++++++++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp index 2e0ffaf9a37bb3..fcd7d014c6fe6b 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.cpp +++ b/be/src/vec/runtime/vdata_stream_recvr.cpp @@ -92,7 +92,7 @@ Status VDataStreamRecvr::SenderQueue::_inner_get_batch_without_lock(Block* block DCHECK(!_block_queue.empty()); auto [next_block, block_byte_size] = std::move(_block_queue.front()); - _recvr->_blocks_memory_usage->add(-block_byte_size); + _recvr->update_blocks_memory_usage(-block_byte_size); _block_queue.pop_front(); if (!_pending_closures.empty()) { @@ -168,7 +168,7 @@ void VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_numbe _pending_closures.emplace_back(*done, monotonicStopWatch); *done = nullptr; } - _recvr->_blocks_memory_usage->add(block_byte_size); + _recvr->update_blocks_memory_usage(block_byte_size); _data_arrival_cv.notify_one(); } @@ -208,7 +208,12 @@ void VDataStreamRecvr::SenderQueue::add_block(Block* block, bool use_move) { _block_queue.emplace_back(std::move(nblock), block_mem_size); _data_arrival_cv.notify_one(); - if (_recvr->exceeds_limit(block_mem_size)) { + // Careful: Accessing members of _recvr that are allocated by Object pool + // should be done before the following logic, because the _lock will be released + // by `iter->second->wait(l)`, after `iter->second->wait(l)` returns, _recvr may + // have been closed and resouces in _recvr are released. + _recvr->update_blocks_memory_usage(block_mem_size); + if (_recvr->exceeds_limit(0)) { // yiguolei // It is too tricky here, if the running thread is bthread then the tid may be wrong. std::thread::id tid = std::this_thread::get_id(); @@ -223,7 +228,6 @@ void VDataStreamRecvr::SenderQueue::add_block(Block* block, bool use_move) { iter->second->wait(l); } - _recvr->_blocks_memory_usage->add(block_mem_size); } void VDataStreamRecvr::SenderQueue::decrement_senders(int be_number) { diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index 03bf6f9db28bc5..0059c8ddf0e017 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -102,14 +102,21 @@ class VDataStreamRecvr { void close(); + // Careful: stream sender will call this function for a local receiver, + // accessing members of receiver that are allocated by Object pool + // in this function is not safe. bool exceeds_limit(int batch_size) { - return _blocks_memory_usage->current_value() + batch_size > + return _blocks_memory_usage_current_value + batch_size > config::exchg_node_buffer_size_bytes; } bool is_closed() const { return _is_closed; } private: + void update_blocks_memory_usage(int64_t size) { + _blocks_memory_usage->add(size); + _blocks_memory_usage_current_value = _blocks_memory_usage->current_value(); + } class SenderQueue; class PipSenderQueue; @@ -154,6 +161,7 @@ class VDataStreamRecvr { RuntimeProfile::Counter* _decompress_bytes; RuntimeProfile::Counter* _memory_usage_counter; RuntimeProfile::HighWaterMarkCounter* _blocks_memory_usage; + std::atomic _blocks_memory_usage_current_value = 0; RuntimeProfile::Counter* _peak_memory_usage_counter; // Number of rows received @@ -266,7 +274,7 @@ class VDataStreamRecvr::PipSenderQueue : public SenderQueue { } _block_queue.emplace_back(std::move(nblock), block_mem_size); COUNTER_UPDATE(_recvr->_local_bytes_received_counter, block_mem_size); - _recvr->_blocks_memory_usage->add(block_mem_size); + _recvr->update_blocks_memory_usage(block_mem_size); _data_arrival_cv.notify_one(); } } From a48ca238d351b34cfed8e6dbc39b87cc555e5cc2 Mon Sep 17 00:00:00 2001 From: yiguolei <676222867@qq.com> Date: Mon, 11 Sep 2023 12:20:07 +0800 Subject: [PATCH 04/31] [improvement](shutdown) not print thread pool error stack trace when shutdown (#24155) * [improvement](shutdown) not print thread pool error stack trace when shutdown when thread pool shutdown, should not print error stack trace, it is very confuse. arrow flight server should not call shutdown, if it is not enabled, because it will print error stack. remove service unavailable from thrift because it is useless. Part of this PR need to pick to 2.0 branch. Co-authored-by: yiguolei --- be/src/common/status.h | 3 +-- be/src/runtime/stream_load/stream_load_executor.cpp | 2 +- be/src/util/threadpool.cpp | 8 +++++--- gensrc/thrift/Status.thrift | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/be/src/common/status.h b/be/src/common/status.h index 2b6b639b043462..1d73bee2d1d7ae 100644 --- a/be/src/common/status.h +++ b/be/src/common/status.h @@ -51,7 +51,6 @@ TStatusError(MEM_LIMIT_EXCEEDED); TStatusError(THRIFT_RPC_ERROR); TStatusError(TIMEOUT); TStatusError(TOO_MANY_TASKS); -TStatusError(SERVICE_UNAVAILABLE); TStatusError(UNINITIALIZED); TStatusError(ABORTED); TStatusError(DATA_QUALITY_ERROR); @@ -116,6 +115,7 @@ E(NOT_INITIALIZED, -236); E(ALREADY_CANCELLED, -237); E(TOO_MANY_SEGMENTS, -238); E(ALREADY_CLOSED, -239); +E(SERVICE_UNAVAILABLE, -240); E(CE_CMD_PARAMS_ERROR, -300); E(CE_BUFFER_TOO_SMALL, -301); E(CE_CMD_NOT_VALID, -302); @@ -412,7 +412,6 @@ class Status { ERROR_CTOR(RpcError, THRIFT_RPC_ERROR) ERROR_CTOR(TimedOut, TIMEOUT) ERROR_CTOR(TooManyTasks, TOO_MANY_TASKS) - ERROR_CTOR(ServiceUnavailable, SERVICE_UNAVAILABLE) ERROR_CTOR(Uninitialized, UNINITIALIZED) ERROR_CTOR(Aborted, ABORTED) ERROR_CTOR(DataQualityError, DATA_QUALITY_ERROR) diff --git a/be/src/runtime/stream_load/stream_load_executor.cpp b/be/src/runtime/stream_load/stream_load_executor.cpp index e1e8e2ff25d90f..1ecba95748aeec 100644 --- a/be/src/runtime/stream_load/stream_load_executor.cpp +++ b/be/src/runtime/stream_load/stream_load_executor.cpp @@ -248,7 +248,7 @@ Status StreamLoadExecutor::begin_txn(StreamLoadContext* ctx) { int64_t duration_ns = 0; TNetworkAddress master_addr = _exec_env->master_info()->network_address; if (master_addr.hostname.empty() || master_addr.port == 0) { - status = Status::ServiceUnavailable("Have not get FE Master heartbeat yet"); + status = Status::Error("Have not get FE Master heartbeat yet"); } else { SCOPED_RAW_TIMER(&duration_ns); #ifndef BE_TEST diff --git a/be/src/util/threadpool.cpp b/be/src/util/threadpool.cpp index 93c14f4d61b8da..6ac02e5cbd74bd 100644 --- a/be/src/util/threadpool.cpp +++ b/be/src/util/threadpool.cpp @@ -274,7 +274,9 @@ void ThreadPool::shutdown() { // capacity, so clients can't tell them apart. This isn't really a practical // concern though because shutting down a pool typically requires clients to // be quiesced first, so there's no danger of a client getting confused. - _pool_status = Status::ServiceUnavailable("The thread pool {} has been shut down.", _name); + // Not print stack trace here + _pool_status = Status::Error( + "The thread pool {} has been shut down.", _name); // Clear the various queues under the lock, but defer the releasing // of the tasks outside the lock, in case there are concurrent threads @@ -356,14 +358,14 @@ Status ThreadPool::do_submit(std::shared_ptr r, ThreadPoolToken* token } if (PREDICT_FALSE(!token->may_submit_new_tasks())) { - return Status::ServiceUnavailable("Thread pool({}) token was shut down", _name); + return Status::Error("Thread pool({}) token was shut down", _name); } // Size limit check. int64_t capacity_remaining = static_cast(_max_threads) - _active_threads + static_cast(_max_queue_size) - _total_queued_tasks; if (capacity_remaining < 1) { - return Status::ServiceUnavailable( + return Status::Error( "Thread pool {} is at capacity ({}/{} tasks running, {}/{} tasks queued)", _name, _num_threads + _num_threads_pending_start, _max_threads, _total_queued_tasks, _max_queue_size); diff --git a/gensrc/thrift/Status.thrift b/gensrc/thrift/Status.thrift index 7b12d3b0603d23..06083b9a93ccc8 100644 --- a/gensrc/thrift/Status.thrift +++ b/gensrc/thrift/Status.thrift @@ -69,7 +69,7 @@ enum TStatusCode { NOT_AUTHORIZED = 38, ABORTED = 39, REMOTE_ERROR = 40, - SERVICE_UNAVAILABLE = 41, + //SERVICE_UNAVAILABLE = 41, // Not used any more UNINITIALIZED = 42, CONFIGURATION_ERROR = 43, INCOMPLETE = 44, From 4793c97fde2fb7fcc5ff2357fd2077af27bb0348 Mon Sep 17 00:00:00 2001 From: realize096 <102856702+realize096@users.noreply.github.com> Date: Sat, 9 Sep 2023 00:37:07 +0800 Subject: [PATCH 05/31] [fix](sec)upgrade org.yaml:snakeyaml to 2.0 #24057 --- fe/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/pom.xml b/fe/pom.xml index 1d43f1041e8bee..794c4736bf368d 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -253,7 +253,7 @@ under the License. 4.7.2 4.9.3 3.4.0 - 1.33 + 2.0 1.1.0.Final 0.2.3 3.4.0 From 72054d8796be7ed3df7e3bb70ba581cfdd9cf3a2 Mon Sep 17 00:00:00 2001 From: realize096 <102856702+realize096@users.noreply.github.com> Date: Sat, 9 Sep 2023 00:37:39 +0800 Subject: [PATCH 06/31] update gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b to 3.0.0 (#24056) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are 1 security vulnerabilities found in gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b CVE-2022-28948 What did I do? Upgrade gopkg.in/yaml.v3 from v3.0.0-20210107192922-496545a6307b to 3.0.0 for vulnerability fix What did you expect to happen? Ideally, no insecure libs should be used. How can we automate the detection of these types of issues? By using the GitHub Actions configurations provided by murphysec, we can conduct automatic code security checks in our CI pipeline. The specification of the pull request PR Specification from OSCS --- extension/beats/go.mod | 346 ++++++++++++++++++++--------------------- extension/beats/go.sum | 2 + 2 files changed, 175 insertions(+), 173 deletions(-) diff --git a/extension/beats/go.mod b/extension/beats/go.mod index 76bcc2ec0c0063..2935cb33cf3f37 100644 --- a/extension/beats/go.mod +++ b/extension/beats/go.mod @@ -3,183 +3,183 @@ module github.com/apache/doris/extension/beats go 1.20 require ( - github.com/elastic/beats/v7 v7.17.5 - gotest.tools v2.2.0+incompatible + github.com/elastic/beats/v7 v7.17.5 + gotest.tools v2.2.0+incompatible ) require ( - github.com/BurntSushi/toml v0.3.1 // indirect - github.com/Microsoft/go-winio v0.5.1 // indirect - github.com/PaesslerAG/gval v1.0.0 // indirect - github.com/PaesslerAG/jsonpath v0.1.1 // indirect - github.com/Shopify/sarama v0.0.0-00010101000000-000000000000 // indirect - github.com/StackExchange/wmi v0.0.0-20170221213301-9f32b5905fd6 // indirect - github.com/aerospike/aerospike-client-go v1.27.1-0.20170612174108-0f3b54da6bdc // indirect - github.com/armon/go-radix v1.0.0 // indirect - github.com/cespare/xxhash/v2 v2.1.1 // indirect - github.com/containerd/containerd v1.5.7 // indirect - github.com/coreos/go-systemd/v22 v22.3.2 // indirect - github.com/davecgh/go-spew v1.1.1 // indirect - github.com/davecgh/go-xdr v0.0.0-20161123171359-e6a2ba005892 // indirect - github.com/digitalocean/go-libvirt v0.0.0-20180301200012-6075ea3c39a1 // indirect - github.com/dlclark/regexp2 v1.1.7-0.20171009020623-7632a260cbaf // indirect - github.com/docker/distribution v2.8.0+incompatible // indirect - github.com/docker/docker v1.4.2-0.20190924003213-a8608b5b67c7 // indirect - github.com/docker/go-connections v0.4.0 // indirect - github.com/docker/go-units v0.4.0 // indirect - github.com/dop251/goja v0.0.0-20200831102558-9af81ddcf0e1 // indirect - github.com/dop251/goja_nodejs v0.0.0-20171011081505-adff31b136e6 // indirect - github.com/dustin/go-humanize v1.0.0 // indirect - github.com/eapache/go-resiliency v1.2.0 // indirect - github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 // indirect - github.com/eapache/queue v1.1.0 // indirect - github.com/eclipse/paho.mqtt.golang v1.2.1-0.20200121105743-0d940dd29fd2 // indirect - github.com/elastic/ecs v1.12.0 // indirect - github.com/elastic/elastic-agent-client/v7 v7.0.0-20210727140539-f0905d9377f6 // indirect - github.com/elastic/go-concert v0.2.0 // indirect - github.com/elastic/go-libaudit/v2 v2.2.0 // indirect - github.com/elastic/go-lumber v0.1.0 // indirect - github.com/elastic/go-seccomp-bpf v1.2.0 // indirect - github.com/elastic/go-structform v0.0.9 // indirect - github.com/elastic/go-sysinfo v1.7.1 // indirect - github.com/elastic/go-txfile v0.0.7 // indirect - github.com/elastic/go-ucfg v0.8.3 // indirect - github.com/elastic/go-windows v1.0.1 // indirect - github.com/elastic/gosigar v0.14.2 // indirect - github.com/fatih/color v1.9.0 // indirect - github.com/fsnotify/fsevents v0.1.1 // indirect - github.com/fsnotify/fsnotify v1.5.1 // indirect - github.com/go-logr/logr v0.4.0 // indirect - github.com/go-ole/go-ole v1.2.5-0.20190920104607-14974a1cf647 // indirect - github.com/go-sourcemap/sourcemap v2.1.2+incompatible // indirect - github.com/go-sql-driver/mysql v1.5.0 // indirect - github.com/gocarina/gocsv v0.0.0-20170324095351-ffef3ffc77be // indirect - github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e // indirect - github.com/godbus/dbus/v5 v5.0.5 // indirect - github.com/gofrs/flock v0.7.2-0.20190320160742-5135e617513b // indirect - github.com/gofrs/uuid v3.3.0+incompatible // indirect - github.com/gogo/protobuf v1.3.2 // indirect - github.com/golang/protobuf v1.5.2 // indirect - github.com/golang/snappy v0.0.4 // indirect - github.com/gomodule/redigo v1.8.3 // indirect - github.com/google/flatbuffers v1.12.1 // indirect - github.com/google/go-cmp v0.5.6 // indirect - github.com/google/gofuzz v1.1.0 // indirect - github.com/googleapis/gnostic v0.4.1 // indirect - github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75 // indirect - github.com/gorilla/websocket v1.4.2 // indirect - github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect - github.com/h2non/filetype v1.1.1 // indirect - github.com/hashicorp/errwrap v1.0.0 // indirect - github.com/hashicorp/go-multierror v1.1.0 // indirect - github.com/hashicorp/go-uuid v1.0.2 // indirect - github.com/hashicorp/golang-lru v0.5.4 // indirect - github.com/imdario/mergo v0.3.12 // indirect - github.com/inconshreveable/mousetrap v1.0.0 // indirect - github.com/insomniacslk/dhcp v0.0.0-20180716145214-633285ba52b2 // indirect - github.com/jcmturner/aescts/v2 v2.0.0 // indirect - github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect - github.com/jcmturner/gofork v1.0.0 // indirect - github.com/jcmturner/gokrb5/v8 v8.4.2 // indirect - github.com/jcmturner/rpc/v2 v2.0.3 // indirect - github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect - github.com/jonboulle/clockwork v0.2.2 // indirect - github.com/json-iterator/go v1.1.11 // indirect - github.com/klauspost/compress v1.13.6 // indirect - github.com/lib/pq v1.1.2-0.20190507191818-2ff3cb3adc01 // indirect - github.com/magefile/mage v1.11.0 // indirect - github.com/mattn/go-colorable v0.1.6 // indirect - github.com/mattn/go-isatty v0.0.12 // indirect - github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect - github.com/miekg/dns v1.1.25 // indirect - github.com/mitchellh/hashstructure v0.0.0-20170116052023-ab25296c0f51 // indirect - github.com/mitchellh/mapstructure v1.3.3 // indirect - github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect - github.com/modern-go/reflect2 v1.0.1 // indirect - github.com/opencontainers/go-digest v1.0.0 // indirect - github.com/opencontainers/image-spec v1.0.2-0.20190823105129-775207bd45b6 // indirect - github.com/pierrec/lz4 v2.6.0+incompatible // indirect - github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_model v0.2.0 // indirect - github.com/prometheus/common v0.10.0 // indirect - github.com/prometheus/procfs v0.6.0 // indirect - github.com/prometheus/prometheus v2.5.0+incompatible // indirect - github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect - github.com/samuel/go-parser v0.0.0-20130731160455-ca8abbf65d0e // indirect - github.com/samuel/go-thrift v0.0.0-20140522043831-2187045faa54 // indirect - github.com/santhosh-tekuri/jsonschema v1.2.4 // indirect - github.com/shirou/gopsutil v3.20.12+incompatible // indirect - github.com/sirupsen/logrus v1.8.1 // indirect - github.com/spf13/cobra v1.0.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/testify v1.7.0 // indirect - github.com/tsg/gopacket v0.0.0-20200626092518-2ab8e397a786 // indirect - github.com/urso/diag v0.0.0-20200210123136-21b3cc8eb797 // indirect - github.com/urso/go-bin v0.0.0-20180220135811-781c575c9f0e // indirect - github.com/urso/magetools v0.0.0-20190919040553-290c89e0c230 // indirect - github.com/urso/sderr v0.0.0-20210525210834-52b04e8f5c71 // indirect - github.com/vmware/govmomi v0.0.0-20170802214208-2cad15190b41 // indirect - github.com/xdg/scram v1.0.3 // indirect - github.com/xdg/stringprep v1.0.3 // indirect - github.com/yuin/gopher-lua v0.0.0-20170403160031-b402f3114ec7 // indirect - go.elastic.co/apm v1.11.0 // indirect - go.elastic.co/apm/module/apmelasticsearch v1.7.2 // indirect - go.elastic.co/apm/module/apmhttp v1.7.2 // indirect - go.elastic.co/ecszap v0.3.0 // indirect - go.elastic.co/fastjson v1.1.0 // indirect - go.etcd.io/bbolt v1.3.6 // indirect - go.uber.org/atomic v1.5.0 // indirect - go.uber.org/multierr v1.3.0 // indirect - go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee // indirect - go.uber.org/zap v1.14.0 // indirect - golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e // indirect - golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect - golang.org/x/mod v0.4.2 // indirect - golang.org/x/net v0.0.0-20211020060615-d418f374d309 // indirect - golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1 // indirect - golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect - golang.org/x/sys v0.0.0-20211102192858-4dd72447c267 // indirect - golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d // indirect - golang.org/x/text v0.3.7 // indirect - golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect - golang.org/x/tools v0.1.7 // indirect - golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect - google.golang.org/appengine v1.6.7 // indirect - google.golang.org/genproto v0.0.0-20211021150943-2b146023228c // indirect - google.golang.org/grpc v1.41.0 // indirect - google.golang.org/protobuf v1.27.1 // indirect - gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/jcmturner/aescts.v1 v1.0.1 // indirect - gopkg.in/jcmturner/dnsutils.v1 v1.0.1 // indirect - gopkg.in/jcmturner/goidentity.v3 v3.0.0 // indirect - gopkg.in/jcmturner/gokrb5.v7 v7.5.0 // indirect - gopkg.in/jcmturner/rpc.v1 v1.1.0 // indirect - gopkg.in/mgo.v2 v2.0.0-20160818020120-3f83fa500528 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect - honnef.co/go/tools v0.0.1-2020.1.4 // indirect - howett.net/plist v0.0.0-20181124034731-591f970eefbb // indirect - k8s.io/api v0.21.1 // indirect - k8s.io/apimachinery v0.21.1 // indirect - k8s.io/client-go v0.21.1 // indirect - k8s.io/klog/v2 v2.8.0 // indirect - k8s.io/utils v0.0.0-20201110183641-67b214c5f920 // indirect - kernel.org/pub/linux/libs/security/libcap/cap v1.2.57 // indirect - kernel.org/pub/linux/libs/security/libcap/psx v1.2.57 // indirect - sigs.k8s.io/structured-merge-diff/v4 v4.1.0 // indirect - sigs.k8s.io/yaml v1.2.0 // indirect + github.com/BurntSushi/toml v0.3.1 // indirect + github.com/Microsoft/go-winio v0.5.1 // indirect + github.com/PaesslerAG/gval v1.0.0 // indirect + github.com/PaesslerAG/jsonpath v0.1.1 // indirect + github.com/Shopify/sarama v0.0.0-00010101000000-000000000000 // indirect + github.com/StackExchange/wmi v0.0.0-20170221213301-9f32b5905fd6 // indirect + github.com/aerospike/aerospike-client-go v1.27.1-0.20170612174108-0f3b54da6bdc // indirect + github.com/armon/go-radix v1.0.0 // indirect + github.com/cespare/xxhash/v2 v2.1.1 // indirect + github.com/containerd/containerd v1.5.7 // indirect + github.com/coreos/go-systemd/v22 v22.3.2 // indirect + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/davecgh/go-xdr v0.0.0-20161123171359-e6a2ba005892 // indirect + github.com/digitalocean/go-libvirt v0.0.0-20180301200012-6075ea3c39a1 // indirect + github.com/dlclark/regexp2 v1.1.7-0.20171009020623-7632a260cbaf // indirect + github.com/docker/distribution v2.8.0+incompatible // indirect + github.com/docker/docker v1.4.2-0.20190924003213-a8608b5b67c7 // indirect + github.com/docker/go-connections v0.4.0 // indirect + github.com/docker/go-units v0.4.0 // indirect + github.com/dop251/goja v0.0.0-20200831102558-9af81ddcf0e1 // indirect + github.com/dop251/goja_nodejs v0.0.0-20171011081505-adff31b136e6 // indirect + github.com/dustin/go-humanize v1.0.0 // indirect + github.com/eapache/go-resiliency v1.2.0 // indirect + github.com/eapache/go-xerial-snappy v0.0.0-20180814174437-776d5712da21 // indirect + github.com/eapache/queue v1.1.0 // indirect + github.com/eclipse/paho.mqtt.golang v1.2.1-0.20200121105743-0d940dd29fd2 // indirect + github.com/elastic/ecs v1.12.0 // indirect + github.com/elastic/elastic-agent-client/v7 v7.0.0-20210727140539-f0905d9377f6 // indirect + github.com/elastic/go-concert v0.2.0 // indirect + github.com/elastic/go-libaudit/v2 v2.2.0 // indirect + github.com/elastic/go-lumber v0.1.0 // indirect + github.com/elastic/go-seccomp-bpf v1.2.0 // indirect + github.com/elastic/go-structform v0.0.9 // indirect + github.com/elastic/go-sysinfo v1.7.1 // indirect + github.com/elastic/go-txfile v0.0.7 // indirect + github.com/elastic/go-ucfg v0.8.3 // indirect + github.com/elastic/go-windows v1.0.1 // indirect + github.com/elastic/gosigar v0.14.2 // indirect + github.com/fatih/color v1.9.0 // indirect + github.com/fsnotify/fsevents v0.1.1 // indirect + github.com/fsnotify/fsnotify v1.5.1 // indirect + github.com/go-logr/logr v0.4.0 // indirect + github.com/go-ole/go-ole v1.2.5-0.20190920104607-14974a1cf647 // indirect + github.com/go-sourcemap/sourcemap v2.1.2+incompatible // indirect + github.com/go-sql-driver/mysql v1.5.0 // indirect + github.com/gocarina/gocsv v0.0.0-20170324095351-ffef3ffc77be // indirect + github.com/godbus/dbus v0.0.0-20190422162347-ade71ed3457e // indirect + github.com/godbus/dbus/v5 v5.0.5 // indirect + github.com/gofrs/flock v0.7.2-0.20190320160742-5135e617513b // indirect + github.com/gofrs/uuid v3.3.0+incompatible // indirect + github.com/gogo/protobuf v1.3.2 // indirect + github.com/golang/protobuf v1.5.2 // indirect + github.com/golang/snappy v0.0.4 // indirect + github.com/gomodule/redigo v1.8.3 // indirect + github.com/google/flatbuffers v1.12.1 // indirect + github.com/google/go-cmp v0.5.6 // indirect + github.com/google/gofuzz v1.1.0 // indirect + github.com/googleapis/gnostic v0.4.1 // indirect + github.com/gorhill/cronexpr v0.0.0-20180427100037-88b0669f7d75 // indirect + github.com/gorilla/websocket v1.4.2 // indirect + github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect + github.com/h2non/filetype v1.1.1 // indirect + github.com/hashicorp/errwrap v1.0.0 // indirect + github.com/hashicorp/go-multierror v1.1.0 // indirect + github.com/hashicorp/go-uuid v1.0.2 // indirect + github.com/hashicorp/golang-lru v0.5.4 // indirect + github.com/imdario/mergo v0.3.12 // indirect + github.com/inconshreveable/mousetrap v1.0.0 // indirect + github.com/insomniacslk/dhcp v0.0.0-20180716145214-633285ba52b2 // indirect + github.com/jcmturner/aescts/v2 v2.0.0 // indirect + github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect + github.com/jcmturner/gofork v1.0.0 // indirect + github.com/jcmturner/gokrb5/v8 v8.4.2 // indirect + github.com/jcmturner/rpc/v2 v2.0.3 // indirect + github.com/joeshaw/multierror v0.0.0-20140124173710-69b34d4ec901 // indirect + github.com/jonboulle/clockwork v0.2.2 // indirect + github.com/json-iterator/go v1.1.11 // indirect + github.com/klauspost/compress v1.13.6 // indirect + github.com/lib/pq v1.1.2-0.20190507191818-2ff3cb3adc01 // indirect + github.com/magefile/mage v1.11.0 // indirect + github.com/mattn/go-colorable v0.1.6 // indirect + github.com/mattn/go-isatty v0.0.12 // indirect + github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect + github.com/miekg/dns v1.1.25 // indirect + github.com/mitchellh/hashstructure v0.0.0-20170116052023-ab25296c0f51 // indirect + github.com/mitchellh/mapstructure v1.3.3 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.1 // indirect + github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/image-spec v1.0.2-0.20190823105129-775207bd45b6 // indirect + github.com/pierrec/lz4 v2.6.0+incompatible // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + github.com/prometheus/client_model v0.2.0 // indirect + github.com/prometheus/common v0.10.0 // indirect + github.com/prometheus/procfs v0.6.0 // indirect + github.com/prometheus/prometheus v2.5.0+incompatible // indirect + github.com/rcrowley/go-metrics v0.0.0-20201227073835-cf1acfcdf475 // indirect + github.com/samuel/go-parser v0.0.0-20130731160455-ca8abbf65d0e // indirect + github.com/samuel/go-thrift v0.0.0-20140522043831-2187045faa54 // indirect + github.com/santhosh-tekuri/jsonschema v1.2.4 // indirect + github.com/shirou/gopsutil v3.20.12+incompatible // indirect + github.com/sirupsen/logrus v1.8.1 // indirect + github.com/spf13/cobra v1.0.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect + github.com/stretchr/testify v1.7.0 // indirect + github.com/tsg/gopacket v0.0.0-20200626092518-2ab8e397a786 // indirect + github.com/urso/diag v0.0.0-20200210123136-21b3cc8eb797 // indirect + github.com/urso/go-bin v0.0.0-20180220135811-781c575c9f0e // indirect + github.com/urso/magetools v0.0.0-20190919040553-290c89e0c230 // indirect + github.com/urso/sderr v0.0.0-20210525210834-52b04e8f5c71 // indirect + github.com/vmware/govmomi v0.0.0-20170802214208-2cad15190b41 // indirect + github.com/xdg/scram v1.0.3 // indirect + github.com/xdg/stringprep v1.0.3 // indirect + github.com/yuin/gopher-lua v0.0.0-20170403160031-b402f3114ec7 // indirect + go.elastic.co/apm v1.11.0 // indirect + go.elastic.co/apm/module/apmelasticsearch v1.7.2 // indirect + go.elastic.co/apm/module/apmhttp v1.7.2 // indirect + go.elastic.co/ecszap v0.3.0 // indirect + go.elastic.co/fastjson v1.1.0 // indirect + go.etcd.io/bbolt v1.3.6 // indirect + go.uber.org/atomic v1.5.0 // indirect + go.uber.org/multierr v1.3.0 // indirect + go.uber.org/tools v0.0.0-20190618225709-2cfd321de3ee // indirect + go.uber.org/zap v1.14.0 // indirect + golang.org/x/crypto v0.0.0-20210616213533-5ff15b29337e // indirect + golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect + golang.org/x/mod v0.4.2 // indirect + golang.org/x/net v0.0.0-20211020060615-d418f374d309 // indirect + golang.org/x/oauth2 v0.0.0-20211005180243-6b3c2da341f1 // indirect + golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect + golang.org/x/sys v0.0.0-20211102192858-4dd72447c267 // indirect + golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d // indirect + golang.org/x/text v0.3.7 // indirect + golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect + golang.org/x/tools v0.1.7 // indirect + golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect + google.golang.org/appengine v1.6.7 // indirect + google.golang.org/genproto v0.0.0-20211021150943-2b146023228c // indirect + google.golang.org/grpc v1.41.0 // indirect + google.golang.org/protobuf v1.27.1 // indirect + gopkg.in/inf.v0 v0.9.1 // indirect + gopkg.in/jcmturner/aescts.v1 v1.0.1 // indirect + gopkg.in/jcmturner/dnsutils.v1 v1.0.1 // indirect + gopkg.in/jcmturner/goidentity.v3 v3.0.0 // indirect + gopkg.in/jcmturner/gokrb5.v7 v7.5.0 // indirect + gopkg.in/jcmturner/rpc.v1 v1.1.0 // indirect + gopkg.in/mgo.v2 v2.0.0-20160818020120-3f83fa500528 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v3 v3.0.0 // indirect + honnef.co/go/tools v0.0.1-2020.1.4 // indirect + howett.net/plist v0.0.0-20181124034731-591f970eefbb // indirect + k8s.io/api v0.21.1 // indirect + k8s.io/apimachinery v0.21.1 // indirect + k8s.io/client-go v0.21.1 // indirect + k8s.io/klog/v2 v2.8.0 // indirect + k8s.io/utils v0.0.0-20201110183641-67b214c5f920 // indirect + kernel.org/pub/linux/libs/security/libcap/cap v1.2.57 // indirect + kernel.org/pub/linux/libs/security/libcap/psx v1.2.57 // indirect + sigs.k8s.io/structured-merge-diff/v4 v4.1.0 // indirect + sigs.k8s.io/yaml v1.2.0 // indirect ) replace ( - github.com/Microsoft/go-winio => github.com/bi-zone/go-winio v0.4.15 - github.com/Shopify/sarama => github.com/elastic/sarama v1.19.1-0.20210823122811-11c3ef800752 - github.com/docker/docker => github.com/docker/engine v0.0.0-20191113042239-ea84732a7725 - github.com/docker/go-plugins-helpers => github.com/elastic/go-plugins-helpers v0.0.0-20200207104224-bdf17607b79f - github.com/dop251/goja => github.com/andrewkroh/goja v0.0.0-20190128172624-dd2ac4456e20 - github.com/dop251/goja_nodejs => github.com/dop251/goja_nodejs v0.0.0-20171011081505-adff31b136e6 - github.com/fsnotify/fsevents => github.com/elastic/fsevents v0.0.0-20181029231046-e1d381a4d270 - github.com/fsnotify/fsnotify => github.com/adriansr/fsnotify v1.4.8-0.20211018144411-a81f2b630e7c - github.com/google/gopacket => github.com/adriansr/gopacket v1.1.18-0.20200327165309-dd62abfa8a41 - github.com/insomniacslk/dhcp => github.com/elastic/dhcp v0.0.0-20200227161230-57ec251c7eb3 // indirect + github.com/Microsoft/go-winio => github.com/bi-zone/go-winio v0.4.15 + github.com/Shopify/sarama => github.com/elastic/sarama v1.19.1-0.20210823122811-11c3ef800752 + github.com/docker/docker => github.com/docker/engine v0.0.0-20191113042239-ea84732a7725 + github.com/docker/go-plugins-helpers => github.com/elastic/go-plugins-helpers v0.0.0-20200207104224-bdf17607b79f + github.com/dop251/goja => github.com/andrewkroh/goja v0.0.0-20190128172624-dd2ac4456e20 + github.com/dop251/goja_nodejs => github.com/dop251/goja_nodejs v0.0.0-20171011081505-adff31b136e6 + github.com/fsnotify/fsevents => github.com/elastic/fsevents v0.0.0-20181029231046-e1d381a4d270 + github.com/fsnotify/fsnotify => github.com/adriansr/fsnotify v1.4.8-0.20211018144411-a81f2b630e7c + github.com/google/gopacket => github.com/adriansr/gopacket v1.1.18-0.20200327165309-dd62abfa8a41 + github.com/insomniacslk/dhcp => github.com/elastic/dhcp v0.0.0-20200227161230-57ec251c7eb3 // indirect ) diff --git a/extension/beats/go.sum b/extension/beats/go.sum index 547110c371d577..0c17d71a6c6ec4 100644 --- a/extension/beats/go.sum +++ b/extension/beats/go.sum @@ -1297,6 +1297,8 @@ gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo= gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gopkg.in/yaml.v3 v3.0.0 h1:hjy8E9ON/egN1tAYqKb61G10WtihqetD4sz2H+8nIeA= +gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo= gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw= gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk= From 4f42ee436aa528e6291565b39c820cfcaa515151 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Sat, 9 Sep 2023 15:31:40 +0800 Subject: [PATCH 07/31] [Fix](clucene) fix clucene build error in arm (#24130) --- be/src/clucene | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/clucene b/be/src/clucene index fd453665055c65..2761b1afe48cb1 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit fd453665055c65b94892d13a93ac47180afd72bb +Subproject commit 2761b1afe48cb1bd272ce8959e3aa3049b6e63ac From 1a51937fdf67bda2af0683a286bcdd932817beeb Mon Sep 17 00:00:00 2001 From: ZhenchaoXu <49646212+ixzc@users.noreply.github.com> Date: Sun, 10 Sep 2023 12:11:43 +0800 Subject: [PATCH 08/31] [regression-test](fix)add test_ifnull. (#23956) --- .../conditional_functions/test_ifnull.out | 4 ++ .../conditional_functions/test_ifnull.groovy | 40 +++++++++++++++++++ 2 files changed, 44 insertions(+) create mode 100644 regression-test/data/query_p0/sql_functions/conditional_functions/test_ifnull.out create mode 100644 regression-test/suites/query_p0/sql_functions/conditional_functions/test_ifnull.groovy diff --git a/regression-test/data/query_p0/sql_functions/conditional_functions/test_ifnull.out b/regression-test/data/query_p0/sql_functions/conditional_functions/test_ifnull.out new file mode 100644 index 00000000000000..b55eb54faf3bdd --- /dev/null +++ b/regression-test/data/query_p0/sql_functions/conditional_functions/test_ifnull.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 11111.111110000 2222.22222 11111.11111 2222.22222 + diff --git a/regression-test/suites/query_p0/sql_functions/conditional_functions/test_ifnull.groovy b/regression-test/suites/query_p0/sql_functions/conditional_functions/test_ifnull.groovy new file mode 100644 index 00000000000000..32ccc70ebc4619 --- /dev/null +++ b/regression-test/suites/query_p0/sql_functions/conditional_functions/test_ifnull.groovy @@ -0,0 +1,40 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_ifnull") { + def tbName = "test_ifnull" + sql "DROP TABLE IF EXISTS ${tbName};" + sql""" + CREATE TABLE IF NOT EXISTS ${tbName} ( + id int(11) NULL, + t_decimal DECIMALV3(26, 9) NULL, + test_double double NULL + ) ENGINE = OLAP + DUPLICATE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1"); + """ + sql""" + INSERT INTO test_ifnull VALUES(1,11111.11111,2222.22222); + """ + + qt_sql "select id,t_decimal,test_double,ifnull(t_decimal,test_double) as if_dou,ifnull(test_double,t_decimal) as if_dei from test_ifnull;" + + sql "DROP TABLE ${tbName};" +} + From 6ea7e918a60a5086aed40f857a0a5fe353ece954 Mon Sep 17 00:00:00 2001 From: HHoflittlefish777 <77738092+HHoflittlefish777@users.noreply.github.com> Date: Sun, 10 Sep 2023 16:32:17 +0800 Subject: [PATCH 09/31] [Improvement](errorcode) use error code when disk exceed capacity limit (#24136) --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index dae5c77f3486df..e1bbe03f1a413d 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -913,7 +913,8 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size timer.start(); // check disk capacity if (_data_dir != nullptr && _data_dir->reach_capacity_limit((int64_t)estimate_segment_size())) { - return Status::InternalError("disk {} exceed capacity limit.", _data_dir->path_hash()); + return Status::Error("disk {} exceed capacity limit.", + _data_dir->path_hash()); } // write data RETURN_IF_ERROR(finalize_columns_data()); From add3b95417a016522871f7a8528a7bbe32a787eb Mon Sep 17 00:00:00 2001 From: amory Date: Mon, 11 Sep 2023 08:28:11 +0800 Subject: [PATCH 10/31] [Improve](regresstests)add boundary regress tests for map & array #24133 --- ..._types_insert_into_with_duplicat_table.out | 16 + .../nested_types/test_scalar_types_100.csv | 100 +++ ...pes_insert_into_with_duplicat_table.groovy | 647 ++++++++++++++++++ 3 files changed, 763 insertions(+) create mode 100644 regression-test/data/datatype_p0/nested_types/test_nested_types_insert_into_with_duplicat_table.out create mode 100644 regression-test/data/datatype_p0/nested_types/test_scalar_types_100.csv create mode 100644 regression-test/suites/datatype_p0/nested_types/test_nested_types_insert_into_with_duplicat_table.groovy diff --git a/regression-test/data/datatype_p0/nested_types/test_nested_types_insert_into_with_duplicat_table.out b/regression-test/data/datatype_p0/nested_types/test_nested_types_insert_into_with_duplicat_table.out new file mode 100644 index 00000000000000..c37c52400542df --- /dev/null +++ b/regression-test/data/datatype_p0/nested_types/test_nested_types_insert_into_with_duplicat_table.out @@ -0,0 +1,16 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql_nested_table_dup_c -- +306 + +-- !sql_nested_table_dup2_c -- +306 + +-- !sql_nested_table_map_dup_c -- +306 + +-- !sql_nested_table_array_map_dup_c -- +306 + +-- !sql_nested_table_map_array_dup_c -- +306 + diff --git a/regression-test/data/datatype_p0/nested_types/test_scalar_types_100.csv b/regression-test/data/datatype_p0/nested_types/test_scalar_types_100.csv new file mode 100644 index 00000000000000..4e275b43d0423f --- /dev/null +++ b/regression-test/data/datatype_p0/nested_types/test_scalar_types_100.csv @@ -0,0 +1,100 @@ +-2147475406 1 45 23794 -11023 915989078 2115356192 15927.068 1392557423.3915009 45951348783208518.81 8340516346665031.310 2022-01-26 2022-04-13 11:13:48 2022-01-31 2022-02-16 06:07:21 130.50.6.0 DeniseMatthews@Yozio.mil Londonderry Alley 61 +-2147413967 1 -75 30533 -5435 -727385447 32929830 9577.5645 1334766997.510087 39973144022098028.8 5886463393340733.108 2022-06-23 2022-05-10 19:13:50 2022-01-17 2022-11-26 22:49:36 157.38.90.25 JoshuaLewis@Jayo.mil Loeprich Crossing 43 +-2147380173 1 -79 -5785 9752 1851350218 1121852298 25652.402 -1618061059.5135579 95821873014545736.897 38923569966532828.626 2022-10-30 2022-05-02 17:06:33 2022-08-11 2022-02-08 10:19:47 217.198.98.239 JoseBoyd@Voonder.info Lawn Lane 78 +-2147369329 0 -121 -22859 4733 -378861997 385323541 -22969.846 1483825622.420542 50940877800950041.95 87108729227937387.294 2022-06-05 2022-08-18 05:39:56 2022-08-21 2022-12-12 08:43:59 16.27.107.167 PhyllisSmith@Zoombox.org Village Green Terrace 55 +-2147367329 1 84 21471 -29331 1823545950 1200800855 -13832.219 801505090.724918 45495296019797580.477 45196001436348967.557 2022-02-17 2022-05-23 01:44:31 2022-08-01 2022-08-16 10:32:36 84.110.209.128 vLane@Dablist.edu Packers Street 34 +-2147339287 1 62 28989 -32018 650184880 -365849435 -21644.414 -78648426.469503 92593387160450273.87 39588697152489527.185 2022-07-23 2023-01-03 11:54:35 2022-08-02 2022-05-19 18:35:36 30.194.6.115 veniam@Thoughtstorm.mil Basil Street 79 +-2147306138 0 82 -32442 -6144 714240734 -1231183014 -13242.392 384775850.618081 95346047516039276.423 74114490752253685.481 2022-05-26 2022-05-24 08:29:39 2022-02-05 2022-04-08 02:48:34 169.31.82.131 illum_et@Dynabox.mil Warrior Parkway 77 +-2147303679 0 -96 15999 9232 -1599308504 -734311941 -5495.8071 -1399215099.7846489 17443816097166764.281 84525658185172942.967 2022-04-11 2022-01-19 15:51:51 2022-01-24 2022-12-02 04:39:45 8.212.247.122 xGonzales@Yamia.edu Thierer Way 38 +-2147262763 1 127 -1700 1542 130731024 335031107 19994.604 -1825872281.30349 55231262378035099.197 69682235501133830.355 2022-12-23 2022-10-21 07:09:28 2022-10-20 2022-06-30 08:03:59 244.225.174.1 zAllen@Edgetag.mil Arrowood Place 93 +-2147241282 1 17 -16304 -23590 421313153 447840244 -7585.0444 1840220725.287715 47206868481841239.822 72929785481453927.981 2022-05-04 2022-08-17 16:04:19 2022-02-27 2022-04-13 16:08:20 231.227.99.116 fugit@Tambee.com Little Fleur Avenue 96 +-2147234672 0 -48 10083 12902 293756986 -1365816427 3501.8916 -501012774.008986 85564175718642344.976 29822328119067807.175 2022-05-15 2022-03-10 22:02:49 2022-11-03 2022-04-24 05:23:32 186.147.184.146 BonnieSimmons@Oyope.net Lakewood Alley 37 +-2147220564 1 -3 -10961 -32113 -2086483017 1854884810 -5685.6948 -1648274138.040339 89701570207561342.341 52446198336966115.232 2022-03-31 2022-09-19 16:22:10 2022-12-02 2022-04-17 01:07:47 208.57.124.182 lPatterson@Meezzy.net Cascade Park 83 +-2147220530 0 -68 27003 -7963 -1749726923 1016429745 10021.474 -2045081975.098917 49223430141903125.79 58145291373473490.545 2022-08-08 2022-12-09 12:58:47 2022-07-25 2022-03-13 19:37:07 167.227.79.184 id@Jaxbean.org Florence Park 92 +-2147170182 0 -33 -15657 -12309 1888343611 168341736 10429.622 1433026351.1586189 38273503307442458.737 23092430621668026.713 2022-03-13 2022-06-13 20:49:14 2022-08-02 2022-08-16 19:42:30 76.13.7.252 earum@Shufflebeat.info Novick Alley 72 +-2147167855 1 59 -2102 11613 429729049 -1810964223 5669.7588 383834690.999246 47784385840870915.241 58526954263785775.470 2022-07-23 2022-09-27 21:37:17 2022-10-19 2022-02-28 09:13:06 116.166.202.138 NicoleJackson@Teklist.biz Steensland Circle 87 +-2147071037 0 60 -21638 17612 -2068486682 -351300386 -13496.469 -1954522093.3062749 81355805409001937.98 64611677704695220.731 2022-12-08 2022-11-14 15:26:56 2022-09-26 2022-07-30 02:47:43 72.26.237.241 JamesLopez@Nlounge.net Jenifer Parkway 34 +-2147038421 0 63 23955 27295 2098427263 551774470 30370.965 1873261498.9024529 93626359283633324.242 1440851989809227.726 2022-04-03 2022-10-05 06:46:26 2022-06-02 2022-05-06 14:36:09 112.95.128.98 AnnieBell@Meejo.net South Terrace 47 +-2146999016 0 -67 12209 10490 1666601359 -191558046 22327.137 -324191025.475507 99929623297985537.658 8706682236142820.237 2022-04-07 2022-01-23 04:59:06 2022-09-14 2022-08-24 00:28:28 57.73.12.124 nMartin@Divavu.name Crescent Oaks Avenue 86 +-2146973921 0 -85 -685 -2405 919669201 -2083543032 -30503.852 1616588624.9597061 24903212416358700.45 68627810911452355.200 2022-05-13 2022-06-04 20:38:39 2022-09-26 2022-01-19 18:58:51 189.26.210.28 MarilynWashington@Vinder.info Pawling Terrace 93 +-2146861169 1 81 -2824 10178 1616671669 203020347 25702.715 519120345.825994 9102614354938345.928 58971606284028208.940 2022-08-07 2022-07-10 06:21:48 2022-07-21 2022-05-26 20:01:58 14.181.121.211 RussellMason@Eazzy.gov 2nd Park 6 +-2146822202 0 -36 -23619 -26047 1582371149 596601012 8630.7041 -1115665955.4916 2476388505097042.944 77236300365480308.680 2022-12-15 2022-12-14 14:44:29 2022-10-16 2022-12-14 15:02:23 202.23.160.52 rem_ad_sit@Linklinks.mil Randy Way 97 +-2146803403 0 74 960 -7477 -2051390646 1540228673 -31983.715 812047625.72862 21838472783035225.643 88867383151247743.250 2022-03-01 2022-11-01 06:36:23 2022-11-01 2022-06-03 03:16:59 4.147.25.93 pTorres@Camimbo.mil Oak Valley Parkway 80 +-2146582299 0 85 -27853 1324 -1650959019 -1472434191 -18603.736 -499861526.004434 70283908283434864.739 42321764198164632.913 2022-12-22 2022-11-01 07:24:51 2022-12-16 2022-09-22 06:52:38 116.192.166.250 DonnaHawkins@Pixonyx.edu Grover Trail 43 +-2146564990 1 53 25839 8185 1077130648 807012435 -14279.709 6560114.936127 21548000979076644.209 47056284607153068.861 2022-04-07 2022-10-29 10:38:56 2023-01-10 2022-08-13 12:33:16 28.121.28.3 JenniferLopez@Devpulse.net Harbort Road 79 +-2146517980 0 119 -11719 -12530 -812156962 -122568571 28956.367 -1752583714.386843 43916212562252030.704 26620187467667027.195 2022-06-11 2022-04-28 14:16:05 2022-07-20 2022-12-04 03:25:28 80.213.202.166 PatriciaBishop@Youtags.gov Redwing Trail 31 +-2146495716 0 109 21484 23048 1777853186 1471267102 25702.85 148300441.94391 51074710968485572.32 13002292166538322.926 2022-08-09 2022-06-03 07:07:53 2022-04-27 2022-06-24 18:00:19 99.24.211.208 EvelynRice@Minyx.net Maple Street 51 +-2146477239 1 -71 -10895 -3231 1544014139 1313937231 29074.635 -108208031.479772 3800768464606510.269 90532685597830794.926 2022-09-11 2022-03-13 11:51:35 2022-10-24 2022-05-23 04:38:46 218.219.46.247 TimothyBowman@Leexo.info Trailsway Point 97 +-2146449031 1 3 31881 12622 -835781221 -1439511933 10810.164 -1041749657.675557 47620162910458136.975 70738076271602363.141 2022-10-26 2022-01-17 10:52:56 2022-08-16 2022-07-01 19:38:44 49.222.69.178 4Fields@Flipbug.org Marquette Junction 3 +-2146400130 0 -78 -16371 -32117 1360465595 -219994576 -11442.081 -1234009689.778173 26021135988565174.2 88370525725068482.505 2022-08-30 2022-06-19 18:54:13 2022-08-11 2022-08-30 19:41:40 252.136.194.236 kHawkins@Linkbuzz.edu Buhler Lane 34 +-2146367811 0 112 25407 -16877 1290695679 46285736 -16979.607 -616399770.11097 95798148476984480.956 64938168551880736.250 2023-01-09 2022-09-02 16:43:05 2022-06-09 2022-11-15 09:37:40 59.207.196.102 yDiaz@JumpXS.info Dottie Hill 79 +-2146259170 0 -48 -9147 4321 -1275173801 -679287518 -782.96674 -1989177332.756572 7113020583870576.802 81184822389678743.948 2022-04-14 2022-06-24 03:04:22 2022-09-09 2022-12-12 22:32:41 96.198.83.221 eaque_voluptas@Skaboo.gov Bobwhite Park 95 +-2146227671 1 38 3199 6154 156878176 142376745 -5616.5903 1999769776.495497 95338145891386545.822 44635105305534225.122 2022-10-15 2022-11-25 00:46:47 2022-03-25 2022-09-11 23:58:44 214.15.97.228 bOwens@Oozz.name Atwood Drive 94 +-2146189866 0 -125 10363 15234 -1242115105 -2008134886 28431.93 2016316456.8750169 26808579354082227.593 85575675066951751.206 2022-06-26 2022-12-12 08:00:54 2022-02-22 2022-07-09 05:06:14 225.8.171.247 qui_culpa_corporis@Jazzy.mil Starling Way 22 +-2146182665 1 -126 -9842 -15949 -705548821 -1268773397 -12721.22 311896719.505819 45371634693300005.877 57518353894747451.330 2022-08-27 2022-03-08 14:16:57 2022-09-14 2022-11-19 20:41:09 196.116.99.255 corrupti_recusandae_distinctio@Mydeo.name Tomscot Parkway 80 +-2146163661 1 -9 -6191 3725 1118274531 -1491226994 20852.549 1219586426.7228949 20063506131529737.201 86263100386121098.503 2022-11-26 2022-07-06 20:36:00 2022-08-07 2022-06-25 03:45:59 166.106.100.67 fHudson@Oyoyo.info Anhalt Junction 69 +-2146145479 1 115 -31626 24835 468792617 1776909375 30019.051 -1679301024.0002871 85375396702905307.775 48368955491907175.294 2022-06-25 2022-07-20 12:21:13 2022-10-20 2022-02-20 23:49:54 174.90.113.38 lCunningham@Aimbu.org Milwaukee Junction 23 +-2146062572 1 85 10733 31870 305720846 -1141536191 -13008.365 1719287295.649487 93088835134891116.722 45414904693614422.501 2022-07-18 2022-05-12 17:20:08 2022-05-06 2022-12-28 20:04:32 182.228.154.114 KellyPerry@Eamia.net Harbort Lane 11 +-2146044782 0 45 28866 -29323 -1668074083 272624707 22338.168 357368915.263751 38293538835351491.805 75157465420643291.660 2022-06-07 2022-07-18 22:12:21 2022-02-04 2022-02-15 12:25:12 186.98.36.253 MargaretCox@Npath.org Linden Trail 42 +-2145970543 0 11 -29565 1158 -1527730334 399180986 19722.717 1251554135.3197169 40658391723993420.706 44185253771848385.156 2022-01-28 2022-07-10 00:59:19 2022-09-23 2022-11-15 12:58:27 122.171.24.45 TheresaSanchez@Vipe.com Morningstar Court 0 +-2145929604 1 -86 -1617 14607 -1076595694 -1727723754 -18170.061 -1816106544.7038341 21908195993588334.463 82349972554547635.107 2022-04-17 2022-10-14 16:54:55 2022-05-29 2022-11-19 16:05:00 96.133.98.227 rBishop@Mudo.edu Fordem Road 94 +-2145832027 0 -81 22321 -16757 -848281144 783508735 -13826.459 520550717.214763 86994123933991820.457 17218767804756892.590 2022-08-25 2022-09-05 06:19:31 2022-07-15 2022-08-25 16:01:12 196.155.19.229 cSmith@Avamba.com Parkside Way 29 +-2145739104 1 10 -22603 6132 -984517723 138439036 8683.9043 1681202635.040786 49683339998558535.395 38251259739648714.297 2022-04-26 2022-09-12 00:32:18 2022-11-20 2023-01-09 16:19:06 180.215.212.86 KathyRoberts@Talane.info Darwin Center 26 +-2145722565 0 -25 31882 24465 -1761136832 -295515357 -9526.9219 -12925414.621229 22608011502065630.802 71368184597386700.659 2022-08-16 2022-01-12 00:15:54 2022-08-05 2022-11-20 00:03:29 248.111.226.86 ShirleyWashington@Photojam.biz Mariners Cove Street 72 +-2145665591 0 -116 -26163 21122 623227152 1468000005 -30261.648 1694091260.025321 35716264325571392.235 71693879700537512.756 2022-08-27 2022-03-26 10:05:15 2022-01-29 2022-12-27 01:05:27 217.164.24.148 zParker@Topicstorm.net Bashford Center 41 +-2145633486 0 -7 -29261 -14663 1446719542 -1643728420 -15327.756 -257716890.997028 70421564778970287.808 76282863984144460.436 2022-08-05 2022-10-28 00:21:26 2022-05-12 2022-08-26 07:31:12 170.155.83.47 mTurner@Tagpad.net Green Lane 18 +-2145632811 0 68 -16237 -31873 -492698917 350757927 12625.895 1400661023.750335 33383841474886390.439 91709334227041002.853 2022-04-14 2022-08-19 18:08:51 2022-02-07 2022-02-19 16:27:39 55.44.200.103 est_commodi@Skinder.com Fuller Pass 10 +-2145601425 0 -42 -21289 16303 1715251077 -768157880 -16677.92 858580400.165352 84889989042179242.388 83277010238707085.868 2022-11-03 2022-04-24 14:14:38 2022-04-08 2022-10-05 12:07:32 250.212.97.21 excepturi_accusantium@Pixoboo.mil Graceland Center 31 +-2145561361 1 97 -27380 -30393 933246548 -1910617227 28721.449 -1277642339.7692859 62328918632405846.629 20238751781349550.514 2022-11-14 2022-12-14 13:34:30 2022-09-09 2022-12-19 20:04:35 23.58.157.18 rerum_occaecati@Oodoo.gov Golf Course Terrace 68 +-2145460379 0 -95 5722 -16508 -124831759 -587191090 8284.35 -242024146.630121 80881311321913929.245 42618318053361217.455 2022-02-25 2022-04-28 12:42:53 2022-12-09 2022-02-10 11:15:17 200.138.143.177 IreneAnderson@Babblestorm.net Fordem Road 11 +-2145425147 0 124 -7791 -30996 595721721 -1102059603 -32708.266 1665604964.101907 55864976045339865.119 94874173505018262.845 2022-09-02 2022-07-10 02:43:06 2022-07-19 2022-12-25 13:11:31 193.38.117.73 sGarza@Layo.com Delaware Pass 19 +-2145396106 1 -83 -20898 31609 -307605479 -2094510426 30493.371 878466307.117619 83136457059671165.732 41114370644431941.515 2022-11-15 2022-09-20 08:30:47 2022-05-17 2022-01-15 08:30:33 222.141.128.219 eWillis@Rhyloo.edu Rowland Lane 63 +-2145360501 0 -110 -15175 -18712 515185152 1239978996 -2291.9849 2116240058.1139021 23841421408124599.728 45350604150789138.167 2022-05-01 2022-11-09 02:59:53 2022-10-24 2022-03-14 12:38:10 76.217.234.230 SamuelSimmons@Yotz.info Sutteridge Avenue 56 +-2145299388 1 -101 12056 13671 268136166 -618219111 -20649.189 1608059836.4739521 81743649494764280.259 32070149770666675.420 2022-07-22 2022-07-27 16:30:27 2022-11-01 2022-05-06 18:41:55 217.114.13.223 ea@Tazzy.name Reinke Crossing 93 +-2145291376 1 -105 8701 -6812 -1011912910 -1066656918 2380.9619 -2108710402.7121351 99444820994343516.701 15100141674083750.907 2022-08-22 2022-11-08 21:42:18 2022-11-06 2022-11-08 00:21:08 110.161.17.198 iusto_vero_minus@Flashdog.info Sunbrook Drive 13 +-2145194985 1 83 5864 27521 -1337479564 -580923735 -27982.447 -1586058801.6278429 85274571766729620.277 43007736970908035.636 2022-10-05 2022-02-24 15:00:09 2022-09-26 2022-01-28 22:17:26 181.67.114.151 tFowler@Babbleset.org Park Meadow Plaza 1 +-2145117133 1 43 -22233 -15578 -1374063749 2102972527 15984.653 286225644.695247 15704479415194073.33 42338288870486002.517 2022-12-12 2022-11-17 02:15:20 2022-08-05 2022-10-04 14:21:17 15.253.144.136 VirginiaLittle@Zava.edu Northview Pass 25 +-2145053697 1 63 24678 30780 1386254260 1727111403 -3694.6951 -1739481366.5108931 88618778931917380.493 45701286296992048.290 2022-03-05 2022-03-26 21:05:09 2022-05-24 2022-05-26 02:55:13 70.0.191.108 uRichards@Riffwire.info Londonderry Plaza 16 +-2144972147 1 88 17229 -17732 1593680044 34341236 26159.938 36622745.943803 88358697061566919.847 88334931490026854.464 2022-04-20 2022-04-12 23:10:09 2022-02-10 2022-09-05 07:07:34 58.194.131.41 PeterGreene@Jaxnation.gov Forster Park 81 +-2144966593 1 -27 9653 -6739 1025051282 -633745072 28409.896 396529484.351023 85763035757670570.458 72529050683624762.160 2022-10-13 2022-06-06 23:45:32 2023-01-05 2022-12-24 16:52:24 132.36.250.18 ClarenceYoung@Edgepulse.biz Starling Center 38 +-2144966479 0 91 -18098 -24991 -1375439663 -454292026 -13798.354 -604479896.160994 71636202162968825.364 83294712839041576.770 2022-09-04 2022-12-17 16:55:34 2022-07-05 2022-02-03 18:28:27 253.32.48.110 GeraldFlores@Jayo.mil Memorial Plaza 71 +-2144959740 0 -67 -20993 -28898 -1055516969 1952651271 -26628.711 843239279.204007 79598054421552684.652 6846269631516872.934 2022-08-28 2022-07-25 03:21:54 2022-10-14 2022-10-09 19:01:57 74.60.33.163 HarryGreene@Twimm.info Novick Street 15 +-2144940625 1 59 14325 1293 -1454272015 1636582867 7828.9136 714184629.522527 93505841441868380.288 56380447721200594.233 2022-09-10 2022-10-19 05:03:04 2022-04-15 2022-07-10 22:10:42 10.78.99.245 RobinHenderson@Yakidoo.com Oak Place 30 +-2144845416 1 -81 -9152 7489 589678882 947357197 29273.635 1985251329.6431971 93869097107040064.747 75248225651730060.818 2022-04-12 2022-03-11 05:03:49 2022-10-12 2022-03-15 11:07:19 44.205.195.141 rMatthews@Demimbu.org Warrior Crossing 48 +-2144820475 1 -73 4950 27213 -1327633395 701390842 3292.8323 -69381031.0928 37074572129520413.797 35027726190684940.250 2022-08-09 2022-03-02 09:04:59 2022-12-14 2022-04-09 02:16:41 198.244.37.154 numquam@Yakijo.name Troy Drive 51 +-2144607746 0 122 19340 2026 -1579100047 -552500733 -29696.631 -1577550324.2880819 38927322641940550.63 70685944866009829.257 2022-09-06 2022-11-24 13:04:59 2022-02-05 2022-05-29 08:13:07 177.51.72.145 vSanchez@Brainsphere.gov Hayes Pass 50 +-2144563514 1 115 -13085 -29492 869753958 -150789058 19564.104 -781697798.634192 90520240448499160.11 25258872303793553.630 2022-11-19 2022-05-08 04:26:54 2022-09-26 2022-09-06 12:47:21 5.185.80.234 mRice@Meeveo.net Golf Point 88 +-2144376400 0 -18 20201 4388 -153955260 -1996968380 29197.625 -248142639.286893 90264002765485637.496 81449853952508666.733 2022-05-23 2022-05-03 21:41:39 2023-01-01 2022-08-04 14:24:34 123.164.170.233 quo@Innojam.net Paget Road 54 +-2144354593 1 -90 8919 -20662 1293340207 -362462464 -215.71274 -985675366.569135 34964025047403383.39 59692983529863851.202 2022-12-12 2022-09-05 15:27:21 2022-10-20 2022-05-15 00:43:56 47.250.130.192 eum_nam@Skyndu.org Namekagon Alley 52 +-2144334048 0 16 437 -1380 678833141 -1288685536 5194.0576 1844253162.901345 46137574862531306.999 54398595490959023.539 2022-06-05 2022-04-23 20:33:39 2022-06-16 2022-02-07 14:04:31 211.39.177.254 impedit_sit_atque@Skippad.mil Mallard Place 64 +-2144227801 1 58 -159 11215 1801132046 1696811958 -22308.641 -683719894.026245 16411995391647226.2 37580485653448904.580 2022-10-24 2022-07-31 00:29:25 2022-10-27 2022-03-17 19:22:50 36.138.55.238 LarryGreene@Mita.net Grim Junction 44 +-2144030103 1 -103 1605 -16717 1260682672 896689788 -6821.1284 715213723.010525 62079712857619101.783 38666447509787594.275 2022-12-18 2022-09-17 20:53:18 2022-04-04 2022-04-29 14:15:21 75.200.208.117 xKnight@Feedbug.info Sunbrook Park 8 +-2144013233 0 -119 1628 -28916 2038471071 1582504793 -11128.479 973453486.240564 84269555801451263.771 51980887713899057.696 2022-11-12 2022-03-29 19:45:20 2022-09-24 2022-05-16 10:14:18 30.144.183.87 facilis_aut_porro@Plajo.org Hollow Ridge Park 51 +-2143966117 0 -114 20692 2027 523580033 -1718467818 18184.66 -1354794501.3727529 86966527964375628.304 62487768983009646.601 2023-01-02 2022-05-03 03:41:27 2022-09-25 2022-09-24 21:19:08 42.8.63.24 JoeMontgomery@Yombu.org Kropf Pass 39 +-2143901919 0 43 -8993 -26732 1662967743 -1253916370 15632.667 -820574886.744517 6826439255534778.762 76341859464221972.596 2022-06-08 2022-08-30 14:02:41 2022-09-06 2022-03-06 06:26:46 213.150.37.193 eligendi@Trupe.gov Talmadge Crossing 22 +-2143820973 1 97 15802 -28578 -1996071321 -515217738 8389.3857 -1952411457.3189189 24155835405288626.3 34158493154201691.294 2022-10-29 2022-10-20 16:48:56 2022-06-19 2022-12-02 09:54:37 244.148.103.52 KarenGordon@Rhynoodle.gov Kipling Trail 22 +-2143802626 0 79 -29704 -23502 -130664066 727522946 1451.0667 520730371.13745 69847230671266302.43 56676269132838737.700 2022-02-20 2022-02-26 17:10:26 2022-04-04 2022-09-16 20:43:07 60.236.148.112 KathyHudson@Eidel.info Shasta Junction 7 +-2143782943 0 -22 20515 19610 -1319791794 -314420353 24465.053 1399304794.205512 68642611277069521.79 58684870966649713.973 2022-07-08 2022-07-23 08:50:18 2022-07-12 2022-06-09 07:00:52 90.69.59.165 xBell@Centidel.edu Tennyson Junction 67 +-2143721630 1 -103 -14534 30268 -391530620 -780530250 5588.2588 1705886332.835176 29752392497500497.358 72211476586480280.100 2022-10-27 2022-03-22 11:50:10 2022-04-27 2022-03-20 14:22:29 205.33.84.38 fWelch@Youfeed.com Linden Circle 48 +-2143709080 0 78 15558 -28566 1567367765 1711244548 -21312.086 -1676765755.015152 7483050290894384.235 14348426565176102.933 2022-02-05 2022-12-03 09:48:10 2022-06-01 2022-10-19 06:09:53 153.192.186.165 aperiam_officia_consequuntur@Skipfire.mil Golden Leaf Plaza 85 +-2143700018 0 36 30822 -12823 1194565379 -372629928 -12461.867 1111583246.475472 97844642199172550.685 45195433257693102.150 2022-09-16 2022-05-15 16:18:55 2022-04-10 2022-03-26 07:59:08 80.84.134.21 JaniceAdams@Oyoyo.biz Manley Road 16 +-2143665278 0 125 21712 -11361 1627015782 648335755 -6500.37 -1657840634.8634241 36489931479000188.87 53618549111404470.123 2022-06-24 2022-06-28 06:09:52 2022-08-16 2022-09-08 02:39:38 148.179.124.136 molestiae_aliquam_qui@Voolia.info Twin Pines Trail 2 +-2143553210 1 29 23596 -29085 1570476845 1400264311 5292.9404 985511588.312963 26691279718212282.618 52893811507411508.857 2022-08-14 2022-02-25 09:14:36 2022-09-24 2022-04-16 17:37:26 249.206.88.52 jFord@Edgepulse.com Kipling Terrace 14 +-2143466167 0 -48 20078 1500 -300859071 2094503425 7954.3755 -644064655.863321 44300022488505140.73 81346391493252121.766 2022-09-06 2022-11-03 09:45:24 2022-01-26 2022-07-14 21:49:44 210.221.225.180 MildredDean@Realpoint.edu Commercial Lane 81 +-2143338029 0 65 28583 -12447 -1738928304 1832257131 -9549.6064 -857526893.665941 9823233138475839.903 72520719681561428.918 2022-08-09 2022-08-26 21:34:09 2022-06-20 2022-02-18 04:31:46 218.100.49.17 pHowell@Viva.com Hagan Place 18 +-2143322102 1 80 -901 24978 -1298382804 -1594723068 -14798.968 -137237087.719268 10803354863834510.135 18681686412229393.419 2022-02-14 2022-12-01 05:49:36 2022-04-07 2023-01-04 16:45:56 250.165.224.250 eMccoy@Mita.com Cordelia Trail 34 +-2143291574 1 -21 10019 -17173 231513576 -2004769659 16172.086 941985713.954182 91415382278425718.329 11336194132112334.303 2022-03-10 2022-03-16 06:57:55 2022-12-04 2022-05-06 19:19:30 243.62.206.178 harum_omnis_tempora@Devcast.org Harper Circle 42 +-2143272783 0 36 -12429 20858 -1257516971 -1498805260 65.542076 705608237.236062 9831884881415128.78 37233508339616652.703 2022-02-07 2022-11-03 00:55:55 2022-03-11 2022-12-23 08:43:05 213.183.23.50 MartinAllen@Jetwire.name Cambridge Plaza 7 +-2143261604 0 107 -3152 2516 -827318298 1035637204 -31264.121 559726212.134293 133438066875759.909 91097319984287417.518 2022-07-27 2022-08-23 18:29:57 2022-06-01 2022-07-19 17:21:08 142.0.9.61 RubyGriffin@Yakitri.name Saint Paul Parkway 41 +-2143259207 0 6 8805 -26659 14206254 -923396935 2646.405 -233846503.166964 88387101518090024.893 32265882411468156.718 2022-10-23 2022-04-04 23:57:20 2022-04-26 2022-05-26 03:24:14 86.209.184.30 nulla_non_ducimus@Ntags.com Oak Valley Crossing 89 +-2143230815 0 -113 -11392 29571 -1728061216 1383605572 25424.891 1005644817.523787 80134094266764750.693 99692542460226750.956 2022-07-06 2022-02-02 11:40:11 2022-05-22 2022-02-04 08:26:05 32.200.80.169 tFox@Miboo.net Eagan Circle 52 +-2143194971 1 23 -5296 15175 2092789134 -132397296 -28776.523 -411055470.183943 3163530179594883.677 50152629379873430.922 2022-04-04 2022-04-03 01:03:54 2022-04-16 2022-06-15 19:17:08 128.67.32.225 dolorem_et@Einti.info Quincy Alley 73 +-2143184292 1 20 10547 -28168 -505889254 1981328355 19401.881 -385243397.236419 68821910027022343.629 18929155308543822.448 2022-01-15 2022-08-15 15:32:30 2022-08-07 2022-08-10 00:18:34 1.104.231.244 lRichards@Fanoodle.org Meadow Valley Way 72 +-2143169173 1 -114 29031 30776 1800482968 471248239 8144.2861 -1596954472.0818269 80438784671001111.616 47314925684213662.805 2022-12-13 2022-05-26 14:41:06 2022-09-18 2022-03-09 15:07:18 118.29.166.43 aut@Yakidoo.name Sutherland Plaza 47 +-2143151901 1 -5 25343 964 -1020210711 1745956682 -32360.256 -757727807.13815 34692749902950064.502 50366352212333617.332 2022-06-14 2022-07-09 14:04:22 2022-02-15 2022-12-22 22:22:53 17.211.38.238 oGarza@Blogpad.mil Onsgard Parkway 44 +-2143151578 0 121 -12031 7633 -792900177 -1163990602 32633.73 186557928.72995 59121988715094317.4 38985748215319285.270 2022-01-20 2022-05-12 12:39:44 2022-08-17 2022-08-29 00:04:52 148.154.162.223 eArnold@Layo.biz Hintze Lane 1 +-2143123905 0 -118 10642 -4563 -1341245502 1017224516 -8458.0029 -106193291.507292 925500132528675.116 7232999535645207.825 2022-05-24 2022-06-11 22:11:38 2022-08-29 2022-04-27 04:50:45 2.181.202.112 RogerJohnson@Trunyx.net Bowman Terrace 71 +-2143091699 0 71 -32492 -6263 -848327842 1461488007 7373.8604 746453563.054372 28995936497349734.606 27760760952367061.114 2022-01-18 2022-10-04 03:54:57 2022-06-01 2022-04-05 15:35:13 175.85.211.161 molestiae_consequuntur@Dabtype.com Eastlawn Park 94 +-2143062391 0 125 -30878 -6319 -1080576697 718283844 27974.408 -1042341679.9821891 67474748942299945.603 33437893206525638.770 2022-07-15 2022-02-22 11:33:13 2022-12-27 2022-03-04 02:36:29 201.58.156.107 JamesBurke@Plajo.gov Wayridge Drive 32 +-2143012777 1 33 -471 28391 838988547 -1029454439 12903.569 -1881843821.4741449 6725471621396771.937 1686758121140217.213 2022-04-15 2022-06-08 07:49:46 2022-06-15 2022-03-21 15:32:50 104.214.28.29 BrandonRay@Yakitri.info Blue Bill Park Way 89 +-2142919005 1 -5 -1177 -1958 1865952544 1073544445 -3874.7803 73792992.960694 37839748091455037.19 24958302880075589.133 2022-07-18 2022-06-11 06:21:46 2022-07-23 2022-09-09 20:15:06 42.208.163.52 PhilipReid@Yakijo.com Columbus Lane 50 diff --git a/regression-test/suites/datatype_p0/nested_types/test_nested_types_insert_into_with_duplicat_table.groovy b/regression-test/suites/datatype_p0/nested_types/test_nested_types_insert_into_with_duplicat_table.groovy new file mode 100644 index 00000000000000..e0bc822bdc42c5 --- /dev/null +++ b/regression-test/suites/datatype_p0/nested_types/test_nested_types_insert_into_with_duplicat_table.groovy @@ -0,0 +1,647 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite("test_nested_types_insert_into_with_duplicat_table", "p0") { + sql """ADMIN SET FRONTEND CONFIG ('disable_nested_complex_type' = 'false')""" + def dataFile = """test_scalar_types_100.csv""" + + // define dup key table1 with scala types + def scala_table_dup = "tbl_scalar_types_dup" + sql "DROP TABLE IF EXISTS ${scala_table_dup}" + sql """ + CREATE TABLE IF NOT EXISTS ${scala_table_dup} ( + `k1` bigint(11) NULL, + `c_bool` boolean NULL, + `c_tinyint` tinyint(4) NULL, + `c_smallint` smallint(6) NULL, + `c_int` int(11) NULL, + `c_bigint` bigint(20) NULL, + `c_largeint` largeint(40) NULL, + `c_float` float NULL, + `c_double` double NULL, + `c_decimal` decimal(20, 3) NULL, + `c_decimalv3` decimalv3(20, 3) NULL, + `c_date` date NULL, + `c_datetime` datetime NULL, + `c_datev2` datev2 NULL, + `c_datetimev2` datetimev2(0) NULL, + `c_char` char(15) NULL, + `c_varchar` varchar(100) NULL, + `c_string` text NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + // load data + streamLoad { + table scala_table_dup + file dataFile + time 60000 + + check { result, exception, startTime, endTime -> + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + assertEquals(100, json.NumberTotalRows) + assertEquals(100, json.NumberLoadedRows) + } + } + + // insert two NULL rows + sql """INSERT INTO ${scala_table_dup} VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)""" + sql """INSERT INTO ${scala_table_dup} VALUES (NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)""" + + + // define dup key table with nested table types with one nested scala + def nested_table_dup = "tbl_array_nested_types_dup" + sql "DROP TABLE IF EXISTS ${nested_table_dup}" + sql """ + CREATE TABLE IF NOT EXISTS ${nested_table_dup} ( + `k1` bigint(11) NULL, + `c_bool` array NULL, + `c_tinyint` array NULL, + `c_smallint` array NULL, + `c_int` array NULL, + `c_bigint` array NULL, + `c_largeint` array NULL, + `c_float` array NULL, + `c_double` array NULL, + `c_decimal` array NULL, + `c_decimalv3` array NULL, + `c_date` array NULL, + `c_datetime` array NULL, + `c_datev2` array NULL, + `c_datetimev2` array NULL, + `c_char` array NULL, + `c_varchar` array NULL, + `c_string` array NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + // define dup key table with nested table types with two nested scala + def nested_table_dup2 = "tbl_array_nested_types_dup2" + sql "DROP TABLE IF EXISTS ${nested_table_dup2}" + sql """ + CREATE TABLE IF NOT EXISTS ${nested_table_dup2} ( + `k1` bigint(11) NULL, + `c_bool` array> NULL, + `c_tinyint` array> NULL, + `c_smallint` array> NULL, + `c_int` array> NULL, + `c_bigint` array> NULL, + `c_largeint` array> NULL, + `c_float` array> NULL, + `c_double` array> NULL, + `c_decimal` array> NULL, + `c_decimalv3` array> NULL, + `c_date` array> NULL, + `c_datetime` array> NULL, + `c_datev2` array> NULL, + `c_datetimev2` array> NULL, + `c_char` array> NULL, + `c_varchar` array> NULL, + `c_string` array> NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + // define dup key table with map types with one nested scala + def nested_table_map_dup = "tbl_map_types_dup" + sql "DROP TABLE IF EXISTS ${nested_table_map_dup}" + sql """ + CREATE TABLE IF NOT EXISTS ${nested_table_map_dup} ( + `k1` bigint(11) NULL, + `c_bool` map NULL, + `c_tinyint` map NULL, + `c_smallint` map NULL, + `c_int` map NULL, + `c_bigint` map NULL, + `c_largeint` map NULL, + `c_float` map NULL, + `c_double` map NULL, + `c_decimal` map NULL, + `c_decimalv3` map NULL, + `c_date` map NULL, + `c_datetime` map NULL, + `c_datev2` map NULL, + `c_datetimev2` map NULL, + `c_char` map NULL, + `c_varchar` map NULL, + `c_string` map NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + + // define dup key table with array nested map table types with one nested scala + def nested_table_array_map_dup = "tbl_array_map_types_dup" + sql "DROP TABLE IF EXISTS ${nested_table_array_map_dup}" + sql """ + CREATE TABLE IF NOT EXISTS ${nested_table_array_map_dup} ( + `k1` bigint(11) NULL, + `c_bool` array> NULL, + `c_tinyint` array> NULL, + `c_smallint` array> NULL, + `c_int` array> NULL, + `c_bigint` array> NULL, + `c_largeint` array> NULL, + `c_float` array> NULL, + `c_double` array> NULL, + `c_decimal` array> NULL, + `c_decimalv3` array> NULL, + `c_date` array> NULL, + `c_datetime` array> NULL, + `c_datev2` array> NULL, + `c_datetimev2` array> NULL, + `c_char` array> NULL, + `c_varchar` array> NULL, + `c_string` array> NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + // define dup key table with map nested value array table types with one nested scala + def nested_table_map_array_dup = "tbl_map_array_types_dup" + sql "DROP TABLE IF EXISTS ${nested_table_map_array_dup}" + sql """ + CREATE TABLE IF NOT EXISTS ${nested_table_map_array_dup} ( + `k1` bigint(11) NULL, + `c_bool` map> NULL, + `c_tinyint` map> NULL, + `c_smallint` map> NULL, + `c_int` map> NULL, + `c_bigint` map> NULL, + `c_largeint` map> NULL, + `c_float` map> NULL, + `c_double` map> NULL, + `c_decimal` map> NULL, + `c_decimalv3` map> NULL, + `c_date` map> NULL, + `c_datetime` map> NULL, + `c_datev2` map> NULL, + `c_datetimev2` map> NULL, + `c_char` map> NULL, + `c_varchar` map> NULL, + `c_string` map> NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + // test action for scala to array with scala type + // current we support char family to insert nested type + test { + sql "insert into ${nested_table_dup} (c_bool) select c_bool from ${scala_table_dup}" + exception "java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type BOOLEAN to target type=ARRAY" + } + + test { + sql "insert into ${nested_table_dup} (c_tinyint) select c_tinyint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type TINYINT to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_smallint) select c_smallint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type SMALLINT to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_int) select c_int from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type INT to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_largeint) select c_largeint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type LARGEINT to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_float) select c_float from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type FLOAT to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_double) select c_double from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DOUBLE to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_decimal) select c_decimal from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_decimalv3) select c_decimalv3 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_date) select c_date from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_datetime) select c_datetime from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_datev2) select c_datev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_datetimev2) select c_datetimev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=ARRAY") + } + + test { + sql "insert into ${nested_table_dup} (c_char) select c_char from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_dup} (c_varchar) select c_varchar from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_dup} (c_string) select c_string from ${scala_table_dup}" + exception null + } + + qt_sql_nested_table_dup_c """select count() from ${nested_table_dup};""" + + // test action for scala to array with array-scala type + test { + sql "insert into ${nested_table_dup2} (c_bool) select c_bool from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type BOOLEAN to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_tinyint) select c_tinyint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type TINYINT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_smallint) select c_smallint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type SMALLINT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_int) select c_int from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type INT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_largeint) select c_largeint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type LARGEINT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_float) select c_float from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type FLOAT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_double) select c_double from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DOUBLE to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_decimal) select c_decimal from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_decimalv3) select c_decimalv3 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_date) select c_date from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_datetime) select c_datetime from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_datev2) select c_datev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_datetimev2) select c_datetimev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_dup2} (c_char) select c_char from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_dup2} (c_varchar) select c_varchar from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_dup2} (c_string) select c_string from ${scala_table_dup}" + exception null + } + + qt_sql_nested_table_dup2_c """select count() from ${nested_table_dup2};""" + + + // test action for scala to map with map-scala-scala type + test { + sql "insert into ${nested_table_map_dup} (c_bool) select c_bool from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type BOOLEAN to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_tinyint) select c_tinyint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type TINYINT to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_smallint) select c_smallint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type SMALLINT to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_int) select c_int from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type INT to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_largeint) select c_largeint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type LARGEINT to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_float) select c_float from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type FLOAT to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_double) select c_double from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DOUBLE to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_decimal) select c_decimal from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_decimalv3) select c_decimalv3 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_date) select c_date from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_datetime) select c_datetime from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_datev2) select c_datev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_datetimev2) select c_datetimev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=MAP") + } + + test { + sql "insert into ${nested_table_map_dup} (c_char) select c_char from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_map_dup} (c_varchar) select c_varchar from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_map_dup} (c_string) select c_string from ${scala_table_dup}" + exception null + } + + qt_sql_nested_table_map_dup_c """select count() from ${nested_table_map_dup};""" + + // test action for scala to array with map-scala-scala type + test { + sql "insert into ${nested_table_array_map_dup} (c_bool) select c_bool from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type BOOLEAN to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_tinyint) select c_tinyint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type TINYINT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_smallint) select c_smallint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type SMALLINT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_int) select c_int from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type INT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_largeint) select c_largeint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type LARGEINT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_float) select c_float from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type FLOAT to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_double) select c_double from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DOUBLE to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_decimal) select c_decimal from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_decimalv3) select c_decimalv3 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_date) select c_date from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_datetime) select c_datetime from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_datev2) select c_datev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_datetimev2) select c_datetimev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=ARRAY>") + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_char) select c_char from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_varchar) select c_varchar from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_array_map_dup} (c_string) select c_string from ${scala_table_dup}" + exception null + } + + qt_sql_nested_table_array_map_dup_c """select count() from ${nested_table_array_map_dup};""" + + // test action for map with scala array-scala + // test action for scala to array with array-scala type + test { + sql "insert into ${nested_table_map_array_dup} (c_bool) select c_bool from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type BOOLEAN to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_tinyint) select c_tinyint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type TINYINT to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_smallint) select c_smallint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type SMALLINT to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_int) select c_int from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type INT to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_largeint) select c_largeint from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type LARGEINT to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_float) select c_float from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type FLOAT to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_double) select c_double from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DOUBLE to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_decimal) select c_decimal from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_decimalv3) select c_decimalv3 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DECIMALV3(20, 3) to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_date) select c_date from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_datetime) select c_datetime from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_datev2) select c_datev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATEV2 to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_datetimev2) select c_datetimev2 from ${scala_table_dup}" + exception("java.sql.SQLException: errCode = 2, detailMessage = can not cast from origin type DATETIMEV2(0) to target type=MAP>") + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_char) select c_char from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_varchar) select c_varchar from ${scala_table_dup}" + exception null + } + + test { + sql "insert into ${nested_table_map_array_dup} (c_string) select c_string from ${scala_table_dup}" + exception null + } + + qt_sql_nested_table_map_array_dup_c """select count() from ${nested_table_map_array_dup};""" + +} From 4e874586e9349406d1264b96372c429fb2ecd132 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Mon, 11 Sep 2023 10:04:00 +0800 Subject: [PATCH 11/31] [BUG](view) fix can't create view with lambda function (#23942) before the lambda function Expr not implement toSqlImpl() function. so it's call parent function, which is not suit for lambda function. and will be have error when create view. --- .../functions/array/function_array_filter.cpp | 133 ++++++++++++++++++ .../array/function_array_register.cpp | 2 + .../array-functions/array_filter.md | 20 ++- .../array-functions/array_filter.md | 22 ++- .../java/org/apache/doris/analysis/Expr.java | 5 + .../analysis/LambdaFunctionCallExpr.java | 33 +++++ .../doris/analysis/LambdaFunctionExpr.java | 15 +- .../data/ddl_p0/test_create_view.out | 26 ++++ .../suites/ddl_p0/test_create_view.groovy | 42 ++++++ 9 files changed, 292 insertions(+), 6 deletions(-) create mode 100644 be/src/vec/functions/array/function_array_filter.cpp create mode 100644 regression-test/data/ddl_p0/test_create_view.out diff --git a/be/src/vec/functions/array/function_array_filter.cpp b/be/src/vec/functions/array/function_array_filter.cpp new file mode 100644 index 00000000000000..d6395ed37b8e64 --- /dev/null +++ b/be/src/vec/functions/array/function_array_filter.cpp @@ -0,0 +1,133 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include +#include +#include + +#include +#include +#include +#include + +#include "common/status.h" +#include "vec/aggregate_functions/aggregate_function.h" +#include "vec/columns/column.h" +#include "vec/columns/column_vector.h" +#include "vec/columns/columns_number.h" +#include "vec/core/block.h" +#include "vec/core/column_numbers.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/core/types.h" +#include "vec/data_types/data_type.h" +#include "vec/functions/array/function_array_utils.h" +#include "vec/functions/function.h" +#include "vec/functions/simple_function_factory.h" + +namespace doris { +class FunctionContext; +} // namespace doris + +namespace doris::vectorized { + +class FunctionArrayFilter : public IFunction { +public: + static constexpr auto name = "array_filter"; + static FunctionPtr create() { return std::make_shared(); } + + /// Get function name. + String get_name() const override { return name; } + + bool is_variadic() const override { return false; } + + size_t get_number_of_arguments() const override { return 2; } + + DataTypePtr get_return_type_impl(const DataTypes& arguments) const override { + DCHECK(is_array(arguments[0])) + << "First argument for function: " << name + << " should be DataTypeArray but it has type " << arguments[0]->get_name() << "."; + return arguments[0]; + } + + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, + size_t result, size_t input_rows_count) override { + //TODO: maybe need optimize not convert + auto first_column = + block.get_by_position(arguments[0]).column->convert_to_full_column_if_const(); + auto second_column = + block.get_by_position(arguments[1]).column->convert_to_full_column_if_const(); + + const ColumnArray& first_col_array = assert_cast(*first_column); + const auto& first_off_data = + assert_cast(first_col_array.get_offsets_column()) + .get_data(); + const auto& first_nested_nullable_column = + assert_cast(*first_col_array.get_data_ptr()); + + const ColumnArray& second_col_array = assert_cast(*second_column); + const auto& second_off_data = assert_cast( + second_col_array.get_offsets_column()) + .get_data(); + const auto& second_nested_null_map_data = + assert_cast(*second_col_array.get_data_ptr()) + .get_null_map_column() + .get_data(); + const auto& second_nested_column = + assert_cast(*second_col_array.get_data_ptr()) + .get_nested_column(); + const auto& second_nested_data = + assert_cast(second_nested_column).get_data(); + + auto result_data_column = first_nested_nullable_column.clone_empty(); + auto result_offset_column = ColumnArray::ColumnOffsets::create(); + auto& result_offset_data = result_offset_column->get_data(); + vectorized::IColumn::Selector selector; + selector.reserve(first_off_data.size()); + result_offset_data.reserve(input_rows_count); + + for (size_t row = 0; row < input_rows_count; ++row) { + unsigned long count = 0; + auto first_offset_start = first_off_data[row - 1]; + auto first_offset_end = first_off_data[row]; + auto second_offset_start = second_off_data[row - 1]; + auto second_offset_end = second_off_data[row]; + auto move_off = second_offset_start; + for (auto off = first_offset_start; + off < first_offset_end && move_off < second_offset_end; // not out range + ++off) { + if (second_nested_null_map_data[move_off] == 0 && // not null + second_nested_data[move_off] == 1) { // not 0 + count++; + selector.push_back(off); + } + move_off++; + } + result_offset_data.push_back(count + result_offset_data.back()); + } + first_nested_nullable_column.append_data_by_selector(result_data_column, selector); + + auto res_column = + ColumnArray::create(std::move(result_data_column), std::move(result_offset_column)); + block.replace_by_position(result, std::move(res_column)); + return Status::OK(); + } +}; + +void register_function_array_filter_function(SimpleFunctionFactory& factory) { + factory.register_function(); +} + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/array/function_array_register.cpp b/be/src/vec/functions/array/function_array_register.cpp index 02ad9cfe051ea9..3fc640e8e2be24 100644 --- a/be/src/vec/functions/array/function_array_register.cpp +++ b/be/src/vec/functions/array/function_array_register.cpp @@ -53,6 +53,7 @@ void register_function_array_pushback(SimpleFunctionFactory& factory); void register_function_array_first_or_last_index(SimpleFunctionFactory& factory); void register_function_array_cum_sum(SimpleFunctionFactory& factory); void register_function_array_count(SimpleFunctionFactory&); +void register_function_array_filter_function(SimpleFunctionFactory&); void register_function_array(SimpleFunctionFactory& factory) { register_function_array_shuffle(factory); @@ -86,6 +87,7 @@ void register_function_array(SimpleFunctionFactory& factory) { register_function_array_first_or_last_index(factory); register_function_array_cum_sum(factory); register_function_array_count(factory); + register_function_array_filter_function(factory); } } // namespace doris::vectorized diff --git a/docs/en/docs/sql-manual/sql-functions/array-functions/array_filter.md b/docs/en/docs/sql-manual/sql-functions/array-functions/array_filter.md index eae138daca9657..54691de25b7915 100644 --- a/docs/en/docs/sql-manual/sql-functions/array-functions/array_filter.md +++ b/docs/en/docs/sql-manual/sql-functions/array-functions/array_filter.md @@ -32,12 +32,18 @@ array_filter(lambda,array) + + +array array_filter(array arr, array_bool filter_column) + + + ### description #### Syntax ```sql -ARRAY array_filter(lambda, ARRAY arr1, ARRAY arr2, ... ) -ARRAY array_filter(ARRAY arr) +ARRAY array_filter(lambda, ARRAY arr) +ARRAY array_filter(ARRAY arr, ARRAY filter_column) ``` Use the lambda expression as the input parameter to calculate and filter the data of the ARRAY column of the other input parameter. @@ -47,11 +53,21 @@ And filter out the values of 0 and NULL in the result. array_filter(x->x>0, array1); array_filter(x->(x+2)=10, array1); array_filter(x->(abs(x)-2)>0, array1); +array_filter(c_array,[0,1,0]); ``` ### example ```shell +mysql [test]>select c_array,array_filter(c_array,[0,1,0]) from array_test; ++-----------------+----------------------------------------------------+ +| c_array | array_filter(`c_array`, ARRAY(FALSE, TRUE, FALSE)) | ++-----------------+----------------------------------------------------+ +| [1, 2, 3, 4, 5] | [2] | +| [6, 7, 8] | [7] | +| [] | [] | +| NULL | NULL | ++-----------------+----------------------------------------------------+ mysql [test]>select array_filter(x->(x > 1),[1,2,3,0,null]); +----------------------------------------------------------------------------------------------+ diff --git a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_filter.md b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_filter.md index 5a2bc7228365db..dec6e9f8f5fc32 100644 --- a/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_filter.md +++ b/docs/zh-CN/docs/sql-manual/sql-functions/array-functions/array_filter.md @@ -32,12 +32,18 @@ array_filter(lambda,array) + + +array array_filter(array arr, array_bool filter_column) + + + ### description #### Syntax ```sql -ARRAY array_filter(lambda, ARRAY arr1, ARRAY arr2, ... ) -ARRAY array_filter(ARRAY arr) +ARRAY array_filter(lambda, ARRAY arr) +ARRAY array_filter(ARRAY arr, ARRAY filter_column) ``` 使用lambda表达式作为输入参数,计算筛选另外的输入参数ARRAY列的数据。 @@ -47,12 +53,22 @@ ARRAY array_filter(ARRAY arr) array_filter(x->x>0, array1); array_filter(x->(x+2)=10, array1); array_filter(x->(abs(x)-2)>0, array1); - +array_filter(c_array,[0,1,0]); ``` ### example ```shell +mysql [test]>select c_array,array_filter(c_array,[0,1,0]) from array_test; ++-----------------+----------------------------------------------------+ +| c_array | array_filter(`c_array`, ARRAY(FALSE, TRUE, FALSE)) | ++-----------------+----------------------------------------------------+ +| [1, 2, 3, 4, 5] | [2] | +| [6, 7, 8] | [7] | +| [] | [] | +| NULL | NULL | ++-----------------+----------------------------------------------------+ + mysql [test]>select array_filter(x->(x > 1),[1,2,3,0,null]); +----------------------------------------------------------------------------------------------+ | array_filter(ARRAY(1, 2, 3, 0, NULL), array_map([x] -> (x(0) > 1), ARRAY(1, 2, 3, 0, NULL))) | diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java index 4ce82186067a36..1b583425e11bad 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Expr.java @@ -1845,6 +1845,11 @@ private boolean findSlotRefByName(String colName) { if (slot.getColumnName() != null && slot.getColumnName().equals(colName)) { return true; } + } else if (this instanceof ColumnRefExpr) { + ColumnRefExpr slot = (ColumnRefExpr) this; + if (slot.getName() != null && slot.getName().equals(colName)) { + return true; + } } return false; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java index 33a66570e52769..4c9455d4e0c8d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionCallExpr.java @@ -263,4 +263,37 @@ protected void toThrift(TExprNode msg) { msg.node_type = TExprNodeType.LAMBDA_FUNCTION_CALL_EXPR; } } + + @Override + public String toSqlImpl() { + StringBuilder sb = new StringBuilder(); + sb.append(getFnName().getFunction()); + sb.append("("); + int childSize = children.size(); + Expr lastExpr = getChild(childSize - 1); + // eg: select array_map(x->x>10, k1) from table, + // but we need analyze each param, so change the function like this in parser + // array_map(x->x>10, k1) ---> array_map(k1, x->x>10), + // so maybe the lambda expr is the end position. and need this check. + boolean lastIsLambdaExpr = (lastExpr instanceof LambdaFunctionExpr); + if (lastIsLambdaExpr) { + sb.append(lastExpr.toSql()); + sb.append(", "); + } + for (int i = 0; i < childSize - 1; ++i) { + sb.append(getChild(i).toSql()); + if (i != childSize - 2) { + sb.append(", "); + } + } + // and some functions is only implement as a normal array function; + // but also want use as lambda function, select array_sortby(x->x,['b','a','c']); + // so we convert to: array_sortby(array('b', 'a', 'c'), array_map(x -> `x`, array('b', 'a', 'c'))) + if (lastIsLambdaExpr == false) { + sb.append(", "); + sb.append(lastExpr.toSql()); + } + sb.append(")"); + return sb.toString(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionExpr.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionExpr.java index 15c37ad85052ed..e2e7b90bfb2077 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionExpr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LambdaFunctionExpr.java @@ -115,7 +115,20 @@ protected void analyzeImpl(Analyzer analyzer) throws AnalysisException { @Override protected String toSqlImpl() { - return String.format("%s -> %s", names.toString(), getChild(0).toSql()); + String nameStr = ""; + Expr lambdaExpr = slotExpr.get(0); + int exprSize = names.size(); + for (int i = 0; i < exprSize; ++i) { + nameStr = nameStr + names.get(i); + if (i != exprSize - 1) { + nameStr = nameStr + ","; + } + } + if (exprSize > 1) { + nameStr = "(" + nameStr + ")"; + } + String res = String.format("%s -> %s", nameStr, lambdaExpr.toSql()); + return res; } @Override diff --git a/regression-test/data/ddl_p0/test_create_view.out b/regression-test/data/ddl_p0/test_create_view.out new file mode 100644 index 00000000000000..f55b7fa59819d5 --- /dev/null +++ b/regression-test/data/ddl_p0/test_create_view.out @@ -0,0 +1,26 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test_view_1 -- +1 [1, 2, 3] +2 [10, -2, 8] +3 [-1, 20, 0] + +-- !test_view_2 -- +1 [1, 2, 3] [1, 1, 1] +2 [10, -2, 8] [1, 0, 1] +3 [-1, 20, 0] [0, 1, 0] + +-- !test_view_3 -- +1 [1, 2, 3] [1, 2, 3] [1, 2, 3] +2 [10, -2, 8] [10, 8] [10, 8] +3 [-1, 20, 0] [20] [20] + +-- !test_view_4 -- +1 [1, 2, 3] [1, 2, 3] [1, 2, 3] +2 [10, -2, 8] [10, 8] [10, 8] +3 [-1, 20, 0] [20] [20] + +-- !test_view_5 -- +1 [1, 2, 3] [1, 1, 1] +2 [10, -2, 8] [1, 0, 1] +3 [-1, 20, 0] [0, 1, 0] + diff --git a/regression-test/suites/ddl_p0/test_create_view.groovy b/regression-test/suites/ddl_p0/test_create_view.groovy index c209d42bd358d3..a2c7090e6dcb93 100644 --- a/regression-test/suites/ddl_p0/test_create_view.groovy +++ b/regression-test/suites/ddl_p0/test_create_view.groovy @@ -111,4 +111,46 @@ suite("test_create_view") { sql """DROP VIEW IF EXISTS my_view""" sql """DROP TABLE IF EXISTS t1""" sql """DROP TABLE IF EXISTS t2""" + + + sql """DROP TABLE IF EXISTS view_baseall""" + sql """DROP VIEW IF EXISTS test_view7""" + sql """DROP VIEW IF EXISTS test_view8""" + sql """ + CREATE TABLE `view_baseall` ( + `k1` int(11) NULL, + `k3` array NULL + ) ENGINE=OLAP + DUPLICATE KEY(`k1`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`k1`) BUCKETS 5 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + sql """insert into view_baseall values(1,[1,2,3]);""" + sql """insert into view_baseall values(2,[10,-2,8]);""" + sql """insert into view_baseall values(3,[-1,20,0]);""" + + qt_test_view_1 """ select * from view_baseall order by k1; """ + qt_test_view_2 """ select *, array_map(x->x>0,k3) from view_baseall order by k1; """ + qt_test_view_3 """ select *, array_filter(x->x>0,k3),array_filter(`k3`, array_map(x -> x > 0, `k3`)) from view_baseall order by k1; """ + + + sql """ + create view IF NOT EXISTS test_view7 (k1,k2,k3,k4) as + select *, array_filter(x->x>0,k3),array_filter(`k3`, array_map(x -> x > 0, `k3`)) from view_baseall order by k1; + """ + qt_test_view_4 """ select * from test_view7 order by k1; """ + + sql """ + create view IF NOT EXISTS test_view8 (k1,k2,k3) as + select *, array_map(x->x>0,k3) from view_baseall order by k1; + """ + qt_test_view_5 """ select * from test_view8 order by k1; """ } From a19a53b44091e1be4e94f927cb7058458e8ab733 Mon Sep 17 00:00:00 2001 From: Liqf <109049295+LemonLiTree@users.noreply.github.com> Date: Mon, 11 Sep 2023 10:25:06 +0800 Subject: [PATCH 12/31] [typo](errmsg) Improve partition error message (#23968) --- .../org/apache/doris/common/util/DynamicPartitionUtil.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index 274e1a3e15c9ab..fcd578d9bc754a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -674,7 +674,8 @@ public static void checkAlterAllowed(OlapTable olapTable) throws DdlException { && tableProperty.getDynamicPartitionProperty().isExist() && tableProperty.getDynamicPartitionProperty().getEnable()) { throw new DdlException("Cannot add/drop partition on a Dynamic Partition Table, " - + "Use command `ALTER TABLE tbl_name SET (\"dynamic_partition.enable\" = \"false\")` firstly."); + + "Use command `ALTER TABLE " + olapTable.getName() + + " SET (\"dynamic_partition.enable\" = \"false\")` firstly."); } } From 0525f41e9d5f4ef5acab71e63ec06a2cbf6ef79d Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Mon, 11 Sep 2023 10:26:54 +0800 Subject: [PATCH 13/31] [improvement](jdbc catalog) optimize the JDBC Catalog connection error message (#23868) --- .../org/apache/doris/datasource/jdbc/client/JdbcClient.java | 5 +++-- .../src/main/java/org/apache/doris/qe/StmtExecutor.java | 5 +++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java index 90070bdba71fe8..8ffc5df3cabcde 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java @@ -162,8 +162,9 @@ public Connection getConnection() throws JdbcClientException { try { conn = dataSource.getConnection(); } catch (Exception e) { - throw new JdbcClientException("Can not connect to jdbc due to error: %s, catalog name: %s", e.getMessage(), - this.catalog, e); + String errorMessage = String.format("Can not connect to jdbc due to error: %s, Catalog name: %s", e, + this.getCatalog()); + throw new JdbcClientException(errorMessage, e); } return conn; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 649f75c278e24e..fea250c064e0d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -103,6 +103,7 @@ import org.apache.doris.common.util.SqlParserUtils; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.common.util.Util; +import org.apache.doris.datasource.jdbc.client.JdbcClientException; import org.apache.doris.load.EtlJobType; import org.apache.doris.load.LoadJobRowResult; import org.apache.doris.load.loadv2.LoadManager; @@ -767,6 +768,10 @@ public void executeByLegacy(TUniqueId queryId) throws Exception { LOG.warn("execute Exception. {}", context.getQueryIdentifier(), e); context.getState().setError(e.getMysqlErrorCode(), e.getMessage()); context.getState().setErrType(QueryState.ErrType.ANALYSIS_ERR); + } catch (JdbcClientException e) { + LOG.warn("execute Exception. {}", context.getQueryIdentifier(), e); + context.getState().setError(ErrorCode.ERR_UNKNOWN_ERROR, + e.getMessage()); } catch (Exception e) { LOG.warn("execute Exception. {}", context.getQueryIdentifier(), e); context.getState().setError(ErrorCode.ERR_UNKNOWN_ERROR, From 0e984fbfbf68d650d1d09e11bcee827d15bdb0b6 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Mon, 11 Sep 2023 10:38:53 +0800 Subject: [PATCH 14/31] [improvement](jdbc catalog) Added create jdbc catalog properties validation (#23764) --- .../datasource/jdbc/JdbcExternalCatalog.java | 17 +++++ .../jdbc/JdbcExternalCatalogTest.java | 63 +++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalogTest.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java index 7617ad71801a09..9b2d9b00f8849a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalog.java @@ -177,5 +177,22 @@ public void setDefaultPropsWhenCreating(boolean isReplay) throws DdlException { properties.put(JdbcResource.CHECK_SUM, JdbcResource.computeObjectChecksum(properties.get(JdbcResource.DRIVER_URL))); } + String onlySpecifiedDatabase = getOnlySpecifiedDatabase(); + if (!onlySpecifiedDatabase.equalsIgnoreCase("true") && !onlySpecifiedDatabase.equalsIgnoreCase("false")) { + throw new DdlException("only_specified_database must be true or false"); + } + String lowerCaseTableNames = getLowerCaseTableNames(); + if (!lowerCaseTableNames.equalsIgnoreCase("true") && !lowerCaseTableNames.equalsIgnoreCase("false")) { + throw new DdlException("lower_case_table_names must be true or false"); + } + if (!onlySpecifiedDatabase.equalsIgnoreCase("true")) { + Map includeDatabaseList = getIncludeDatabaseMap(); + Map excludeDatabaseList = getExcludeDatabaseMap(); + if ((includeDatabaseList != null && !includeDatabaseList.isEmpty()) + || (excludeDatabaseList != null && !excludeDatabaseList.isEmpty())) { + throw new DdlException("include_database_list and exclude_database_list can not be set when " + + "only_specified_database is false"); + } + } } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalogTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalogTest.java new file mode 100644 index 00000000000000..0f2977a9886afb --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/jdbc/JdbcExternalCatalogTest.java @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.datasource.jdbc; + +import org.apache.doris.catalog.JdbcResource; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.FeConstants; + +import org.junit.Assert; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.Map; + +public class JdbcExternalCatalogTest { + private JdbcExternalCatalog jdbcExternalCatalog; + + @BeforeEach + public void setUp() throws DdlException { + FeConstants.runningUnitTest = true; + Map properties = new HashMap<>(); + properties.put(JdbcResource.DRIVER_URL, "ojdbc8.jar"); + properties.put(JdbcResource.JDBC_URL, "jdbc:oracle:thin:@127.0.0.1:1521:XE"); + properties.put(JdbcResource.DRIVER_CLASS, "oracle.jdbc.driver.OracleDriver"); + jdbcExternalCatalog = new JdbcExternalCatalog(1L, "testCatalog", "testResource", properties, "testComment"); + } + + @Test + public void setDefaultPropsWhenCreatingTest() { + jdbcExternalCatalog.getCatalogProperty().addProperty(JdbcResource.ONLY_SPECIFIED_DATABASE, "1"); + Exception exception1 = Assert.assertThrows(DdlException.class, () -> jdbcExternalCatalog.setDefaultPropsWhenCreating(false)); + Assert.assertEquals("errCode = 2, detailMessage = only_specified_database must be true or false", exception1.getMessage()); + + jdbcExternalCatalog.getCatalogProperty().addProperty(JdbcResource.ONLY_SPECIFIED_DATABASE, "true"); + jdbcExternalCatalog.getCatalogProperty().addProperty(JdbcResource.LOWER_CASE_TABLE_NAMES, "1"); + Exception exception2 = Assert.assertThrows(DdlException.class, () -> jdbcExternalCatalog.setDefaultPropsWhenCreating(false)); + Assert.assertEquals("errCode = 2, detailMessage = lower_case_table_names must be true or false", exception2.getMessage()); + + jdbcExternalCatalog.getCatalogProperty().addProperty(JdbcResource.ONLY_SPECIFIED_DATABASE, "false"); + jdbcExternalCatalog.getCatalogProperty().addProperty(JdbcResource.LOWER_CASE_TABLE_NAMES, "false"); + jdbcExternalCatalog.getCatalogProperty().addProperty(JdbcResource.INCLUDE_DATABASE_LIST, "db1,db2"); + DdlException exceptione3 = Assert.assertThrows(DdlException.class, () -> jdbcExternalCatalog.setDefaultPropsWhenCreating(false)); + Assert.assertEquals("errCode = 2, detailMessage = include_database_list and exclude_database_list can not be set when only_specified_database is false", exceptione3.getMessage()); + + } +} + From 44c7c78fccfec4e2dc6765f5ec11a490ac42615d Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Mon, 11 Sep 2023 11:54:44 +0800 Subject: [PATCH 15/31] [fix](join) avoid DCHECK failed in '_filter_data_and_build_output' (#24162) avoid DCHECK failed in '_filter_data_and_build_output' --- be/src/vec/exec/join/vhash_join_node.cpp | 12 +++++++++--- be/src/vec/exec/join/vhash_join_node.h | 3 ++- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index 284a1937798cc0..b59027fc144776 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -571,7 +571,11 @@ Status HashJoinNode::pull(doris::RuntimeState* state, vectorized::Block* output_ ->get_data() .resize_fill(block_rows, 1); } - RETURN_IF_ERROR(_filter_data_and_build_output(state, output_block, eos, &temp_block)); + + /// No need to check the block size in `_filter_data_and_build_output` because here dose not + /// increase the output rows count(just same as `_probe_block`'s rows count). + RETURN_IF_ERROR( + _filter_data_and_build_output(state, output_block, eos, &temp_block, false)); temp_block.clear(); release_block_memory(_probe_block); return Status::OK(); @@ -659,12 +663,14 @@ Status HashJoinNode::pull(doris::RuntimeState* state, vectorized::Block* output_ Status HashJoinNode::_filter_data_and_build_output(RuntimeState* state, vectorized::Block* output_block, bool* eos, - Block* temp_block) { + Block* temp_block, bool check_rows_count) { if (_is_outer_join) { _add_tuple_is_null_column(temp_block); } auto output_rows = temp_block->rows(); - DCHECK(output_rows <= state->batch_size()); + if (check_rows_count) { // If the join node does not increase the number of output rows, no need to check. + DCHECK(output_rows <= state->batch_size()); + } { SCOPED_TIMER(_join_filter_timer); RETURN_IF_ERROR(VExprContext::filter_block(_conjuncts, temp_block, temp_block->columns())); diff --git a/be/src/vec/exec/join/vhash_join_node.h b/be/src/vec/exec/join/vhash_join_node.h index 639b5709347b61..b94d7214023bb1 100644 --- a/be/src/vec/exec/join/vhash_join_node.h +++ b/be/src/vec/exec/join/vhash_join_node.h @@ -404,7 +404,8 @@ class HashJoinNode final : public VJoinNodeBase { void _add_tuple_is_null_column(Block* block) override; Status _filter_data_and_build_output(RuntimeState* state, vectorized::Block* output_block, - bool* eos, Block* temp_block); + bool* eos, Block* temp_block, + bool check_rows_count = true); template friend struct ProcessHashTableBuild; From 0a4784fffb8ad5856d4f29d9bad4fcc62e6952be Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Mon, 11 Sep 2023 14:15:06 +0800 Subject: [PATCH 16/31] [Improve](Routineload)Set the maximum timeout for obtaining partition to 60s (#24173) --- .../src/main/java/org/apache/doris/common/util/KafkaUtil.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/KafkaUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/KafkaUtil.java index 581a1ca48e63d0..40041502ca8087 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/KafkaUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/KafkaUtil.java @@ -41,6 +41,7 @@ public class KafkaUtil { private static final Logger LOG = LogManager.getLogger(KafkaUtil.class); + private static final int MAX_KAFKA_PARTITION_TIMEOUT_SECOND = 60; public static List getAllKafkaPartitions(String brokerList, String topic, Map convertedCustomProperties) throws UserException { @@ -70,7 +71,7 @@ public static List getAllKafkaPartitions(String brokerList, String topi // get info Future future = BackendServiceProxy.getInstance().getInfo(address, request); - InternalService.PProxyResult result = future.get(5, TimeUnit.SECONDS); + InternalService.PProxyResult result = future.get(MAX_KAFKA_PARTITION_TIMEOUT_SECOND, TimeUnit.SECONDS); TStatusCode code = TStatusCode.findByValue(result.getStatus().getStatusCode()); if (code != TStatusCode.OK) { throw new UserException("failed to get kafka partition info: " + result.getStatus().getErrorMsgsList()); From 4cd19c14c84514f3e5137f774e747e0cbccc974e Mon Sep 17 00:00:00 2001 From: AKIRA <33112463+Kikyou1997@users.noreply.github.com> Date: Mon, 11 Sep 2023 15:27:11 +0900 Subject: [PATCH 17/31] [fix](optimizer) Fix sql block when new optimizer is enabled (#23804) The check would skipped since when checkBlockPolicy get invoked, new optimizer doesn't do plan yet --- .../trees/plans/commands/ExplainCommand.java | 1 + .../commands/InsertIntoTableCommand.java | 2 +- .../org/apache/doris/qe/StmtExecutor.java | 46 +++++++----- .../test_sql_block_rule.groovy | 71 ++++++++++++++++++- 4 files changed, 99 insertions(+), 21 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java index bfb0e82a173ff7..9e02b8b4e9a14b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/ExplainCommand.java @@ -79,6 +79,7 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { NereidsPlanner planner = new NereidsPlanner(ctx.getStatementContext()); planner.plan(logicalPlanAdapter, ctx.getSessionVariable().toThrift()); executor.setPlanner(planner); + executor.checkBlockRules(); executor.handleExplainStmt(planner.getExplainString(new ExplainOptions(level))); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java index 3155eab2e7c80e..f35b7c9dcebdea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/InsertIntoTableCommand.java @@ -100,7 +100,7 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { LogicalPlanAdapter logicalPlanAdapter = new LogicalPlanAdapter(logicalQuery, ctx.getStatementContext()); planner = new NereidsPlanner(ctx.getStatementContext()); planner.plan(logicalPlanAdapter, ctx.getSessionVariable().toThrift()); - + executor.checkBlockRules(); if (ctx.getMysqlChannel() != null) { ctx.getMysqlChannel().reset(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index fea250c064e0d2..d7e597057adb43 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -465,24 +465,32 @@ public void execute(TUniqueId queryId) throws Exception { } } - private void checkBlockRules() throws AnalysisException { - if (originStmt != null) { - Env.getCurrentEnv().getSqlBlockRuleMgr().matchSql( - originStmt.originStmt, context.getSqlHash(), context.getQualifiedUser()); - } - - // limitations: partition_num, tablet_num, cardinality - if (planner != null) { - List scanNodeList = planner.getScanNodes(); - for (ScanNode scanNode : scanNodeList) { - if (scanNode instanceof OlapScanNode) { - OlapScanNode olapScanNode = (OlapScanNode) scanNode; - Env.getCurrentEnv().getSqlBlockRuleMgr().checkLimitations( - olapScanNode.getSelectedPartitionNum().longValue(), - olapScanNode.getSelectedTabletsNum(), - olapScanNode.getCardinality(), - context.getQualifiedUser()); - } + public void checkBlockRules() throws AnalysisException { + checkBlockRulesByRegex(originStmt); + checkBlockRulesByScan(planner); + } + + public void checkBlockRulesByRegex(OriginStatement originStmt) throws AnalysisException { + if (originStmt == null) { + return; + } + Env.getCurrentEnv().getSqlBlockRuleMgr().matchSql( + originStmt.originStmt, context.getSqlHash(), context.getQualifiedUser()); + } + + public void checkBlockRulesByScan(Planner planner) throws AnalysisException { + if (planner == null) { + return; + } + List scanNodeList = planner.getScanNodes(); + for (ScanNode scanNode : scanNodeList) { + if (scanNode instanceof OlapScanNode) { + OlapScanNode olapScanNode = (OlapScanNode) scanNode; + Env.getCurrentEnv().getSqlBlockRuleMgr().checkLimitations( + olapScanNode.getSelectedPartitionNum().longValue(), + olapScanNode.getSelectedTabletsNum(), + olapScanNode.getCardinality(), + context.getQualifiedUser()); } } } @@ -494,7 +502,6 @@ private void executeByNereids(TUniqueId queryId) throws Exception { profile.getSummaryProfile().setQueryBeginTime(); context.setStmtId(STMT_ID_GENERATOR.incrementAndGet()); - checkBlockRules(); parseByNereids(); Preconditions.checkState(parsedStmt instanceof LogicalPlanAdapter, "Nereids only process LogicalPlanAdapter, but parsedStmt is " + parsedStmt.getClass().getName()); @@ -545,6 +552,7 @@ private void executeByNereids(TUniqueId queryId) throws Exception { planner = new NereidsPlanner(statementContext); try { planner.plan(parsedStmt, context.getSessionVariable().toThrift()); + checkBlockRules(); } catch (Exception e) { LOG.debug("Nereids plan query failed:\n{}", originStmt.originStmt); throw new NereidsException(new AnalysisException("Unexpected exception: " + e.getMessage(), e)); diff --git a/regression-test/suites/sql_block_rule_p0/test_sql_block_rule.groovy b/regression-test/suites/sql_block_rule_p0/test_sql_block_rule.groovy index fa45be92cda3e6..582fa7c705d03b 100644 --- a/regression-test/suites/sql_block_rule_p0/test_sql_block_rule.groovy +++ b/regression-test/suites/sql_block_rule_p0/test_sql_block_rule.groovy @@ -16,6 +16,15 @@ // under the License. suite("test_sql_block_rule") { + + sql """ + DROP SQL_BLOCK_RULE if exists test_rule_partition + """ + + sql """ + DROP SQL_BLOCK_RULE if exists test_rule_tablet + """ + sql """ DROP SQL_BLOCK_RULE if exists test_rule_num """ @@ -27,7 +36,7 @@ suite("test_sql_block_rule") { `abcd` varchar(150) NULL COMMENT "", `create_time` datetime NULL COMMENT "" ) ENGINE=OLAP - UNIQUE KEY(`abcd`) + DUPLICATE KEY(`abcd`) COMMENT "OLAP" DISTRIBUTED BY HASH(`abcd`) BUCKETS 3 PROPERTIES ( @@ -47,6 +56,15 @@ suite("test_sql_block_rule") { sql("SELECT * FROM table_2", false) exception "sql match regex sql block rule: test_rule_sql" } + test { + sql("EXPLAIN SELECT * FROM table_2", false) + exception "sql match regex sql block rule: test_rule_sql" + } + + test { + sql("INSERT INTO table_2 SELECT * FROM table_2", false) + exception "sql match regex sql block rule: test_rule_sql" + } sql """ DROP SQL_BLOCK_RULE if exists test_rule_sql @@ -126,4 +144,55 @@ suite("test_sql_block_rule") { " PROPERTIES(\"sql\"=\"create\", \"global\"= \"true\", \"enable\"= \"true\")", false) exception "sql of SQL_BLOCK_RULE should not match its name" } + + sql """DROP TABLE IF EXISTS a_partitioned_table_for_sql_block_rule""" + + sql """ + CREATE TABLE a_partitioned_table_for_sql_block_rule ( + id BIGINT, + val BIGINT, + str VARCHAR(114) + ) DUPLICATE KEY(`id`) + PARTITION BY RANGE(`id`) + ( + PARTITION `p1` VALUES LESS THAN ('5'), + PARTITION `p2` VALUES LESS THAN ('10'), + PARTITION `p3` VALUES LESS THAN ('15') + ) + DISTRIBUTED BY HASH(`id`) BUCKETS 3 + PROPERTIES ( + "replication_num"="1" + ); + """ + + sql """ + INSERT INTO a_partitioned_table_for_sql_block_rule VALUES(1, 5, 11),(6,1,5),(11,8,5); + """ + + sql """ + CREATE SQL_BLOCK_RULE if not exists test_rule_partition PROPERTIES ( "partition_num" = "1", "global" = "true", + "enable"="true"); + """ + + test { + sql("""SELECT * FROM a_partitioned_table_for_sql_block_rule;""", false) + + exception """sql hits sql block rule""" + + } + + sql """ + CREATE SQL_BLOCK_RULE if not exists test_rule_tablet PROPERTIES ( "tablet_num" = "3", "global" = "true", + "enable"="true"); + """ + + test { + sql("""SELECT * FROM a_partitioned_table_for_sql_block_rule;""", false) + + exception """sql hits sql block rule""" + + } + + + } From bbb66b95fd420c3ff6e76580ad78c90a0e9ed144 Mon Sep 17 00:00:00 2001 From: LiBinfeng <46676950+LiBinfeng-01@users.noreply.github.com> Date: Mon, 11 Sep 2023 14:30:31 +0800 Subject: [PATCH 18/31] [Fix](Nereids) fix infer predicate lost cast of source expression (#23692) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem: When inferring predicate,we lost cast of source expressions and some datatype derivation. Example: a = b and cast(a as targetType) = constant (cast(a as targetType) = constant ) this expression is define as source expression. we expect getting cast(b as targetType) = constant instead of b = constant Reason: When inferring predicate, we will compare original type of a and b. if they can be cast without precision lost, a new predicate would be created. But created predicate forgot to cast to target type Solved: Add cast to target type, and open make other datatype valid also. --- .../rules/rewrite/PredicatePropagation.java | 39 +++++++++++-------- .../rules/rewrite/InferPredicatesTest.java | 30 ++++++++++++++ .../infer_predicate/infer_predicate.groovy | 18 +++++++++ 3 files changed, 71 insertions(+), 16 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java index cc45952817a845..71818966696958 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java @@ -59,12 +59,12 @@ public Set infer(Set predicates) { } /** - * Use the left or right child of `leftSlotEqualToRightSlot` to replace the left or right child of `expression` + * Use the left or right child of `equalExpr` to replace the left or right child of `expression` * Now only support infer `ComparisonPredicate`. * TODO: We should determine whether `expression` satisfies the condition for replacement * eg: Satisfy `expression` is non-deterministic */ - private Expression doInfer(Expression leftSlotEqualToRightSlot, Expression expression) { + private Expression doInfer(Expression equalExpr, Expression expression) { return expression.accept(new DefaultExpressionRewriter() { @Override @@ -76,36 +76,43 @@ public Expression visit(Expression expr, Void context) { public Expression visitComparisonPredicate(ComparisonPredicate cp, Void context) { // we need to get expression covered by cast, because we want to infer different datatype if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.left()) && (cp.right().isConstant())) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left())); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left()), equalExpr); } else if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.right()) && cp.left().isConstant()) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right())); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right()), equalExpr); } return super.visit(cp, context); } private boolean isDataTypeValid(DataType originDataType, Expression expr) { - if ((leftSlotEqualToRightSlot.child(0).getDataType() instanceof IntegralType) - && (leftSlotEqualToRightSlot.child(1).getDataType() instanceof IntegralType) + if ((expr.child(0).getDataType() instanceof IntegralType) + && (expr.child(1).getDataType() instanceof IntegralType) && (originDataType instanceof IntegralType)) { // infer filter can not be lower than original datatype, or dataset would be wrong if (!((IntegralType) originDataType).widerThan( - (IntegralType) leftSlotEqualToRightSlot.child(0).getDataType()) + (IntegralType) expr.child(0).getDataType()) && !((IntegralType) originDataType).widerThan( - (IntegralType) leftSlotEqualToRightSlot.child(1).getDataType())) { + (IntegralType) expr.child(1).getDataType())) { return true; } + } else if (expr.child(0).getDataType().equals(expr.child(1).getDataType())) { + return true; } return false; } - private Expression replaceSlot(Expression expr, DataType originDataType) { - return expr.rewriteUp(e -> { - if (isDataTypeValid(originDataType, leftSlotEqualToRightSlot)) { - if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(0))) { - return leftSlotEqualToRightSlot.child(1); - } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(1))) { - return leftSlotEqualToRightSlot.child(0); - } + private Expression replaceSlot(Expression sourcePredicate, DataType originDataType, Expression equal) { + if (!isDataTypeValid(originDataType, equal)) { + return sourcePredicate; + } + return sourcePredicate.rewriteUp(e -> { + // we can not replace Cast expression to slot because when rewrite up, we have replace child of cast + if (e instanceof Cast) { + return e; + } + if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(0))) { + return equal.child(1); + } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(1))) { + return equal.child(0); } return e; }); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java index adc67ca835f915..b7b235d2b43041 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java @@ -17,15 +17,33 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.nereids.trees.expressions.Cast; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.utframe.TestWithFeService; +import com.google.common.collect.Sets; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.Optional; +import java.util.Set; + public class InferPredicatesTest extends TestWithFeService implements MemoPatternMatchSupported { + private final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + + private final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + + private final PredicatePropagation propagation = new PredicatePropagation(); + @Override protected void runBeforeAll() throws Exception { createDatabase("test"); @@ -628,4 +646,16 @@ public void innerJoinShouldNotInferUnderLeftJoinOnClausePredicates() { ).when(join -> join.getJoinType() == JoinType.LEFT_OUTER_JOIN) ); } + + @Test + void testInfer() { + EqualTo equalTo = new EqualTo(new Cast(scan1.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)); + EqualTo equalTo2 = new EqualTo(scan2.getOutput().get(0), scan1.getOutput().get(0)); + Set predicates = Sets.newHashSet(); + predicates.add(equalTo2); + predicates.add(equalTo); + Set newPredicates = propagation.infer(predicates); + Optional newPredicate = newPredicates.stream().findFirst(); + Assertions.assertTrue(newPredicate.get().equals(new EqualTo(new Cast(scan2.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)))); + } } diff --git a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy index a1621f1c239aa5..120c9a8f674458 100644 --- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy +++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy @@ -22,6 +22,8 @@ suite("test_infer_predicate") { sql 'drop table if exists infer_tb1;' sql 'drop table if exists infer_tb2;' sql 'drop table if exists infer_tb3;' + sql 'drop table if exists infer_tb4;' + sql 'drop table if exists infer_tb5;' sql '''create table infer_tb1 (k1 int, k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' @@ -29,6 +31,10 @@ suite("test_infer_predicate") { sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' + sql '''create table infer_tb4 (k1 varchar(100), k2 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' + + sql '''create table infer_tb5 (k1 varchar(100), k3 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' + explain { sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 = infer_tb1.k2 and infer_tb2.k1 = 1;" contains "PREDICATES: k2" @@ -55,4 +61,16 @@ suite("test_infer_predicate") { contains "PREDICATES: k3" contains "PREDICATES: k2" } + + explain { + sql "select * from infer_tb4 left join infer_tb5 on infer_tb4.k2 = infer_tb5.k3 where infer_tb4.k2 = '20230901';" + contains "PREDICATES: k3" + contains "PREDICATES: k2" + } + + sql 'drop table if exists infer_tb1;' + sql 'drop table if exists infer_tb2;' + sql 'drop table if exists infer_tb3;' + sql 'drop table if exists infer_tb4;' + sql 'drop table if exists infer_tb5;' } From 832ee7bc8e1d97b2261259e19bda314ced85f8a8 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Mon, 11 Sep 2023 16:16:58 +0800 Subject: [PATCH 19/31] [minor](fe) optimize some log info and imports issue (#24138) --- fe/check/checkstyle/import-control.xml | 1 + fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java | 3 ++- .../src/main/java/org/apache/doris/qe/ConnectProcessor.java | 4 +++- .../java/org/apache/doris/analysis/S3TvfLoadStmtTest.java | 2 +- 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/fe/check/checkstyle/import-control.xml b/fe/check/checkstyle/import-control.xml index 9ff32f7d512eb0..16371ce8919206 100644 --- a/fe/check/checkstyle/import-control.xml +++ b/fe/check/checkstyle/import-control.xml @@ -25,6 +25,7 @@ under the License. + diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 9fbed8b63d6087..3754ea416beb07 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -1132,7 +1132,8 @@ private void getClusterIdAndRole() throws IOException { } if (Config.cluster_id != -1 && clusterId != Config.cluster_id) { - throw new IOException("cluster id is not equal with config item cluster_id. will exit."); + throw new IOException("cluster id is not equal with config item cluster_id. will exit. " + + "If you are in recovery mode, please also modify the cluster_id in 'doris-meta/image/VERSION'"); } if (role.equals(FrontendNodeType.FOLLOWER)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java index 73c6debb698ed3..4c2549d22a5730 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java @@ -362,7 +362,9 @@ private void auditAfterExec(String origStmt, StatementBase parsedStmt, // Process COM_QUERY statement, // only throw an exception when there is a problem interacting with the requesting client private void handleQuery(MysqlCommand mysqlCommand) { - MetricRepo.COUNTER_REQUEST_ALL.increase(1L); + if (MetricRepo.isInit) { + MetricRepo.COUNTER_REQUEST_ALL.increase(1L); + } // convert statement to Java string byte[] bytes = packetBuf.array(); int ending = packetBuf.limit() - 1; diff --git a/fe/fe-core/src/test/java/org/apache/doris/analysis/S3TvfLoadStmtTest.java b/fe/fe-core/src/test/java/org/apache/doris/analysis/S3TvfLoadStmtTest.java index cc41e31e3c338c..6c847f28206a6f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/analysis/S3TvfLoadStmtTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/analysis/S3TvfLoadStmtTest.java @@ -33,11 +33,11 @@ import org.apache.doris.load.loadv2.LoadTask.MergeType; import org.apache.doris.tablefunction.S3TableValuedFunction; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import mockit.Expectations; import mockit.Injectable; -import org.apache.hadoop.util.Lists; import org.junit.Assert; import org.junit.Before; import org.junit.Test; From 6e21e406b7421d18bf61cc181c3bfd1a0704b286 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Mon, 11 Sep 2023 18:03:46 +0800 Subject: [PATCH 20/31] [fix](block rule) throw npe when use Nereids explain or fallback (#24182) --- .../apache/doris/nereids/NereidsPlanner.java | 4 +-- .../org/apache/doris/qe/StmtExecutor.java | 1 + .../test_sql_block_rule.groovy | 30 +++++++++++-------- 3 files changed, 21 insertions(+), 14 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index 0813f8e5f00e10..3810f42e10e5e8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -89,7 +89,7 @@ public class NereidsPlanner extends Planner { public static final Logger LOG = LogManager.getLogger(NereidsPlanner.class); private CascadesContext cascadesContext; private final StatementContext statementContext; - private List scanNodeList = null; + private final List scanNodeList = Lists.newArrayList(); private DescriptorTable descTable; private Plan parsedPlan; @@ -140,7 +140,7 @@ public void plan(StatementBase queryStmt, org.apache.doris.thrift.TQueryOptions } PlanFragment root = physicalPlanTranslator.translatePlan(physicalPlan); - scanNodeList = planTranslatorContext.getScanNodes(); + scanNodeList.addAll(planTranslatorContext.getScanNodes()); descTable = planTranslatorContext.getDescTable(); fragments = new ArrayList<>(planTranslatorContext.getPlanFragments()); for (int seq = 0; seq < fragments.size(); seq++) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index d7e597057adb43..fff24358b93759 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -2468,6 +2468,7 @@ public List executeInternalQuery() { } catch (Exception e) { LOG.warn("fall back to legacy planner, because: {}", e.getMessage(), e); parsedStmt = null; + planner = null; context.getState().setNereids(false); analyzer = new Analyzer(context.getEnv(), context); analyze(context.getSessionVariable().toThrift()); diff --git a/regression-test/suites/sql_block_rule_p0/test_sql_block_rule.groovy b/regression-test/suites/sql_block_rule_p0/test_sql_block_rule.groovy index 582fa7c705d03b..768cbcf029e7d7 100644 --- a/regression-test/suites/sql_block_rule_p0/test_sql_block_rule.groovy +++ b/regression-test/suites/sql_block_rule_p0/test_sql_block_rule.groovy @@ -174,25 +174,31 @@ suite("test_sql_block_rule") { "enable"="true"); """ - test { - sql("""SELECT * FROM a_partitioned_table_for_sql_block_rule;""", false) - - exception """sql hits sql block rule""" - + try { + test { + sql("""SELECT * FROM a_partitioned_table_for_sql_block_rule;""", false) + exception """sql hits sql block rule""" + } + } finally { + sql """ + drop SQL_BLOCK_RULE if exists test_rule_partition; + """ } sql """ CREATE SQL_BLOCK_RULE if not exists test_rule_tablet PROPERTIES ( "tablet_num" = "3", "global" = "true", "enable"="true"); """ - - test { - sql("""SELECT * FROM a_partitioned_table_for_sql_block_rule;""", false) - - exception """sql hits sql block rule""" - + try { + test { + sql("""SELECT * FROM a_partitioned_table_for_sql_block_rule;""", false) + exception """sql hits sql block rule""" + } + } finally { + sql """ + drop SQL_BLOCK_RULE if exists test_rule_tablet; + """ } - } From e841bb1138584f463cc14935eb97348d60896649 Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Mon, 11 Sep 2023 18:32:31 +0800 Subject: [PATCH 21/31] [Enhance](ip)optimize priority_ network matching logic for be (#23795) Issue Number: close #xxx If the user has configured the wrong priority_network, direct startup failure to avoid users mistakenly assuming that the configuration is correct If the user has not configured p_ n. Select only the first IP from the IPv4 list, rather than selecting from all IPs, to avoid users' servers not supporting IPv4 extends #23784 --- be/src/service/backend_options.cpp | 83 +++++++++++++---------- be/src/service/backend_options.h | 6 +- be/src/vec/exec/scan/new_es_scan_node.cpp | 4 +- be/test/util/backend_options_test.cpp | 78 +++++++++++++++++++++ 4 files changed, 134 insertions(+), 37 deletions(-) create mode 100644 be/test/util/backend_options_test.cpp diff --git a/be/src/service/backend_options.cpp b/be/src/service/backend_options.cpp index 20f4e68c7603cb..c8325733368e71 100644 --- a/be/src/service/backend_options.cpp +++ b/be/src/service/backend_options.cpp @@ -38,7 +38,7 @@ bool BackendOptions::_bind_ipv6 = false; const char* _service_bind_address = "0.0.0.0"; bool BackendOptions::init() { - if (!analyze_priority_cidrs()) { + if (!analyze_priority_cidrs(config::priority_networks, &_s_priority_cidrs)) { return false; } std::vector hosts; @@ -53,37 +53,12 @@ bool BackendOptions::init() { LOG(FATAL) << "failed to get host"; return false; } - - std::string loopback; - std::vector::iterator addr_it = hosts.begin(); - for (; addr_it != hosts.end(); ++addr_it) { - VLOG_CRITICAL << "check ip=" << addr_it->get_host_address(); - if (!_s_priority_cidrs.empty()) { - // Whether to use IPV4 or IPV6, it's configured by CIDR format. - // If both IPV4 and IPV6 are configured, the config order decides priority. - if (is_in_prior_network(addr_it->get_host_address())) { - _s_localhost = addr_it->get_host_address(); - _bind_ipv6 = addr_it->is_ipv6(); - break; - } - LOG(INFO) << "skip ip not belonged to priority networks: " - << addr_it->get_host_address(); - } else if ((*addr_it).is_loopback()) { - loopback = addr_it->get_host_address(); - _bind_ipv6 = addr_it->is_ipv6(); - } else { - _s_localhost = addr_it->get_host_address(); - _bind_ipv6 = addr_it->is_ipv6(); - break; - } + if (!analyze_localhost(_s_localhost, _bind_ipv6, &_s_priority_cidrs, &hosts)) { + return false; } if (_bind_ipv6) { _service_bind_address = "[::0]"; } - if (_s_localhost.empty()) { - LOG(INFO) << "fail to find one valid non-loopback address, use loopback address."; - _s_localhost = loopback; - } LOG(INFO) << "local host ip=" << _s_localhost; return true; } @@ -118,14 +93,14 @@ const char* BackendOptions::get_service_bind_address_without_bracket() { return _service_bind_address; } -bool BackendOptions::analyze_priority_cidrs() { - if (config::priority_networks == "") { +bool BackendOptions::analyze_priority_cidrs(const std::string& priority_networks, + std::vector* cidrs) { + if (priority_networks == "") { return true; } - LOG(INFO) << "priority cidrs in conf: " << config::priority_networks; + LOG(INFO) << "priority cidrs: " << priority_networks; - std::vector cidr_strs = - strings::Split(config::priority_networks, PRIORITY_CIDR_SEPARATOR); + std::vector cidr_strs = strings::Split(priority_networks, PRIORITY_CIDR_SEPARATOR); for (auto& cidr_str : cidr_strs) { CIDR cidr; @@ -133,7 +108,47 @@ bool BackendOptions::analyze_priority_cidrs() { LOG(FATAL) << "wrong cidr format. cidr_str=" << cidr_str; return false; } - _s_priority_cidrs.push_back(cidr); + cidrs->push_back(cidr); + } + return true; +} + +bool BackendOptions::analyze_localhost(std::string& localhost, bool& bind_ipv6, + std::vector* cidrs, std::vector* hosts) { + std::vector::iterator addr_it = hosts->begin(); + if (!cidrs->empty()) { + for (; addr_it != hosts->end(); ++addr_it) { + VLOG_CRITICAL << "check ip=" << addr_it->get_host_address(); + // Whether to use IPV4 or IPV6, it's configured by CIDR format. + // If both IPV4 and IPV6 are configured, the config order decides priority. + if (is_in_prior_network(addr_it->get_host_address())) { + localhost = addr_it->get_host_address(); + bind_ipv6 = addr_it->is_ipv6(); + break; + } + LOG(INFO) << "skip ip not belonged to priority networks: " + << addr_it->get_host_address(); + } + if (localhost.empty()) { + LOG(FATAL) << "fail to find one valid address, exit."; + return false; + } + } else { + std::string loopback; + for (; addr_it != hosts->end(); ++addr_it) { + if ((*addr_it).is_loopback()) { + loopback = addr_it->get_host_address(); + _bind_ipv6 = addr_it->is_ipv6(); + } else if (!addr_it->is_ipv6()) { + localhost = addr_it->get_host_address(); + _bind_ipv6 = addr_it->is_ipv6(); + break; + } + } + if (localhost.empty()) { + LOG(INFO) << "fail to find one valid non-loopback address, use loopback address."; + localhost = loopback; + } } return true; } diff --git a/be/src/service/backend_options.h b/be/src/service/backend_options.h index 3aff93403635db..72293373883471 100644 --- a/be/src/service/backend_options.h +++ b/be/src/service/backend_options.h @@ -23,6 +23,7 @@ #include #include "gen_cpp/Types_types.h" +#include "util/network_util.h" namespace doris { @@ -37,9 +38,12 @@ class BackendOptions { static bool is_bind_ipv6(); static const char* get_service_bind_address(); static const char* get_service_bind_address_without_bracket(); + static bool analyze_priority_cidrs(const std::string& priority_networks, + std::vector* cidrs); + static bool analyze_localhost(std::string& localhost, bool& bind_ipv6, std::vector* cidrs, + std::vector* hosts); private: - static bool analyze_priority_cidrs(); static bool is_in_prior_network(const std::string& ip); static std::string _s_localhost; diff --git a/be/src/vec/exec/scan/new_es_scan_node.cpp b/be/src/vec/exec/scan/new_es_scan_node.cpp index 088784b330b7bc..6a3ec3a73890b2 100644 --- a/be/src/vec/exec/scan/new_es_scan_node.cpp +++ b/be/src/vec/exec/scan/new_es_scan_node.cpp @@ -41,7 +41,7 @@ class VScanner; static const std::string NEW_SCAN_NODE_TYPE = "NewEsScanNode"; // Prefer to the local host -static std::string get_host_port(const std::vector& es_hosts) { +static std::string get_host_and_port(const std::vector& es_hosts) { std::string host_port; std::string localhost = doris::BackendOptions::get_localhost(); @@ -152,7 +152,7 @@ Status NewEsScanNode::_init_scanners(std::list* scanners) { } properties[ESScanReader::KEY_SHARD] = std::to_string(es_scan_range->shard_id); properties[ESScanReader::KEY_BATCH_SIZE] = std::to_string(_state->batch_size()); - properties[ESScanReader::KEY_HOST_PORT] = get_host_port(es_scan_range->es_hosts); + properties[ESScanReader::KEY_HOST_PORT] = get_host_and_port(es_scan_range->es_hosts); // push down limit to Elasticsearch // if predicate in _conjunct_ctxs can not be processed by Elasticsearch, we can not push down limit operator to Elasticsearch if (limit() != -1 && limit() <= _state->batch_size()) { diff --git a/be/test/util/backend_options_test.cpp b/be/test/util/backend_options_test.cpp new file mode 100644 index 00000000000000..a8d0ca41513ca5 --- /dev/null +++ b/be/test/util/backend_options_test.cpp @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "service/backend_options.h" + +#include +#include + +#include "gtest/gtest_pred_impl.h" +#include "util/cidr.h" + +namespace doris { + +class BackendOptionsTest : public testing::Test { +public: + BackendOptionsTest() {} + virtual ~BackendOptionsTest() {} +}; + +// only loopback +TEST_F(BackendOptionsTest, emptyCidr1) { + std::vector hosts; + hosts.emplace_back(std::string("127.0.0.1"), AF_INET, true); + + std::vector cidrs; + BackendOptions::analyze_priority_cidrs("", &cidrs); + std::string localhost; + bool bind_ipv6 = false; + BackendOptions::analyze_localhost(localhost, bind_ipv6, &cidrs, &hosts); + EXPECT_STREQ("127.0.0.1", localhost.c_str()); +} + +// priority not loopback +TEST_F(BackendOptionsTest, emptyCidr2) { + std::vector hosts; + hosts.emplace_back(std::string("127.0.0.1"), AF_INET, true); + hosts.emplace_back(std::string("10.10.10.10"), AF_INET, false); + hosts.emplace_back(std::string("10.10.10.11"), AF_INET, false); + + std::vector cidrs; + BackendOptions::analyze_priority_cidrs("", &cidrs); + std::string localhost; + bool bind_ipv6 = false; + BackendOptions::analyze_localhost(localhost, bind_ipv6, &cidrs, &hosts); + EXPECT_STREQ("10.10.10.10", localhost.c_str()); +} + +// not choose ipv6 +TEST_F(BackendOptionsTest, emptyCidr3) { + std::vector hosts; + hosts.emplace_back(std::string("127.0.0.1"), AF_INET, true); + hosts.emplace_back(std::string("fe80::5054:ff:fec9:dee0"), AF_INET6, false); + hosts.emplace_back(std::string("10.10.10.10"), AF_INET, false); + hosts.emplace_back(std::string("10.10.10.11"), AF_INET, false); + + std::vector cidrs; + BackendOptions::analyze_priority_cidrs("", &cidrs); + std::string localhost; + bool bind_ipv6 = false; + BackendOptions::analyze_localhost(localhost, bind_ipv6, &cidrs, &hosts); + EXPECT_STREQ("10.10.10.10", localhost.c_str()); +} + +} // namespace doris From 31577ad7bcdb774b564df85c2ea06b4e73ecd41d Mon Sep 17 00:00:00 2001 From: Ashin Gau Date: Mon, 11 Sep 2023 19:54:59 +0800 Subject: [PATCH 22/31] [fix](hudi) compatible with hudi spark configuration and support skip merge (#24067) Fix three bugs: 1. Hudi slice maybe has log files only, so `new Path(filePath)` will throw errors. 2. Hive column names are lowercase only, so match column names in ignore-case-mode. 3. Compatible with [Spark Datasource Configs](https://hudi.apache.org/docs/configurations/#Read-Options), so users can add `hoodie.datasource.merge.type=skip_merge` in catalog properties to skip merge logs files. --- .../vec/exec/format/table/hudi_jni_reader.cpp | 7 ++- .../vec/exec/format/table/hudi_jni_reader.h | 1 + docs/en/docs/lakehouse/multi-catalog/hudi.md | 3 + .../docs/lakehouse/multi-catalog/hudi.md | 4 ++ .../org/apache/doris/hudi/HudiJniScanner.java | 16 +++-- .../apache/doris/hudi/BaseSplitReader.scala | 48 +++++++++------ .../doris/hudi/HoodieRecordIterator.scala | 5 +- .../planner/external/hudi/HudiScanNode.java | 19 +++--- .../external_table_p2/hive/test_hive_hudi.out | 25 ++++++++ .../hive/test_hive_hudi.groovy | 58 +++++++++++++++++++ 10 files changed, 151 insertions(+), 35 deletions(-) create mode 100644 regression-test/data/external_table_p2/hive/test_hive_hudi.out create mode 100644 regression-test/suites/external_table_p2/hive/test_hive_hudi.groovy diff --git a/be/src/vec/exec/format/table/hudi_jni_reader.cpp b/be/src/vec/exec/format/table/hudi_jni_reader.cpp index 029135ac670bd2..bd6b40f3f1a232 100644 --- a/be/src/vec/exec/format/table/hudi_jni_reader.cpp +++ b/be/src/vec/exec/format/table/hudi_jni_reader.cpp @@ -36,6 +36,7 @@ class Block; namespace doris::vectorized { +const std::string HudiJniReader::HOODIE_CONF_PREFIX = "hoodie."; const std::string HudiJniReader::HADOOP_CONF_PREFIX = "hadoop_conf."; HudiJniReader::HudiJniReader(const TFileScanRangeParams& scan_params, @@ -67,7 +68,11 @@ HudiJniReader::HudiJniReader(const TFileScanRangeParams& scan_params, // Use compatible hadoop client to read data for (auto& kv : _scan_params.properties) { - params[HADOOP_CONF_PREFIX + kv.first] = kv.second; + if (kv.first.starts_with(HOODIE_CONF_PREFIX)) { + params[kv.first] = kv.second; + } else { + params[HADOOP_CONF_PREFIX + kv.first] = kv.second; + } } _jni_connector = std::make_unique("org/apache/doris/hudi/HudiJniScanner", params, diff --git a/be/src/vec/exec/format/table/hudi_jni_reader.h b/be/src/vec/exec/format/table/hudi_jni_reader.h index bf2dab943d8d80..c0438e93289063 100644 --- a/be/src/vec/exec/format/table/hudi_jni_reader.h +++ b/be/src/vec/exec/format/table/hudi_jni_reader.h @@ -46,6 +46,7 @@ class HudiJniReader : public GenericReader { ENABLE_FACTORY_CREATOR(HudiJniReader); public: + static const std::string HOODIE_CONF_PREFIX; static const std::string HADOOP_CONF_PREFIX; HudiJniReader(const TFileScanRangeParams& scan_params, const THudiFileDesc& hudi_params, diff --git a/docs/en/docs/lakehouse/multi-catalog/hudi.md b/docs/en/docs/lakehouse/multi-catalog/hudi.md index 4c46ccb0e143f4..52892db2df2174 100644 --- a/docs/en/docs/lakehouse/multi-catalog/hudi.md +++ b/docs/en/docs/lakehouse/multi-catalog/hudi.md @@ -59,6 +59,9 @@ CREATE CATALOG hudi PROPERTIES ( Same as that in Hive Catalogs. See the relevant section in [Hive](./hive.md). +## Skip Merge +Spark will create the read optimize table with `_ro` suffix when generating hudi mor table. Doris will skip the log files when reading optimize table. Doris does not determine whether a table is read optimize by the `_ro` suffix instead of the hive inputformat. Users can observe whether the inputformat of the 'cow/mor/read optimize' table is the same through the `SHOW CREATE TABLE` command. In addition, Doris supports adding hoodie related configurations to catalog properties, which are compatible with [Spark Datasource Configs](https://hudi.apache.org/docs/configurations/#Read-Options), so users can add `hoodie.datasource.merge.type=skip_merge` in catalog properties to skip merge logs files. + ## Query Optimization Doris uses the parquet native reader to read the data files of the COW table, and uses the Java SDK (By calling hudi-bundle through JNI) to read the data files of the MOR table. In `upsert` scenario, there may still remains base files that have not been updated in the MOR table, which can be read through the parquet native reader. Users can view the execution plan of hudi scan through the [explain](../../advanced/best-practice/query-analysis.md) command, where `hudiNativeReadSplits` indicates how many split files are read through the parquet native reader. ``` diff --git a/docs/zh-CN/docs/lakehouse/multi-catalog/hudi.md b/docs/zh-CN/docs/lakehouse/multi-catalog/hudi.md index 228be874203ad3..b619283cacf5bc 100644 --- a/docs/zh-CN/docs/lakehouse/multi-catalog/hudi.md +++ b/docs/zh-CN/docs/lakehouse/multi-catalog/hudi.md @@ -59,6 +59,10 @@ CREATE CATALOG hudi PROPERTIES ( 和 Hive Catalog 一致,可参阅 [Hive Catalog](./hive.md) 中 **列类型映射** 一节。 +## Skip Merge +Spark 在创建 hudi mor 表的时候,会创建 `_ro` 后缀的 read optimize 表,doris 读取 read optimize 表会跳过 log 文件的合并。doris 判定一个表是否为 read optimize 表并不是通过 `_ro` 后缀,而是通过 hive inputformat,用户可以通过 `SHOW CREATE TABLE` 命令观察 cow/mor/read optimize 表的 inputformat 是否相同。 +此外 doris 支持在 catalog properties 添加 hoodie 相关的配置,配置项兼容 [Spark Datasource Configs](https://hudi.apache.org/docs/configurations/#Read-Options)。所以用户可以在 catalog properties 中添加 `hoodie.datasource.merge.type=skip_merge` 跳过合并 log 文件。 + ## 查询优化 Doris 使用 parquet native reader 读取 COW 表的数据文件,使用 Java SDK(通过JNI调用hudi-bundle) 读取 MOR 表的数据文件。在 upsert 场景下,MOR 依然会有数据文件没有被更新,这部分文件可以通过 parquet native reader读取,用户可以通过 [explain](../../advanced/best-practice/query-analysis.md) 命令查看 hudi scan 的执行计划,`hudiNativeReadSplits` 表示有多少 split 文件通过 parquet native reader 读取。 diff --git a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java index 417b338115c2da..64c4fd70e7b542 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java +++ b/fe/be-java-extensions/hudi-scanner/src/main/java/org/apache/doris/hudi/HudiJniScanner.java @@ -86,6 +86,9 @@ public class HudiJniScanner extends JniScanner { static { int numThreads = Math.max(Runtime.getRuntime().availableProcessors() * 2 + 1, 4); + if (numThreads > 32) { + numThreads = Runtime.getRuntime().availableProcessors(); + } avroReadPool = Executors.newFixedThreadPool(numThreads, new ThreadFactoryBuilder().setNameFormat("avro-log-reader-%d").build()); LOG.info("Create " + numThreads + " daemon threads to load avro logs"); @@ -176,10 +179,15 @@ public void open() throws IOException { if (ugi != null) { recordIterator = ugi.doAs( (PrivilegedExceptionAction>) () -> new MORSnapshotSplitReader( - split).buildScanIterator(split.requiredFields(), new Filter[0])); + split).buildScanIterator(new Filter[0])); } else { recordIterator = new MORSnapshotSplitReader(split) - .buildScanIterator(split.requiredFields(), new Filter[0]); + .buildScanIterator(new Filter[0]); + } + if (AVRO_RESOLVER_CACHE != null && AVRO_RESOLVER_CACHE.get() != null) { + cachedResolvers.computeIfAbsent(Thread.currentThread().getId(), + threadId -> AVRO_RESOLVER_CACHE.get()); + AVRO_RESOLVER_CACHE.get().clear(); } } catch (Exception e) { LOG.error("Failed to open hudi scanner, split params:\n" + debugString, e); @@ -189,10 +197,6 @@ public void open() throws IOException { } isKilled.set(true); executorService.shutdownNow(); - if (AVRO_RESOLVER_CACHE != null && AVRO_RESOLVER_CACHE.get() != null) { - cachedResolvers.computeIfAbsent(Thread.currentThread().getId(), - threadId -> AVRO_RESOLVER_CACHE.get()); - } getRecordReaderTimeNs += System.nanoTime() - startTime; }); try { diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala index 5ba16a5e164067..3c10f8a4cd7208 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala +++ b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/BaseSplitReader.scala @@ -153,6 +153,8 @@ case class HoodieTableInformation(sparkSession: SparkSession, metaClient: HoodieTableMetaClient, timeline: HoodieTimeline, tableConfig: HoodieTableConfig, + resolvedTargetFields: Array[String], + tableAvroSchema: Schema, internalSchemaOpt: Option[InternalSchema]) /** @@ -214,22 +216,7 @@ abstract class BaseSplitReader(val split: HoodieSplit) { * required to fetch table's Avro and Internal schemas */ protected lazy val (tableAvroSchema: Schema, internalSchemaOpt: Option[InternalSchema]) = { - val schemaResolver = new TableSchemaResolver(tableInformation.metaClient) - val (name, namespace) = AvroConversionUtils.getAvroRecordNameAndNamespace(tableName) - val avroSchema: Schema = tableInformation.internalSchemaOpt.map { is => - AvroInternalSchemaConverter.convert(is, namespace + "." + name) - } orElse { - specifiedQueryTimestamp.map(schemaResolver.getTableAvroSchema) - } orElse { - split.schemaSpec.map(s => convertToAvroSchema(s, tableName)) - } getOrElse { - Try(schemaResolver.getTableAvroSchema) match { - case Success(schema) => schema - case Failure(e) => - throw new HoodieSchemaException("Failed to fetch schema from the table", e) - } - } - (avroSchema, tableInformation.internalSchemaOpt) + (tableInformation.tableAvroSchema, tableInformation.internalSchemaOpt) } protected lazy val tableStructSchema: StructType = convertAvroSchemaToStructType(tableAvroSchema) @@ -280,13 +267,13 @@ abstract class BaseSplitReader(val split: HoodieSplit) { sqlContext.sparkSession.sessionState.conf.setConfString("spark.sql.parquet.enableVectorizedReader", "false") } - def buildScanIterator(requiredColumns: Array[String], filters: Array[Filter]): Iterator[InternalRow] = { + def buildScanIterator(filters: Array[Filter]): Iterator[InternalRow] = { // NOTE: PLEASE READ CAREFULLY BEFORE MAKING CHANGES // *Appending* additional columns to the ones requested by the caller is not a problem, as those // will be eliminated by the caller's projection; // (!) Please note, however, that it's critical to avoid _reordering_ of the requested columns as this // will break the upstream projection - val targetColumns: Array[String] = appendMandatoryColumns(requiredColumns) + val targetColumns: Array[String] = appendMandatoryColumns(tableInformation.resolvedTargetFields) // NOTE: We explicitly fallback to default table's Avro schema to make sure we avoid unnecessary Catalyst > Avro // schema conversion, which is lossy in nature (for ex, it doesn't preserve original Avro type-names) and // could have an effect on subsequent de-/serializing records in some exotic scenarios (when Avro unions @@ -663,11 +650,36 @@ object BaseSplitReader { None } } + val tableName = metaClient.getTableConfig.getTableName + val (name, namespace) = AvroConversionUtils.getAvroRecordNameAndNamespace(tableName) + val avroSchema: Schema = internalSchemaOpt.map { is => + AvroInternalSchemaConverter.convert(is, namespace + "." + name) + } orElse { + specifiedQueryTimestamp.map(schemaResolver.getTableAvroSchema) + } orElse { + split.schemaSpec.map(s => convertToAvroSchema(s, tableName)) + } getOrElse { + Try(schemaResolver.getTableAvroSchema) match { + case Success(schema) => schema + case Failure(e) => + throw new HoodieSchemaException("Failed to fetch schema from the table", e) + } + } + + // match column name in lower case + val colNames = internalSchemaOpt.map { internalSchema => + internalSchema.getAllColsFullName.asScala.map(f => f.toLowerCase -> f).toMap + } getOrElse { + avroSchema.getFields.asScala.map(f => f.name().toLowerCase -> f.name()).toMap + } + val resolvedTargetFields = split.requiredFields.map(field => colNames.getOrElse(field.toLowerCase, field)) HoodieTableInformation(sparkSession, metaClient, timeline, metaClient.getTableConfig, + resolvedTargetFields, + avroSchema, internalSchemaOpt) } } diff --git a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/HoodieRecordIterator.scala b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/HoodieRecordIterator.scala index c5645655355708..6e2b7b31e547bc 100644 --- a/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/HoodieRecordIterator.scala +++ b/fe/be-java-extensions/hudi-scanner/src/main/scala/org/apache/doris/hudi/HoodieRecordIterator.scala @@ -98,8 +98,9 @@ class HoodieMORRecordIterator(config: Configuration, case split => mergeType match { case DataSourceReadOptions.REALTIME_SKIP_MERGE_OPT_VAL => - val reader = fileReaders.requiredSchemaReaderSkipMerging - new SkipMergeIterator(split, reader, tableSchema, requiredSchema, tableState, config) + // val reader = fileReaders.requiredSchemaReaderSkipMerging + // new SkipMergeIterator(split, reader, tableSchema, requiredSchema, tableState, config) + throw new UnsupportedOperationException("Skip merge is optimized by native read") case DataSourceReadOptions.REALTIME_PAYLOAD_COMBINE_OPT_VAL => val reader = pickBaseFileReader() diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java index c92c46659eee3e..328c7b0f19c7ef 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/hudi/HudiScanNode.java @@ -79,7 +79,7 @@ public class HudiScanNode extends HiveScanNode { private static final Logger LOG = LogManager.getLogger(HudiScanNode.class); - private final boolean isCowTable; + private final boolean isCowOrRoTable; private final AtomicLong noLogsSplitNum = new AtomicLong(0); @@ -91,9 +91,10 @@ public class HudiScanNode extends HiveScanNode { */ public HudiScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumnPriv) { super(id, desc, "HUDI_SCAN_NODE", StatisticalType.HUDI_SCAN_NODE, needCheckColumnPriv); - isCowTable = hmsTable.isHoodieCowTable(); - if (isCowTable) { - LOG.debug("Hudi table {} can read as cow table", hmsTable.getName()); + isCowOrRoTable = hmsTable.isHoodieCowTable() || "skip_merge".equals( + hmsTable.getCatalogProperties().get("hoodie.datasource.merge.type")); + if (isCowOrRoTable) { + LOG.debug("Hudi table {} can read as cow/read optimize table", hmsTable.getName()); } else { LOG.debug("Hudi table {} is a mor table, and will use JNI to read data in BE", hmsTable.getName()); } @@ -101,7 +102,7 @@ public HudiScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumn @Override public TFileFormatType getFileFormatType() throws UserException { - if (isCowTable) { + if (isCowOrRoTable) { return super.getFileFormatType(); } else { // Use jni to read hudi table in BE @@ -124,7 +125,7 @@ protected void doInitialize() throws UserException { @Override protected Map getLocationProperties() throws UserException { - if (isCowTable) { + if (isCowOrRoTable) { return super.getLocationProperties(); } else { // HudiJniScanner uses hadoop client to read data. @@ -291,7 +292,7 @@ public List getSplits() throws UserException { HoodieTableFileSystemView fileSystemView = new HoodieTableFileSystemView(hudiClient, timeline, statuses.toArray(new FileStatus[0])); - if (isCowTable) { + if (isCowOrRoTable) { fileSystemView.getLatestBaseFilesBeforeOrOn(partitionName, queryInstant).forEach(baseFile -> { noLogsSplitNum.incrementAndGet(); String filePath = baseFile.getPath(); @@ -312,7 +313,9 @@ public List getSplits() throws UserException { noLogsSplitNum.incrementAndGet(); } - HudiSplit split = new HudiSplit(new Path(filePath), 0, fileSize, fileSize, + // no base file, use log file to parse file type + String agencyPath = filePath.isEmpty() ? logs.get(0) : filePath; + HudiSplit split = new HudiSplit(new Path(agencyPath), 0, fileSize, fileSize, new String[0], partition.getPartitionValues()); split.setTableFormatType(TableFormatType.HUDI); split.setDataFilePath(filePath); diff --git a/regression-test/data/external_table_p2/hive/test_hive_hudi.out b/regression-test/data/external_table_p2/hive/test_hive_hudi.out new file mode 100644 index 00000000000000..a695d3cdb7d1f0 --- /dev/null +++ b/regression-test/data/external_table_p2/hive/test_hive_hudi.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !optimize_table -- +20230605145009209 20230605145009209_0_0 rowId:row_1 partitionId=2021-01-01/versionId=v_0 65ffc5d9-397a-456e-a735-30f3ad37466f-0_0-33-96_20230605145009209.parquet row_1 2021-01-01 0 bob v_0 toBeDel0 0 1000000 +20230605145403388 20230605145403388_2_0 rowId:row_1 partitionId=2011-11-11/versionId=v_1 dbff8acb-42bc-400c-be33-47d9e0bae9b7-0_2-83-222_20230605145403388.parquet row_1 2011-11-11 1 bob v_1 toBeDel1 0 1000001 +20230605145009209 20230605145009209_0_1 rowId:row_2 partitionId=2021-01-01/versionId=v_0 65ffc5d9-397a-456e-a735-30f3ad37466f-0_0-33-96_20230605145009209.parquet row_2 2021-01-01 0 john v_0 toBeDel0 0 1000000 +20230605145403388 20230605145403388_1_0 rowId:row_4 partitionId=2021-02-01/versionId=v_4 e33d645c-6e2f-41f3-b8d6-f658771bd460-0_1-83-220_20230605145403388.parquet row_4 2021-02-01 4 ashin v_4 toBeDel4 0 1000004 + +-- !merge_on_read -- +20230801201335031 20230801201335031_0_1 rowId:row_1 partitionId=2021-01-01/versionId=v_0 65ffc5d9-397a-456e-a735-30f3ad37466f-0 row_1 2021-01-01 0 bob v_0 toBeDel0 1 1000000 +20230801201335031 20230801201335031_1_1 rowId:row_1 partitionId=2011-11-11/versionId=v_1 dbff8acb-42bc-400c-be33-47d9e0bae9b7-0 row_1 2011-11-11 1 bob v_1 toBeDel1 1 1000001 +20230605145009209 20230605145009209_0_1 rowId:row_2 partitionId=2021-01-01/versionId=v_0 65ffc5d9-397a-456e-a735-30f3ad37466f-0_0-33-96_20230605145009209.parquet row_2 2021-01-01 0 john v_0 toBeDel0 0 1000000 +20230605145403388 20230605145403388_1_0 rowId:row_4 partitionId=2021-02-01/versionId=v_4 e33d645c-6e2f-41f3-b8d6-f658771bd460-0_1-83-220_20230605145403388.parquet row_4 2021-02-01 4 ashin v_4 toBeDel4 0 1000004 + +-- !lowercase_column -- +row_1 2021-01-01 0 v_0 +row_1 2011-11-11 1 v_1 +row_2 2021-01-01 0 v_0 +row_4 2021-02-01 4 v_4 + +-- !skip_merge -- +20230605145009209 20230605145009209_0_0 rowId:row_1 partitionId=2021-01-01/versionId=v_0 65ffc5d9-397a-456e-a735-30f3ad37466f-0_0-33-96_20230605145009209.parquet row_1 2021-01-01 0 bob v_0 toBeDel0 0 1000000 +20230605145403388 20230605145403388_2_0 rowId:row_1 partitionId=2011-11-11/versionId=v_1 dbff8acb-42bc-400c-be33-47d9e0bae9b7-0_2-83-222_20230605145403388.parquet row_1 2011-11-11 1 bob v_1 toBeDel1 0 1000001 +20230605145009209 20230605145009209_0_1 rowId:row_2 partitionId=2021-01-01/versionId=v_0 65ffc5d9-397a-456e-a735-30f3ad37466f-0_0-33-96_20230605145009209.parquet row_2 2021-01-01 0 john v_0 toBeDel0 0 1000000 +20230605145403388 20230605145403388_1_0 rowId:row_4 partitionId=2021-02-01/versionId=v_4 e33d645c-6e2f-41f3-b8d6-f658771bd460-0_1-83-220_20230605145403388.parquet row_4 2021-02-01 4 ashin v_4 toBeDel4 0 1000004 + diff --git a/regression-test/suites/external_table_p2/hive/test_hive_hudi.groovy b/regression-test/suites/external_table_p2/hive/test_hive_hudi.groovy new file mode 100644 index 00000000000000..abdd5b34dcbca5 --- /dev/null +++ b/regression-test/suites/external_table_p2/hive/test_hive_hudi.groovy @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_hive_hudi", "p2,external,hive,hudi") { + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "test_hive_hudi" + + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'hadoop.username'='hadoop', + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + + sql """use ${catalog_name}.hudi_catalog""" + // read optimize table with partition + qt_optimize_table """select * from partitioned_mor_ro order by rowid, versionid""" + // copy on write table with update + qt_merge_on_read """select * from partitioned_mor_rt order by rowid, versionid""" + // match colum name in lower case + qt_lowercase_column """select RoWiD, PaRtiTionID, PrEComB, VerSIonID from partitioned_mor_rt order by rowid, versionid""" + + + // skip logs + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'hadoop.username'='hadoop', + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}', + 'hoodie.datasource.merge.type'='skip_merge' + ); + """ + // copy on write table with update, skip merge logs, so the result is the same as partitioned_mor_ro + qt_skip_merge """select * from partitioned_mor_rt order by rowid, versionid""" + + sql """drop catalog if exists ${catalog_name};""" + } +} From 6c16bb85ac22bb5e459eb877fba869687933e6eb Mon Sep 17 00:00:00 2001 From: zzzxl <33418555+zzzxl1993@users.noreply.github.com> Date: Mon, 11 Sep 2023 23:47:19 +0800 Subject: [PATCH 23/31] [fix](invert index) fix overloaded-virtual compiler warning (#24174) --- be/src/clucene | 2 +- be/src/olap/rowset/segment_v2/inverted_index_cache.h | 6 ------ 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/be/src/clucene b/be/src/clucene index 2761b1afe48cb1..3b51f707d4c515 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit 2761b1afe48cb1bd272ce8959e3aa3049b6e63ac +Subproject commit 3b51f707d4c51596d77b97f48b0baf49db0d2c30 diff --git a/be/src/olap/rowset/segment_v2/inverted_index_cache.h b/be/src/olap/rowset/segment_v2/inverted_index_cache.h index 04fcddaf374245..00e8fa244f2ee0 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_cache.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_cache.h @@ -17,13 +17,7 @@ #pragma once -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Woverloaded-virtual" - #include // IWYU pragma: keep - -#pragma GCC diagnostic pop - #include #include #include From 9a5bd48343fdcf7257f0bfdfc973e965afe23902 Mon Sep 17 00:00:00 2001 From: slothever <18522955+wsjz@users.noreply.github.com> Date: Tue, 12 Sep 2023 10:36:48 +0800 Subject: [PATCH 24/31] [fix](multi-catalog)support bit type and hidden mc secret key (#24124) support max compute bit type and mask mc secret key bool type will use bit arrow vector should mask secret key: close #24019 --- .../org/apache/doris/maxcompute/MaxComputeColumnValue.java | 5 +++-- .../main/java/org/apache/doris/common/util/PrintableMap.java | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java index 5dfd5a0bcf697b..57b67bacf42c40 100644 --- a/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java +++ b/fe/be-java-extensions/max-compute-scanner/src/main/java/org/apache/doris/maxcompute/MaxComputeColumnValue.java @@ -20,6 +20,7 @@ import org.apache.doris.common.jni.vec.ColumnValue; import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.BitVector; import org.apache.arrow.vector.DateDayVector; import org.apache.arrow.vector.DateMilliVector; import org.apache.arrow.vector.DecimalVector; @@ -83,8 +84,8 @@ private void skippedIfNull() { @Override public boolean getBoolean() { skippedIfNull(); - TinyIntVector tinyIntCol = (TinyIntVector) column; - return tinyIntCol.get(idx++) > 0; + BitVector bitCol = (BitVector) column; + return bitCol.get(idx++) != 0; } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/PrintableMap.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/PrintableMap.java index 3a5f5e0cd556e1..33e5624a517b14 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/PrintableMap.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/PrintableMap.java @@ -21,6 +21,7 @@ import org.apache.doris.datasource.property.constants.DLFProperties; import org.apache.doris.datasource.property.constants.GCSProperties; import org.apache.doris.datasource.property.constants.GlueProperties; +import org.apache.doris.datasource.property.constants.MCProperties; import org.apache.doris.datasource.property.constants.ObsProperties; import org.apache.doris.datasource.property.constants.OssProperties; import org.apache.doris.datasource.property.constants.S3Properties; @@ -55,7 +56,7 @@ public class PrintableMap { SENSITIVE_KEY.add("jdbc.password"); SENSITIVE_KEY.add("elasticsearch.password"); SENSITIVE_KEY.addAll(Arrays.asList(S3Properties.SECRET_KEY, ObsProperties.SECRET_KEY, OssProperties.SECRET_KEY, - GCSProperties.SECRET_KEY, CosProperties.SECRET_KEY, GlueProperties.SECRET_KEY, + GCSProperties.SECRET_KEY, CosProperties.SECRET_KEY, GlueProperties.SECRET_KEY, MCProperties.SECRET_KEY, DLFProperties.SECRET_KEY)); HIDDEN_KEY = Sets.newHashSet(); HIDDEN_KEY.addAll(S3Properties.Env.FS_KEYS); From 666769796f9550c600757b67f481e2b3e5f86082 Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Tue, 12 Sep 2023 11:04:10 +0800 Subject: [PATCH 25/31] [fix](load) check segment file size when finalize (#24196) --- be/src/olap/rowset/segment_v2/segment_writer.cpp | 13 ++++--------- be/src/olap/rowset/segment_v2/segment_writer.h | 1 - 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index e1bbe03f1a413d..8e998fb5615f74 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -900,11 +900,9 @@ Status SegmentWriter::finalize_footer(uint64_t* segment_file_size) { // finish RETURN_IF_ERROR(_file_writer->finalize()); *segment_file_size = _file_writer->bytes_appended(); - return Status::OK(); -} - -Status SegmentWriter::finalize_footer() { - RETURN_IF_ERROR(_write_footer()); + if (*segment_file_size == 0) { + return Status::Corruption("Bad segment, file size = 0"); + } return Status::OK(); } @@ -921,10 +919,7 @@ Status SegmentWriter::finalize(uint64_t* segment_file_size, uint64_t* index_size // write index RETURN_IF_ERROR(finalize_columns_index(index_size)); // write footer - RETURN_IF_ERROR(finalize_footer()); - // finish - RETURN_IF_ERROR(_file_writer->finalize()); - *segment_file_size = _file_writer->bytes_appended(); + RETURN_IF_ERROR(finalize_footer(segment_file_size)); if (timer.elapsed_time() > 5000000000l) { LOG(INFO) << "segment flush consumes a lot time_ns " << timer.elapsed_time() diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index 0b17ed4faa08dc..276de1a430eb43 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -119,7 +119,6 @@ class SegmentWriter { Status finalize_columns_data(); Status finalize_columns_index(uint64_t* index_size); Status finalize_footer(uint64_t* segment_file_size); - Status finalize_footer(); void init_column_meta(ColumnMetaPB* meta, uint32_t column_id, const TabletColumn& column, TabletSchemaSPtr tablet_schema); From 2a952472f6d7269392c4dea061faf58d9f35cee6 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Tue, 12 Sep 2023 16:05:55 +0800 Subject: [PATCH 26/31] [fix](Nereids) record wrong best plan properties #23973 (#24232) pick from master: PR: #23973 commit-id: b2ca28139553cacd0f5f2e492e30d3a37ce26bed when output meet order by not meet distribution. we use a trick way to do enforce by set current output to any. but when we do enforce later, we still use the old output. So when we do choose best plan, we could not find the older output's plan, since we have replace it by any. For example: ``` lowest Plan(cost, properties, plan, childrenRequires) 18.0 ANY id:138#4 cost=0 [0/0/0/] estRows=4 children=[@0 ] (plan=PhysicalWindow[139]@4 ( windowFrameGroup=(Funcs=[row_number() WindowSpec(PARTITION BY b#1, a#0 ROWS BETWEEN UNBOUNDED_PRECEDING AND CURRENT_ROW) AS `r1`#2], PartitionKeys=[b#1, a#0], OrderKeys=[], WindowFrame=WindowFrame(ROWS, UNBOUNDED_PRECEDING, CURRENT_ROW)), requiredProperties=[DistributionSpecHash ( orderedShuffledColumns=[1, 0], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[1], [0]], exprIdToEquivalenceSet={0=1, 1=0} ) Order: ([b#1 asc, a#0 asc])], stats=null )) [DistributionSpecHash ( orderedShuffledColumns=[0], shuffleType=NATURAL, tableId=3547296, selectedIndexId=3547297, partitionIds=[3547295], equivalenceExprIds=[[0]], exprIdToEquivalenceSet={0=0} ) Order: ([b#1 asc, a#0 asc])] 32.01171875 DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=REQUIRE, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([b#1 asc]) id:161#4 cost=14 [4/4/4/] estRows=4 children=[@4 ] (plan=PhysicalQuickSort[162]@4 ( orderKeys=[b#1 asc], phase=LOCAL_SORT, stats=null )) [DistributionSpecHash ( orderedShuffledColumns=[0], shuffleType=NATURAL, tableId=3547296, selectedIndexId=3547297, partitionIds=[3547295], equivalenceExprIds=[[0]], exprIdToEquivalenceSet={0=0} ) Order: ([b#1 asc, a#0 asc])] 32.01171875 DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=EXECUTION_BUCKETED, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([b#1 asc]) id:161#4 cost=14 [4/4/4/] estRows=4 children=[@4 ] (plan=PhysicalQuickSort[162]@4 ( orderKeys=[b#1 asc], phase=LOCAL_SORT, stats=null )) [DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=EXECUTION_BUCKETED, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([])] 18.01171875 DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=EXECUTION_BUCKETED, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ) Order: ([]) id:157#4 cost=0 [0/0/0/] estRows=4 children=[@4 ] (plan=PhysicalDistribute[158]@4 ( distributionSpec=DistributionSpecHash ( orderedShuffledColumns=[1], shuffleType=EXECUTION_BUCKETED, tableId=-1, selectedIndexId=-1, partitionIds=[], equivalenceExprIds=[[1]], exprIdToEquivalenceSet={1=0} ), stats=null )) [DistributionSpecHash ( orderedShuffledColumns=[0], shuffleType=NATURAL, tableId=3547296, selectedIndexId=3547297, partitionIds=[3547295], equivalenceExprIds=[[0]], exprIdToEquivalenceSet={0=0} ) Order: ([b#1 asc, a#0 asc])] ``` the last one require a natural shuffle type property from this group. but this property already been removed when we do enforceDistributionButMeetSort. So, such exception will be thrown ``` Caused by: org.apache.doris.nereids.exceptions.AnalysisException: Failed to choose best plan at org.apache.doris.nereids.NereidsPlanner.chooseBestPlan(NereidsPlanner.java:340) ~[classes/:?] at org.apache.doris.nereids.NereidsPlanner.chooseBestPlan(NereidsPlanner.java:323) ~[classes/:?] ... 18 more Caused by: org.apache.doris.nereids.exceptions.AnalysisException: lowestCostPlans with physicalProperties(DistributionSpecHash ( orderedShuffledColumns=[0], shuffleType=NATURAL, tableId=3547296, selectedIndexId=3547297, partitionIds=[3547295], equivalenceExprIds=[[0]], exprIdToEquivalenceSet={0=0} ) Order: ([b#1 asc, a#0 asc])) doesn't exist in root group at org.apache.doris.nereids.NereidsPlanner.lambda$chooseBestPlan$1(NereidsPlanner.java:318) ~[classes/:?] at java.util.Optional.orElseThrow(Optional.java:408) ~[?:?] at org.apache.doris.nereids.NereidsPlanner.chooseBestPlan(NereidsPlanner.java:317) ~[classes/:?] at org.apache.doris.nereids.NereidsPlanner.chooseBestPlan(NereidsPlanner.java:323) ~[classes/:?] ... 18 more ``` --- .../nereids/properties/EnforceMissingPropertiesHelper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/EnforceMissingPropertiesHelper.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/EnforceMissingPropertiesHelper.java index 1c45db6d4c483e..d548e3254c47d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/EnforceMissingPropertiesHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/properties/EnforceMissingPropertiesHelper.java @@ -87,7 +87,7 @@ private PhysicalProperties enforceDistributionButMeetSort(PhysicalProperties out groupExpression.getOwnerGroup() .replaceBestPlanProperty( output, PhysicalProperties.ANY, groupExpression.getCostValueByProperties(output)); - return enforceSortAndDistribution(output, request); + return enforceSortAndDistribution(PhysicalProperties.ANY, request); } private PhysicalProperties enforceGlobalSort(PhysicalProperties oldOutputProperty, PhysicalProperties required) { From 91180530ffd144a48851b69c7a47c1e143229d50 Mon Sep 17 00:00:00 2001 From: HappenLee Date: Tue, 12 Sep 2023 17:40:40 +0800 Subject: [PATCH 27/31] [schedule](pipeline) Remove wait schedule time in pipeline query engine (#23994) (#24247) Co-authored-by: yiguolei <676222867@qq.com> --- be/src/pipeline/pipeline_task.cpp | 2 -- be/src/pipeline/pipeline_task.h | 4 ---- be/src/pipeline/task_scheduler.cpp | 31 ++++++++++++------------------ be/src/pipeline/task_scheduler.h | 1 - 4 files changed, 12 insertions(+), 26 deletions(-) diff --git a/be/src/pipeline/pipeline_task.cpp b/be/src/pipeline/pipeline_task.cpp index afcd876f8bcb22..2b428ac5f14c8b 100644 --- a/be/src/pipeline/pipeline_task.cpp +++ b/be/src/pipeline/pipeline_task.cpp @@ -68,7 +68,6 @@ void PipelineTask::_fresh_profile_counter() { COUNTER_SET(_schedule_counts, (int64_t)_schedule_time); COUNTER_SET(_wait_sink_timer, (int64_t)_wait_sink_watcher.elapsed_time()); COUNTER_SET(_wait_worker_timer, (int64_t)_wait_worker_watcher.elapsed_time()); - COUNTER_SET(_wait_schedule_timer, (int64_t)_wait_schedule_watcher.elapsed_time()); COUNTER_SET(_begin_execute_timer, _begin_execute_time); COUNTER_SET(_eos_timer, _eos_time); COUNTER_SET(_src_pending_finish_over_timer, _src_pending_finish_over_time); @@ -99,7 +98,6 @@ void PipelineTask::_init_profile() { _wait_bf_timer = ADD_TIMER(_task_profile, "WaitBfTime"); _wait_sink_timer = ADD_TIMER(_task_profile, "WaitSinkTime"); _wait_worker_timer = ADD_TIMER(_task_profile, "WaitWorkerTime"); - _wait_schedule_timer = ADD_TIMER(_task_profile, "WaitScheduleTime"); _block_counts = ADD_COUNTER(_task_profile, "NumBlockedTimes", TUnit::UNIT); _block_by_source_counts = ADD_COUNTER(_task_profile, "NumBlockedBySrcTimes", TUnit::UNIT); _block_by_sink_counts = ADD_COUNTER(_task_profile, "NumBlockedBySinkTimes", TUnit::UNIT); diff --git a/be/src/pipeline/pipeline_task.h b/be/src/pipeline/pipeline_task.h index 27ca3cdd424ac3..696b335f0e1102 100644 --- a/be/src/pipeline/pipeline_task.h +++ b/be/src/pipeline/pipeline_task.h @@ -130,8 +130,6 @@ class PipelineTask { _wait_worker_watcher.start(); } void pop_out_runnable_queue() { _wait_worker_watcher.stop(); } - void start_schedule_watcher() { _wait_schedule_watcher.start(); } - void stop_schedule_watcher() { _wait_schedule_watcher.stop(); } PipelineTaskState get_state() { return _cur_state; } void set_state(PipelineTaskState state); @@ -310,8 +308,6 @@ class PipelineTask { MonotonicStopWatch _wait_worker_watcher; RuntimeProfile::Counter* _wait_worker_timer; // TODO we should calculate the time between when really runnable and runnable - MonotonicStopWatch _wait_schedule_watcher; - RuntimeProfile::Counter* _wait_schedule_timer; RuntimeProfile::Counter* _yield_counts; RuntimeProfile::Counter* _core_change_times; diff --git a/be/src/pipeline/task_scheduler.cpp b/be/src/pipeline/task_scheduler.cpp index 1f5d5e58b62b16..9af60404c9f4c9 100644 --- a/be/src/pipeline/task_scheduler.cpp +++ b/be/src/pipeline/task_scheduler.cpp @@ -84,7 +84,6 @@ void BlockedTaskScheduler::_schedule() { _started.store(true); std::list local_blocked_tasks; int empty_times = 0; - std::vector ready_tasks; while (!_shutdown) { { @@ -104,6 +103,7 @@ void BlockedTaskScheduler::_schedule() { } } + auto origin_local_block_tasks_size = local_blocked_tasks.size(); auto iter = local_blocked_tasks.begin(); vectorized::VecDateTimeValue now = vectorized::VecDateTimeValue::local_time(); while (iter != local_blocked_tasks.end()) { @@ -114,15 +114,14 @@ void BlockedTaskScheduler::_schedule() { if (task->is_pending_finish()) { iter++; } else { - _make_task_run(local_blocked_tasks, iter, ready_tasks, - PipelineTaskState::PENDING_FINISH); + _make_task_run(local_blocked_tasks, iter, PipelineTaskState::PENDING_FINISH); } } else if (task->fragment_context()->is_canceled()) { if (task->is_pending_finish()) { task->set_state(PipelineTaskState::PENDING_FINISH); iter++; } else { - _make_task_run(local_blocked_tasks, iter, ready_tasks); + _make_task_run(local_blocked_tasks, iter); } } else if (task->query_context()->is_timeout(now)) { LOG(WARNING) << "Timeout, query_id=" << print_id(task->query_context()->query_id) @@ -135,47 +134,43 @@ void BlockedTaskScheduler::_schedule() { task->set_state(PipelineTaskState::PENDING_FINISH); iter++; } else { - _make_task_run(local_blocked_tasks, iter, ready_tasks); + _make_task_run(local_blocked_tasks, iter); } } else if (state == PipelineTaskState::BLOCKED_FOR_DEPENDENCY) { if (task->has_dependency()) { iter++; } else { - _make_task_run(local_blocked_tasks, iter, ready_tasks); + _make_task_run(local_blocked_tasks, iter); } } else if (state == PipelineTaskState::BLOCKED_FOR_SOURCE) { if (task->source_can_read()) { - _make_task_run(local_blocked_tasks, iter, ready_tasks); + _make_task_run(local_blocked_tasks, iter); } else { iter++; } } else if (state == PipelineTaskState::BLOCKED_FOR_RF) { if (task->runtime_filters_are_ready_or_timeout()) { - _make_task_run(local_blocked_tasks, iter, ready_tasks); + _make_task_run(local_blocked_tasks, iter); } else { iter++; } } else if (state == PipelineTaskState::BLOCKED_FOR_SINK) { if (task->sink_can_write()) { - _make_task_run(local_blocked_tasks, iter, ready_tasks); + _make_task_run(local_blocked_tasks, iter); } else { iter++; } } else { // TODO: DCHECK the state - _make_task_run(local_blocked_tasks, iter, ready_tasks); + _make_task_run(local_blocked_tasks, iter); } } - if (ready_tasks.empty()) { + if (origin_local_block_tasks_size == 0 || + local_blocked_tasks.size() == origin_local_block_tasks_size) { empty_times += 1; } else { empty_times = 0; - for (auto& task : ready_tasks) { - task->stop_schedule_watcher(); - _task_queue->push_back(task); - } - ready_tasks.clear(); } if (empty_times != 0 && (empty_times & (EMPTY_TIMES_TO_YIELD - 1)) == 0) { @@ -195,13 +190,11 @@ void BlockedTaskScheduler::_schedule() { void BlockedTaskScheduler::_make_task_run(std::list& local_tasks, std::list::iterator& task_itr, - std::vector& ready_tasks, PipelineTaskState t_state) { auto task = *task_itr; - task->start_schedule_watcher(); task->set_state(t_state); local_tasks.erase(task_itr++); - ready_tasks.emplace_back(task); + _task_queue->push_back(task); } TaskScheduler::~TaskScheduler() { diff --git a/be/src/pipeline/task_scheduler.h b/be/src/pipeline/task_scheduler.h index bcdbcf1a48b145..ad69e10d8b0f8a 100644 --- a/be/src/pipeline/task_scheduler.h +++ b/be/src/pipeline/task_scheduler.h @@ -71,7 +71,6 @@ class BlockedTaskScheduler { void _schedule(); void _make_task_run(std::list& local_tasks, std::list::iterator& task_itr, - std::vector& ready_tasks, PipelineTaskState state = PipelineTaskState::RUNNABLE); }; From 13652d6c6a0bdf6c76265509e989d90246fc5d28 Mon Sep 17 00:00:00 2001 From: Jerry Hu Date: Tue, 12 Sep 2023 18:16:20 +0800 Subject: [PATCH 28/31] [fix](join) incorrect result of mark join (#24112) (#24238) --- be/src/vec/exec/join/vhash_join_node.cpp | 37 ++++++++++++++++++- .../sub_query_correlated.out | 9 +++++ .../sub_query_correlated.groovy | 36 ++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/join/vhash_join_node.cpp b/be/src/vec/exec/join/vhash_join_node.cpp index b59027fc144776..8b0dc5786b4d35 100644 --- a/be/src/vec/exec/join/vhash_join_node.cpp +++ b/be/src/vec/exec/join/vhash_join_node.cpp @@ -519,7 +519,7 @@ Status HashJoinNode::close(RuntimeState* state) { bool HashJoinNode::need_more_input_data() const { return (_probe_block.rows() == 0 || _probe_index == _probe_block.rows()) && !_probe_eos && - !_short_circuit_for_probe; + (!_short_circuit_for_probe || _is_mark_join); } void HashJoinNode::prepare_for_next() { @@ -530,10 +530,43 @@ void HashJoinNode::prepare_for_next() { Status HashJoinNode::pull(doris::RuntimeState* state, vectorized::Block* output_block, bool* eos) { SCOPED_TIMER(_probe_timer); if (_short_circuit_for_probe) { + /// If `_short_circuit_for_probe` is true, this indicates no rows + /// match the join condition, and this is 'mark join', so we need to create a column as mark + /// with all rows set to 0. + if (_is_mark_join) { + auto block_rows = _probe_block.rows(); + if (block_rows == 0) { + *eos = _probe_eos; + return Status::OK(); + } + + Block temp_block; + //get probe side output column + for (int i = 0; i < _left_output_slot_flags.size(); ++i) { + if (_left_output_slot_flags[i]) { + temp_block.insert(_probe_block.get_by_position(i)); + } + } + auto mark_column = ColumnUInt8::create(block_rows, 0); + temp_block.insert({std::move(mark_column), std::make_shared(), ""}); + + { + SCOPED_TIMER(_join_filter_timer); + RETURN_IF_ERROR( + VExprContext::filter_block(_conjuncts, &temp_block, temp_block.columns())); + } + + RETURN_IF_ERROR(_build_output_block(&temp_block, output_block, false)); + temp_block.clear(); + release_block_memory(_probe_block); + reached_limit(output_block, eos); + return Status::OK(); + } // If we use a short-circuit strategy, should return empty block directly. *eos = true; return Status::OK(); } + //TODO: this short circuit maybe could refactor, no need to check at here. if (_short_circuit_for_probe_and_additional_data) { // when build table rows is 0 and not have other_join_conjunct and join type is one of LEFT_OUTER_JOIN/FULL_OUTER_JOIN/LEFT_ANTI_JOIN @@ -720,7 +753,7 @@ Status HashJoinNode::push(RuntimeState* /*state*/, vectorized::Block* input_bloc Status HashJoinNode::get_next(RuntimeState* state, Block* output_block, bool* eos) { SCOPED_TIMER(_runtime_profile->total_time_counter()); - if (_short_circuit_for_probe) { + if (_short_circuit_for_probe && !_is_mark_join) { // If we use a short-circuit strategy, should return empty block directly. *eos = true; return Status::OK(); diff --git a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out index 06cb3ca83941e6..15f45673658503 100644 --- a/regression-test/data/nereids_syntax_p0/sub_query_correlated.out +++ b/regression-test/data/nereids_syntax_p0/sub_query_correlated.out @@ -192,6 +192,15 @@ 24 4 3 3 +-- !in_subquery_mark_with_order -- +1 \N +1 2 +1 3 +2 4 +2 5 +3 3 +3 4 + -- !exists_subquery_with_order -- 1 2 1 3 diff --git a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy index 8a1df1834190b5..c7dcffb1bef572 100644 --- a/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy +++ b/regression-test/suites/nereids_syntax_p0/sub_query_correlated.groovy @@ -42,6 +42,14 @@ suite ("sub_query_correlated") { DROP TABLE IF EXISTS `sub_query_correlated_subquery5` """ + sql """ + DROP TABLE IF EXISTS `sub_query_correlated_subquery6` + """ + + sql """ + DROP TABLE IF EXISTS `sub_query_correlated_subquery7` + """ + sql """ create table if not exists sub_query_correlated_subquery1 (k1 bigint, k2 bigint) @@ -82,6 +90,21 @@ suite ("sub_query_correlated") { properties('replication_num' = '1') """ + sql """ + create table if not exists sub_query_correlated_subquery6 + (k1 bigint, k2 bigint) + duplicate key(k1) + distributed by hash(k2) buckets 1 + properties('replication_num' = '1') + """ + + sql """ + create table if not exists sub_query_correlated_subquery7 + (k1 int, k2 varchar(128), k3 bigint, v1 bigint, v2 bigint) + distributed by hash(k2) buckets 1 + properties('replication_num' = '1'); + """ + sql """ insert into sub_query_correlated_subquery1 values (1,2), (1,3), (2,4), (2,5), (3,3), (3,4), (20,2), (22,3), (24,4) """ @@ -103,6 +126,15 @@ suite ("sub_query_correlated") { insert into sub_query_correlated_subquery5 values (5,4), (5,2), (8,3), (5,4), (6,7), (8,9) """ + sql """ + insert into sub_query_correlated_subquery6 values (1,null),(null,1),(1,2), (null,2),(1,3), (2,4), (2,5), (3,3), (3,4), (20,2), (22,3), (24,4),(null,null); + """ + + sql """ + insert into sub_query_correlated_subquery7 values (1,"abc",2,3,4), (1,"abcd",3,3,4), (2,"xyz",2,4,2), + (2,"uvw",3,4,2), (2,"uvw",3,4,2), (3,"abc",4,5,3), (3,"abc",4,5,3), (null,null,null,null,null); + """ + sql "SET enable_fallback_to_original_planner=false" //------------------Correlated----------------- @@ -261,6 +293,10 @@ suite ("sub_query_correlated") { select * from sub_query_correlated_subquery1 where sub_query_correlated_subquery1.k1 not in (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by k2); """ + order_qt_in_subquery_mark_with_order """ + select * from sub_query_correlated_subquery6 where sub_query_correlated_subquery6.k1 not in (select sub_query_correlated_subquery7.k3 from sub_query_correlated_subquery7 ) or k1 < 10; + """ + order_qt_exists_subquery_with_order """ select * from sub_query_correlated_subquery1 where exists (select sub_query_correlated_subquery3.k3 from sub_query_correlated_subquery3 where sub_query_correlated_subquery3.v2 = sub_query_correlated_subquery1.k2 order by k2); """ From 6870802bfc534d66c8c8f8ed27037bce47ee3122 Mon Sep 17 00:00:00 2001 From: Kang Date: Tue, 12 Sep 2023 18:26:04 +0800 Subject: [PATCH 29/31] Revert "[Fix](Nereids) fix infer predicate lost cast of source expression (#23692)" This reverts commit 03f029ff8dd65dca30024591511bbf552dd53938. --- .../rules/rewrite/PredicatePropagation.java | 39 ++++++++----------- .../rules/rewrite/InferPredicatesTest.java | 30 -------------- .../infer_predicate/infer_predicate.groovy | 18 --------- 3 files changed, 16 insertions(+), 71 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java index 71818966696958..cc45952817a845 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java @@ -59,12 +59,12 @@ public Set infer(Set predicates) { } /** - * Use the left or right child of `equalExpr` to replace the left or right child of `expression` + * Use the left or right child of `leftSlotEqualToRightSlot` to replace the left or right child of `expression` * Now only support infer `ComparisonPredicate`. * TODO: We should determine whether `expression` satisfies the condition for replacement * eg: Satisfy `expression` is non-deterministic */ - private Expression doInfer(Expression equalExpr, Expression expression) { + private Expression doInfer(Expression leftSlotEqualToRightSlot, Expression expression) { return expression.accept(new DefaultExpressionRewriter() { @Override @@ -76,43 +76,36 @@ public Expression visit(Expression expr, Void context) { public Expression visitComparisonPredicate(ComparisonPredicate cp, Void context) { // we need to get expression covered by cast, because we want to infer different datatype if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.left()) && (cp.right().isConstant())) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left()), equalExpr); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left())); } else if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.right()) && cp.left().isConstant()) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right()), equalExpr); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right())); } return super.visit(cp, context); } private boolean isDataTypeValid(DataType originDataType, Expression expr) { - if ((expr.child(0).getDataType() instanceof IntegralType) - && (expr.child(1).getDataType() instanceof IntegralType) + if ((leftSlotEqualToRightSlot.child(0).getDataType() instanceof IntegralType) + && (leftSlotEqualToRightSlot.child(1).getDataType() instanceof IntegralType) && (originDataType instanceof IntegralType)) { // infer filter can not be lower than original datatype, or dataset would be wrong if (!((IntegralType) originDataType).widerThan( - (IntegralType) expr.child(0).getDataType()) + (IntegralType) leftSlotEqualToRightSlot.child(0).getDataType()) && !((IntegralType) originDataType).widerThan( - (IntegralType) expr.child(1).getDataType())) { + (IntegralType) leftSlotEqualToRightSlot.child(1).getDataType())) { return true; } - } else if (expr.child(0).getDataType().equals(expr.child(1).getDataType())) { - return true; } return false; } - private Expression replaceSlot(Expression sourcePredicate, DataType originDataType, Expression equal) { - if (!isDataTypeValid(originDataType, equal)) { - return sourcePredicate; - } - return sourcePredicate.rewriteUp(e -> { - // we can not replace Cast expression to slot because when rewrite up, we have replace child of cast - if (e instanceof Cast) { - return e; - } - if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(0))) { - return equal.child(1); - } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(1))) { - return equal.child(0); + private Expression replaceSlot(Expression expr, DataType originDataType) { + return expr.rewriteUp(e -> { + if (isDataTypeValid(originDataType, leftSlotEqualToRightSlot)) { + if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(0))) { + return leftSlotEqualToRightSlot.child(1); + } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(1))) { + return leftSlotEqualToRightSlot.child(0); + } } return e; }); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java index b7b235d2b43041..adc67ca835f915 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java @@ -17,33 +17,15 @@ package org.apache.doris.nereids.rules.rewrite; -import org.apache.doris.nereids.trees.expressions.Cast; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.plans.JoinType; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; -import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.PlanChecker; -import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.utframe.TestWithFeService; -import com.google.common.collect.Sets; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.Optional; -import java.util.Set; - public class InferPredicatesTest extends TestWithFeService implements MemoPatternMatchSupported { - private final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0); - - private final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0); - - private final PredicatePropagation propagation = new PredicatePropagation(); - @Override protected void runBeforeAll() throws Exception { createDatabase("test"); @@ -646,16 +628,4 @@ public void innerJoinShouldNotInferUnderLeftJoinOnClausePredicates() { ).when(join -> join.getJoinType() == JoinType.LEFT_OUTER_JOIN) ); } - - @Test - void testInfer() { - EqualTo equalTo = new EqualTo(new Cast(scan1.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)); - EqualTo equalTo2 = new EqualTo(scan2.getOutput().get(0), scan1.getOutput().get(0)); - Set predicates = Sets.newHashSet(); - predicates.add(equalTo2); - predicates.add(equalTo); - Set newPredicates = propagation.infer(predicates); - Optional newPredicate = newPredicates.stream().findFirst(); - Assertions.assertTrue(newPredicate.get().equals(new EqualTo(new Cast(scan2.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)))); - } } diff --git a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy index 120c9a8f674458..a1621f1c239aa5 100644 --- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy +++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy @@ -22,8 +22,6 @@ suite("test_infer_predicate") { sql 'drop table if exists infer_tb1;' sql 'drop table if exists infer_tb2;' sql 'drop table if exists infer_tb3;' - sql 'drop table if exists infer_tb4;' - sql 'drop table if exists infer_tb5;' sql '''create table infer_tb1 (k1 int, k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' @@ -31,10 +29,6 @@ suite("test_infer_predicate") { sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - sql '''create table infer_tb4 (k1 varchar(100), k2 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - - sql '''create table infer_tb5 (k1 varchar(100), k3 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - explain { sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 = infer_tb1.k2 and infer_tb2.k1 = 1;" contains "PREDICATES: k2" @@ -61,16 +55,4 @@ suite("test_infer_predicate") { contains "PREDICATES: k3" contains "PREDICATES: k2" } - - explain { - sql "select * from infer_tb4 left join infer_tb5 on infer_tb4.k2 = infer_tb5.k3 where infer_tb4.k2 = '20230901';" - contains "PREDICATES: k3" - contains "PREDICATES: k2" - } - - sql 'drop table if exists infer_tb1;' - sql 'drop table if exists infer_tb2;' - sql 'drop table if exists infer_tb3;' - sql 'drop table if exists infer_tb4;' - sql 'drop table if exists infer_tb5;' } From 108e91f2a37d08119e4a611a51fdd0341d9330b3 Mon Sep 17 00:00:00 2001 From: minghong Date: Wed, 13 Sep 2023 15:06:44 +0800 Subject: [PATCH 30/31] [refactor](nereids)forbid unknown stats for branch2.0 #24061 (#24243) --- .../translator/PhysicalPlanTranslator.java | 35 +++++++++++++ .../translator/PlanTranslatorContext.java | 31 ++++++++++++ .../rules/rewrite/PredicatePropagation.java | 39 +++++++++------ .../doris/nereids/stats/StatsCalculator.java | 49 +++++-------------- .../rules/rewrite/InferPredicatesTest.java | 30 ++++++++++++ .../infer_predicate/infer_predicate.groovy | 18 +++++++ 6 files changed, 148 insertions(+), 54 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 406dda54859714..359373ddbf0ce7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -126,6 +126,11 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow; import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.MapType; +import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.nereids.util.Utils; @@ -235,6 +240,14 @@ public PlanFragment translatePlan(PhysicalPlan physicalPlan) { Collections.reverse(context.getPlanFragments()); // TODO: maybe we need to trans nullable directly? and then we could remove call computeMemLayout context.getDescTable().computeMemLayout(); + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) { + Set scans = context.getScanNodeWithUnknownColumnStats(); + if (!scans.isEmpty()) { + StringBuilder builder = new StringBuilder(); + scans.forEach(scanNode -> builder.append(scanNode)); + throw new AnalysisException("tables with unknown column stats: " + builder); + } + } return rootFragment; } @@ -530,6 +543,15 @@ public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTransla // TODO: move all node set cardinality into one place if (olapScan.getStats() != null) { olapScanNode.setCardinality((long) olapScan.getStats().getRowCount()); + if (ConnectContext.get().getSessionVariable().forbidUnknownColStats) { + for (int i = 0; i < slots.size(); i++) { + Slot slot = slots.get(i); + if (olapScan.getStats().findColumnStatistics(slot).isUnKnown() + && !isComplexDataType(slot.getDataType())) { + context.addUnknownStatsColumn(olapScanNode, tupleDescriptor.getSlots().get(i).getId()); + } + } + } } // TODO: Do we really need tableName here? TableName tableName = new TableName(null, "", ""); @@ -1978,6 +2000,14 @@ private void updateScanSlotsMaterialization(ScanNode scanNode, scanNode.getTupleDesc().getSlots().add(smallest); } try { + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) { + for (SlotId slotId : requiredByProjectSlotIdSet) { + if (context.isColumnStatsUnknown(scanNode, slotId)) { + throw new AnalysisException("meet unknown column stats on table " + scanNode); + } + } + context.removeScanFromStatsUnknownColumnsMap(scanNode); + } scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet); } catch (UserException e) { Util.logAndThrowRuntimeException(LOG, @@ -2240,4 +2270,9 @@ private List translateToLegacyConjuncts(Set conjuncts) { } return outputExprs; } + + private boolean isComplexDataType(DataType dataType) { + return dataType instanceof ArrayType || dataType instanceof MapType || dataType instanceof JsonType + || dataType instanceof StructType; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index 256b37d70572e4..e69b5ee8ef3a13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -45,11 +45,13 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -90,6 +92,7 @@ public class PlanTranslatorContext { private final Map cteProducerMap = Maps.newHashMap(); private final Map tablePushAggOp = Maps.newHashMap(); + private final Map> statsUnknownColumnsMap = Maps.newHashMap(); public PlanTranslatorContext(CascadesContext ctx) { this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext()); @@ -100,6 +103,34 @@ public PlanTranslatorContext() { translator = null; } + /** + * remember the unknown-stats column and its scan, used for forbid_unknown_col_stats check + */ + public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) { + Set slots = statsUnknownColumnsMap.get(scan); + if (slots == null) { + statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId)); + } else { + statsUnknownColumnsMap.get(scan).add(slotId); + } + } + + public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) { + Set unknownSlots = statsUnknownColumnsMap.get(scan); + if (unknownSlots == null) { + return false; + } + return unknownSlots.contains(slotId); + } + + public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) { + statsUnknownColumnsMap.remove(scan); + } + + public Set getScanNodeWithUnknownColumnStats() { + return statsUnknownColumnsMap.keySet(); + } + public List getPlanFragments() { return planFragments; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java index cc45952817a845..71818966696958 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java @@ -59,12 +59,12 @@ public Set infer(Set predicates) { } /** - * Use the left or right child of `leftSlotEqualToRightSlot` to replace the left or right child of `expression` + * Use the left or right child of `equalExpr` to replace the left or right child of `expression` * Now only support infer `ComparisonPredicate`. * TODO: We should determine whether `expression` satisfies the condition for replacement * eg: Satisfy `expression` is non-deterministic */ - private Expression doInfer(Expression leftSlotEqualToRightSlot, Expression expression) { + private Expression doInfer(Expression equalExpr, Expression expression) { return expression.accept(new DefaultExpressionRewriter() { @Override @@ -76,36 +76,43 @@ public Expression visit(Expression expr, Void context) { public Expression visitComparisonPredicate(ComparisonPredicate cp, Void context) { // we need to get expression covered by cast, because we want to infer different datatype if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.left()) && (cp.right().isConstant())) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left())); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left()), equalExpr); } else if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.right()) && cp.left().isConstant()) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right())); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right()), equalExpr); } return super.visit(cp, context); } private boolean isDataTypeValid(DataType originDataType, Expression expr) { - if ((leftSlotEqualToRightSlot.child(0).getDataType() instanceof IntegralType) - && (leftSlotEqualToRightSlot.child(1).getDataType() instanceof IntegralType) + if ((expr.child(0).getDataType() instanceof IntegralType) + && (expr.child(1).getDataType() instanceof IntegralType) && (originDataType instanceof IntegralType)) { // infer filter can not be lower than original datatype, or dataset would be wrong if (!((IntegralType) originDataType).widerThan( - (IntegralType) leftSlotEqualToRightSlot.child(0).getDataType()) + (IntegralType) expr.child(0).getDataType()) && !((IntegralType) originDataType).widerThan( - (IntegralType) leftSlotEqualToRightSlot.child(1).getDataType())) { + (IntegralType) expr.child(1).getDataType())) { return true; } + } else if (expr.child(0).getDataType().equals(expr.child(1).getDataType())) { + return true; } return false; } - private Expression replaceSlot(Expression expr, DataType originDataType) { - return expr.rewriteUp(e -> { - if (isDataTypeValid(originDataType, leftSlotEqualToRightSlot)) { - if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(0))) { - return leftSlotEqualToRightSlot.child(1); - } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(1))) { - return leftSlotEqualToRightSlot.child(0); - } + private Expression replaceSlot(Expression sourcePredicate, DataType originDataType, Expression equal) { + if (!isDataTypeValid(originDataType, equal)) { + return sourcePredicate; + } + return sourcePredicate.rewriteUp(e -> { + // we can not replace Cast expression to slot because when rewrite up, we have replace child of cast + if (e instanceof Cast) { + return e; + } + if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(0))) { + return equal.child(1); + } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(1))) { + return equal.child(0); } return e; }); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 45aeae54fde483..24ec929e820b00 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -26,7 +26,6 @@ import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; -import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.trees.expressions.Alias; @@ -123,7 +122,6 @@ import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; -import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -623,46 +621,21 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { .setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()) .build(); } - if (cache.isUnKnown) { - if (forbidUnknownColStats && !shouldIgnoreThisCol) { - if (StatisticsUtil.statsTblAvailable()) { - throw new AnalysisException(String.format("Found unknown stats for column:%s.%s.\n" - + "It may caused by:\n" - + "\n" - + "1. This column never got analyzed\n" - + "2. This table is empty\n" - + "3. Stats load failed caused by unstable of backends," - + "and FE cached the unknown stats by default in this scenario\n" - + "4. There is a bug, please report it to Doris community\n" - + "\n" - + "If an unknown stats for this column is tolerable," - + "you could set session variable `forbid_unknown_col_stats` to false to make planner" - + " ignore this error and keep planning.", table.getName(), colName)); - } else { - throw new AnalysisException("BE is not available!"); + if (!cache.isUnKnown) { + rowCount = Math.max(rowCount, cache.count); + Histogram histogram = getColumnHistogram(table, colName); + if (histogram != null) { + ColumnStatisticBuilder columnStatisticBuilder = + new ColumnStatisticBuilder(cache).setHistogram(histogram); + cache = columnStatisticBuilder.build(); + if (ConnectContext.get().getSessionVariable().isEnableMinidump() + && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { + totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); + totalHistogramMap.put(table.getName() + colName, histogram); } } - columnStatisticMap.put(slotReference, cache); - continue; - } - rowCount = Math.max(rowCount, cache.count); - Histogram histogram = getColumnHistogram(table, colName); - if (histogram != null) { - ColumnStatisticBuilder columnStatisticBuilder = - new ColumnStatisticBuilder(cache).setHistogram(histogram); - columnStatisticMap.put(slotReference, columnStatisticBuilder.build()); - cache = columnStatisticBuilder.build(); - if (ConnectContext.get().getSessionVariable().isEnableMinidump() - && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { - totalHistogramMap.put(table.getName() + ":" + colName, histogram); - } } columnStatisticMap.put(slotReference, cache); - if (ConnectContext.get().getSessionVariable().isEnableMinidump() - && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { - totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); - totalHistogramMap.put(table.getName() + colName, histogram); - } } return new Statistics(rowCount, columnStatisticMap); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java index adc67ca835f915..b7b235d2b43041 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java @@ -17,15 +17,33 @@ package org.apache.doris.nereids.rules.rewrite; +import org.apache.doris.nereids.trees.expressions.Cast; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.plans.JoinType; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.PlanChecker; +import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.utframe.TestWithFeService; +import com.google.common.collect.Sets; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; +import java.util.Optional; +import java.util.Set; + public class InferPredicatesTest extends TestWithFeService implements MemoPatternMatchSupported { + private final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0); + + private final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0); + + private final PredicatePropagation propagation = new PredicatePropagation(); + @Override protected void runBeforeAll() throws Exception { createDatabase("test"); @@ -628,4 +646,16 @@ public void innerJoinShouldNotInferUnderLeftJoinOnClausePredicates() { ).when(join -> join.getJoinType() == JoinType.LEFT_OUTER_JOIN) ); } + + @Test + void testInfer() { + EqualTo equalTo = new EqualTo(new Cast(scan1.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)); + EqualTo equalTo2 = new EqualTo(scan2.getOutput().get(0), scan1.getOutput().get(0)); + Set predicates = Sets.newHashSet(); + predicates.add(equalTo2); + predicates.add(equalTo); + Set newPredicates = propagation.infer(predicates); + Optional newPredicate = newPredicates.stream().findFirst(); + Assertions.assertTrue(newPredicate.get().equals(new EqualTo(new Cast(scan2.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)))); + } } diff --git a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy index a1621f1c239aa5..120c9a8f674458 100644 --- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy +++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy @@ -22,6 +22,8 @@ suite("test_infer_predicate") { sql 'drop table if exists infer_tb1;' sql 'drop table if exists infer_tb2;' sql 'drop table if exists infer_tb3;' + sql 'drop table if exists infer_tb4;' + sql 'drop table if exists infer_tb5;' sql '''create table infer_tb1 (k1 int, k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' @@ -29,6 +31,10 @@ suite("test_infer_predicate") { sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' + sql '''create table infer_tb4 (k1 varchar(100), k2 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' + + sql '''create table infer_tb5 (k1 varchar(100), k3 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' + explain { sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 = infer_tb1.k2 and infer_tb2.k1 = 1;" contains "PREDICATES: k2" @@ -55,4 +61,16 @@ suite("test_infer_predicate") { contains "PREDICATES: k3" contains "PREDICATES: k2" } + + explain { + sql "select * from infer_tb4 left join infer_tb5 on infer_tb4.k2 = infer_tb5.k3 where infer_tb4.k2 = '20230901';" + contains "PREDICATES: k3" + contains "PREDICATES: k2" + } + + sql 'drop table if exists infer_tb1;' + sql 'drop table if exists infer_tb2;' + sql 'drop table if exists infer_tb3;' + sql 'drop table if exists infer_tb4;' + sql 'drop table if exists infer_tb5;' } From 687a91872a296bd60d38a2866a0ed21138a2e5d8 Mon Sep 17 00:00:00 2001 From: Kang Date: Wed, 13 Sep 2023 15:18:48 +0800 Subject: [PATCH 31/31] Revert "[refactor](nereids)forbid unknown stats for branch2.0 #24061 (#24243)" (#24294) This reverts commit 108e91f2a37d08119e4a611a51fdd0341d9330b3. --- .../translator/PhysicalPlanTranslator.java | 35 ------------- .../translator/PlanTranslatorContext.java | 31 ------------ .../rules/rewrite/PredicatePropagation.java | 39 ++++++--------- .../doris/nereids/stats/StatsCalculator.java | 49 ++++++++++++++----- .../rules/rewrite/InferPredicatesTest.java | 30 ------------ .../infer_predicate/infer_predicate.groovy | 18 ------- 6 files changed, 54 insertions(+), 148 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 359373ddbf0ce7..406dda54859714 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -126,11 +126,6 @@ import org.apache.doris.nereids.trees.plans.physical.PhysicalWindow; import org.apache.doris.nereids.trees.plans.physical.RuntimeFilter; import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanVisitor; -import org.apache.doris.nereids.types.ArrayType; -import org.apache.doris.nereids.types.DataType; -import org.apache.doris.nereids.types.JsonType; -import org.apache.doris.nereids.types.MapType; -import org.apache.doris.nereids.types.StructType; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.JoinUtils; import org.apache.doris.nereids.util.Utils; @@ -240,14 +235,6 @@ public PlanFragment translatePlan(PhysicalPlan physicalPlan) { Collections.reverse(context.getPlanFragments()); // TODO: maybe we need to trans nullable directly? and then we could remove call computeMemLayout context.getDescTable().computeMemLayout(); - if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) { - Set scans = context.getScanNodeWithUnknownColumnStats(); - if (!scans.isEmpty()) { - StringBuilder builder = new StringBuilder(); - scans.forEach(scanNode -> builder.append(scanNode)); - throw new AnalysisException("tables with unknown column stats: " + builder); - } - } return rootFragment; } @@ -543,15 +530,6 @@ public PlanFragment visitPhysicalOlapScan(PhysicalOlapScan olapScan, PlanTransla // TODO: move all node set cardinality into one place if (olapScan.getStats() != null) { olapScanNode.setCardinality((long) olapScan.getStats().getRowCount()); - if (ConnectContext.get().getSessionVariable().forbidUnknownColStats) { - for (int i = 0; i < slots.size(); i++) { - Slot slot = slots.get(i); - if (olapScan.getStats().findColumnStatistics(slot).isUnKnown() - && !isComplexDataType(slot.getDataType())) { - context.addUnknownStatsColumn(olapScanNode, tupleDescriptor.getSlots().get(i).getId()); - } - } - } } // TODO: Do we really need tableName here? TableName tableName = new TableName(null, "", ""); @@ -2000,14 +1978,6 @@ private void updateScanSlotsMaterialization(ScanNode scanNode, scanNode.getTupleDesc().getSlots().add(smallest); } try { - if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().forbidUnknownColStats) { - for (SlotId slotId : requiredByProjectSlotIdSet) { - if (context.isColumnStatsUnknown(scanNode, slotId)) { - throw new AnalysisException("meet unknown column stats on table " + scanNode); - } - } - context.removeScanFromStatsUnknownColumnsMap(scanNode); - } scanNode.updateRequiredSlots(context, requiredByProjectSlotIdSet); } catch (UserException e) { Util.logAndThrowRuntimeException(LOG, @@ -2270,9 +2240,4 @@ private List translateToLegacyConjuncts(Set conjuncts) { } return outputExprs; } - - private boolean isComplexDataType(DataType dataType) { - return dataType instanceof ArrayType || dataType instanceof MapType || dataType instanceof JsonType - || dataType instanceof StructType; - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java index e69b5ee8ef3a13..256b37d70572e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PlanTranslatorContext.java @@ -45,13 +45,11 @@ import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import com.google.common.collect.Sets; import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Optional; -import java.util.Set; import java.util.stream.Collectors; import javax.annotation.Nullable; @@ -92,7 +90,6 @@ public class PlanTranslatorContext { private final Map cteProducerMap = Maps.newHashMap(); private final Map tablePushAggOp = Maps.newHashMap(); - private final Map> statsUnknownColumnsMap = Maps.newHashMap(); public PlanTranslatorContext(CascadesContext ctx) { this.translator = new RuntimeFilterTranslator(ctx.getRuntimeFilterContext()); @@ -103,34 +100,6 @@ public PlanTranslatorContext() { translator = null; } - /** - * remember the unknown-stats column and its scan, used for forbid_unknown_col_stats check - */ - public void addUnknownStatsColumn(ScanNode scan, SlotId slotId) { - Set slots = statsUnknownColumnsMap.get(scan); - if (slots == null) { - statsUnknownColumnsMap.put(scan, Sets.newHashSet(slotId)); - } else { - statsUnknownColumnsMap.get(scan).add(slotId); - } - } - - public boolean isColumnStatsUnknown(ScanNode scan, SlotId slotId) { - Set unknownSlots = statsUnknownColumnsMap.get(scan); - if (unknownSlots == null) { - return false; - } - return unknownSlots.contains(slotId); - } - - public void removeScanFromStatsUnknownColumnsMap(ScanNode scan) { - statsUnknownColumnsMap.remove(scan); - } - - public Set getScanNodeWithUnknownColumnStats() { - return statsUnknownColumnsMap.keySet(); - } - public List getPlanFragments() { return planFragments; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java index 71818966696958..cc45952817a845 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java @@ -59,12 +59,12 @@ public Set infer(Set predicates) { } /** - * Use the left or right child of `equalExpr` to replace the left or right child of `expression` + * Use the left or right child of `leftSlotEqualToRightSlot` to replace the left or right child of `expression` * Now only support infer `ComparisonPredicate`. * TODO: We should determine whether `expression` satisfies the condition for replacement * eg: Satisfy `expression` is non-deterministic */ - private Expression doInfer(Expression equalExpr, Expression expression) { + private Expression doInfer(Expression leftSlotEqualToRightSlot, Expression expression) { return expression.accept(new DefaultExpressionRewriter() { @Override @@ -76,43 +76,36 @@ public Expression visit(Expression expr, Void context) { public Expression visitComparisonPredicate(ComparisonPredicate cp, Void context) { // we need to get expression covered by cast, because we want to infer different datatype if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.left()) && (cp.right().isConstant())) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left()), equalExpr); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.left())); } else if (ExpressionUtils.isExpressionSlotCoveredByCast(cp.right()) && cp.left().isConstant()) { - return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right()), equalExpr); + return replaceSlot(cp, ExpressionUtils.getDatatypeCoveredByCast(cp.right())); } return super.visit(cp, context); } private boolean isDataTypeValid(DataType originDataType, Expression expr) { - if ((expr.child(0).getDataType() instanceof IntegralType) - && (expr.child(1).getDataType() instanceof IntegralType) + if ((leftSlotEqualToRightSlot.child(0).getDataType() instanceof IntegralType) + && (leftSlotEqualToRightSlot.child(1).getDataType() instanceof IntegralType) && (originDataType instanceof IntegralType)) { // infer filter can not be lower than original datatype, or dataset would be wrong if (!((IntegralType) originDataType).widerThan( - (IntegralType) expr.child(0).getDataType()) + (IntegralType) leftSlotEqualToRightSlot.child(0).getDataType()) && !((IntegralType) originDataType).widerThan( - (IntegralType) expr.child(1).getDataType())) { + (IntegralType) leftSlotEqualToRightSlot.child(1).getDataType())) { return true; } - } else if (expr.child(0).getDataType().equals(expr.child(1).getDataType())) { - return true; } return false; } - private Expression replaceSlot(Expression sourcePredicate, DataType originDataType, Expression equal) { - if (!isDataTypeValid(originDataType, equal)) { - return sourcePredicate; - } - return sourcePredicate.rewriteUp(e -> { - // we can not replace Cast expression to slot because when rewrite up, we have replace child of cast - if (e instanceof Cast) { - return e; - } - if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(0))) { - return equal.child(1); - } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, equal.child(1))) { - return equal.child(0); + private Expression replaceSlot(Expression expr, DataType originDataType) { + return expr.rewriteUp(e -> { + if (isDataTypeValid(originDataType, leftSlotEqualToRightSlot)) { + if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(0))) { + return leftSlotEqualToRightSlot.child(1); + } else if (ExpressionUtils.isTwoExpressionEqualWithCast(e, leftSlotEqualToRightSlot.child(1))) { + return leftSlotEqualToRightSlot.child(0); + } } return e; }); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 24ec929e820b00..45aeae54fde483 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -26,6 +26,7 @@ import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.exceptions.AnalysisException; import org.apache.doris.nereids.memo.Group; import org.apache.doris.nereids.memo.GroupExpression; import org.apache.doris.nereids.trees.expressions.Alias; @@ -122,6 +123,7 @@ import org.apache.doris.statistics.StatisticRange; import org.apache.doris.statistics.Statistics; import org.apache.doris.statistics.StatisticsBuilder; +import org.apache.doris.statistics.util.StatisticsUtil; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -621,21 +623,46 @@ private Statistics computeCatalogRelation(CatalogRelation catalogRelation) { .setAvgSizeByte(slotReference.getColumn().get().getType().getSlotSize()) .build(); } - if (!cache.isUnKnown) { - rowCount = Math.max(rowCount, cache.count); - Histogram histogram = getColumnHistogram(table, colName); - if (histogram != null) { - ColumnStatisticBuilder columnStatisticBuilder = - new ColumnStatisticBuilder(cache).setHistogram(histogram); - cache = columnStatisticBuilder.build(); - if (ConnectContext.get().getSessionVariable().isEnableMinidump() - && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { - totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); - totalHistogramMap.put(table.getName() + colName, histogram); + if (cache.isUnKnown) { + if (forbidUnknownColStats && !shouldIgnoreThisCol) { + if (StatisticsUtil.statsTblAvailable()) { + throw new AnalysisException(String.format("Found unknown stats for column:%s.%s.\n" + + "It may caused by:\n" + + "\n" + + "1. This column never got analyzed\n" + + "2. This table is empty\n" + + "3. Stats load failed caused by unstable of backends," + + "and FE cached the unknown stats by default in this scenario\n" + + "4. There is a bug, please report it to Doris community\n" + + "\n" + + "If an unknown stats for this column is tolerable," + + "you could set session variable `forbid_unknown_col_stats` to false to make planner" + + " ignore this error and keep planning.", table.getName(), colName)); + } else { + throw new AnalysisException("BE is not available!"); } } + columnStatisticMap.put(slotReference, cache); + continue; + } + rowCount = Math.max(rowCount, cache.count); + Histogram histogram = getColumnHistogram(table, colName); + if (histogram != null) { + ColumnStatisticBuilder columnStatisticBuilder = + new ColumnStatisticBuilder(cache).setHistogram(histogram); + columnStatisticMap.put(slotReference, columnStatisticBuilder.build()); + cache = columnStatisticBuilder.build(); + if (ConnectContext.get().getSessionVariable().isEnableMinidump() + && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { + totalHistogramMap.put(table.getName() + ":" + colName, histogram); + } } columnStatisticMap.put(slotReference, cache); + if (ConnectContext.get().getSessionVariable().isEnableMinidump() + && !ConnectContext.get().getSessionVariable().isPlayNereidsDump()) { + totalColumnStatisticMap.put(table.getName() + ":" + colName, cache); + totalHistogramMap.put(table.getName() + colName, histogram); + } } return new Statistics(rowCount, columnStatisticMap); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java index b7b235d2b43041..adc67ca835f915 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicatesTest.java @@ -17,33 +17,15 @@ package org.apache.doris.nereids.rules.rewrite; -import org.apache.doris.nereids.trees.expressions.Cast; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.plans.JoinType; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; -import org.apache.doris.nereids.types.BigIntType; import org.apache.doris.nereids.util.MemoPatternMatchSupported; import org.apache.doris.nereids.util.PlanChecker; -import org.apache.doris.nereids.util.PlanConstructor; import org.apache.doris.utframe.TestWithFeService; -import com.google.common.collect.Sets; -import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; -import java.util.Optional; -import java.util.Set; - public class InferPredicatesTest extends TestWithFeService implements MemoPatternMatchSupported { - private final LogicalOlapScan scan1 = PlanConstructor.newLogicalOlapScan(0, "t1", 0); - - private final LogicalOlapScan scan2 = PlanConstructor.newLogicalOlapScan(1, "t2", 0); - - private final PredicatePropagation propagation = new PredicatePropagation(); - @Override protected void runBeforeAll() throws Exception { createDatabase("test"); @@ -646,16 +628,4 @@ public void innerJoinShouldNotInferUnderLeftJoinOnClausePredicates() { ).when(join -> join.getJoinType() == JoinType.LEFT_OUTER_JOIN) ); } - - @Test - void testInfer() { - EqualTo equalTo = new EqualTo(new Cast(scan1.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)); - EqualTo equalTo2 = new EqualTo(scan2.getOutput().get(0), scan1.getOutput().get(0)); - Set predicates = Sets.newHashSet(); - predicates.add(equalTo2); - predicates.add(equalTo); - Set newPredicates = propagation.infer(predicates); - Optional newPredicate = newPredicates.stream().findFirst(); - Assertions.assertTrue(newPredicate.get().equals(new EqualTo(new Cast(scan2.getOutput().get(0), BigIntType.INSTANCE), Literal.of(1)))); - } } diff --git a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy index 120c9a8f674458..a1621f1c239aa5 100644 --- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy +++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy @@ -22,8 +22,6 @@ suite("test_infer_predicate") { sql 'drop table if exists infer_tb1;' sql 'drop table if exists infer_tb2;' sql 'drop table if exists infer_tb3;' - sql 'drop table if exists infer_tb4;' - sql 'drop table if exists infer_tb5;' sql '''create table infer_tb1 (k1 int, k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' @@ -31,10 +29,6 @@ suite("test_infer_predicate") { sql '''create table infer_tb3 (k1 varchar(100), k2 int) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - sql '''create table infer_tb4 (k1 varchar(100), k2 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - - sql '''create table infer_tb5 (k1 varchar(100), k3 date) distributed by hash(k1) buckets 3 properties('replication_num' = '1');''' - explain { sql "select * from infer_tb1 inner join infer_tb2 where infer_tb2.k1 = infer_tb1.k2 and infer_tb2.k1 = 1;" contains "PREDICATES: k2" @@ -61,16 +55,4 @@ suite("test_infer_predicate") { contains "PREDICATES: k3" contains "PREDICATES: k2" } - - explain { - sql "select * from infer_tb4 left join infer_tb5 on infer_tb4.k2 = infer_tb5.k3 where infer_tb4.k2 = '20230901';" - contains "PREDICATES: k3" - contains "PREDICATES: k2" - } - - sql 'drop table if exists infer_tb1;' - sql 'drop table if exists infer_tb2;' - sql 'drop table if exists infer_tb3;' - sql 'drop table if exists infer_tb4;' - sql 'drop table if exists infer_tb5;' }