From 10032044cc8c68eeaa43730eb4c7a37435be31d4 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Mon, 22 Jan 2024 11:02:55 +0800 Subject: [PATCH] [Improve](Variant) support implicit cast to numeric and string type (#30029) --- .../doris/analysis/BinaryPredicate.java | 8 ++ .../apache/doris/analysis/MatchPredicate.java | 10 ++- .../data/variant_p0/sql/implicit_cast.out | 80 +++++++++++++++++++ .../suites/variant_p0/sql/implicit_cast.sql | 15 ++++ 4 files changed, 111 insertions(+), 2 deletions(-) create mode 100644 regression-test/data/variant_p0/sql/implicit_cast.out create mode 100644 regression-test/suites/variant_p0/sql/implicit_cast.sql diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java index bb30a46b3e9539..1ce647a473cc9c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java @@ -496,6 +496,14 @@ private Type getCmpType() throws AnalysisException { SessionVariable.getEnableDecimal256()); } + // Variant can be implicit cast to numeric type and string type at present + if (t1.isVariantType() && (t2.isNumericType() || t2.isStringType())) { + return Type.fromPrimitiveType(t2); + } + if (t2.isVariantType() && (t1.isNumericType() || t1.isStringType())) { + return Type.fromPrimitiveType(t1); + } + return Type.DOUBLE; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java index f106aec956c72c..e284d86e2bb986 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/MatchPredicate.java @@ -271,9 +271,10 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException { throw new AnalysisException("right operand of " + op.toString() + " must be of type STRING: " + toSql()); } - if (!getChild(0).getType().isStringType() && !getChild(0).getType().isArrayType()) { + if (!getChild(0).getType().isStringType() && !getChild(0).getType().isArrayType() + && !getChild(0).getType().isVariantType()) { throw new AnalysisException( - "left operand of " + op.toString() + " must be of type STRING or ARRAY: " + toSql()); + "left operand of " + op.toString() + " must be of type STRING, ARRAY or VARIANT: " + toSql()); } fn = getBuiltinFunction(op.toString(), @@ -295,6 +296,11 @@ public void analyzeImpl(Analyzer analyzer) throws AnalysisException { } } + // CAST variant to right expr type + if (e1.type.isVariantType()) { + setChild(0, e1.castTo(e2.getType())); + } + if (e1 instanceof SlotRef) { SlotRef slotRef = (SlotRef) e1; SlotDescriptor slotDesc = slotRef.getDesc(); diff --git a/regression-test/data/variant_p0/sql/implicit_cast.out b/regression-test/data/variant_p0/sql/implicit_cast.out new file mode 100644 index 00000000000000..b0f5d96087b5ac --- /dev/null +++ b/regression-test/data/variant_p0/sql/implicit_cast.out @@ -0,0 +1,80 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !implicit_cast -- +0 + +-- !implicit_cast_2 -- +0 + +-- !implicit_cast_3 -- +0 + +-- !implicit_cast_4 -- +5000 + +-- !implicit_cast_5 -- +leonardomso/33-js-concepts 3 +ytdl-org/youtube-dl 3 +Bogdanp/neko 2 +bminossi/AllVideoPocsFromHackerOne 2 +disclose/diodata 2 + +-- !implicit_cast_6 -- +282 + +-- !implicit_cast_7 -- +14690758274 + +-- !implicit_cast_8 -- +0 + +-- !implicit_cast_9 -- +19829 + +-- !implicit_cast_10 -- +49390617 +64890096 +10696700 +33066637 +32271952 +2051941 +32271952 +57325392 +42386044 +73801003 + +-- !implicit_cast_11 -- +27 {"id":147350463,"name":"leonardomso/33-js-concepts","url":"https://api.github.com/repos/leonardomso/33-js-concepts"} +36 {"id":26109545,"name":"odeke-em/drive","url":"https://api.github.com/repos/odeke-em/drive"} +46 {"id":141905736,"name":"GO-LiFE/GoFIT_SDK_Android","url":"https://api.github.com/repos/GO-LiFE/GoFIT_SDK_Android"} +56 {"id":289417971,"name":"MrXujiang/h5-Dooring","url":"https://api.github.com/repos/MrXujiang/h5-Dooring"} +86 {"id":106453399,"name":"redsaph/cleartext","url":"https://api.github.com/repos/redsaph/cleartext"} +98 {"id":162998479,"name":"sherlock-project/sherlock","url":"https://api.github.com/repos/sherlock-project/sherlock"} +101 {"id":326215605,"name":"okandavut/react-spotify-nowplaying","url":"https://api.github.com/repos/okandavut/react-spotify-nowplaying"} +112 {"id":178435468,"name":"sentriz/gonic","url":"https://api.github.com/repos/sentriz/gonic"} +122 {"id":182606378,"name":"netlify-labs/react-netlify-identity-widget","url":"https://api.github.com/repos/netlify-labs/react-netlify-identity-widget"} +169 {"id":68730444,"name":"microsoft/BotBuilder-Samples","url":"https://api.github.com/repos/microsoft/BotBuilder-Samples"} + +-- !implicit_cast_12 -- +27 14690746717 WatchEvent leonardomso/33-js-concepts +36 14690746732 WatchEvent odeke-em/drive +46 14690746749 WatchEvent GO-LiFE/GoFIT_SDK_Android +56 14690746773 WatchEvent MrXujiang/h5-Dooring +86 14690746843 WatchEvent redsaph/cleartext +98 14690746866 WatchEvent sherlock-project/sherlock +101 14690746870 WatchEvent okandavut/react-spotify-nowplaying +112 14690746899 WatchEvent sentriz/gonic +122 14690746914 WatchEvent netlify-labs/react-netlify-identity-widget +169 14690747028 WatchEvent microsoft/BotBuilder-Samples + +-- !implicit_cast_13 -- +user +user +user +user +user +user +user +user +user +user + diff --git a/regression-test/suites/variant_p0/sql/implicit_cast.sql b/regression-test/suites/variant_p0/sql/implicit_cast.sql new file mode 100644 index 00000000000000..0653a52eed77ca --- /dev/null +++ b/regression-test/suites/variant_p0/sql/implicit_cast.sql @@ -0,0 +1,15 @@ +set exec_mem_limit=8G; +set enable_two_phase_read_opt = true; +set topn_opt_limit_threshold = 1024; +SELECT count() from ghdata; +SELECT cast(v["repo"]["name"] as string) as repo_name, count() AS stars FROM ghdata WHERE v["type"] = 'WatchEvent' GROUP BY repo_name ORDER BY stars DESC, repo_name LIMIT 5; +SELECT COUNT() FROM ghdata WHERE v["type"] match 'WatchEvent'; +SELECT max(cast(v["id"] as bigint)) FROM ghdata; +SELECT sum(cast(v["payload"]["pull_request"]["milestone"]["creator"]["site_admin"] as int)) FROM ghdata; +SELECT sum(length(v["payload"]["pull_request"]["base"]["repo"]["html_url"])) FROM ghdata; +SELECT v["payload"]["member"]["id"] FROM ghdata where v["payload"]["member"]["id"] is not null ORDER BY k LIMIT 10; +-- select k, v:payload.commits.author.name AS name, e FROM ghdata as t lateral view explode(cast(v:payload.commits.author.name as array)) tm1 as e order by k limit 5; +select k, json_extract(v, '$.repo') from ghdata WHERE v["type"] = 'WatchEvent' order by k limit 10; +-- SELECT v["payload"]["member"]["id"], count() FROM ghdata where v["payload"]["member"]["id"] is not null group by v["payload"]["member"]["id"] order by 1, 2 desc LIMIT 10; +select k, v["id"], v["type"], v["repo"]["name"] from ghdata WHERE v["type"] = 'WatchEvent' order by k limit 10; +SELECT v["payload"]["pusher_type"] FROM ghdata where v["payload"]["pusher_type"] is not null ORDER BY k LIMIT 10; \ No newline at end of file