Skip to content

Commit

Permalink
[fix](fold) fixed an issue with be computing constants (#43410)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

issue close: #43061

1、Problem
When enable_fold_constant_by_be=true is set,the results of
between below queries are inconsistent

select hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')) 
+----------------------------------------------+
| hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')) |
+----------------------------------------------+
| C2BD89103557CCBF7ED97B51860225A0             |
+----------------------------------------------+

select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t 
+--------------------------------------------------+
| hex(s)                                           |
+--------------------------------------------------+
| C2BDEFBFBD103557CCBF7EEFBFBD7B51EFBFBD0225EFBFBD |
+--------------------------------------------------+

2、mysql results

select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t;
+----------------------------------+
| hex(s)                           |
+----------------------------------+
| C2BD89103557CCBF7ED97B51860225A0 |
+----------------------------------+

3、cause
When processing binary data such as FromBase64, BE will return the
original binary data through the bytesValue field, and the previous code
only uses the stringValue field, resulting in the binary data being
corrupted during the string encoding conversion process
  • Loading branch information
felixwluo authored and Your Name committed Dec 20, 2024
1 parent b6ca836 commit b9346f8
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -487,8 +487,16 @@ public static List<Literal> getResultExpression(DataType type, PValues resultCon
} else if (type.isStringLikeType()) {
int num = resultContent.getStringValueCount();
for (int i = 0; i < num; ++i) {
Literal literal = new StringLiteral(resultContent.getStringValue(i));
res.add(literal);
// get the raw byte data to avoid character encoding conversion problems
ByteString bytesValues = resultContent.getBytesValue(i);
// use UTF-8 encoding to ensure proper handling of binary data
String stringValue = bytesValues.toStringUtf8();
// handle special NULL value cases
if ("\\N".equalsIgnoreCase(stringValue) && resultContent.hasHasNull()) {
res.add(new NullLiteral(type));
} else {
res.add(new StringLiteral(stringValue));
}
}
} else if (type.isArrayType()) {
ArrayType arrayType = (ArrayType) type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,9 @@ public void testGetResultExpressionStruct() {
PValues.Builder resultContentBuilder = PValues.newBuilder();
for (int i = 0; i < elementsArray.length; i = i + 2) {
childBuilder1.addInt32Value(elementsArray[i]);
childBuilder2.addStringValue("str" + (i + 1));
String strValue = "str" + (i + 1);
childBuilder2.addStringValue(strValue);
childBuilder2.addBytesValue(com.google.protobuf.ByteString.copyFromUtf8(strValue));
}
childBuilder1.setType(childTypeBuilder1.build());
childBuilder2.setType(childTypeBuilder2.build());
Expand Down Expand Up @@ -280,7 +282,9 @@ public void testGetResultExpressionStructArray() {
PValues.Builder resultContentBuilder = PValues.newBuilder();
for (int i = 0; i < elementsArray.length; i = i + 2) {
childBuilder1.addInt32Value(elementsArray[i]);
childBuilder2.addStringValue("str" + (i + 1));
String strValue = "str" + (i + 1);
childBuilder2.addStringValue(strValue);
childBuilder2.addBytesValue(com.google.protobuf.ByteString.copyFromUtf8(strValue));
}
childBuilder1.setType(childTypeBuilder1.build());
childBuilder2.setType(childTypeBuilder2.build());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
C2BD89103557CCBF7ED97B51860225A0

-- !sql --
C2BD89103557CCBF7ED97B51860225A0

-- !sql_1 --
80000

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@ suite("fold_constant_by_be") {
sql 'set enable_fallback_to_original_planner=false'
sql 'set enable_fold_constant_by_be=true'

qt_sql """ select hex(from_base64('wr2JEDVXzL9+2XtRhgIloA==')); """
qt_sql """ select hex(s) from (select from_base64('wr2JEDVXzL9+2XtRhgIloA==') as s) t; """

test {
sql '''
select if(
Expand All @@ -32,8 +35,8 @@ suite("fold_constant_by_be") {
result([['9999-07-31']])
}

sql """
CREATE TABLE IF NOT EXISTS str_tb (k1 VARCHAR(10) NULL, v1 STRING NULL)
sql """
CREATE TABLE IF NOT EXISTS str_tb (k1 VARCHAR(10) NULL, v1 STRING NULL)
UNIQUE KEY(k1) DISTRIBUTED BY HASH(k1) BUCKETS 5 properties("replication_num" = "1");
"""

Expand All @@ -53,7 +56,7 @@ suite("fold_constant_by_be") {

sql 'set query_timeout=12;'
qt_sql "select sleep(sign(1)*5);"

explain {
sql("verbose select substring('123456', 1, 3)")
contains "varchar(3)"
Expand All @@ -71,7 +74,7 @@ suite("fold_constant_by_be") {
col_varchar_1000__undef_signed varchar(1000) null ,
col_varchar_1000__undef_signed_not_null varchar(1000) not null ,
col_varchar_1001__undef_signed varchar(1001) null ,
col_varchar_1001__undef_signed_not_null varchar(1001) not null
col_varchar_1001__undef_signed_not_null varchar(1001) not null
) engine=olap
DUPLICATE KEY(pk, col_char_255__undef_signed, col_char_100__undef_signed)
distributed by hash(pk) buckets 10
Expand Down

0 comments on commit b9346f8

Please sign in to comment.