Skip to content

Commit

Permalink
[Bug](set) fix wrong result on single nullable set operators (#43153)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?
 fix wrong result on single nullable set operators

Related PR: #42046
  • Loading branch information
BiteTheDDDDt authored Nov 4, 2024
1 parent 65a8166 commit 16f62fb
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 6 deletions.
42 changes: 36 additions & 6 deletions be/src/pipeline/common/set_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,27 +55,57 @@ struct SetDataVariants {
SetHashTableVariants method_variant;

void init(const std::vector<vectorized::DataTypePtr>& data_types, HashKeyType type) {
bool nullable = data_types.size() == 1 && data_types[0]->is_nullable();
switch (type) {
case HashKeyType::serialized:
method_variant.emplace<SetSerializedHashTableContext>();
break;
case HashKeyType::int8_key:
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt8>>();
if (nullable) {
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt64>>(
get_key_sizes(data_types));
} else {
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt8>>();
}
break;
case HashKeyType::int16_key:
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt16>>();
if (nullable) {
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt64>>(
get_key_sizes(data_types));
} else {
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt16>>();
}
break;
case HashKeyType::int32_key:
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt32>>();
if (nullable) {
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt64>>(
get_key_sizes(data_types));
} else {
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt32>>();
}
break;
case HashKeyType::int64_key:
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt64>>();
if (nullable) {
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt128>>(
get_key_sizes(data_types));
} else {
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt64>>();
}
break;
case HashKeyType::int128_key:
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt128>>();
if (nullable) {
method_variant.emplace<SetFixedKeyHashTableContext<vectorized::UInt136>>(
get_key_sizes(data_types));
} else {
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt128>>();
}
break;
case HashKeyType::int256_key:
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt256>>();
if (nullable) {
method_variant.emplace<SetSerializedHashTableContext>();
} else {
method_variant.emplace<SetPrimaryTypeHashTableContext<vectorized::UInt256>>();
}
break;
case HashKeyType::string_key:
method_variant.emplace<SetMethodOneString>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -561,3 +561,5 @@ abbbb
-- !union_child_and_const_exprs_orpredicates_res --
1 d2

-- !test --

Original file line number Diff line number Diff line change
Expand Up @@ -466,4 +466,56 @@ suite("test_pull_up_predicate_set_op") {
qt_union_child_and_const_exprs_orpredicates_res """
select t.a,t3.b from (select a,b from test_pull_up_predicate_set_op1 where a in (1,2) or b in ('2d','3') union select 2,'2d' union select 2,'3') t inner join test_pull_up_predicate_set_op3 t3
on t3.a=t.a and t3.b=t.b order by 1,2;"""


sql """
drop table if exists table_1_undef_partitions2_keys3_properties4_distributed_by52;
"""
sql """
create table table_1_undef_partitions2_keys3_properties4_distributed_by52 (
pk int,
col_int_undef_signed int null ,
col_int_undef_signed_not_null int not null ,
col_date_undef_signed date null ,
col_date_undef_signed_not_null date not null ,
col_varchar_10__undef_signed varchar(10) null ,
col_varchar_10__undef_signed_not_null varchar(10) not null ,
col_varchar_1024__undef_signed varchar(1024) null ,
col_varchar_1024__undef_signed_not_null varchar(1024) not null
) engine=olap
DUPLICATE KEY(pk)
distributed by hash(pk) buckets 10
properties("replication_num" = "1");
"""
sql """
insert into table_1_undef_partitions2_keys3_properties4_distributed_by52(pk,col_int_undef_signed,col_int_undef_signed_not_null,col_date_undef_signed,col_date_undef_signed_not_null,col_varchar_10__undef_signed,col_varchar_10__undef_signed_not_null,col_varchar_1024__undef_signed,col_varchar_1024__undef_signed_not_null) values (0,null,2,'2023-12-17','2023-12-14','w','e','k','e');
"""

sql """
drop table if exists table_3_undef_partitions2_keys3_properties4_distributed_by53;
"""
sql """
create table table_3_undef_partitions2_keys3_properties4_distributed_by53 (
col_int_undef_signed_not_null int not null ,
col_date_undef_signed_not_null date not null ,
pk int,
col_int_undef_signed int null ,
col_date_undef_signed date null ,
col_varchar_10__undef_signed varchar(10) null ,
col_varchar_10__undef_signed_not_null varchar(10) not null ,
col_varchar_1024__undef_signed varchar(1024) null ,
col_varchar_1024__undef_signed_not_null varchar(1024) not null
) engine=olap
DUPLICATE KEY(col_int_undef_signed_not_null, col_date_undef_signed_not_null, pk)
PARTITION BY RANGE(col_int_undef_signed_not_null, col_date_undef_signed_not_null) (PARTITION p0 VALUES [('-10000', '2023-12-01'), ('3', '2023-12-10')), PARTITION p1 VALUES [('3', '2023-12-10'), ('6', '2023-12-15')), PARTITION p2 VALUES [('6', '2023-12-15'), ('10000', '2023-12-21')))
distributed by hash(pk) buckets 10
properties("replication_num" = "1");
"""
sql """
insert into table_3_undef_partitions2_keys3_properties4_distributed_by53(pk,col_int_undef_signed,col_int_undef_signed_not_null,col_date_undef_signed,col_date_undef_signed_not_null,col_varchar_10__undef_signed,col_varchar_10__undef_signed_not_null,col_varchar_1024__undef_signed,col_varchar_1024__undef_signed_not_null) values (0,null,5,'2023-12-18','2023-12-17','n','q','something','w'),(1,null,4,'2023-12-15','2023-12-09','they','l','come','e'),(2,null,5,'2023-12-10','2023-12-17','m','g','not','h');
"""

qt_test"""
SELECT subq1.`pk` AS pk1 FROM ( ( SELECT t1.`pk` FROM table_1_undef_partitions2_keys3_properties4_distributed_by52 AS t1 RIGHT OUTER JOIN table_3_undef_partitions2_keys3_properties4_distributed_by53 AS alias1 ON t1 . `pk` = alias1 . `pk` ORDER BY t1.pk LIMIT 2 OFFSET 0 ) INTERSECT ( SELECT t1.`pk` FROM table_1_undef_partitions2_keys3_properties4_distributed_by52 AS t1 ) ) subq1 LIMIT 66666666 ;
"""
}

0 comments on commit 16f62fb

Please sign in to comment.