Skip to content

Commit

Permalink
[fix](topn) Fix wrong rows returned by TOPN sorter (apache#40241)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gabriel39 authored Sep 2, 2024
1 parent d321841 commit c85b2a6
Show file tree
Hide file tree
Showing 3 changed files with 131 additions and 2 deletions.
6 changes: 4 additions & 2 deletions be/src/vec/common/sort/sorter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,10 @@ Status MergeSorterState::merge_sort_read(doris::vectorized::Block* block, int ba
if (priority_queue_.empty()) {
*eos = true;
} else if (priority_queue_.size() == 1) {
if (offset_ != 0) {
priority_queue_.top().impl->block->skip_num_rows(offset_);
if (offset_ != 0 || priority_queue_.top()->pos != 0) {
// Skip rows already returned or need to be ignored
int64_t offset = offset_ + (int64_t)priority_queue_.top()->pos;
priority_queue_.top().impl->block->skip_num_rows(offset);
}
block->swap(*priority_queue_.top().impl->block);
*eos = true;
Expand Down
12 changes: 12 additions & 0 deletions regression-test/data/query_p0/operator/test_sort_operator.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select --
100174 \N
100271 \N
100271 \N
100271 \N
100471 \N
100471 \N
100471 \N
100567 \N
100567 \N

115 changes: 115 additions & 0 deletions regression-test/suites/query_p0/operator/test_sort_operator.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_sort_operator", "query,p0,arrow_flight_sql") {

sql """
DROP TABLE IF EXISTS dim_org_ful;
"""

sql """
CREATE TABLE `dim_org_ful` (
`org_id` int(11) NOT NULL COMMENT '',
`start_dt` date NOT NULL COMMENT '',
`end_dt` date REPLACE_IF_NOT_NULL NULL COMMENT ''
) ENGINE=OLAP
AGGREGATE KEY(`org_id`, `start_dt`)
COMMENT '网点'
DISTRIBUTED BY HASH(`org_id`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);
"""

sql """
DROP TABLE IF EXISTS dim_day;
"""

sql """
CREATE TABLE `dim_day` (
`day_key` varchar(80) NULL,
`day_date` date NULL
) ENGINE=OLAP
DUPLICATE KEY(`day_key`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`day_key`, `day_date`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false"
);
"""

sql """
INSERT INTO `dim_day` VALUES
('20231006','2023-10-06'),
('20231010','2023-10-10'),
('20230822','2023-08-22'),
('20230829','2023-08-29'),
('20230925','2023-09-25'),
('20230731','2023-07-31'),
('20230928','2023-09-28'),
('20230727','2023-07-27'),
('20230801','2023-08-01'),
('20231017','2023-10-17');
"""

sql """INSERT INTO `dim_org_ful` VALUES
(20,'2023-08-02','3000-12-31'),
(100174,'2023-07-31','2023-08-01'),
(100174,'2023-08-01','3000-12-31'),
(100271,'2023-07-26','3000-12-31'),
(100424,'2023-08-02','3000-12-31'),
(100471,'2023-07-26','3000-12-31'),
(100567,'2023-07-29','2023-07-30'),
(100567,'2023-07-30','2023-07-31'),
(100567,'2023-07-31','3000-12-31'),
(100723,'2023-07-30','2023-07-31');"""

sql """
set batch_size = 9;
"""
sql """
set force_sort_algorithm="topn";
"""
sql """
set parallel_pipeline_task_num=1;
"""

order_qt_select """
with `dim_org` AS(
SELECT
`t0`.`day_date` AS `ds`,
`org_id` AS `org_id`
FROM
`dim_day` t0
INNER JOIN `dim_org_ful` t1 ON `t0`.`day_date` BETWEEN `t1`.`start_dt`
AND `t1`.`end_dt` - 1.0
WHERE
`t0`.`day_date` BETWEEN '2021-01-01 00:00:00'
AND '2023-08-07'
)
select org_id,null from dim_org order by 1,2 limit 1,10
"""
}

0 comments on commit c85b2a6

Please sign in to comment.