Skip to content

Commit

Permalink
[fix](estimate) func call with not filter will estimate some statisti… (
Browse files Browse the repository at this point in the history
apache#42302)

…cs (apache#41989)

before this pr: use ! or not in or condition when table has been
analyzed we will meet
```
SELECT
  count(1)
FROM
  table_30_un_pa_ke_pr_di4
where
  col_int_undef_signed_not_null < -128
  or not array_contains(col_array_bigint__undef_signed, col_int_undef_signed_not_null);

ERROR 1105 (HY000): errCode = 2, detailMessage = Not-predicate meet unexpected child:
  array_contains(col_array_bigint__undef_signed, cast(col_int_undef_signed_not_null as BIGINT))
```

## Proposed changes

Issue Number: close #xxx

<!--Describe your changes.-->
  • Loading branch information
amorynan authored Oct 23, 2024
1 parent 2defa90 commit e739519
Show file tree
Hide file tree
Showing 6 changed files with 266 additions and 1 deletion.
8 changes: 8 additions & 0 deletions be/src/vec/functions/array/function_array_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,14 @@ class FunctionArrayIndex : public IFunction {

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
DBUG_EXECUTE_IF("array_func.array_contains", {
auto req_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
"array_func.array_contains", "req_id", 0);
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"{} has already execute inverted index req_id {} , should not execute expr "
"with rows: {}",
get_name(), req_id, input_rows_count);
});
return _execute_dispatch(block, arguments, result, input_rows_count);
}

Expand Down
8 changes: 8 additions & 0 deletions be/src/vec/functions/array/function_arrays_overlap.h
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,14 @@ class FunctionArraysOverlap : public IFunction {

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
DBUG_EXECUTE_IF("array_func.arrays_overlap", {
auto req_id = DebugPoints::instance()->get_debug_param_or_default<int32_t>(
"array_func.arrays_overlap", "req_id", 0);
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"{} has already execute inverted index req_id {} , should not execute expr "
"with rows: {}",
get_name(), req_id, input_rows_count);
});
auto left_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto right_column =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -509,13 +509,15 @@ public Statistics visitNot(Not not, EstimationContext context) {
// 2. not A in (...)
// 3. not A is null
// 4. not A like XXX
// 5. not array_contains([xx, xx], xx)
colBuilder.setNumNulls(0);
Preconditions.checkArgument(
child instanceof EqualPredicate
|| child instanceof InPredicate
|| child instanceof IsNull
|| child instanceof Like
|| child instanceof Match,
|| child instanceof Match
|| child instanceof Function,
"Not-predicate meet unexpected child: %s", child.toSql());
if (child instanceof Like) {
rowCount = context.statistics.getRowCount() - childStats.getRowCount();
Expand Down
149 changes: 149 additions & 0 deletions regression-test/data/inverted_index_p0/tai_estimate.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
2024-10-01|"123456"|['John Doe','Jane Smith']
2024-10-02|"234567"|['Alice Johnson','Bob Lee']
2024-10-03|"345678"|['Chris White','Dana Black']
2024-10-04|"456789"|['Eve Green','Frank Blue']
2024-10-05|"567890"|['Grace Yellow','Hank Red']
2024-10-06|"678901"|['Ivy Gray','Jack Silver']
2024-10-07|"789012"|['Karen Brown','Larry Orange']
2024-10-08|"890123"|['Monica Purple','Nate Gold']
2024-10-09|"901234"|['Olivia Pink','Peter Bronze']
2024-10-10|"012345"|['Quinn Black','Rachel Cyan']
2024-10-11|"112345"|['Steve Magenta','Tom Indigo']
2024-10-12|"212345"|['Ursula Violet','Victor Maroon']
2024-10-13|"312345"|['Wendy Lime','Xander Olive']
2024-10-14|"412345"|['Yara Teal','Zach Plum']
2024-10-15|"512345"|['Ada Amber','Ben Copper']
2024-10-16|"612345"|['Cathy Crimson','Dan Saffron']
2024-10-17|"712345"|['Eliot Coral','Fiona Burgundy']
2024-10-18|"812345"|['George Navy','Holly Peach']
2024-10-19|"912345"|['Irene Lavender','Jake Chartreuse']
2024-10-20|"013456"|['Karl Slate','Laura Fuchsia']
2024-10-21|"113456"|['Matt Cyan','Nancy Azure']
2024-10-22|"213456"|['Oscar Beige','Pam Lilac']
2024-10-23|"313456"|['Quincy Rose','Rita Steel']
2024-10-24|"413456"|['Sam Mint','Tina Lemon']
2024-10-25|"513456"|['Ugo Brass','Vera Aquamarine']
2024-10-26|"613456"|['Walt Jade','Xena Amethyst']
2024-10-27|"713456"|['Yuri Copper','Zoe Emerald']
2024-10-28|"813456"|['Alan Graphite','Bea Indigo']
2024-10-29|"913456"|['Carl Magenta','Dina Quartz']
2024-10-30|"023456"|['Evan Ruby','Faith Scarlet']
2024-10-31|"123456"|['Greg Topaz','Helen Ivory']
2024-11-01|"223456"|['Ian Tan','Jane Garnet']
2024-11-02|"323456"|['Kyle Pearl','Lily Denim']
2024-11-03|"423456"|['Mark Bronze','Nina Bronze']
2024-11-04|"523456"|['Oscar Citrine','Paula Peridot']
2024-11-05|"623456"|['Quinn Jasper','Rita Amber']
2024-11-06|"723456"|['Steve Zircon','Tina Opal']
2024-11-07|"823456"|['Uma Amber','Vera Turquoise']
2024-11-08|"923456"|['Wendy Crystal','Xander Ivory']
2024-11-09|"033456"|['Yara Onyx','Zach Emerald']
2024-11-10|"133456"|['Alan Ruby','Bea Gold']
2024-11-11|"233456"|['Cathy Diamond','Dan Garnet']
2024-11-12|"333456"|['Eliot Sapphire','Fiona Sapphire']
2024-11-13|"433456"|['George Jade','Holly Pearl']
2024-11-14|"533456"|['Ivy Topaz','Jake Bronze']
2024-11-15|"633456"|['Karl Pearl','Laura Ivory']
2024-11-16|"733456"|['Matt Gold','Nancy Silver']
2024-11-17|"833456"|['Oscar Ivory','Pam Onyx']
2024-11-18|"933456"|['Amory Wang','Being Committer']
2024-11-19|"043456"|['Sam Garnet','Tina Crystal']
2024-11-20|"143456"|['Ugo Jasper','Vera Sapphire']
2024-11-21|"243456"|['Walt Sapphire','Xena Opal']
2024-11-22|"343456"|['Yuri Emerald','Zoe Gold']
2024-11-23|"443456"|['Alan Jade','Bea Pearl']
2024-11-24|"543456"|['Cathy Opal','Dan Ivory']
2024-11-25|"643456"|['Eliot Bronze','Fiona Ruby']
2024-11-26|"743456"|['George Ivory','Holly Jade']
2024-11-27|"843456"|['Irene Sapphire','Jake Bronze']
2024-11-28|"943456"|['Karl Emerald','Laura Topaz']
2024-11-29|"053456"|['Matt Ivory','Nancy Ruby']
2024-11-30|"153456"|['Oscar Bronze','Pam Sapphire']
2024-12-01|"253456"|['Quinn Pearl','Rita Emerald']
2024-12-02|"353456"|['Sam Ruby','Tina Ivory']
2024-12-03|"453456"|['Ugo Onyx','Vera Pearl']
2024-12-04|"553456"|['Walt Topaz','Xena Gold']
2024-12-05|"653456"|['Yuri Bronze','Zoe Ruby']
2024-12-06|"753456"|['Alan Sapphire','Bea Garnet']
2024-12-07|"853456"|['Cathy Emerald','Dan Ruby']
2024-12-08|"953456"|['Eliot Sapphire','Fiona Pearl']
2024-12-09|"063456"|['George Gold','Holly Sapphire']
2024-12-10|"163456"|['Irene Bronze','Jake Emerald']
2024-12-11|"263456"|['Karl Ruby','Laura Onyx']
2024-12-12|"363456"|['Matt Pearl','Nancy Ivory']
2024-12-13|"463456"|['Oscar Topaz','Pam Gold']
2024-12-14|"563456"|['Quinn Ruby','Rita Sapphire']
2024-12-15|"663456"|['Sam Garnet','Tina Pearl']
2024-12-16|"763456"|['Ugo Jade','Vera Bronze']
2024-12-17|"863456"|['Walt Ruby','Xena Emerald']
2024-12-18|"963456"|['Yuri Pearl','Zoe Onyx']
2024-12-19|"073456"|['Adam Jade','Bella Onyx']
2024-12-20|"173456"|['Cody Ruby','Diana Pearl']
2024-12-21|"273456"|['Eliza Bronze','Frank Sapphire']
2024-12-22|"373456"|['Gina Emerald','Hank Gold']
2024-12-23|"473456"|['Isaac Pearl','Julia Ruby']
2024-12-24|"573456"|['Kyle Onyx','Luna Ivory']
2024-12-25|"673456"|['Mona Ruby','Nick Emerald']
2024-12-26|"773456"|['Olga Sapphire','Paul Topaz']
2024-12-27|"873456"|['Quincy Ivory','Rachel Garnet']
2024-12-28|"973456"|['Steve Onyx','Tina Sapphire']
2024-12-29|"083456"|['Uma Ruby','Victor Pearl']
2024-12-30|"183456"|['Wendy Topaz','Xander Bronze']
2024-12-31|"283456"|['Yara Emerald','Zane Ruby']
2025-01-01|"383456"|['Alan Sapphire','Bea Garnet']
2025-01-02|"483456"|['Cathy Ruby','Dan Onyx']
2025-01-03|"583456"|['Eliot Pearl','Fiona Topaz']
2025-01-04|"683456"|['George Sapphire','Holly Emerald']
2025-01-05|"783456"|['Isaac Bronze','Julia Topaz']
2025-01-06|"883456"|['Karl Onyx','Laura Sapphire']
2025-01-07|"983456"|['Matt Ruby','Nancy Garnet']
2025-01-08|"093456"|['Oscar Emerald','Pam Onyx']
2025-01-09|"193456"|['Quinn Ruby','Rita Pearl']
2025-01-10|"293456"|['Sam Sapphire','Tina Garnet']
2025-01-11|"393456"|['Ugo Onyx','Vera Ruby']
2025-01-12|"493456"|['Walt Topaz','Xena Sapphire']
2025-01-13|"593456"|['Yuri Garnet','Zoe Onyx']
2025-01-14|"693456"|['Adam Ruby','Bella Pearl']
2025-01-15|"793456"|['Cody Sapphire','Diana Emerald']
2025-01-16|"893456"|['Eliza Ruby','Frank Pearl']
2025-01-17|"993456"|['Gina Onyx','Hank Garnet']
2025-01-18|"103456"|['Isaac Sapphire','Julia Ruby']
2025-01-19|"203456"|['Kyle Topaz','Luna Emerald']
2025-01-20|"303456"|['Mona Ruby','Nick Pearl']
2025-01-21|"403456"|['Olga Garnet','Paul Onyx']
2025-01-22|"503456"|['Quincy Emerald','Rachel Sapphire']
2025-01-23|"603456"|['Steve Ruby','Tina Pearl']
2025-01-24|"703456"|['Uma Garnet','Victor Onyx']
2025-01-25|"803456"|['Wendy Ruby','Xander Pearl']
2025-01-26|"903456"|['Yara Sapphire','Zane Emerald']
2025-01-27|"113456"|['Alan Onyx','Bea Garnet']
2025-01-28|"213456"|['Cathy Sapphire','Dan Ruby']
2025-01-29|"313456"|['Eliot Emerald','Fiona Pearl']
2025-01-30|"413456"|['George Garnet','Holly Onyx']
2025-01-31|"513456"|['Isaac Ruby','Julia Sapphire']
2025-02-01|"613456"|['Karl Pearl','Laura Emerald']
2025-02-02|"713456"|['Matt Onyx','Nancy Ruby']
2025-02-03|"813456"|['Oscar Sapphire','Pam Pearl']
2025-02-04|"913456"|['Quinn Garnet','Rita Ruby']
2025-02-05|"123456"|['Sam Onyx','Tina Emerald']
2025-02-06|"223456"|['Ugo Sapphire','Vera Pearl']
2025-02-07|"323456"|['Walt Ruby','Xena Onyx']
2025-02-08|"423456"|['Yuri Emerald','Zoe Sapphire']
2025-02-09|"523456"|['Adam Onyx','Bella Garnet']
2025-02-10|"623456"|['Cody Ruby','Diana Sapphire']
2025-02-11|"723456"|['Eliza Emerald','Frank Pearl']
2025-02-12|"823456"|['Gina Ruby','Hank Sapphire']
2025-02-13|"923456"|['Isaac Onyx','Julia Garnet']
2025-02-14|"133456"|['Kyle Ruby','Luna Pearl']
2025-02-15|"233456"|['Mona Emerald','Nick Sapphire']
2025-02-16|"333456"|['Olga Ruby','Paul Garnet']
2025-02-17|"433456"|['Quincy Pearl','Rachel Sapphire']
2025-02-18|"533456"|['Steve Ruby','Tina Garnet']
2025-02-19|"633456"|['Uma Onyx','Victor Pearl']
2025-02-20|"733456"|['Wendy Sapphire','Xander Ruby']
2025-02-21|"833456"|['Yara Garnet','Zane Onyx']
2025-02-22|"933456"|['Alan Ruby','Bea Pearl']
2025-02-23|"143456"|['Cathy Sapphire','Dan Onyx']
2025-02-24|"243456"|['Eliot Ruby','Fiona Emerald']
2025-02-25|"343456"|['George Pearl','Holly Sapphire']
2025-02-26|"443456"|['Isaac Onyx','Julia Ruby']
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
148

Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_array_contains_estimate", "nonConcurrent"){
// prepare test table
def indexTblName = "tai_estimate"
def dataFile = "tai_estimate.csv"

sql """ set enable_common_expr_pushdown = true; """
sql """ set enable_profile = true;"""
sql """ set enable_inverted_index_query=true; """
sql """ set inverted_index_skip_threshold = 0; """ // set skip threshold to 0

sql "DROP TABLE IF EXISTS ${indexTblName}"
// create 1 replica table
sql """
CREATE TABLE IF NOT EXISTS `${indexTblName}` (
`apply_date` date NULL COMMENT '',
`id` varchar(60) NOT NULL COMMENT '',
`inventors` array<text> NULL COMMENT '',
INDEX index_inverted_inventors(inventors) USING INVERTED COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`apply_date`, `id`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"is_being_synced" = "false",
"storage_format" = "V2",
"light_schema_change" = "true",
"disable_auto_compaction" = "false",
"enable_single_replica_compaction" = "false"
);
"""

streamLoad {
table indexTblName

file dataFile // import csv file
time 10000 // limit inflight 10s
set 'column_separator', '|'
set 'strict_mode', 'true'

// if declared a check callback, the default check condition will ignore.
// So you must check all condition
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals(149, json.NumberTotalRows)
assertEquals(149, json.NumberLoadedRows)
assertTrue(json.LoadBytes > 0)
}
}

// test array_contains estimate
def create_sql = {
List<String> list = new ArrayList<>()
list.add("select count() from ${indexTblName} where apply_date = "2024-11-18" or !array_contains(inventors, 'Amory Wang')")
return list;
}

def checkpoints_name = "array_func.array_contains"
def execute_sql = { sqlList ->
def i = 0
for (sqlStr in sqlList) {
try {
log.info("execute sql: i")
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name, [req_id: i])
order_qt_sql """ ${sqlStr} """
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}
++i
}
}

}

0 comments on commit e739519

Please sign in to comment.