diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a00fbdc..b49ee4e3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -25,7 +25,7 @@ add_definitions(-DDUCKDB_EXTENSION_AUTOLOAD_DEFAULT=1 -DDUCKDB_EXTENSION_AUTOINS file(GLOB_RECURSE JAVA_SRC_FILES src/main/java/org/duckdb/*.java) file(GLOB_RECURSE JAVA_TEST_FILES src/test/java/org/duckdb/*.java) -set(DUCKDB_SRC_FILES src/duckdb/ub_src_catalog.cpp src/duckdb/ub_src_catalog_catalog_entry.cpp src/duckdb/ub_src_catalog_catalog_entry_dependency.cpp src/duckdb/ub_src_catalog_default.cpp src/duckdb/ub_src_common_adbc.cpp src/duckdb/ub_src_common_adbc_nanoarrow.cpp src/duckdb/ub_src_common.cpp src/duckdb/ub_src_common_arrow_appender.cpp src/duckdb/ub_src_common_arrow.cpp src/duckdb/ub_src_common_crypto.cpp src/duckdb/ub_src_common_enums.cpp src/duckdb/ub_src_common_exception.cpp src/duckdb/ub_src_common_operator.cpp src/duckdb/ub_src_common_progress_bar.cpp src/duckdb/ub_src_common_row_operations.cpp src/duckdb/ub_src_common_serializer.cpp src/duckdb/ub_src_common_sort.cpp src/duckdb/ub_src_common_tree_renderer.cpp src/duckdb/ub_src_common_types.cpp src/duckdb/ub_src_common_types_column.cpp src/duckdb/ub_src_common_types_row.cpp src/duckdb/ub_src_common_value_operations.cpp src/duckdb/src/common/vector_operations/boolean_operators.cpp src/duckdb/src/common/vector_operations/comparison_operators.cpp src/duckdb/src/common/vector_operations/generators.cpp src/duckdb/src/common/vector_operations/is_distinct_from.cpp src/duckdb/src/common/vector_operations/null_operations.cpp src/duckdb/src/common/vector_operations/numeric_inplace_operators.cpp src/duckdb/src/common/vector_operations/vector_cast.cpp src/duckdb/src/common/vector_operations/vector_copy.cpp src/duckdb/src/common/vector_operations/vector_hash.cpp src/duckdb/src/common/vector_operations/vector_storage.cpp src/duckdb/ub_src_execution.cpp src/duckdb/ub_src_execution_expression_executor.cpp src/duckdb/ub_src_execution_index_art.cpp src/duckdb/ub_src_execution_index.cpp src/duckdb/ub_src_execution_nested_loop_join.cpp src/duckdb/ub_src_execution_operator_aggregate.cpp src/duckdb/ub_src_execution_operator_csv_scanner_buffer_manager.cpp src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp src/duckdb/ub_src_execution_operator_csv_scanner_state_machine.cpp src/duckdb/ub_src_execution_operator_csv_scanner_table_function.cpp src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp src/duckdb/ub_src_execution_operator_filter.cpp src/duckdb/ub_src_execution_operator_helper.cpp src/duckdb/ub_src_execution_operator_join.cpp src/duckdb/ub_src_execution_operator_order.cpp src/duckdb/ub_src_execution_operator_persistent.cpp src/duckdb/ub_src_execution_operator_projection.cpp src/duckdb/ub_src_execution_operator_scan.cpp src/duckdb/ub_src_execution_operator_schema.cpp src/duckdb/ub_src_execution_operator_set.cpp src/duckdb/ub_src_execution_physical_plan.cpp src/duckdb/ub_src_function_aggregate_distributive.cpp src/duckdb/ub_src_function_aggregate.cpp src/duckdb/ub_src_function.cpp src/duckdb/ub_src_function_cast.cpp src/duckdb/ub_src_function_cast_union.cpp src/duckdb/ub_src_function_pragma.cpp src/duckdb/ub_src_function_scalar_compressed_materialization.cpp src/duckdb/ub_src_function_scalar.cpp src/duckdb/ub_src_function_scalar_date.cpp src/duckdb/ub_src_function_scalar_generic.cpp src/duckdb/ub_src_function_scalar_list.cpp src/duckdb/ub_src_function_scalar_map.cpp src/duckdb/ub_src_function_scalar_operator.cpp src/duckdb/ub_src_function_scalar_sequence.cpp src/duckdb/ub_src_function_scalar_string.cpp src/duckdb/ub_src_function_scalar_string_regexp.cpp src/duckdb/ub_src_function_scalar_struct.cpp src/duckdb/ub_src_function_scalar_system.cpp src/duckdb/ub_src_function_table_arrow.cpp src/duckdb/ub_src_function_table.cpp src/duckdb/ub_src_function_table_system.cpp src/duckdb/ub_src_function_table_version.cpp src/duckdb/ub_src_function_window.cpp src/duckdb/ub_src_main.cpp src/duckdb/ub_src_main_buffered_data.cpp src/duckdb/ub_src_main_capi.cpp src/duckdb/ub_src_main_capi_cast.cpp src/duckdb/ub_src_main_chunk_scan_state.cpp src/duckdb/ub_src_main_extension.cpp src/duckdb/ub_src_main_relation.cpp src/duckdb/ub_src_main_secret.cpp src/duckdb/ub_src_main_settings.cpp src/duckdb/ub_src_optimizer.cpp src/duckdb/ub_src_optimizer_compressed_materialization.cpp src/duckdb/ub_src_optimizer_join_order.cpp src/duckdb/ub_src_optimizer_matcher.cpp src/duckdb/ub_src_optimizer_pullup.cpp src/duckdb/ub_src_optimizer_pushdown.cpp src/duckdb/ub_src_optimizer_rule.cpp src/duckdb/ub_src_optimizer_statistics_expression.cpp src/duckdb/ub_src_optimizer_statistics_operator.cpp src/duckdb/ub_src_parallel.cpp src/duckdb/ub_src_parser.cpp src/duckdb/ub_src_parser_constraints.cpp src/duckdb/ub_src_parser_expression.cpp src/duckdb/ub_src_parser_parsed_data.cpp src/duckdb/ub_src_parser_query_node.cpp src/duckdb/ub_src_parser_statement.cpp src/duckdb/ub_src_parser_tableref.cpp src/duckdb/ub_src_parser_transform_constraint.cpp src/duckdb/ub_src_parser_transform_expression.cpp src/duckdb/ub_src_parser_transform_helpers.cpp src/duckdb/ub_src_parser_transform_statement.cpp src/duckdb/ub_src_parser_transform_tableref.cpp src/duckdb/ub_src_planner.cpp src/duckdb/ub_src_planner_binder_expression.cpp src/duckdb/ub_src_planner_binder_query_node.cpp src/duckdb/ub_src_planner_binder_statement.cpp src/duckdb/ub_src_planner_binder_tableref.cpp src/duckdb/ub_src_planner_expression.cpp src/duckdb/ub_src_planner_expression_binder.cpp src/duckdb/ub_src_planner_filter.cpp src/duckdb/ub_src_planner_operator.cpp src/duckdb/ub_src_planner_subquery.cpp src/duckdb/ub_src_storage.cpp src/duckdb/ub_src_storage_buffer.cpp src/duckdb/ub_src_storage_checkpoint.cpp src/duckdb/ub_src_storage_compression_alp.cpp src/duckdb/ub_src_storage_compression.cpp src/duckdb/ub_src_storage_compression_chimp.cpp src/duckdb/ub_src_storage_compression_roaring.cpp src/duckdb/ub_src_storage_metadata.cpp src/duckdb/ub_src_storage_serialization.cpp src/duckdb/ub_src_storage_statistics.cpp src/duckdb/ub_src_storage_table.cpp src/duckdb/ub_src_transaction.cpp src/duckdb/src/verification/copied_statement_verifier.cpp src/duckdb/src/verification/deserialized_statement_verifier.cpp src/duckdb/src/verification/external_statement_verifier.cpp src/duckdb/src/verification/fetch_row_verifier.cpp src/duckdb/src/verification/no_operator_caching_verifier.cpp src/duckdb/src/verification/parsed_statement_verifier.cpp src/duckdb/src/verification/prepared_statement_verifier.cpp src/duckdb/src/verification/statement_verifier.cpp src/duckdb/src/verification/unoptimized_statement_verifier.cpp src/duckdb/third_party/fmt/format.cc src/duckdb/third_party/fsst/libfsst.cpp src/duckdb/third_party/miniz/miniz.cpp src/duckdb/third_party/re2/re2/bitmap256.cc src/duckdb/third_party/re2/re2/bitstate.cc src/duckdb/third_party/re2/re2/compile.cc src/duckdb/third_party/re2/re2/dfa.cc src/duckdb/third_party/re2/re2/filtered_re2.cc src/duckdb/third_party/re2/re2/mimics_pcre.cc src/duckdb/third_party/re2/re2/nfa.cc src/duckdb/third_party/re2/re2/onepass.cc src/duckdb/third_party/re2/re2/parse.cc src/duckdb/third_party/re2/re2/perl_groups.cc src/duckdb/third_party/re2/re2/prefilter.cc src/duckdb/third_party/re2/re2/prefilter_tree.cc src/duckdb/third_party/re2/re2/prog.cc src/duckdb/third_party/re2/re2/re2.cc src/duckdb/third_party/re2/re2/regexp.cc src/duckdb/third_party/re2/re2/set.cc src/duckdb/third_party/re2/re2/simplify.cc src/duckdb/third_party/re2/re2/stringpiece.cc src/duckdb/third_party/re2/re2/tostring.cc src/duckdb/third_party/re2/re2/unicode_casefold.cc src/duckdb/third_party/re2/re2/unicode_groups.cc src/duckdb/third_party/re2/util/rune.cc src/duckdb/third_party/re2/util/strutil.cc src/duckdb/third_party/hyperloglog/hyperloglog.cpp src/duckdb/third_party/hyperloglog/sds.cpp src/duckdb/third_party/skiplist/SkipList.cpp src/duckdb/third_party/fastpforlib/bitpacking.cpp src/duckdb/third_party/utf8proc/utf8proc.cpp src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp src/duckdb/third_party/libpg_query/pg_functions.cpp src/duckdb/third_party/libpg_query/postgres_parser.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp src/duckdb/third_party/libpg_query/src_common_keywords.cpp src/duckdb/third_party/mbedtls/library/aes.cpp src/duckdb/third_party/mbedtls/library/aria.cpp src/duckdb/third_party/mbedtls/library/asn1parse.cpp src/duckdb/third_party/mbedtls/library/base64.cpp src/duckdb/third_party/mbedtls/library/bignum.cpp src/duckdb/third_party/mbedtls/library/camellia.cpp src/duckdb/third_party/mbedtls/library/cipher.cpp src/duckdb/third_party/mbedtls/library/cipher_wrap.cpp src/duckdb/third_party/mbedtls/library/constant_time.cpp src/duckdb/third_party/mbedtls/library/entropy.cpp src/duckdb/third_party/mbedtls/library/entropy_poll.cpp src/duckdb/third_party/mbedtls/library/gcm.cpp src/duckdb/third_party/mbedtls/library/md.cpp src/duckdb/third_party/mbedtls/library/oid.cpp src/duckdb/third_party/mbedtls/library/pem.cpp src/duckdb/third_party/mbedtls/library/pk.cpp src/duckdb/third_party/mbedtls/library/pk_wrap.cpp src/duckdb/third_party/mbedtls/library/pkparse.cpp src/duckdb/third_party/mbedtls/library/platform_util.cpp src/duckdb/third_party/mbedtls/library/rsa.cpp src/duckdb/third_party/mbedtls/library/rsa_alt_helpers.cpp src/duckdb/third_party/mbedtls/library/sha1.cpp src/duckdb/third_party/mbedtls/library/sha256.cpp src/duckdb/third_party/mbedtls/library/sha512.cpp src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp src/duckdb/third_party/yyjson/yyjson.cpp src/duckdb/third_party/zstd/common/debug.cpp src/duckdb/third_party/zstd/common/entropy_common.cpp src/duckdb/third_party/zstd/common/error_private.cpp src/duckdb/third_party/zstd/common/fse_decompress.cpp src/duckdb/third_party/zstd/common/pool.cpp src/duckdb/third_party/zstd/common/threading.cpp src/duckdb/third_party/zstd/common/xxhash.cpp src/duckdb/third_party/zstd/common/zstd_common.cpp src/duckdb/third_party/zstd/compress/fse_compress.cpp src/duckdb/third_party/zstd/compress/hist.cpp src/duckdb/third_party/zstd/compress/huf_compress.cpp src/duckdb/third_party/zstd/compress/zstd_compress.cpp src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp src/duckdb/third_party/zstd/compress/zstd_fast.cpp src/duckdb/third_party/zstd/compress/zstd_lazy.cpp src/duckdb/third_party/zstd/compress/zstd_ldm.cpp src/duckdb/third_party/zstd/compress/zstd_opt.cpp src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp src/duckdb/third_party/zstd/decompress/huf_decompress.cpp src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp src/duckdb/third_party/zstd/dict/cover.cpp src/duckdb/third_party/zstd/dict/divsufsort.cpp src/duckdb/third_party/zstd/dict/fastcover.cpp src/duckdb/third_party/zstd/dict/zdict.cpp src/duckdb/extension/core_functions/lambda_functions.cpp src/duckdb/extension/core_functions/core_functions_extension.cpp src/duckdb/extension/core_functions/function_list.cpp src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp src/duckdb/ub_extension_core_functions_aggregate_nested.cpp src/duckdb/ub_extension_core_functions_aggregate_regression.cpp src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp src/duckdb/ub_extension_core_functions_scalar_random.cpp src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/ub_extension_core_functions_scalar_math.cpp src/duckdb/ub_extension_core_functions_scalar_operators.cpp src/duckdb/ub_extension_core_functions_scalar_struct.cpp src/duckdb/ub_extension_core_functions_scalar_map.cpp src/duckdb/ub_extension_core_functions_scalar_list.cpp src/duckdb/ub_extension_core_functions_scalar_blob.cpp src/duckdb/ub_extension_core_functions_scalar_array.cpp src/duckdb/ub_extension_core_functions_scalar_date.cpp src/duckdb/ub_extension_core_functions_scalar_string.cpp src/duckdb/ub_extension_core_functions_scalar_generic.cpp src/duckdb/ub_extension_core_functions_scalar_union.cpp src/duckdb/ub_extension_core_functions_scalar_debug.cpp src/duckdb/ub_extension_core_functions_scalar_enum.cpp src/duckdb/extension/parquet/column_reader.cpp src/duckdb/extension/parquet/column_writer.cpp src/duckdb/extension/parquet/parquet_crypto.cpp src/duckdb/extension/parquet/parquet_extension.cpp src/duckdb/extension/parquet/parquet_metadata.cpp src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/extension/parquet/parquet_timestamp.cpp src/duckdb/extension/parquet/parquet_writer.cpp src/duckdb/extension/parquet/serialize_parquet.cpp src/duckdb/extension/parquet/zstd_file_system.cpp src/duckdb/extension/parquet/geo_parquet.cpp src/duckdb/third_party/parquet/parquet_types.cpp src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.cpp src/duckdb/third_party/snappy/snappy.cc src/duckdb/third_party/snappy/snappy-sinksource.cc src/duckdb/third_party/lz4/lz4.cpp src/duckdb/third_party/brotli/common/constants.cpp src/duckdb/third_party/brotli/common/context.cpp src/duckdb/third_party/brotli/common/dictionary.cpp src/duckdb/third_party/brotli/common/platform.cpp src/duckdb/third_party/brotli/common/shared_dictionary.cpp src/duckdb/third_party/brotli/common/transform.cpp src/duckdb/third_party/brotli/dec/bit_reader.cpp src/duckdb/third_party/brotli/dec/decode.cpp src/duckdb/third_party/brotli/dec/huffman.cpp src/duckdb/third_party/brotli/dec/state.cpp src/duckdb/third_party/brotli/enc/backward_references.cpp src/duckdb/third_party/brotli/enc/backward_references_hq.cpp src/duckdb/third_party/brotli/enc/bit_cost.cpp src/duckdb/third_party/brotli/enc/block_splitter.cpp src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp src/duckdb/third_party/brotli/enc/cluster.cpp src/duckdb/third_party/brotli/enc/command.cpp src/duckdb/third_party/brotli/enc/compound_dictionary.cpp src/duckdb/third_party/brotli/enc/compress_fragment.cpp src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp src/duckdb/third_party/brotli/enc/dictionary_hash.cpp src/duckdb/third_party/brotli/enc/encode.cpp src/duckdb/third_party/brotli/enc/encoder_dict.cpp src/duckdb/third_party/brotli/enc/entropy_encode.cpp src/duckdb/third_party/brotli/enc/fast_log.cpp src/duckdb/third_party/brotli/enc/histogram.cpp src/duckdb/third_party/brotli/enc/literal_cost.cpp src/duckdb/third_party/brotli/enc/memory.cpp src/duckdb/third_party/brotli/enc/metablock.cpp src/duckdb/third_party/brotli/enc/static_dict.cpp src/duckdb/third_party/brotli/enc/utf8_util.cpp src/duckdb/extension/icu/./icu-timebucket.cpp src/duckdb/extension/icu/./icu-timezone.cpp src/duckdb/extension/icu/./icu-datetrunc.cpp src/duckdb/extension/icu/./icu_extension.cpp src/duckdb/extension/icu/./icu-dateadd.cpp src/duckdb/extension/icu/./icu-table-range.cpp src/duckdb/extension/icu/./icu-datesub.cpp src/duckdb/extension/icu/./icu-datefunc.cpp src/duckdb/extension/icu/./icu-makedate.cpp src/duckdb/extension/icu/./icu-strptime.cpp src/duckdb/extension/icu/./icu-list-range.cpp src/duckdb/extension/icu/./icu-datepart.cpp src/duckdb/ub_extension_icu_third_party_icu_common.cpp src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp src/duckdb/extension/json/buffered_json_reader.cpp src/duckdb/extension/json/json_enums.cpp src/duckdb/extension/json/json_extension.cpp src/duckdb/extension/json/json_common.cpp src/duckdb/extension/json/json_functions.cpp src/duckdb/extension/json/json_scan.cpp src/duckdb/extension/json/json_serializer.cpp src/duckdb/extension/json/json_deserializer.cpp src/duckdb/extension/json/serialize_json.cpp src/duckdb/ub_extension_json_json_functions.cpp) +set(DUCKDB_SRC_FILES src/duckdb/ub_src_catalog.cpp src/duckdb/ub_src_catalog_catalog_entry.cpp src/duckdb/ub_src_catalog_catalog_entry_dependency.cpp src/duckdb/ub_src_catalog_default.cpp src/duckdb/ub_src_common_adbc.cpp src/duckdb/ub_src_common_adbc_nanoarrow.cpp src/duckdb/ub_src_common.cpp src/duckdb/ub_src_common_arrow_appender.cpp src/duckdb/ub_src_common_arrow.cpp src/duckdb/ub_src_common_crypto.cpp src/duckdb/ub_src_common_enums.cpp src/duckdb/ub_src_common_exception.cpp src/duckdb/ub_src_common_operator.cpp src/duckdb/ub_src_common_progress_bar.cpp src/duckdb/ub_src_common_row_operations.cpp src/duckdb/ub_src_common_serializer.cpp src/duckdb/ub_src_common_sort.cpp src/duckdb/ub_src_common_tree_renderer.cpp src/duckdb/ub_src_common_types.cpp src/duckdb/ub_src_common_types_column.cpp src/duckdb/ub_src_common_types_row.cpp src/duckdb/ub_src_common_value_operations.cpp src/duckdb/src/common/vector_operations/boolean_operators.cpp src/duckdb/src/common/vector_operations/comparison_operators.cpp src/duckdb/src/common/vector_operations/generators.cpp src/duckdb/src/common/vector_operations/is_distinct_from.cpp src/duckdb/src/common/vector_operations/null_operations.cpp src/duckdb/src/common/vector_operations/numeric_inplace_operators.cpp src/duckdb/src/common/vector_operations/vector_cast.cpp src/duckdb/src/common/vector_operations/vector_copy.cpp src/duckdb/src/common/vector_operations/vector_hash.cpp src/duckdb/src/common/vector_operations/vector_storage.cpp src/duckdb/ub_src_execution.cpp src/duckdb/ub_src_execution_expression_executor.cpp src/duckdb/ub_src_execution_index_art.cpp src/duckdb/ub_src_execution_index.cpp src/duckdb/ub_src_execution_nested_loop_join.cpp src/duckdb/ub_src_execution_operator_aggregate.cpp src/duckdb/ub_src_execution_operator_csv_scanner_buffer_manager.cpp src/duckdb/ub_src_execution_operator_csv_scanner_encode.cpp src/duckdb/ub_src_execution_operator_csv_scanner_scanner.cpp src/duckdb/ub_src_execution_operator_csv_scanner_sniffer.cpp src/duckdb/ub_src_execution_operator_csv_scanner_state_machine.cpp src/duckdb/ub_src_execution_operator_csv_scanner_table_function.cpp src/duckdb/ub_src_execution_operator_csv_scanner_util.cpp src/duckdb/ub_src_execution_operator_filter.cpp src/duckdb/ub_src_execution_operator_helper.cpp src/duckdb/ub_src_execution_operator_join.cpp src/duckdb/ub_src_execution_operator_order.cpp src/duckdb/ub_src_execution_operator_persistent.cpp src/duckdb/ub_src_execution_operator_projection.cpp src/duckdb/ub_src_execution_operator_scan.cpp src/duckdb/ub_src_execution_operator_schema.cpp src/duckdb/ub_src_execution_operator_set.cpp src/duckdb/ub_src_execution_physical_plan.cpp src/duckdb/ub_src_function_aggregate_distributive.cpp src/duckdb/ub_src_function_aggregate.cpp src/duckdb/ub_src_function.cpp src/duckdb/ub_src_function_cast.cpp src/duckdb/ub_src_function_cast_union.cpp src/duckdb/ub_src_function_pragma.cpp src/duckdb/ub_src_function_scalar_compressed_materialization.cpp src/duckdb/ub_src_function_scalar.cpp src/duckdb/ub_src_function_scalar_date.cpp src/duckdb/ub_src_function_scalar_generic.cpp src/duckdb/ub_src_function_scalar_list.cpp src/duckdb/ub_src_function_scalar_map.cpp src/duckdb/ub_src_function_scalar_operator.cpp src/duckdb/ub_src_function_scalar_sequence.cpp src/duckdb/ub_src_function_scalar_string.cpp src/duckdb/ub_src_function_scalar_string_regexp.cpp src/duckdb/ub_src_function_scalar_struct.cpp src/duckdb/ub_src_function_scalar_system.cpp src/duckdb/ub_src_function_table_arrow.cpp src/duckdb/ub_src_function_table.cpp src/duckdb/ub_src_function_table_system.cpp src/duckdb/ub_src_function_table_version.cpp src/duckdb/ub_src_function_window.cpp src/duckdb/ub_src_main.cpp src/duckdb/ub_src_main_buffered_data.cpp src/duckdb/ub_src_main_capi.cpp src/duckdb/ub_src_main_capi_cast.cpp src/duckdb/ub_src_main_chunk_scan_state.cpp src/duckdb/ub_src_main_extension.cpp src/duckdb/ub_src_main_relation.cpp src/duckdb/ub_src_main_secret.cpp src/duckdb/ub_src_main_settings.cpp src/duckdb/ub_src_optimizer.cpp src/duckdb/ub_src_optimizer_compressed_materialization.cpp src/duckdb/ub_src_optimizer_join_order.cpp src/duckdb/ub_src_optimizer_matcher.cpp src/duckdb/ub_src_optimizer_pullup.cpp src/duckdb/ub_src_optimizer_pushdown.cpp src/duckdb/ub_src_optimizer_rule.cpp src/duckdb/ub_src_optimizer_statistics_expression.cpp src/duckdb/ub_src_optimizer_statistics_operator.cpp src/duckdb/ub_src_parallel.cpp src/duckdb/ub_src_parser.cpp src/duckdb/ub_src_parser_constraints.cpp src/duckdb/ub_src_parser_expression.cpp src/duckdb/ub_src_parser_parsed_data.cpp src/duckdb/ub_src_parser_query_node.cpp src/duckdb/ub_src_parser_statement.cpp src/duckdb/ub_src_parser_tableref.cpp src/duckdb/ub_src_parser_transform_constraint.cpp src/duckdb/ub_src_parser_transform_expression.cpp src/duckdb/ub_src_parser_transform_helpers.cpp src/duckdb/ub_src_parser_transform_statement.cpp src/duckdb/ub_src_parser_transform_tableref.cpp src/duckdb/ub_src_planner.cpp src/duckdb/ub_src_planner_binder_expression.cpp src/duckdb/ub_src_planner_binder_query_node.cpp src/duckdb/ub_src_planner_binder_statement.cpp src/duckdb/ub_src_planner_binder_tableref.cpp src/duckdb/ub_src_planner_expression.cpp src/duckdb/ub_src_planner_expression_binder.cpp src/duckdb/ub_src_planner_filter.cpp src/duckdb/ub_src_planner_operator.cpp src/duckdb/ub_src_planner_subquery.cpp src/duckdb/ub_src_storage.cpp src/duckdb/ub_src_storage_buffer.cpp src/duckdb/ub_src_storage_checkpoint.cpp src/duckdb/ub_src_storage_compression_alp.cpp src/duckdb/ub_src_storage_compression.cpp src/duckdb/ub_src_storage_compression_chimp.cpp src/duckdb/ub_src_storage_compression_roaring.cpp src/duckdb/ub_src_storage_metadata.cpp src/duckdb/ub_src_storage_serialization.cpp src/duckdb/ub_src_storage_statistics.cpp src/duckdb/ub_src_storage_table.cpp src/duckdb/ub_src_transaction.cpp src/duckdb/src/verification/copied_statement_verifier.cpp src/duckdb/src/verification/deserialized_statement_verifier.cpp src/duckdb/src/verification/external_statement_verifier.cpp src/duckdb/src/verification/fetch_row_verifier.cpp src/duckdb/src/verification/no_operator_caching_verifier.cpp src/duckdb/src/verification/parsed_statement_verifier.cpp src/duckdb/src/verification/prepared_statement_verifier.cpp src/duckdb/src/verification/statement_verifier.cpp src/duckdb/src/verification/unoptimized_statement_verifier.cpp src/duckdb/third_party/fmt/format.cc src/duckdb/third_party/fsst/libfsst.cpp src/duckdb/third_party/miniz/miniz.cpp src/duckdb/third_party/re2/re2/bitmap256.cc src/duckdb/third_party/re2/re2/bitstate.cc src/duckdb/third_party/re2/re2/compile.cc src/duckdb/third_party/re2/re2/dfa.cc src/duckdb/third_party/re2/re2/filtered_re2.cc src/duckdb/third_party/re2/re2/mimics_pcre.cc src/duckdb/third_party/re2/re2/nfa.cc src/duckdb/third_party/re2/re2/onepass.cc src/duckdb/third_party/re2/re2/parse.cc src/duckdb/third_party/re2/re2/perl_groups.cc src/duckdb/third_party/re2/re2/prefilter.cc src/duckdb/third_party/re2/re2/prefilter_tree.cc src/duckdb/third_party/re2/re2/prog.cc src/duckdb/third_party/re2/re2/re2.cc src/duckdb/third_party/re2/re2/regexp.cc src/duckdb/third_party/re2/re2/set.cc src/duckdb/third_party/re2/re2/simplify.cc src/duckdb/third_party/re2/re2/stringpiece.cc src/duckdb/third_party/re2/re2/tostring.cc src/duckdb/third_party/re2/re2/unicode_casefold.cc src/duckdb/third_party/re2/re2/unicode_groups.cc src/duckdb/third_party/re2/util/rune.cc src/duckdb/third_party/re2/util/strutil.cc src/duckdb/third_party/hyperloglog/hyperloglog.cpp src/duckdb/third_party/hyperloglog/sds.cpp src/duckdb/third_party/skiplist/SkipList.cpp src/duckdb/third_party/fastpforlib/bitpacking.cpp src/duckdb/third_party/utf8proc/utf8proc.cpp src/duckdb/third_party/utf8proc/utf8proc_wrapper.cpp src/duckdb/third_party/libpg_query/pg_functions.cpp src/duckdb/third_party/libpg_query/postgres_parser.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_list.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_makefuncs.cpp src/duckdb/third_party/libpg_query/src_backend_nodes_value.cpp src/duckdb/third_party/libpg_query/src_backend_parser_gram.cpp src/duckdb/third_party/libpg_query/src_backend_parser_parser.cpp src/duckdb/third_party/libpg_query/src_backend_parser_scan.cpp src/duckdb/third_party/libpg_query/src_backend_parser_scansup.cpp src/duckdb/third_party/libpg_query/src_common_keywords.cpp src/duckdb/third_party/mbedtls/library/aes.cpp src/duckdb/third_party/mbedtls/library/aria.cpp src/duckdb/third_party/mbedtls/library/asn1parse.cpp src/duckdb/third_party/mbedtls/library/base64.cpp src/duckdb/third_party/mbedtls/library/bignum.cpp src/duckdb/third_party/mbedtls/library/camellia.cpp src/duckdb/third_party/mbedtls/library/cipher.cpp src/duckdb/third_party/mbedtls/library/cipher_wrap.cpp src/duckdb/third_party/mbedtls/library/constant_time.cpp src/duckdb/third_party/mbedtls/library/entropy.cpp src/duckdb/third_party/mbedtls/library/entropy_poll.cpp src/duckdb/third_party/mbedtls/library/gcm.cpp src/duckdb/third_party/mbedtls/library/md.cpp src/duckdb/third_party/mbedtls/library/oid.cpp src/duckdb/third_party/mbedtls/library/pem.cpp src/duckdb/third_party/mbedtls/library/pk.cpp src/duckdb/third_party/mbedtls/library/pk_wrap.cpp src/duckdb/third_party/mbedtls/library/pkparse.cpp src/duckdb/third_party/mbedtls/library/platform_util.cpp src/duckdb/third_party/mbedtls/library/rsa.cpp src/duckdb/third_party/mbedtls/library/rsa_alt_helpers.cpp src/duckdb/third_party/mbedtls/library/sha1.cpp src/duckdb/third_party/mbedtls/library/sha256.cpp src/duckdb/third_party/mbedtls/library/sha512.cpp src/duckdb/third_party/mbedtls/mbedtls_wrapper.cpp src/duckdb/third_party/yyjson/yyjson.cpp src/duckdb/third_party/zstd/common/debug.cpp src/duckdb/third_party/zstd/common/entropy_common.cpp src/duckdb/third_party/zstd/common/error_private.cpp src/duckdb/third_party/zstd/common/fse_decompress.cpp src/duckdb/third_party/zstd/common/pool.cpp src/duckdb/third_party/zstd/common/threading.cpp src/duckdb/third_party/zstd/common/xxhash.cpp src/duckdb/third_party/zstd/common/zstd_common.cpp src/duckdb/third_party/zstd/compress/fse_compress.cpp src/duckdb/third_party/zstd/compress/hist.cpp src/duckdb/third_party/zstd/compress/huf_compress.cpp src/duckdb/third_party/zstd/compress/zstd_compress.cpp src/duckdb/third_party/zstd/compress/zstd_compress_literals.cpp src/duckdb/third_party/zstd/compress/zstd_compress_sequences.cpp src/duckdb/third_party/zstd/compress/zstd_compress_superblock.cpp src/duckdb/third_party/zstd/compress/zstd_double_fast.cpp src/duckdb/third_party/zstd/compress/zstd_fast.cpp src/duckdb/third_party/zstd/compress/zstd_lazy.cpp src/duckdb/third_party/zstd/compress/zstd_ldm.cpp src/duckdb/third_party/zstd/compress/zstd_opt.cpp src/duckdb/third_party/zstd/compress/zstdmt_compress.cpp src/duckdb/third_party/zstd/decompress/huf_decompress.cpp src/duckdb/third_party/zstd/decompress/zstd_ddict.cpp src/duckdb/third_party/zstd/decompress/zstd_decompress.cpp src/duckdb/third_party/zstd/decompress/zstd_decompress_block.cpp src/duckdb/third_party/zstd/deprecated/zbuff_common.cpp src/duckdb/third_party/zstd/deprecated/zbuff_compress.cpp src/duckdb/third_party/zstd/deprecated/zbuff_decompress.cpp src/duckdb/third_party/zstd/dict/cover.cpp src/duckdb/third_party/zstd/dict/divsufsort.cpp src/duckdb/third_party/zstd/dict/fastcover.cpp src/duckdb/third_party/zstd/dict/zdict.cpp src/duckdb/extension/core_functions/core_functions_extension.cpp src/duckdb/extension/core_functions/lambda_functions.cpp src/duckdb/extension/core_functions/function_list.cpp src/duckdb/ub_extension_core_functions_aggregate_regression.cpp src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp src/duckdb/ub_extension_core_functions_aggregate_nested.cpp src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp src/duckdb/ub_extension_core_functions_scalar_random.cpp src/duckdb/ub_extension_core_functions_scalar_string.cpp src/duckdb/ub_extension_core_functions_scalar_math.cpp src/duckdb/ub_extension_core_functions_scalar_generic.cpp src/duckdb/ub_extension_core_functions_scalar_enum.cpp src/duckdb/ub_extension_core_functions_scalar_map.cpp src/duckdb/ub_extension_core_functions_scalar_operators.cpp src/duckdb/ub_extension_core_functions_scalar_date.cpp src/duckdb/ub_extension_core_functions_scalar_list.cpp src/duckdb/ub_extension_core_functions_scalar_blob.cpp src/duckdb/ub_extension_core_functions_scalar_debug.cpp src/duckdb/ub_extension_core_functions_scalar_array.cpp src/duckdb/ub_extension_core_functions_scalar_union.cpp src/duckdb/ub_extension_core_functions_scalar_struct.cpp src/duckdb/ub_extension_core_functions_scalar_bit.cpp src/duckdb/extension/parquet/column_reader.cpp src/duckdb/extension/parquet/column_writer.cpp src/duckdb/extension/parquet/parquet_crypto.cpp src/duckdb/extension/parquet/parquet_extension.cpp src/duckdb/extension/parquet/parquet_metadata.cpp src/duckdb/extension/parquet/parquet_reader.cpp src/duckdb/extension/parquet/parquet_statistics.cpp src/duckdb/extension/parquet/parquet_timestamp.cpp src/duckdb/extension/parquet/parquet_writer.cpp src/duckdb/extension/parquet/serialize_parquet.cpp src/duckdb/extension/parquet/zstd_file_system.cpp src/duckdb/extension/parquet/geo_parquet.cpp src/duckdb/third_party/parquet/parquet_types.cpp src/duckdb/third_party/thrift/thrift/protocol/TProtocol.cpp src/duckdb/third_party/thrift/thrift/transport/TTransportException.cpp src/duckdb/third_party/thrift/thrift/transport/TBufferTransports.cpp src/duckdb/third_party/snappy/snappy.cc src/duckdb/third_party/snappy/snappy-sinksource.cc src/duckdb/third_party/lz4/lz4.cpp src/duckdb/third_party/brotli/common/constants.cpp src/duckdb/third_party/brotli/common/context.cpp src/duckdb/third_party/brotli/common/dictionary.cpp src/duckdb/third_party/brotli/common/platform.cpp src/duckdb/third_party/brotli/common/shared_dictionary.cpp src/duckdb/third_party/brotli/common/transform.cpp src/duckdb/third_party/brotli/dec/bit_reader.cpp src/duckdb/third_party/brotli/dec/decode.cpp src/duckdb/third_party/brotli/dec/huffman.cpp src/duckdb/third_party/brotli/dec/state.cpp src/duckdb/third_party/brotli/enc/backward_references.cpp src/duckdb/third_party/brotli/enc/backward_references_hq.cpp src/duckdb/third_party/brotli/enc/bit_cost.cpp src/duckdb/third_party/brotli/enc/block_splitter.cpp src/duckdb/third_party/brotli/enc/brotli_bit_stream.cpp src/duckdb/third_party/brotli/enc/cluster.cpp src/duckdb/third_party/brotli/enc/command.cpp src/duckdb/third_party/brotli/enc/compound_dictionary.cpp src/duckdb/third_party/brotli/enc/compress_fragment.cpp src/duckdb/third_party/brotli/enc/compress_fragment_two_pass.cpp src/duckdb/third_party/brotli/enc/dictionary_hash.cpp src/duckdb/third_party/brotli/enc/encode.cpp src/duckdb/third_party/brotli/enc/encoder_dict.cpp src/duckdb/third_party/brotli/enc/entropy_encode.cpp src/duckdb/third_party/brotli/enc/fast_log.cpp src/duckdb/third_party/brotli/enc/histogram.cpp src/duckdb/third_party/brotli/enc/literal_cost.cpp src/duckdb/third_party/brotli/enc/memory.cpp src/duckdb/third_party/brotli/enc/metablock.cpp src/duckdb/third_party/brotli/enc/static_dict.cpp src/duckdb/third_party/brotli/enc/utf8_util.cpp src/duckdb/extension/icu/./icu-table-range.cpp src/duckdb/extension/icu/./icu-datefunc.cpp src/duckdb/extension/icu/./icu-datepart.cpp src/duckdb/extension/icu/./icu-datetrunc.cpp src/duckdb/extension/icu/./icu_extension.cpp src/duckdb/extension/icu/./icu-timezone.cpp src/duckdb/extension/icu/./icu-timebucket.cpp src/duckdb/extension/icu/./icu-makedate.cpp src/duckdb/extension/icu/./icu-datesub.cpp src/duckdb/extension/icu/./icu-dateadd.cpp src/duckdb/extension/icu/./icu-list-range.cpp src/duckdb/extension/icu/./icu-strptime.cpp src/duckdb/ub_extension_icu_third_party_icu_common.cpp src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp src/duckdb/extension/icu/third_party/icu/stubdata/stubdata.cpp src/duckdb/extension/json/buffered_json_reader.cpp src/duckdb/extension/json/json_enums.cpp src/duckdb/extension/json/json_extension.cpp src/duckdb/extension/json/json_common.cpp src/duckdb/extension/json/json_functions.cpp src/duckdb/extension/json/json_scan.cpp src/duckdb/extension/json/json_serializer.cpp src/duckdb/extension/json/json_deserializer.cpp src/duckdb/extension/json/serialize_json.cpp src/duckdb/ub_extension_json_json_functions.cpp) set(CMAKE_JAVA_COMPILE_FLAGS -source 1.8 -target 1.8 -encoding utf-8) diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/mad.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/mad.cpp index 93516b59..2319184e 100644 --- a/src/duckdb/extension/core_functions/aggregate/holistic/mad.cpp +++ b/src/duckdb/extension/core_functions/aggregate/holistic/mad.cpp @@ -218,7 +218,7 @@ struct MedianAbsoluteDeviationOperation : QuantileOperation { const auto &quantile = bind_data.quantiles[0]; auto &window_state = state.GetOrCreateWindowState(); MEDIAN_TYPE med; - if (gstate && gstate->HasTrees()) { + if (gstate && gstate->HasTree()) { med = gstate->GetWindowState().template WindowScalar(data, frames, n, result, quantile); } else { window_state.UpdateSkip(data, frames, included); diff --git a/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp b/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp index f8f668af..98ca4d5b 100644 --- a/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp +++ b/src/duckdb/extension/core_functions/aggregate/holistic/quantile.cpp @@ -232,7 +232,7 @@ struct QuantileScalarOperation : public QuantileOperation { } const auto &quantile = bind_data.quantiles[0]; - if (gstate && gstate->HasTrees()) { + if (gstate && gstate->HasTree()) { rdata[ridx] = gstate->GetWindowState().template WindowScalar(data, frames, n, result, quantile); } else { @@ -333,7 +333,7 @@ struct QuantileListOperation : QuantileOperation { return; } - if (gstate && gstate->HasTrees()) { + if (gstate && gstate->HasTree()) { gstate->GetWindowState().template WindowList(data, frames, n, list, lidx, bind_data); } else { auto &window_state = state.GetOrCreateWindowState(); diff --git a/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp b/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp index b669ac9f..a330c0a4 100644 --- a/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp +++ b/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_sort_tree.hpp @@ -16,6 +16,7 @@ #include "duckdb/common/operator/cast_operators.hpp" #include "duckdb/common/operator/multiply.hpp" #include "duckdb/planner/expression/bound_constant_expression.hpp" +#include "duckdb/function/window/window_index_tree.hpp" #include #include #include @@ -316,148 +317,55 @@ struct QuantileIncluded { CURSOR_TYPE &dmask; }; -// Shared untemplated sort logic -static unique_ptr SortQuantileIndices(const WindowPartitionInput &partition, // NOLINT - const LogicalType &index_type, OrderType order_type) { - auto &inputs = *partition.inputs; - const auto &filter_mask = partition.filter_mask; - - // Sort the unfiltered indices by the argument values - vector payload_types; - payload_types.emplace_back(index_type); - - idx_t capacity = STANDARD_VECTOR_SIZE; - DataChunk payload; - payload.Initialize(inputs.GetAllocator(), payload_types, capacity); - RowLayout payload_layout; - payload_layout.Initialize(payload.GetTypes()); - SelectionVector filtered(capacity); - - // TODO: Two pass parallel sorting using Build - ColumnDataScanState state; - DataChunk sort; - inputs.InitializeScan(state, partition.column_ids); - inputs.InitializeScanChunk(state, sort); - auto order_expr = make_uniq(Value(sort.GetTypes()[0])); - vector orders; - orders.emplace_back(BoundOrderByNode(order_type, OrderByNullType::NULLS_LAST, std::move(order_expr))); - - auto &buffer_manager = BufferManager::GetBufferManager(partition.context); - auto global_sort = make_uniq(buffer_manager, orders, payload_layout); - global_sort->external = ClientConfig::GetConfig(partition.context).force_external; - const auto memory_per_thread = PhysicalOperator::GetMaxThreadMemory(partition.context); - - LocalSortState local_sort; - local_sort.Initialize(*global_sort, global_sort->buffer_manager); - - // Build the indirection array by scanning the valid indices - while (inputs.Scan(state, sort)) { - // Match the payload to the scanned data - if (sort.size() > capacity) { - payload.Destroy(); - capacity = sort.size(); - payload.Initialize(inputs.GetAllocator(), payload_types, capacity); - filtered.Initialize(capacity); - } else { - payload.Reset(); - } - auto &indices = payload.data[0]; - payload.SetCardinality(sort); - indices.Sequence(int64_t(state.current_row_index), 1, payload.size()); - - if (!filter_mask.AllValid() || !partition.all_valid[0]) { - auto &key = sort.data[0]; - auto &validity = FlatVector::Validity(key); - idx_t valid = 0; - for (sel_t i = 0; i < sort.size(); ++i) { - if (filter_mask.RowIsValid(i + state.current_row_index) && validity.RowIsValid(i)) { - filtered[valid++] = i; - } - } - if (valid < sort.size()) { - payload.Slice(filtered, valid); - sort.Slice(filtered, valid); - } - } - local_sort.SinkChunk(sort, payload); - if (local_sort.SizeInBytes() > memory_per_thread) { - local_sort.Sort(*global_sort, true); - } - } - global_sort->AddLocalState(local_sort); - - // Sort it - global_sort->PrepareMergePhase(); - while (global_sort->sorted_blocks.size() > 1) { - global_sort->InitializeMergeRound(); - MergeSorter merge_sorter(*global_sort, global_sort->buffer_manager); - merge_sorter.PerformInMergeRound(); - global_sort->CompleteMergeRound(false); - } - - return global_sort; -} - -template -struct QuantileSortTree : public MergeSortTree { +struct QuantileSortTree { - using BaseTree = MergeSortTree; - using Elements = typename BaseTree::Elements; + unique_ptr index_tree; - explicit QuantileSortTree(Elements &&lowest_level) { - BaseTree::Allocate(lowest_level.size()); - BaseTree::LowestLevel() = std::move(lowest_level); - } - - template - static unique_ptr WindowInit(AggregateInputData &aggr_input_data, - const WindowPartitionInput &partition) { + QuantileSortTree(AggregateInputData &aggr_input_data, const WindowPartitionInput &partition) { + // TODO: Two pass parallel sorting using Build auto &inputs = *partition.inputs; + ColumnDataScanState scan; + DataChunk sort; + inputs.InitializeScan(scan, partition.column_ids); + inputs.InitializeScanChunk(scan, sort); - // Sort the unfiltered indices by the argument values - using ElementType = typename QuantileSortTree::ElementType; - vector payload_types; - switch (sizeof(ElementType)) { - case sizeof(int64_t): - payload_types.emplace_back(LogicalType::BIGINT); - break; - case sizeof(int32_t): - payload_types.emplace_back(LogicalType::INTEGER); - break; - default: - throw InternalException("Unsupported Quantile Sort Tree index size"); - } - - // TODO: Two pass parallel sorting using Build + // Sort on the single argument auto &bind_data = aggr_input_data.bind_data->Cast(); + auto order_expr = make_uniq(Value(sort.GetTypes()[0])); auto order_type = bind_data.desc ? OrderType::DESCENDING : OrderType::ASCENDING; - auto global_sort = SortQuantileIndices(partition, payload_types[0], order_type); - - // Now scan the sorted indices into an array we can use as the leaves - vector sorted; - if (!global_sort->sorted_blocks.empty()) { - PayloadScanner scanner(*global_sort); - DataChunk payload; - payload.Initialize(inputs.GetAllocator(), payload_types); - sorted.resize(scanner.Remaining()); - for (;;) { - idx_t row_idx = scanner.Scanned(); - scanner.Scan(payload); - if (payload.size() == 0) { - break; + BoundOrderModifier order_bys; + order_bys.orders.emplace_back(BoundOrderByNode(order_type, OrderByNullType::NULLS_LAST, std::move(order_expr))); + vector sort_idx(1, 0); + const auto count = partition.count; + + index_tree = make_uniq(partition.context, order_bys, sort_idx, count); + auto index_state = index_tree->GetLocalState(); + auto &local_state = index_state->Cast(); + + // Build the indirection array by scanning the valid indices + const auto &filter_mask = partition.filter_mask; + SelectionVector filter_sel(STANDARD_VECTOR_SIZE); + while (inputs.Scan(scan, sort)) { + const auto row_idx = scan.current_row_index; + if (!filter_mask.AllValid() || !partition.all_valid[0]) { + auto &key = sort.data[0]; + auto &validity = FlatVector::Validity(key); + idx_t filtered = 0; + for (sel_t i = 0; i < sort.size(); ++i) { + if (filter_mask.RowIsValid(i + row_idx) && validity.RowIsValid(i)) { + filter_sel[filtered++] = i; + } } - auto &indices = payload.data[0]; - auto data = FlatVector::GetData(indices); - - std::copy(data, data + payload.size(), sorted.data() + row_idx); + local_state.SinkChunk(sort, row_idx, filter_sel, filtered); + } else { + local_state.SinkChunk(sort, row_idx, nullptr, 0); } } - - return make_uniq(std::move(sorted)); + local_state.Sort(); } - inline IDX SelectNth(const SubFrames &frames, size_t n) const { - return BaseTree::NthElement(BaseTree::SelectNth(frames, n)); + inline idx_t SelectNth(const SubFrames &frames, size_t n) const { + return index_tree->SelectNth(frames, n); } template @@ -466,7 +374,7 @@ struct QuantileSortTree : public MergeSortTree { D_ASSERT(n > 0); // Thread safe and idempotent. - BaseTree::Build(); + index_tree->Build(); // Find the interpolated indicies within the frame Interpolator interp(q, n, false); @@ -488,7 +396,7 @@ struct QuantileSortTree : public MergeSortTree { D_ASSERT(n > 0); // Thread safe and idempotent. - BaseTree::Build(); + index_tree->Build(); // Result is a constant LIST with a fixed length auto ldata = FlatVector::GetData(list); diff --git a/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_state.hpp b/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_state.hpp index 6b0efe58..00f4baf7 100644 --- a/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_state.hpp +++ b/src/duckdb/extension/core_functions/include/core_functions/aggregate/quantile_state.hpp @@ -54,7 +54,6 @@ struct QuantileOperation { data_ptr_t g_state) { D_ASSERT(partition.inputs); - const auto count = partition.count; const auto &stats = partition.stats; // If frames overlap significantly, then use local skip lists. @@ -71,11 +70,7 @@ struct QuantileOperation { // Build the tree auto &state = *reinterpret_cast(g_state); auto &window_state = state.GetOrCreateWindowState(); - if (count < std::numeric_limits::max()) { - window_state.qst32 = QuantileSortTree::WindowInit(aggr_input_data, partition); - } else { - window_state.qst64 = QuantileSortTree::WindowInit(aggr_input_data, partition); - } + window_state.qst = make_uniq(aggr_input_data, partition); } template @@ -109,10 +104,7 @@ struct SkipLess { template struct WindowQuantileState { // Windowed Quantile merge sort trees - using QuantileSortTree32 = QuantileSortTree; - using QuantileSortTree64 = QuantileSortTree; - unique_ptr qst32; - unique_ptr qst64; + unique_ptr qst; // Windowed Quantile skip lists using SkipType = pair; @@ -196,18 +188,16 @@ struct WindowQuantileState { } } - bool HasTrees() const { - return qst32 || qst64; + bool HasTree() const { + return qst.get(); } template RESULT_TYPE WindowScalar(CursorType &data, const SubFrames &frames, const idx_t n, Vector &result, const QuantileValue &q) const { D_ASSERT(n > 0); - if (qst32) { - return qst32->WindowScalar(data, frames, n, result, q); - } else if (qst64) { - return qst64->WindowScalar(data, frames, n, result, q); + if (qst) { + return qst->WindowScalar(data, frames, n, result, q); } else if (s) { // Find the position(s) needed try { @@ -284,8 +274,8 @@ struct QuantileState { v.emplace_back(TYPE_OP::Operation(element, aggr_input)); } - bool HasTrees() const { - return window_state && window_state->HasTrees(); + bool HasTree() const { + return window_state && window_state->HasTree(); } WindowQuantileState &GetOrCreateWindowState() { if (!window_state) { diff --git a/src/duckdb/extension/json/json_serializer.cpp b/src/duckdb/extension/json/json_serializer.cpp index 57435114..55ce53f3 100644 --- a/src/duckdb/extension/json/json_serializer.cpp +++ b/src/duckdb/extension/json/json_serializer.cpp @@ -211,7 +211,7 @@ void JsonSerializer::WriteValue(bool value) { } void JsonSerializer::WriteDataPtr(const_data_ptr_t ptr, idx_t count) { - auto blob = Blob::ToBlob(string_t(const_char_ptr_cast(ptr), count)); + auto blob = Blob::ToString(string_t(const_char_ptr_cast(ptr), count)); auto val = yyjson_mut_strcpy(doc, blob.c_str()); PushValue(val); } diff --git a/src/duckdb/src/catalog/catalog.cpp b/src/duckdb/src/catalog/catalog.cpp index 6168967d..8d88a513 100644 --- a/src/duckdb/src/catalog/catalog.cpp +++ b/src/duckdb/src/catalog/catalog.cpp @@ -304,6 +304,13 @@ optional_ptr Catalog::CreateIndex(ClientContext &context, CreateIn return CreateIndex(GetCatalogTransaction(context), info); } +unique_ptr Catalog::BindAlterAddIndex(Binder &binder, TableCatalogEntry &table_entry, + unique_ptr plan, + unique_ptr create_info, + unique_ptr alter_info) { + throw NotImplementedException("BindAlterAddIndex not supported by this catalog"); +} + //===--------------------------------------------------------------------===// // Lookup Structures //===--------------------------------------------------------------------===// diff --git a/src/duckdb/src/common/types.cpp b/src/duckdb/src/common/types.cpp index 7ec71892..5c01b269 100644 --- a/src/duckdb/src/common/types.cpp +++ b/src/duckdb/src/common/types.cpp @@ -711,6 +711,11 @@ bool LogicalType::IsComplete() const { return true; // Missing or incorrect type info } break; + case LogicalTypeId::ENUM: + if (!type.AuxInfo() || type.AuxInfo()->type != ExtraTypeInfoType::ENUM_TYPE_INFO) { + return true; // Missing or incorrect type info + } + break; default: return false; } @@ -1596,35 +1601,30 @@ const child_list_t UnionType::CopyMemberTypes(const LogicalType &ty const string &UserType::GetCatalog(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::USER); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().catalog; } const string &UserType::GetSchema(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::USER); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().schema; } const string &UserType::GetTypeName(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::USER); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().user_type_name; } const vector &UserType::GetTypeModifiers(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::USER); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().user_type_modifiers; } vector &UserType::GetTypeModifiers(LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::USER); auto info = type.GetAuxInfoShrPtr(); - D_ASSERT(info); return info->Cast().user_type_modifiers; } @@ -1664,21 +1664,18 @@ const string EnumType::GetValue(const Value &val) { const Vector &EnumType::GetValuesInsertOrder(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::ENUM); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().GetValuesInsertOrder(); } idx_t EnumType::GetSize(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::ENUM); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().GetDictSize(); } PhysicalType EnumType::GetPhysicalType(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::ENUM); auto aux_info = type.AuxInfo(); - D_ASSERT(aux_info); auto &info = aux_info->Cast(); D_ASSERT(info.GetEnumDictType() == EnumDictType::VECTOR_DICT); return EnumTypeInfo::DictType(info.GetDictSize()); @@ -1704,21 +1701,18 @@ bool LogicalType::IsJSONType() const { const LogicalType &ArrayType::GetChildType(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::ARRAY); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().child_type; } idx_t ArrayType::GetSize(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::ARRAY); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().size; } bool ArrayType::IsAnySize(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::ARRAY); auto info = type.AuxInfo(); - D_ASSERT(info); return info->Cast().size == 0; } @@ -1799,7 +1793,7 @@ idx_t AnyType::GetCastScore(const LogicalType &type) { LogicalType IntegerLiteral::GetType(const LogicalType &type) { D_ASSERT(type.id() == LogicalTypeId::INTEGER_LITERAL); auto info = type.AuxInfo(); - D_ASSERT(info && info->type == ExtraTypeInfoType::INTEGER_LITERAL_TYPE_INFO); + D_ASSERT(info->type == ExtraTypeInfoType::INTEGER_LITERAL_TYPE_INFO); return info->Cast().constant_value.type(); } @@ -1814,7 +1808,7 @@ bool IntegerLiteral::FitsInType(const LogicalType &type, const LogicalType &targ } // we can cast to integral types if the constant value fits within that type auto info = type.AuxInfo(); - D_ASSERT(info && info->type == ExtraTypeInfoType::INTEGER_LITERAL_TYPE_INFO); + D_ASSERT(info->type == ExtraTypeInfoType::INTEGER_LITERAL_TYPE_INFO); auto &literal_info = info->Cast(); Value copy = literal_info.constant_value; return copy.DefaultTryCastAs(target); diff --git a/src/duckdb/src/execution/join_hashtable.cpp b/src/duckdb/src/execution/join_hashtable.cpp index fc99ddd0..621ff6a4 100644 --- a/src/duckdb/src/execution/join_hashtable.cpp +++ b/src/duckdb/src/execution/join_hashtable.cpp @@ -421,6 +421,10 @@ idx_t JoinHashTable::PrepareKeys(DataChunk &keys, vector } for (idx_t col_idx = 0; col_idx < keys.ColumnCount(); col_idx++) { + // see internal issue 3717. + if (join_type == JoinType::MARK && !correlated_mark_join_info.correlated_types.empty()) { + continue; + } if (null_values_are_equal[col_idx]) { continue; } diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index 164f5d16..f23192bc 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "4-dev3247" +#define DUCKDB_PATCH_VERSION "4-dev3300" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.4-dev3247" +#define DUCKDB_VERSION "v1.1.4-dev3300" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "04d8f995da" +#define DUCKDB_SOURCE_ID "4a8188effc" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/function/window/window_index_tree.cpp b/src/duckdb/src/function/window/window_index_tree.cpp new file mode 100644 index 00000000..c9dcb6d5 --- /dev/null +++ b/src/duckdb/src/function/window/window_index_tree.cpp @@ -0,0 +1,271 @@ +#include "duckdb/function/window/window_index_tree.hpp" + +#include +#include + +namespace duckdb { + +WindowIndexTree::WindowIndexTree(ClientContext &context, const vector &orders, + const vector &sort_idx, const idx_t count) + : context(context), memory_per_thread(PhysicalOperator::GetMaxThreadMemory(context)), sort_idx(sort_idx), + build_stage(PartitionSortStage::INIT), tasks_completed(0) { + // Sort the unfiltered indices by the orders + LogicalType index_type; + if (count < std::numeric_limits::max()) { + index_type = LogicalType::INTEGER; + mst32 = make_uniq(); + } else { + index_type = LogicalType::BIGINT; + mst64 = make_uniq(); + } + + vector payload_types; + payload_types.emplace_back(index_type); + + RowLayout payload_layout; + payload_layout.Initialize(payload_types); + + auto &buffer_manager = BufferManager::GetBufferManager(context); + global_sort = make_uniq(buffer_manager, orders, payload_layout); + global_sort->external = ClientConfig::GetConfig(context).force_external; +} + +WindowIndexTree::WindowIndexTree(ClientContext &context, const BoundOrderModifier &order_bys, + const vector &sort_idx, const idx_t count) + : WindowIndexTree(context, order_bys.orders, sort_idx, count) { +} + +optional_ptr WindowIndexTree::AddLocalSort() { + lock_guard local_sort_guard(lock); + auto local_sort = make_uniq(); + local_sort->Initialize(*global_sort, global_sort->buffer_manager); + local_sorts.emplace_back(std::move(local_sort)); + + return local_sorts.back().get(); +} + +unique_ptr WindowIndexTree::GetLocalState() { + return make_uniq(*this); +} + +WindowIndexTreeLocalState::WindowIndexTreeLocalState(WindowIndexTree &index_tree) : index_tree(index_tree) { + sort_chunk.Initialize(index_tree.context, index_tree.global_sort->sort_layout.logical_types); + payload_chunk.Initialize(index_tree.context, index_tree.global_sort->payload_layout.GetTypes()); + local_sort = index_tree.AddLocalSort(); +} + +void WindowIndexTreeLocalState::SinkChunk(DataChunk &chunk, const idx_t row_idx, + optional_ptr filter_sel, idx_t filtered) { + // Reference the sort columns + auto &sort_idx = index_tree.sort_idx; + for (column_t c = 0; c < sort_idx.size(); ++c) { + sort_chunk.data[c].Reference(chunk.data[sort_idx[c]]); + } + sort_chunk.SetCardinality(chunk); + + // Sequence the payload column + auto &indices = payload_chunk.data[0]; + payload_chunk.SetCardinality(sort_chunk); + indices.Sequence(int64_t(row_idx), 1, payload_chunk.size()); + + // Apply FILTER clause, if any + if (filter_sel) { + sort_chunk.Slice(*filter_sel, filtered); + payload_chunk.Slice(*filter_sel, filtered); + } + + local_sort->SinkChunk(sort_chunk, payload_chunk); + + // Flush if we have too much data + if (local_sort->SizeInBytes() > index_tree.memory_per_thread) { + local_sort->Sort(*index_tree.global_sort, true); + } +} + +void WindowIndexTreeLocalState::ExecuteSortTask() { + switch (build_stage) { + case PartitionSortStage::SCAN: + index_tree.global_sort->AddLocalState(*index_tree.local_sorts[build_task]); + break; + case PartitionSortStage::MERGE: { + auto &global_sort = *index_tree.global_sort; + MergeSorter merge_sorter(global_sort, global_sort.buffer_manager); + merge_sorter.PerformInMergeRound(); + break; + } + case PartitionSortStage::SORTED: + BuildLeaves(); + break; + default: + break; + } + + ++index_tree.tasks_completed; +} + +void WindowIndexTree::MeasurePayloadBlocks() { + const auto &blocks = global_sort->sorted_blocks[0]->payload_data->data_blocks; + idx_t count = 0; + for (const auto &block : blocks) { + block_starts.emplace_back(count); + count += block->count; + } + block_starts.emplace_back(count); + + // Allocate the leaves. + if (mst32) { + mst32->Allocate(count); + mst32->LowestLevel().resize(count); + } else if (mst64) { + mst64->Allocate(count); + mst64->LowestLevel().resize(count); + } +} + +void WindowIndexTreeLocalState::BuildLeaves() { + auto &global_sort = *index_tree.global_sort; + if (global_sort.sorted_blocks.empty()) { + return; + } + + PayloadScanner scanner(global_sort, build_task); + idx_t row_idx = index_tree.block_starts[build_task]; + for (;;) { + payload_chunk.Reset(); + scanner.Scan(payload_chunk); + const auto count = payload_chunk.size(); + if (count == 0) { + break; + } + auto &indices = payload_chunk.data[0]; + if (index_tree.mst32) { + auto &sorted = index_tree.mst32->LowestLevel(); + auto data = FlatVector::GetData(indices); + std::copy(data, data + count, sorted.data() + row_idx); + } else { + auto &sorted = index_tree.mst64->LowestLevel(); + auto data = FlatVector::GetData(indices); + std::copy(data, data + count, sorted.data() + row_idx); + } + row_idx += count; + } +} + +void WindowIndexTree::CleanupSort() { + global_sort.reset(); + local_sorts.clear(); +} + +bool WindowIndexTree::TryPrepareSortStage(WindowIndexTreeLocalState &lstate) { + lock_guard stage_guard(lock); + + switch (build_stage.load()) { + case PartitionSortStage::INIT: + total_tasks = local_sorts.size(); + tasks_assigned = 0; + tasks_completed = 0; + lstate.build_stage = build_stage = PartitionSortStage::SCAN; + lstate.build_task = tasks_assigned++; + return true; + case PartitionSortStage::SCAN: + // Process all the local sorts + if (tasks_assigned < total_tasks) { + lstate.build_stage = PartitionSortStage::SCAN; + lstate.build_task = tasks_assigned++; + return true; + } else if (tasks_completed < tasks_assigned) { + return false; + } + global_sort->PrepareMergePhase(); + if (!(global_sort->sorted_blocks.size() / 2)) { + if (global_sort->sorted_blocks.empty()) { + lstate.build_stage = build_stage = PartitionSortStage::FINISHED; + return true; + } + MeasurePayloadBlocks(); + total_tasks = block_starts.size() - 1; + tasks_completed = 0; + tasks_assigned = 0; + lstate.build_stage = build_stage = PartitionSortStage::SORTED; + lstate.build_task = tasks_assigned++; + return true; + } + global_sort->InitializeMergeRound(); + lstate.build_stage = build_stage = PartitionSortStage::MERGE; + total_tasks = local_sorts.size(); + tasks_assigned = 1; + tasks_completed = 0; + return true; + case PartitionSortStage::MERGE: + if (tasks_assigned < total_tasks) { + lstate.build_stage = PartitionSortStage::MERGE; + ++tasks_assigned; + return true; + } else if (tasks_completed < tasks_assigned) { + return false; + } + global_sort->CompleteMergeRound(true); + if (!(global_sort->sorted_blocks.size() / 2)) { + MeasurePayloadBlocks(); + total_tasks = block_starts.size() - 1; + tasks_completed = 0; + tasks_assigned = 0; + lstate.build_stage = build_stage = PartitionSortStage::SORTED; + lstate.build_task = tasks_assigned++; + return true; + } + global_sort->InitializeMergeRound(); + lstate.build_stage = PartitionSortStage::MERGE; + total_tasks = local_sorts.size(); + tasks_assigned = 1; + tasks_completed = 0; + return true; + case PartitionSortStage::SORTED: + if (tasks_assigned < total_tasks) { + lstate.build_stage = PartitionSortStage::SORTED; + lstate.build_task = tasks_assigned++; + return true; + } else if (tasks_completed < tasks_assigned) { + lstate.build_stage = PartitionSortStage::FINISHED; + // Sleep while other tasks finish + return false; + } + CleanupSort(); + break; + default: + break; + } + + lstate.build_stage = build_stage = PartitionSortStage::FINISHED; + + return true; +} + +void WindowIndexTreeLocalState::Sort() { + // Sort, merge and build the tree in parallel + while (index_tree.build_stage.load() != PartitionSortStage::FINISHED) { + if (index_tree.TryPrepareSortStage(*this)) { + ExecuteSortTask(); + } else { + std::this_thread::yield(); + } + } +} + +void WindowIndexTree::Build() { + if (mst32) { + mst32->Build(); + } else { + mst64->Build(); + } +} + +idx_t WindowIndexTree::SelectNth(const SubFrames &frames, idx_t n) const { + if (mst32) { + return mst32->NthElement(mst32->SelectNth(frames, n)); + } else { + return mst64->NthElement(mst32->SelectNth(frames, n)); + } +} + +} // namespace duckdb diff --git a/src/duckdb/src/function/window/window_value_function.cpp b/src/duckdb/src/function/window/window_value_function.cpp index c652c811..0c064283 100644 --- a/src/duckdb/src/function/window/window_value_function.cpp +++ b/src/duckdb/src/function/window/window_value_function.cpp @@ -2,6 +2,7 @@ #include "duckdb/common/operator/subtract.hpp" #include "duckdb/function/window/window_aggregator.hpp" #include "duckdb/function/window/window_collection.hpp" +#include "duckdb/function/window/window_index_tree.hpp" #include "duckdb/function/window/window_shared_expressions.hpp" #include "duckdb/function/window/window_value_function.hpp" #include "duckdb/planner/expression/bound_window_expression.hpp" @@ -19,6 +20,11 @@ class WindowValueGlobalState : public WindowExecutorGlobalState { const ValidityMask &partition_mask, const ValidityMask &order_mask) : WindowExecutorGlobalState(executor, payload_count, partition_mask, order_mask), ignore_nulls(&all_valid), child_idx(executor.child_idx) { + + if (!executor.sort_idx.empty()) { + inner_sort = make_uniq(executor.context, executor.wexpr.arg_orders, executor.sort_idx, + payload_count); + } } void Finalize(CollectionPtr collection) { @@ -34,6 +40,8 @@ class WindowValueGlobalState : public WindowExecutorGlobalState { optional_ptr ignore_nulls; const column_t child_idx; + + unique_ptr inner_sort; }; //===--------------------------------------------------------------------===// @@ -46,13 +54,27 @@ class WindowValueLocalState : public WindowExecutorBoundsState { explicit WindowValueLocalState(const WindowValueGlobalState &gvstate) : WindowExecutorBoundsState(gvstate), gvstate(gvstate) { WindowAggregatorLocalState::InitSubFrames(frames, gvstate.executor.wexpr.exclude_clause); + + if (gvstate.inner_sort) { + local_sort = gvstate.inner_sort->GetLocalState(); + if (gvstate.executor.wexpr.ignore_nulls) { + sort_nulls.Initialize(); + } + } } + //! Accumulate the secondary sort values + void Sink(WindowExecutorGlobalState &gstate, DataChunk &sink_chunk, DataChunk &coll_chunk, + idx_t input_idx) override; //! Finish the sinking and prepare to scan void Finalize(WindowExecutorGlobalState &gstate, CollectionPtr collection) override; //! The corresponding global value state const WindowValueGlobalState &gvstate; + //! The optional sorting state for secondary sorts + unique_ptr local_sort; + //! Reusable selection vector for NULLs + SelectionVector sort_nulls; //! The frame boundaries, used for EXCLUDE SubFrames frames; @@ -60,9 +82,44 @@ class WindowValueLocalState : public WindowExecutorBoundsState { unique_ptr cursor; }; +void WindowValueLocalState::Sink(WindowExecutorGlobalState &gstate, DataChunk &sink_chunk, DataChunk &coll_chunk, + idx_t input_idx) { + WindowExecutorBoundsState::Sink(gstate, sink_chunk, coll_chunk, input_idx); + + if (local_sort) { + idx_t filtered = 0; + optional_ptr filter_sel; + + // If we need to IGNORE NULLS for the child, and there are NULLs, + // then build an SV to hold them + const auto coll_count = coll_chunk.size(); + auto &child = coll_chunk.data[gvstate.child_idx]; + UnifiedVectorFormat child_data; + child.ToUnifiedFormat(coll_count, child_data); + const auto &validity = child_data.validity; + if (gstate.executor.wexpr.ignore_nulls && !validity.AllValid()) { + for (sel_t i = 0; i < coll_count; ++i) { + if (validity.RowIsValidUnsafe(i)) { + sort_nulls[filtered++] = i; + } + } + filter_sel = &sort_nulls; + } + + auto &local_index = local_sort->Cast(); + local_index.SinkChunk(sink_chunk, input_idx, filter_sel, filtered); + } +} + void WindowValueLocalState::Finalize(WindowExecutorGlobalState &gstate, CollectionPtr collection) { WindowExecutorBoundsState::Finalize(gstate, collection); + if (local_sort) { + auto &local_index = local_sort->Cast(); + local_index.Sort(); + local_index.index_tree.Build(); + } + // Prepare to scan if (!cursor && gvstate.child_idx != DConstants::INVALID_INDEX) { cursor = make_uniq(*collection, gvstate.child_idx); @@ -87,6 +144,10 @@ WindowValueExecutor::WindowValueExecutor(BoundWindowExpression &wexpr, ClientCon offset_idx = shared.RegisterEvaluate(wexpr.offset_expr); default_idx = shared.RegisterEvaluate(wexpr.default_expr); + + for (const auto &order : wexpr.arg_orders) { + sort_idx.emplace_back(shared.RegisterSink(order.expression)); + } } WindowNtileExecutor::WindowNtileExecutor(BoundWindowExpression &wexpr, ClientContext &context, @@ -282,6 +343,21 @@ void WindowFirstValueExecutor::EvaluateInternal(WindowExecutorGlobalState &gstat auto &ignore_nulls = *gvstate.ignore_nulls; auto exclude_mode = gvstate.executor.wexpr.exclude_clause; WindowAggregator::EvaluateSubFrames(bounds, exclude_mode, count, row_idx, frames, [&](idx_t i) { + if (gvstate.inner_sort) { + idx_t frame_width = 0; + for (const auto &frame : frames) { + frame_width += frame.end - frame.start; + } + + if (frame_width) { + const auto first_idx = gvstate.inner_sort->SelectNth(frames, 0); + cursor.CopyCell(0, first_idx, result, i); + } else { + FlatVector::SetNull(result, i, true); + } + return; + } + for (const auto &frame : frames) { if (frame.start >= frame.end) { continue; @@ -317,6 +393,21 @@ void WindowLastValueExecutor::EvaluateInternal(WindowExecutorGlobalState &gstate auto &ignore_nulls = *gvstate.ignore_nulls; auto exclude_mode = gvstate.executor.wexpr.exclude_clause; WindowAggregator::EvaluateSubFrames(bounds, exclude_mode, count, row_idx, frames, [&](idx_t i) { + if (gvstate.inner_sort) { + idx_t frame_width = 0; + for (const auto &frame : frames) { + frame_width += frame.end - frame.start; + } + + if (frame_width) { + const auto last_idx = gvstate.inner_sort->SelectNth(frames, frame_width - 1); + cursor.CopyCell(0, last_idx, result, i); + } else { + FlatVector::SetNull(result, i, true); + } + return; + } + for (idx_t f = frames.size(); f-- > 0;) { const auto &frame = frames[f]; if (frame.start >= frame.end) { @@ -368,6 +459,21 @@ void WindowNthValueExecutor::EvaluateInternal(WindowExecutorGlobalState &gstate, // Decrement as we go along. auto n = idx_t(n_param); + if (gvstate.inner_sort) { + idx_t frame_width = 0; + for (const auto &frame : frames) { + frame_width += frame.end - frame.start; + } + + if (n < frame_width) { + const auto nth_index = gvstate.inner_sort->SelectNth(frames, n - 1); + cursor.CopyCell(0, nth_index, result, i); + } else { + FlatVector::SetNull(result, i, true); + } + return; + } + for (const auto &frame : frames) { if (frame.start >= frame.end) { continue; diff --git a/src/duckdb/src/include/duckdb/catalog/catalog.hpp b/src/duckdb/src/include/duckdb/catalog/catalog.hpp index 5d219cdf..dbef14ee 100644 --- a/src/duckdb/src/include/duckdb/catalog/catalog.hpp +++ b/src/duckdb/src/include/duckdb/catalog/catalog.hpp @@ -299,6 +299,10 @@ class Catalog { unique_ptr plan) = 0; virtual unique_ptr BindCreateIndex(Binder &binder, CreateStatement &stmt, TableCatalogEntry &table, unique_ptr plan) = 0; + virtual unique_ptr BindAlterAddIndex(Binder &binder, TableCatalogEntry &table_entry, + unique_ptr plan, + unique_ptr create_info, + unique_ptr alter_info); virtual DatabaseSize GetDatabaseSize(ClientContext &context) = 0; virtual vector GetMetadataInfo(ClientContext &context); diff --git a/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp b/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp index c8299f81..54bcb40d 100644 --- a/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp +++ b/src/duckdb/src/include/duckdb/catalog/duck_catalog.hpp @@ -1,7 +1,7 @@ //===----------------------------------------------------------------------===// // DuckDB // -// duckdb/catalog/dcatalog.hpp +// duckdb/catalog/duck_catalog.hpp // // //===----------------------------------------------------------------------===// @@ -49,6 +49,11 @@ class DuckCatalog : public Catalog { DUCKDB_API unique_ptr BindCreateIndex(Binder &binder, CreateStatement &stmt, TableCatalogEntry &table, unique_ptr plan) override; + DUCKDB_API unique_ptr BindAlterAddIndex(Binder &binder, TableCatalogEntry &table_entry, + unique_ptr plan, + unique_ptr create_info, + unique_ptr alter_info) override; + CatalogSet &GetSchemaCatalogSet(); DatabaseSize GetDatabaseSize(ClientContext &context) override; diff --git a/src/duckdb/src/include/duckdb/common/types.hpp b/src/duckdb/src/include/duckdb/common/types.hpp index 72a91501..5c2492f8 100644 --- a/src/duckdb/src/include/duckdb/common/types.hpp +++ b/src/duckdb/src/include/duckdb/common/types.hpp @@ -252,7 +252,7 @@ struct LogicalType { inline PhysicalType InternalType() const { return physical_type_; } - inline const ExtraTypeInfo *AuxInfo() const { + inline const optional_ptr AuxInfo() const { return type_info_.get(); } inline bool IsNested() const { diff --git a/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp b/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp index b13a8a76..36e6b2e0 100644 --- a/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp +++ b/src/duckdb/src/include/duckdb/execution/merge_sort_tree.hpp @@ -253,6 +253,7 @@ struct MergeSortTree { return level; } +public: void Print() const { std::ostringstream out; const char *separator = " "; diff --git a/src/duckdb/src/include/duckdb/function/window/window_executor.hpp b/src/duckdb/src/include/duckdb/function/window/window_executor.hpp index 6d392bf9..44d3a905 100644 --- a/src/duckdb/src/include/duckdb/function/window/window_executor.hpp +++ b/src/duckdb/src/include/duckdb/function/window/window_executor.hpp @@ -58,7 +58,7 @@ class WindowExecutorLocalState : public WindowExecutorState { explicit WindowExecutorLocalState(const WindowExecutorGlobalState &gstate); - void Sink(WindowExecutorGlobalState &gstate, DataChunk &sink_chunk, DataChunk &coll_chunk, idx_t input_idx); + virtual void Sink(WindowExecutorGlobalState &gstate, DataChunk &sink_chunk, DataChunk &coll_chunk, idx_t input_idx); virtual void Finalize(WindowExecutorGlobalState &gstate, CollectionPtr collection); //! The state used for reading the range collection diff --git a/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp b/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp new file mode 100644 index 00000000..7d53e166 --- /dev/null +++ b/src/duckdb/src/include/duckdb/function/window/window_index_tree.hpp @@ -0,0 +1,112 @@ +//===----------------------------------------------------------------------===// +// DuckDB +// +// duckdb/function/window/window_index_tree.hpp +// +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include "duckdb/execution/merge_sort_tree.hpp" +#include "duckdb/planner/bound_result_modifier.hpp" + +#include "duckdb/function/window/window_aggregator.hpp" +#include "duckdb/common/sort/sort.hpp" +#include "duckdb/common/sort/partition_state.hpp" + +namespace duckdb { + +class WindowIndexTree; + +class WindowIndexTreeLocalState : public WindowAggregatorState { +public: + explicit WindowIndexTreeLocalState(WindowIndexTree &index_tree); + + //! Add a chunk to the local sort + void SinkChunk(DataChunk &chunk, const idx_t row_idx, optional_ptr filter_sel, idx_t filtered); + //! Sort the data + void Sort(); + //! Process sorted leaf data + void BuildLeaves(); + + //! The index tree we are building + WindowIndexTree &index_tree; + //! Thread-local sorting data + optional_ptr local_sort; + //! Buffer for the sort keys + DataChunk sort_chunk; + //! Buffer for the payload data + DataChunk payload_chunk; + //! Build stage + PartitionSortStage build_stage = PartitionSortStage::INIT; + //! Build task number + idx_t build_task; + +private: + void ExecuteSortTask(); +}; + +class WindowIndexTree { +public: + using GlobalSortStatePtr = unique_ptr; + using LocalSortStatePtr = unique_ptr; + + WindowIndexTree(ClientContext &context, const vector &orders, const vector &sort_idx, + const idx_t count); + WindowIndexTree(ClientContext &context, const BoundOrderModifier &order_bys, const vector &sort_idx, + const idx_t count); + virtual ~WindowIndexTree() = default; + + unique_ptr GetLocalState(); + + //! Make a local sort for a thread + optional_ptr AddLocalSort(); + + //! Sort state machine + bool TryPrepareSortStage(WindowIndexTreeLocalState &lstate); + //! Thread-safe post-sort cleanup + void CleanupSort(); + + //! Build the MST in parallel from the sorted data + void Build(); + + //! Find the Nth index in the set of subframes + idx_t SelectNth(const SubFrames &frames, idx_t n) const; + + //! The query context + ClientContext &context; + //! Thread memory limit + const idx_t memory_per_thread; + //! The column indices for sorting + const vector sort_idx; + //! The sorted data + GlobalSortStatePtr global_sort; + //! Finalize guard + mutex lock; + //! Local sort set + vector local_sorts; + //! Finalize stage + atomic build_stage; + //! Tasks launched + idx_t total_tasks = 0; + //! Tasks launched + idx_t tasks_assigned = 0; + //! Tasks landed + atomic tasks_completed; + //! The block starts (the scanner doesn't know this) plus the total count + vector block_starts; + + // Merge sort trees for various sizes + // Smaller is probably not worth the effort. + using MergeSortTree32 = MergeSortTree; + using MergeSortTree64 = MergeSortTree; + unique_ptr mst32; + unique_ptr mst64; + +private: + //! Find the starts of all the blocks + void MeasurePayloadBlocks(); +}; + +} // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp b/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp index b0357eab..9254b741 100644 --- a/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp +++ b/src/duckdb/src/include/duckdb/function/window/window_value_function.hpp @@ -32,6 +32,8 @@ class WindowValueExecutor : public WindowExecutor { column_t offset_idx = DConstants::INVALID_INDEX; //! The column index of the default value column column_t default_idx = DConstants::INVALID_INDEX; + //! Thee column indices of the inner sort expressions + vector sort_idx; }; // diff --git a/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp b/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp index 4663c0f9..e41f4ad8 100644 --- a/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp +++ b/src/duckdb/src/include/duckdb/parser/expression/window_expression.hpp @@ -69,6 +69,11 @@ class WindowExpression : public ParsedExpression { unique_ptr offset_expr; unique_ptr default_expr; + //! The set of argument ordering clauses + //! These are distinct from the frame ordering clauses e.g., the "x" in + //! FIRST_VALUE(a ORDER BY x) OVER (PARTITION BY p ORDER BY s) + vector arg_orders; + public: bool IsWindow() const override { return true; @@ -108,6 +113,13 @@ class WindowExpression : public ParsedExpression { result += ", "; result += entry.default_expr->ToString(); } + // ORDER BY arguments + if (!entry.arg_orders.empty()) { + result += " ORDER BY "; + result += StringUtil::Join(entry.arg_orders, entry.arg_orders.size(), ", ", + [](const ORDER_NODE &order) { return order.ToString(); }); + } + // IGNORE NULLS if (entry.ignore_nulls) { result += " IGNORE NULLS"; diff --git a/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp b/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp index b105c66c..adf15ead 100644 --- a/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp +++ b/src/duckdb/src/include/duckdb/planner/expression/bound_window_expression.hpp @@ -54,6 +54,11 @@ class BoundWindowExpression : public Expression { unique_ptr offset_expr; unique_ptr default_expr; + //! The set of argument ordering clauses + //! These are distinct from the frame ordering clauses e.g., the "x" in + //! FIRST_VALUE(a ORDER BY x) OVER (PARTITION BY p ORDER BY s) + vector arg_orders; + //! Statistics belonging to the other expressions (start, end, offset, default) vector> expr_stats; diff --git a/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp b/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp index 0ff8169d..7ee56dc9 100644 --- a/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp +++ b/src/duckdb/src/optimizer/build_probe_side_optimizer.cpp @@ -48,7 +48,10 @@ BuildProbeSideOptimizer::BuildProbeSideOptimizer(ClientContext &context, Logical static void FlipChildren(LogicalOperator &op) { std::swap(op.children[0], op.children[1]); - if (op.type == LogicalOperatorType::LOGICAL_COMPARISON_JOIN || op.type == LogicalOperatorType::LOGICAL_DELIM_JOIN) { + switch (op.type) { + case LogicalOperatorType::LOGICAL_COMPARISON_JOIN: + case LogicalOperatorType::LOGICAL_DELIM_JOIN: + case LogicalOperatorType::LOGICAL_ASOF_JOIN: { auto &join = op.Cast(); join.join_type = InverseJoinType(join.join_type); for (auto &cond : join.conditions) { @@ -56,11 +59,20 @@ static void FlipChildren(LogicalOperator &op) { cond.comparison = FlipComparisonExpression(cond.comparison); } std::swap(join.left_projection_map, join.right_projection_map); + return; } - if (op.type == LogicalOperatorType::LOGICAL_ANY_JOIN) { + case LogicalOperatorType::LOGICAL_ANY_JOIN: { auto &join = op.Cast(); join.join_type = InverseJoinType(join.join_type); std::swap(join.left_projection_map, join.right_projection_map); + return; + } + case LogicalOperatorType::LOGICAL_CROSS_PRODUCT: { + // don't need to do anything here. + return; + } + default: + throw InternalException("Flipping children, but children were not flipped"); } } diff --git a/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp b/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp index fb65c0ba..07cbf1dc 100644 --- a/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp +++ b/src/duckdb/src/optimizer/join_order/cardinality_estimator.cpp @@ -344,8 +344,8 @@ DenomInfo CardinalityEstimator::GetDenominator(JoinRelationSet &set) { &set_manager.Union(*subgraph_to_merge_into->relations, *subgraph_to_delete->relations); subgraph_to_merge_into->numerator_relations = &UpdateNumeratorRelations(*subgraph_to_merge_into, *subgraph_to_delete, edge); - subgraph_to_delete->relations = nullptr; subgraph_to_merge_into->denom = CalculateUpdatedDenom(*subgraph_to_merge_into, *subgraph_to_delete, edge); + subgraph_to_delete->relations = nullptr; auto remove_start = std::remove_if(subgraphs.begin(), subgraphs.end(), [](Subgraph2Denominator &s) { return !s.relations; }); subgraphs.erase(remove_start, subgraphs.end()); diff --git a/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp b/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp index 55dd7af7..3f3d9173 100644 --- a/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp +++ b/src/duckdb/src/optimizer/statistics/operator/propagate_window.cpp @@ -42,6 +42,9 @@ unique_ptr StatisticsPropagator::PropagateStatistics(LogicalWind } else { over_expr.expr_stats.push_back(nullptr); } + for (auto &bound_order : over_expr.arg_orders) { + bound_order.stats = PropagateExpression(bound_order.expression); + } } return std::move(node_stats); } diff --git a/src/duckdb/src/parser/expression/window_expression.cpp b/src/duckdb/src/parser/expression/window_expression.cpp index 66e7fe83..666bfbd3 100644 --- a/src/duckdb/src/parser/expression/window_expression.cpp +++ b/src/duckdb/src/parser/expression/window_expression.cpp @@ -82,13 +82,29 @@ bool WindowExpression::Equal(const WindowExpression &a, const WindowExpression & if (a.exclude_clause != b.exclude_clause) { return false; } - // check if the framing expressions are equivalentbind_ + // check if the framing expressions are equivalent if (!ParsedExpression::Equals(a.start_expr, b.start_expr) || !ParsedExpression::Equals(a.end_expr, b.end_expr) || !ParsedExpression::Equals(a.offset_expr, b.offset_expr) || !ParsedExpression::Equals(a.default_expr, b.default_expr)) { return false; } + // check if the argument orderings are equivalent + if (a.arg_orders.size() != b.arg_orders.size()) { + return false; + } + for (idx_t i = 0; i < a.arg_orders.size(); i++) { + if (a.arg_orders[i].type != b.arg_orders[i].type) { + return false; + } + if (a.arg_orders[i].null_order != b.arg_orders[i].null_order) { + return false; + } + if (!a.arg_orders[i].expression->Equals(*b.arg_orders[i].expression)) { + return false; + } + } + // check if the partitions are equivalent if (!ParsedExpression::ListEquals(a.partitions, b.partitions)) { return false; @@ -101,6 +117,9 @@ bool WindowExpression::Equal(const WindowExpression &a, const WindowExpression & if (a.orders[i].type != b.orders[i].type) { return false; } + if (a.orders[i].null_order != b.orders[i].null_order) { + return false; + } if (!a.orders[i].expression->Equals(*b.orders[i].expression)) { return false; } @@ -129,6 +148,10 @@ unique_ptr WindowExpression::Copy() const { new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy()); } + for (auto &o : arg_orders) { + new_window->arg_orders.emplace_back(o.type, o.null_order, o.expression->Copy()); + } + new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr; new_window->start = start; diff --git a/src/duckdb/src/parser/parsed_expression_iterator.cpp b/src/duckdb/src/parser/parsed_expression_iterator.cpp index 8b29ed7f..2553ef00 100644 --- a/src/duckdb/src/parser/parsed_expression_iterator.cpp +++ b/src/duckdb/src/parser/parsed_expression_iterator.cpp @@ -140,6 +140,9 @@ void ParsedExpressionIterator::EnumerateChildren( if (window_expr.default_expr) { callback(window_expr.default_expr); } + for (auto &order : window_expr.arg_orders) { + callback(order.expression); + } break; } case ExpressionClass::BOUND_EXPRESSION: diff --git a/src/duckdb/src/parser/transform/expression/transform_function.cpp b/src/duckdb/src/parser/transform/expression/transform_function.cpp index 16c50fa7..5ce6e98a 100644 --- a/src/duckdb/src/parser/transform/expression/transform_function.cpp +++ b/src/duckdb/src/parser/transform/expression/transform_function.cpp @@ -110,6 +110,27 @@ bool Transformer::InWindowDefinition() { return false; } +static bool IsOrderableWindowFunction(ExpressionType type) { + switch (type) { + case ExpressionType::WINDOW_FIRST_VALUE: + case ExpressionType::WINDOW_LAST_VALUE: + case ExpressionType::WINDOW_NTH_VALUE: + return true; + case ExpressionType::WINDOW_LEAD: + case ExpressionType::WINDOW_LAG: + case ExpressionType::WINDOW_AGGREGATE: + case ExpressionType::WINDOW_ROW_NUMBER: + case ExpressionType::WINDOW_RANK: + case ExpressionType::WINDOW_RANK_DENSE: + case ExpressionType::WINDOW_PERCENT_RANK: + case ExpressionType::WINDOW_CUME_DIST: + case ExpressionType::WINDOW_NTILE: + return false; + default: + throw InternalException("Unknown orderable window type %s", ExpressionTypeToString(type).c_str()); + } +} + unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::PGFuncCall &root) { auto name = root.funcname; string catalog, schema, function_name; @@ -157,8 +178,8 @@ unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::P throw ParserException("DISTINCT is not implemented for non-aggregate window functions!"); } - if (root.agg_order) { - throw ParserException("ORDER BY is not implemented for window functions!"); + if (root.agg_order && !IsOrderableWindowFunction(win_fun_type)) { + throw ParserException("ORDER BY is not supported for the window function \"%s\"", lowercase_name.c_str()); } if (win_fun_type != ExpressionType::WINDOW_AGGREGATE && root.agg_filter) { @@ -182,6 +203,12 @@ unique_ptr Transformer::TransformFuncCall(duckdb_libpgquery::P expr->filter_expr = std::move(filter_expr); } + if (root.agg_order) { + auto order_bys = make_uniq(); + TransformOrderBy(root.agg_order, order_bys->orders); + expr->arg_orders = std::move(order_bys->orders); + } + if (win_fun_type == ExpressionType::WINDOW_AGGREGATE) { expr->children = std::move(children); } else { diff --git a/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp b/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp index 482c4e17..73231402 100644 --- a/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp +++ b/src/duckdb/src/planner/binder/expression/bind_window_expression.cpp @@ -204,6 +204,10 @@ BindResult BaseSelectBinder::BindWindow(WindowExpression &window, idx_t depth) { BindChild(window.offset_expr, depth, error); BindChild(window.default_expr, depth, error); + for (auto &order : window.arg_orders) { + BindChild(order.expression, depth, error); + } + inside_window = false; if (error.HasError()) { // failed to bind children of window function @@ -211,6 +215,11 @@ BindResult BaseSelectBinder::BindWindow(WindowExpression &window, idx_t depth) { } // Restore any collation expressions + for (auto &order : window.arg_orders) { + auto &order_expr = order.expression; + auto &bound_order = BoundExpression::GetExpression(*order_expr); + ExpressionBinder::PushCollation(context, bound_order, bound_order->return_type); + } for (auto &part_expr : window.partitions) { auto &bound_partition = BoundExpression::GetExpression(*part_expr); ExpressionBinder::PushCollation(context, bound_partition, bound_partition->return_type); @@ -354,6 +363,14 @@ BindResult BaseSelectBinder::BindWindow(WindowExpression &window, idx_t depth) { result->orders.emplace_back(type, null_order, std::move(expression)); } + // Argument orders are just like arguments, not frames + for (auto &order : window.arg_orders) { + auto type = config.ResolveOrder(order.type); + auto null_order = config.ResolveNullOrder(type, order.null_order); + auto expression = GetExpression(order.expression); + result->arg_orders.emplace_back(type, null_order, std::move(expression)); + } + result->filter_expr = CastWindowExpression(window.filter_expr, LogicalType::BOOLEAN); result->start_expr = CastWindowExpression(window.start_expr, start_type); result->end_expr = CastWindowExpression(window.end_expr, end_type); diff --git a/src/duckdb/src/planner/binder/statement/bind_simple.cpp b/src/duckdb/src/planner/binder/statement/bind_simple.cpp index 1dc795c8..d4d86284 100644 --- a/src/duckdb/src/planner/binder/statement/bind_simple.cpp +++ b/src/duckdb/src/planner/binder/statement/bind_simple.cpp @@ -1,6 +1,7 @@ #include "duckdb/catalog/catalog.hpp" #include "duckdb/catalog/catalog_entry/duck_table_entry.hpp" #include "duckdb/catalog/catalog_entry/view_catalog_entry.hpp" +#include "duckdb/catalog/duck_catalog.hpp" #include "duckdb/execution/index/art/art.hpp" #include "duckdb/function/table/table_scan.hpp" #include "duckdb/parser/constraints/unique_constraint.hpp" @@ -17,11 +18,21 @@ namespace duckdb { +unique_ptr DuckCatalog::BindAlterAddIndex(Binder &binder, TableCatalogEntry &table_entry, + unique_ptr plan, + unique_ptr create_info, + unique_ptr alter_info) { + D_ASSERT(plan->type == LogicalOperatorType::LOGICAL_GET); + IndexBinder index_binder(binder, binder.context); + return index_binder.BindCreateIndex(binder.context, std::move(create_info), table_entry, std::move(plan), + std::move(alter_info)); +} + BoundStatement Binder::BindAlterAddIndex(BoundStatement &result, CatalogEntry &entry, unique_ptr alter_info) { auto &table_info = alter_info->Cast(); auto &constraint_info = table_info.Cast(); - auto &table = entry.Cast(); + auto &table = entry.Cast(); auto &column_list = table.GetColumns(); auto bound_constraint = BindUniqueConstraint(*constraint_info.constraint, table_info.name, column_list); @@ -56,10 +67,9 @@ BoundStatement Binder::BindAlterAddIndex(BoundStatement &result, CatalogEntry &e auto &get = plan->Cast(); get.names = column_list.GetColumnNames(); - IndexBinder index_binder(*this, context); - auto op = index_binder.BindCreateIndex(context, std::move(create_index_info), table, std::move(plan), - unique_ptr_cast(std::move(alter_info))); - result.plan = std::move(op); + auto alter_table_info = unique_ptr_cast(std::move(alter_info)); + result.plan = table.catalog.BindAlterAddIndex(*this, table, std::move(plan), std::move(create_index_info), + std::move(alter_table_info)); return std::move(result); } diff --git a/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp b/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp index a1b9789a..f81c09d3 100644 --- a/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp +++ b/src/duckdb/src/planner/binder/tableref/bind_pivot.cpp @@ -422,6 +422,9 @@ unique_ptr Binder::BindPivot(PivotRef &ref, vector BoundWindowExpression::Copy() const { new_window->orders.emplace_back(o.type, o.null_order, o.expression->Copy()); } + for (auto &o : arg_orders) { + new_window->arg_orders.emplace_back(o.type, o.null_order, o.expression->Copy()); + } + new_window->filter_expr = filter_expr ? filter_expr->Copy() : nullptr; new_window->start = start; @@ -189,6 +203,7 @@ void BoundWindowExpression::Serialize(Serializer &serializer) const { serializer.WritePropertyWithDefault(211, "default_expr", default_expr, unique_ptr()); serializer.WriteProperty(212, "exclude_clause", exclude_clause); serializer.WriteProperty(213, "distinct", distinct); + serializer.WriteProperty(214, "arg_orders", arg_orders); } unique_ptr BoundWindowExpression::Deserialize(Deserializer &deserializer) { @@ -218,6 +233,7 @@ unique_ptr BoundWindowExpression::Deserialize(Deserializer &deserial deserializer.ReadPropertyWithExplicitDefault(211, "default_expr", result->default_expr, unique_ptr()); deserializer.ReadProperty(212, "exclude_clause", result->exclude_clause); deserializer.ReadProperty(213, "distinct", result->distinct); + deserializer.ReadProperty(214, "arg_orders", result->arg_orders); return std::move(result); } diff --git a/src/duckdb/src/planner/expression_iterator.cpp b/src/duckdb/src/planner/expression_iterator.cpp index 1188958b..31f4fccc 100644 --- a/src/duckdb/src/planner/expression_iterator.cpp +++ b/src/duckdb/src/planner/expression_iterator.cpp @@ -119,6 +119,9 @@ void ExpressionIterator::EnumerateChildren(Expression &expr, if (window_expr.default_expr) { callback(window_expr.default_expr); } + for (auto &order : window_expr.arg_orders) { + callback(order.expression); + } break; } case ExpressionClass::BOUND_UNNEST: { diff --git a/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp b/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp index 3ae8fe29..a5c79b3d 100644 --- a/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp +++ b/src/duckdb/src/storage/serialization/serialize_parsed_expression.cpp @@ -350,6 +350,7 @@ void WindowExpression::Serialize(Serializer &serializer) const { serializer.WritePropertyWithDefault>(213, "filter_expr", filter_expr); serializer.WritePropertyWithDefault(214, "exclude_clause", exclude_clause, WindowExcludeMode::NO_OTHER); serializer.WritePropertyWithDefault(215, "distinct", distinct); + serializer.WritePropertyWithDefault>(216, "arg_orders", arg_orders); } unique_ptr WindowExpression::Deserialize(Deserializer &deserializer) { @@ -370,6 +371,7 @@ unique_ptr WindowExpression::Deserialize(Deserializer &deseria deserializer.ReadPropertyWithDefault>(213, "filter_expr", result->filter_expr); deserializer.ReadPropertyWithExplicitDefault(214, "exclude_clause", result->exclude_clause, WindowExcludeMode::NO_OTHER); deserializer.ReadPropertyWithDefault(215, "distinct", result->distinct); + deserializer.ReadPropertyWithDefault>(216, "arg_orders", result->arg_orders); return std::move(result); } diff --git a/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp b/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp index e9930d89..ac1532ac 100644 --- a/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp +++ b/src/duckdb/ub_extension_core_functions_aggregate_algebraic.cpp @@ -1,8 +1,8 @@ -#include "extension/core_functions/aggregate/algebraic/stddev.cpp" - #include "extension/core_functions/aggregate/algebraic/corr.cpp" -#include "extension/core_functions/aggregate/algebraic/covar.cpp" +#include "extension/core_functions/aggregate/algebraic/stddev.cpp" #include "extension/core_functions/aggregate/algebraic/avg.cpp" +#include "extension/core_functions/aggregate/algebraic/covar.cpp" + diff --git a/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp b/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp index 12ca3066..f8c552db 100644 --- a/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp +++ b/src/duckdb/ub_extension_core_functions_aggregate_distributive.cpp @@ -1,19 +1,19 @@ #include "extension/core_functions/aggregate/distributive/skew.cpp" -#include "extension/core_functions/aggregate/distributive/kurtosis.cpp" +#include "extension/core_functions/aggregate/distributive/bool.cpp" #include "extension/core_functions/aggregate/distributive/bitstring_agg.cpp" -#include "extension/core_functions/aggregate/distributive/product.cpp" - -#include "extension/core_functions/aggregate/distributive/sum.cpp" - #include "extension/core_functions/aggregate/distributive/bitagg.cpp" -#include "extension/core_functions/aggregate/distributive/bool.cpp" +#include "extension/core_functions/aggregate/distributive/kurtosis.cpp" + +#include "extension/core_functions/aggregate/distributive/sum.cpp" #include "extension/core_functions/aggregate/distributive/arg_min_max.cpp" +#include "extension/core_functions/aggregate/distributive/product.cpp" + #include "extension/core_functions/aggregate/distributive/string_agg.cpp" #include "extension/core_functions/aggregate/distributive/approx_count.cpp" diff --git a/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp b/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp index 0362fc3e..2e18fa37 100644 --- a/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp +++ b/src/duckdb/ub_extension_core_functions_aggregate_holistic.cpp @@ -1,12 +1,12 @@ +#include "extension/core_functions/aggregate/holistic/mad.cpp" + #include "extension/core_functions/aggregate/holistic/quantile.cpp" -#include "extension/core_functions/aggregate/holistic/approx_top_k.cpp" +#include "extension/core_functions/aggregate/holistic/reservoir_quantile.cpp" #include "extension/core_functions/aggregate/holistic/approximate_quantile.cpp" -#include "extension/core_functions/aggregate/holistic/mode.cpp" - -#include "extension/core_functions/aggregate/holistic/reservoir_quantile.cpp" +#include "extension/core_functions/aggregate/holistic/approx_top_k.cpp" -#include "extension/core_functions/aggregate/holistic/mad.cpp" +#include "extension/core_functions/aggregate/holistic/mode.cpp" diff --git a/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp b/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp index 7c97319a..3b271378 100644 --- a/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp +++ b/src/duckdb/ub_extension_core_functions_aggregate_regression.cpp @@ -1,14 +1,14 @@ -#include "extension/core_functions/aggregate/regression/regr_sxx_syy.cpp" - #include "extension/core_functions/aggregate/regression/regr_avg.cpp" +#include "extension/core_functions/aggregate/regression/regr_intercept.cpp" + +#include "extension/core_functions/aggregate/regression/regr_sxx_syy.cpp" + #include "extension/core_functions/aggregate/regression/regr_count.cpp" #include "extension/core_functions/aggregate/regression/regr_slope.cpp" -#include "extension/core_functions/aggregate/regression/regr_sxy.cpp" - #include "extension/core_functions/aggregate/regression/regr_r2.cpp" -#include "extension/core_functions/aggregate/regression/regr_intercept.cpp" +#include "extension/core_functions/aggregate/regression/regr_sxy.cpp" diff --git a/src/duckdb/ub_extension_core_functions_scalar_blob.cpp b/src/duckdb/ub_extension_core_functions_scalar_blob.cpp index f50a9422..1eda3bad 100644 --- a/src/duckdb/ub_extension_core_functions_scalar_blob.cpp +++ b/src/duckdb/ub_extension_core_functions_scalar_blob.cpp @@ -1,4 +1,4 @@ -#include "extension/core_functions/scalar/blob/encode.cpp" - #include "extension/core_functions/scalar/blob/base64.cpp" +#include "extension/core_functions/scalar/blob/encode.cpp" + diff --git a/src/duckdb/ub_extension_core_functions_scalar_date.cpp b/src/duckdb/ub_extension_core_functions_scalar_date.cpp index 89ebc58f..11b7e875 100644 --- a/src/duckdb/ub_extension_core_functions_scalar_date.cpp +++ b/src/duckdb/ub_extension_core_functions_scalar_date.cpp @@ -1,20 +1,20 @@ -#include "extension/core_functions/scalar/date/date_part.cpp" - -#include "extension/core_functions/scalar/date/date_sub.cpp" - -#include "extension/core_functions/scalar/date/current.cpp" +#include "extension/core_functions/scalar/date/make_date.cpp" #include "extension/core_functions/scalar/date/date_trunc.cpp" -#include "extension/core_functions/scalar/date/epoch.cpp" +#include "extension/core_functions/scalar/date/date_part.cpp" -#include "extension/core_functions/scalar/date/to_interval.cpp" +#include "extension/core_functions/scalar/date/date_diff.cpp" + +#include "extension/core_functions/scalar/date/age.cpp" #include "extension/core_functions/scalar/date/time_bucket.cpp" -#include "extension/core_functions/scalar/date/age.cpp" +#include "extension/core_functions/scalar/date/date_sub.cpp" -#include "extension/core_functions/scalar/date/date_diff.cpp" +#include "extension/core_functions/scalar/date/current.cpp" -#include "extension/core_functions/scalar/date/make_date.cpp" +#include "extension/core_functions/scalar/date/to_interval.cpp" + +#include "extension/core_functions/scalar/date/epoch.cpp" diff --git a/src/duckdb/ub_extension_core_functions_scalar_list.cpp b/src/duckdb/ub_extension_core_functions_scalar_list.cpp index 5fdfebc4..87d225f9 100644 --- a/src/duckdb/ub_extension_core_functions_scalar_list.cpp +++ b/src/duckdb/ub_extension_core_functions_scalar_list.cpp @@ -1,22 +1,22 @@ -#include "extension/core_functions/scalar/list/array_slice.cpp" - -#include "extension/core_functions/scalar/list/list_transform.cpp" - -#include "extension/core_functions/scalar/list/list_reduce.cpp" - #include "extension/core_functions/scalar/list/flatten.cpp" #include "extension/core_functions/scalar/list/list_distance.cpp" -#include "extension/core_functions/scalar/list/list_has_any_or_all.cpp" +#include "extension/core_functions/scalar/list/array_slice.cpp" -#include "extension/core_functions/scalar/list/list_sort.cpp" +#include "extension/core_functions/scalar/list/list_transform.cpp" #include "extension/core_functions/scalar/list/list_filter.cpp" -#include "extension/core_functions/scalar/list/list_value.cpp" +#include "extension/core_functions/scalar/list/list_reduce.cpp" + +#include "extension/core_functions/scalar/list/list_sort.cpp" #include "extension/core_functions/scalar/list/list_aggregates.cpp" +#include "extension/core_functions/scalar/list/list_has_any_or_all.cpp" + #include "extension/core_functions/scalar/list/range.cpp" +#include "extension/core_functions/scalar/list/list_value.cpp" + diff --git a/src/duckdb/ub_extension_core_functions_scalar_map.cpp b/src/duckdb/ub_extension_core_functions_scalar_map.cpp index 14edf3a9..b191443c 100644 --- a/src/duckdb/ub_extension_core_functions_scalar_map.cpp +++ b/src/duckdb/ub_extension_core_functions_scalar_map.cpp @@ -1,14 +1,14 @@ -#include "extension/core_functions/scalar/map/cardinality.cpp" +#include "extension/core_functions/scalar/map/map_keys_values.cpp" -#include "extension/core_functions/scalar/map/map_entries.cpp" +#include "extension/core_functions/scalar/map/cardinality.cpp" #include "extension/core_functions/scalar/map/map_concat.cpp" -#include "extension/core_functions/scalar/map/map.cpp" - -#include "extension/core_functions/scalar/map/map_keys_values.cpp" +#include "extension/core_functions/scalar/map/map_extract.cpp" #include "extension/core_functions/scalar/map/map_from_entries.cpp" -#include "extension/core_functions/scalar/map/map_extract.cpp" +#include "extension/core_functions/scalar/map/map.cpp" + +#include "extension/core_functions/scalar/map/map_entries.cpp" diff --git a/src/duckdb/ub_extension_core_functions_scalar_random.cpp b/src/duckdb/ub_extension_core_functions_scalar_random.cpp index 3e74feb3..f71b7b4c 100644 --- a/src/duckdb/ub_extension_core_functions_scalar_random.cpp +++ b/src/duckdb/ub_extension_core_functions_scalar_random.cpp @@ -1,4 +1,4 @@ -#include "extension/core_functions/scalar/random/setseed.cpp" - #include "extension/core_functions/scalar/random/random.cpp" +#include "extension/core_functions/scalar/random/setseed.cpp" + diff --git a/src/duckdb/ub_extension_core_functions_scalar_string.cpp b/src/duckdb/ub_extension_core_functions_scalar_string.cpp index 1b8f5770..b57063a4 100644 --- a/src/duckdb/ub_extension_core_functions_scalar_string.cpp +++ b/src/duckdb/ub_extension_core_functions_scalar_string.cpp @@ -1,48 +1,48 @@ -#include "extension/core_functions/scalar/string/reverse.cpp" +#include "extension/core_functions/scalar/string/hex.cpp" -#include "extension/core_functions/scalar/string/levenshtein.cpp" +#include "extension/core_functions/scalar/string/unicode.cpp" -#include "extension/core_functions/scalar/string/damerau_levenshtein.cpp" +#include "extension/core_functions/scalar/string/translate.cpp" -#include "extension/core_functions/scalar/string/repeat.cpp" +#include "extension/core_functions/scalar/string/left_right.cpp" -#include "extension/core_functions/scalar/string/to_base.cpp" +#include "extension/core_functions/scalar/string/hamming.cpp" -#include "extension/core_functions/scalar/string/jaccard.cpp" +#include "extension/core_functions/scalar/string/damerau_levenshtein.cpp" -#include "extension/core_functions/scalar/string/trim.cpp" +#include "extension/core_functions/scalar/string/ascii.cpp" -#include "extension/core_functions/scalar/string/translate.cpp" +#include "extension/core_functions/scalar/string/starts_with.cpp" -#include "extension/core_functions/scalar/string/hex.cpp" +#include "extension/core_functions/scalar/string/repeat.cpp" #include "extension/core_functions/scalar/string/chr.cpp" -#include "extension/core_functions/scalar/string/pad.cpp" - -#include "extension/core_functions/scalar/string/jaro_winkler.cpp" - -#include "extension/core_functions/scalar/string/starts_with.cpp" - -#include "extension/core_functions/scalar/string/hamming.cpp" +#include "extension/core_functions/scalar/string/levenshtein.cpp" -#include "extension/core_functions/scalar/string/left_right.cpp" +#include "extension/core_functions/scalar/string/pad.cpp" -#include "extension/core_functions/scalar/string/unicode.cpp" +#include "extension/core_functions/scalar/string/bar.cpp" #include "extension/core_functions/scalar/string/replace.cpp" +#include "extension/core_functions/scalar/string/to_base.cpp" + #include "extension/core_functions/scalar/string/printf.cpp" -#include "extension/core_functions/scalar/string/parse_path.cpp" +#include "extension/core_functions/scalar/string/format_bytes.cpp" #include "extension/core_functions/scalar/string/instr.cpp" -#include "extension/core_functions/scalar/string/ascii.cpp" +#include "extension/core_functions/scalar/string/url_encode.cpp" -#include "extension/core_functions/scalar/string/bar.cpp" +#include "extension/core_functions/scalar/string/jaccard.cpp" -#include "extension/core_functions/scalar/string/format_bytes.cpp" +#include "extension/core_functions/scalar/string/reverse.cpp" -#include "extension/core_functions/scalar/string/url_encode.cpp" +#include "extension/core_functions/scalar/string/trim.cpp" + +#include "extension/core_functions/scalar/string/parse_path.cpp" + +#include "extension/core_functions/scalar/string/jaro_winkler.cpp" diff --git a/src/duckdb/ub_extension_core_functions_scalar_union.cpp b/src/duckdb/ub_extension_core_functions_scalar_union.cpp index 5b68509a..c23d6ebf 100644 --- a/src/duckdb/ub_extension_core_functions_scalar_union.cpp +++ b/src/duckdb/ub_extension_core_functions_scalar_union.cpp @@ -1,6 +1,6 @@ -#include "extension/core_functions/scalar/union/union_tag.cpp" +#include "extension/core_functions/scalar/union/union_value.cpp" #include "extension/core_functions/scalar/union/union_extract.cpp" -#include "extension/core_functions/scalar/union/union_value.cpp" +#include "extension/core_functions/scalar/union/union_tag.cpp" diff --git a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp index 4d22eddd..de69a2f9 100644 --- a/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp +++ b/src/duckdb/ub_extension_icu_third_party_icu_i18n.cpp @@ -348,17 +348,17 @@ #include "extension/icu/third_party/icu/i18n/wintzimpl.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" #include "extension/icu/third_party/icu/i18n/double-conversion-bignum.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-cached-powers.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-strtod.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-double-to-string.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-fast-dtoa.cpp" -#include "extension/icu/third_party/icu/i18n/double-conversion-string-to-double.cpp" +#include "extension/icu/third_party/icu/i18n/double-conversion-bignum-dtoa.cpp" diff --git a/src/duckdb/ub_src_function_window.cpp b/src/duckdb/ub_src_function_window.cpp index 3d74f068..2c8af3c1 100644 --- a/src/duckdb/ub_src_function_window.cpp +++ b/src/duckdb/ub_src_function_window.cpp @@ -18,6 +18,8 @@ #include "src/function/window/window_executor.cpp" +#include "src/function/window/window_index_tree.cpp" + #include "src/function/window/window_naive_aggregator.cpp" #include "src/function/window/window_rank_function.cpp"