From 7169c078bf364af572f97e6bcd387d2da5533262 Mon Sep 17 00:00:00 2001 From: Qi Chen Date: Mon, 25 Mar 2024 21:13:43 +0800 Subject: [PATCH 01/22] [Fix](hive-writer) Fix the issue of block was not copied to do filtering when hive partition writer write block to file. (#32775) --- be/src/vec/sink/writer/vhive_partition_writer.cpp | 4 ++-- .../hive/write/test_hive_write_insert.out | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/be/src/vec/sink/writer/vhive_partition_writer.cpp b/be/src/vec/sink/writer/vhive_partition_writer.cpp index 9fe706c5e92b5cd..1a8cc083bc070ed 100644 --- a/be/src/vec/sink/writer/vhive_partition_writer.cpp +++ b/be/src/vec/sink/writer/vhive_partition_writer.cpp @@ -248,7 +248,7 @@ Status VHivePartitionWriter::_projection_and_filter_block(doris::vectorized::Blo return status; } RETURN_IF_ERROR(vectorized::VExprContext::get_output_block_after_execute_exprs( - _vec_output_expr_ctxs, input_block, output_block)); + _vec_output_expr_ctxs, input_block, output_block, true)); materialize_block_inplace(*output_block); if (filter == nullptr) { @@ -282,4 +282,4 @@ THivePartitionUpdate VHivePartitionWriter::_build_partition_update() { } } // namespace vectorized -} // namespace doris \ No newline at end of file +} // namespace doris diff --git a/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out b/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out index 2852d887a9be7c5..7b5c0e7d7bcd9b9 100644 --- a/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out +++ b/regression-test/data/external_table_p0/hive/write/test_hive_write_insert.out @@ -58,16 +58,16 @@ true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5 true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240321 +false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": 1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240320 +true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -- !q04 -- false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 false -7 -15 16 -9223372036854775808 -123.45 -123456.789 123456789 -1234.5678 -123456.789012 -123456789.012345678901 str binary_value 2024-03-25 2024-03-25T12:00 2024-03-25T12:00:00.123457 2024-03-25T12:00:00.123457 char_value11111 char_value22222 char_value33333 varchar_value11111 varchar_value22222 varchar_value33333 {"key7":"value1"} {"key7":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {5.3456:2.3456} {5.34567890:2.34567890} {2.34567890:2.34567890} {7.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [9.4567, 4.5678] [6.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240321 true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": 1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240320 +true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -- !q01 -- false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 @@ -141,16 +141,16 @@ true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5 true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -- !q02 -- -false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240321 +false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123457 2024-03-21T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": 1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240320 +true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -- !q04 -- false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-22 2024-03-22T12:00 2024-03-22T12:00:00.123457 2024-03-22T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240322 false -7 -15 16 -9223372036854775808 -123.45 -123456.789 123456789 -1234.5678 -123456.789012 -123456789.012345678901 str binary_value 2024-03-25 2024-03-25T12:00 2024-03-25T12:00:00.123457 2024-03-25T12:00:00.123457 char_value11111 char_value22222 char_value33333 varchar_value11111 varchar_value22222 varchar_value33333 {"key7":"value1"} {"key7":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {5.3456:2.3456} {5.34567890:2.34567890} {2.34567890:2.34567890} {7.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [9.4567, 4.5678] [6.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240321 true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {3:20} {3:200000000000} {3.2:20.2} {3.2:20.2} {0:1} {3.2:2.2} {3.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {3.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [8.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": 1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value11", "value2", null] [null, null, null] 20240320 +true 127 32767 2147483647 9223372036854775807 123.45 123456.789 123456789 1234.5678 123456.789012 123456789.012345678901 string_value binary_value 2024-03-20 2024-03-20T12:00 2024-03-20T12:00:00.123457 2024-03-20T12:00:00.123457 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"a":"b"} {1:10} {1:100000000000} {1.1:10.1} {1.1:10.1} {1:0} {1.1:1.1} {1.23:1.23} {1.2345:1.2345} {1.23456789:1.23456789} {1.23456789:1.23456789} {1.2345678901234568:1.2345678901234568} ["string1", "string2"] [1, 2, 3] [100000000000, 200000000000] [1.1, 2.2] [1.123456789, 2.123456789] [1, 0] ["varchar1", "varchar2"] ["char1", "char2"] [1.1, 2.2] [1.23, 2.34] [1.2345, 2.3456] [1.23456789, 2.34567891] [1.23456789, 2.34567891] [1.2345678901234568, 2.3456789012345679] {"s_bigint": 1234567890} {"key":[{"s_int": 123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": 123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": 123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": 123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240320 -- !q01 -- false -128 -32768 -2147483648 -9223372036854775808 -123.45 -123456.789 -123456789 -1234.5678 -123456.789012 -123456789.012345678901 string_value binary_value 2024-03-21 2024-03-21T12:00 2024-03-21T12:00:00.123456 2024-03-21T12:00:00.123456 char_value1 char_value2 char_value3 varchar_value1 varchar_value2 varchar_value3 {"key1":"value1"} {"key1":"value1"} {"x":"y"} {2:20} {2:200000000000} {2.2:20.2} {2.2:20.2} {0:1} {2.2:2.2} {2.34:2.34} {2.3456:2.3456} {2.34567890:2.34567890} {2.34567890:2.34567890} {2.3456789012345679:2.3456789012345679} ["string1", "string2"] [4, 5, 6] [300000000000, 400000000000] [3.3, 4.4] [3.123456789, 4.123456789] [0, 1] ["varchar1", "varchar2"] ["char1", "char2"] [3.3, 4.4] [3.45, 4.56] [3.4567, 4.5678] [3.45678901, 4.56789012] [3.45678901, 4.56789012] [3.4567890123456789, 4.5678901234567890] {"s_bigint": -1234567890} {"key":[{"s_int": -123}]} {"struct_field": ["value1", "value2"]} {"struct_field_null": null, "struct_field_null2": null} {"struct_non_nulls_after_nulls1": -123, "struct_non_nulls_after_nulls2": "value"} {"struct_field1": -123, "struct_field2": "value", "strict_field3": {"nested_struct_field1": -123, "nested_struct_field2": "nested_value"}} {"null_key":null} [null, "value1", "value2"] ["value1", null, "value2"] ["value1", "value2", null] [null, null, null] 20240321 From 55691ad7ef779379257321762b4313823ae8cbd3 Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Mon, 25 Mar 2024 22:09:09 +0800 Subject: [PATCH 02/22] [fix](brpc) check failed socket before SetConnected (#32790) --- ...c-1.4.0-fix-stream-rpc-set-connected.patch | 40 ++++++++++++++++--- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/thirdparty/patches/brpc-1.4.0-fix-stream-rpc-set-connected.patch b/thirdparty/patches/brpc-1.4.0-fix-stream-rpc-set-connected.patch index 8e122a57b8f10b9..4328165cfdbb913 100644 --- a/thirdparty/patches/brpc-1.4.0-fix-stream-rpc-set-connected.patch +++ b/thirdparty/patches/brpc-1.4.0-fix-stream-rpc-set-connected.patch @@ -1,20 +1,48 @@ -From 9acc6ef89d8770d5516953e2eadf0c27a7d424fc Mon Sep 17 00:00:00 2001 +From 031194784d540235dfa6ba56bd196a7aad92e30c Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Sun, 28 Jan 2024 15:58:31 +0800 Subject: [PATCH] fix set connected for stream rpc --- - src/brpc/policy/baidu_rpc_protocol.cpp | 11 +++++------ - 1 file changed, 5 insertions(+), 6 deletions(-) + src/brpc/policy/baidu_rpc_protocol.cpp | 25 ++++++++++++++++++------- + 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/src/brpc/policy/baidu_rpc_protocol.cpp b/src/brpc/policy/baidu_rpc_protocol.cpp -index 0239960e..c47903a6 100644 +index 0239960e..e8a90e34 100644 --- a/src/brpc/policy/baidu_rpc_protocol.cpp +++ b/src/brpc/policy/baidu_rpc_protocol.cpp -@@ -234,6 +234,11 @@ void SendRpcResponse(int64_t correlation_id, +@@ -25,6 +25,7 @@ + #include "butil/iobuf.h" // butil::IOBuf + #include "butil/raw_pack.h" // RawPacker RawUnpacker + #include "brpc/controller.h" // Controller ++#include "brpc/errno.pb.h" + #include "brpc/socket.h" // Socket + #include "brpc/server.h" // Server + #include "brpc/span.h" +@@ -212,7 +213,9 @@ void SendRpcResponse(int64_t correlation_id, + if (Socket::Address(response_stream_id, &stream_ptr) == 0) { + Stream* s = (Stream*)stream_ptr->conn(); + s->FillSettings(meta.mutable_stream_settings()); +- s->SetHostSocket(sock); ++ if (s->SetHostSocket(sock) != 0) { ++ LOG(WARNING) << "SetHostSocket failed"; ++ } + } else { + LOG(WARNING) << "Stream=" << response_stream_id + << " was closed before sending response"; +@@ -234,6 +237,20 @@ void SendRpcResponse(int64_t correlation_id, // Send rpc response over stream even if server side failed to create // stream for some reasons. if(cntl->has_remote_stream()){ ++ if (sock->Failed()) { ++ LOG(WARNING) << "Fail to write into " << *sock; ++ cntl->SetFailed(EFAILEDSOCKET, "Fail to write into %s", ++ sock->description().c_str()); ++ if (stream_ptr) { ++ ((Stream *)stream_ptr->conn())->Close(); ++ } ++ return; ++ } + if(stream_ptr) { + // Now it's ok the mark this server-side stream as connectted as all the + // written user data would follower the RPC response. @@ -23,7 +51,7 @@ index 0239960e..c47903a6 100644 // Send the response over stream to notify that this stream connection // is successfully built. // Response_stream can be INVALID_STREAM_ID when error occurs. -@@ -249,12 +254,6 @@ void SendRpcResponse(int64_t correlation_id, +@@ -249,12 +266,6 @@ void SendRpcResponse(int64_t correlation_id, } return; } From 1c4edb2ad7f8d7bf10bebfa235fe3b9fb1cac538 Mon Sep 17 00:00:00 2001 From: TengJianPing <18241664+jacktengg@users.noreply.github.com> Date: Mon, 25 Mar 2024 22:27:31 +0800 Subject: [PATCH 03/22] [fix](RF) fix 'Invalid value' error of RF of decimal type (#32749) --- be/src/vec/core/types.h | 13 +++ be/src/vec/exprs/vexpr.h | 15 ++- be/test/vec/data_types/decimal_test.cpp | 104 ++++++++++++++++++ .../join/test_runtimefilter_on_decimal.out | 8 ++ .../join/test_runtimefilter_on_decimal.groovy | 67 +++++++++++ 5 files changed, 203 insertions(+), 4 deletions(-) create mode 100644 regression-test/data/nereids_p0/join/test_runtimefilter_on_decimal.out create mode 100644 regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 8899b6ce0177354..fd3e16e0a2b886e 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -422,6 +422,13 @@ std::string decimal_to_string(const T& value, UInt32 scale) { return str; } +template +std::string decimal_to_string(const T& orig_value, UInt32 trunc_precision, UInt32 scale) { + T multiplier = decimal_scale_multiplier(trunc_precision); + T value = orig_value % multiplier; + return decimal_to_string(value, scale); +} + template size_t decimal_to_string(const T& value, char* dst, UInt32 scale, const T& scale_multiplier) { if (UNLIKELY(value == std::numeric_limits::min())) { @@ -621,6 +628,12 @@ struct Decimal { std::string to_string(UInt32 scale) const { return decimal_to_string(value, scale); } + // truncate to specified precision and scale, + // used by runtime filter only for now. + std::string to_string(UInt32 precision, UInt32 scale) const { + return decimal_to_string(value, precision, scale); + } + /** * Got the string representation of a decimal. * @param dst Store the result, should be pre-allocated. diff --git a/be/src/vec/exprs/vexpr.h b/be/src/vec/exprs/vexpr.h index 4c4f0aa6740c132..42a46d8a8f3b13f 100644 --- a/be/src/vec/exprs/vexpr.h +++ b/be/src/vec/exprs/vexpr.h @@ -374,28 +374,35 @@ Status create_texpr_literal_node(const void* data, TExprNode* node, int precisio const auto* origin_value = reinterpret_cast*>(data); (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); TDecimalLiteral decimal_literal; - decimal_literal.__set_value(origin_value->to_string(scale)); + decimal_literal.__set_value(origin_value->to_string(precision, scale)); (*node).__set_decimal_literal(decimal_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL32, precision, scale)); } else if constexpr (T == TYPE_DECIMAL64) { const auto* origin_value = reinterpret_cast*>(data); (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); TDecimalLiteral decimal_literal; - decimal_literal.__set_value(origin_value->to_string(scale)); + decimal_literal.__set_value(origin_value->to_string(precision, scale)); (*node).__set_decimal_literal(decimal_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64, precision, scale)); } else if constexpr (T == TYPE_DECIMAL128I) { const auto* origin_value = reinterpret_cast*>(data); (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); TDecimalLiteral decimal_literal; - decimal_literal.__set_value(origin_value->to_string(scale)); + // e.g. For a decimal(26,6) column, the initial value of the _min of the MinMax RF + // on the RF producer side is an int128 value with 38 digits of 9, and this is the + // final min value of the MinMax RF if the fragment instance has no data. + // Need to truncate the value to the right precision and scale here, to avoid + // error when casting string back to decimal later. + // TODO: this is a temporary solution, the best solution is to produce the + // right min max value at the producer side. + decimal_literal.__set_value(origin_value->to_string(precision, scale)); (*node).__set_decimal_literal(decimal_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL128I, precision, scale)); } else if constexpr (T == TYPE_DECIMAL256) { const auto* origin_value = reinterpret_cast*>(data); (*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL); TDecimalLiteral decimal_literal; - decimal_literal.__set_value(origin_value->to_string(scale)); + decimal_literal.__set_value(origin_value->to_string(precision, scale)); (*node).__set_decimal_literal(decimal_literal); (*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL256, precision, scale)); } else if constexpr (T == TYPE_FLOAT) { diff --git a/be/test/vec/data_types/decimal_test.cpp b/be/test/vec/data_types/decimal_test.cpp index 0f4b95020140dd7..7f12876e148d1bf 100644 --- a/be/test/vec/data_types/decimal_test.cpp +++ b/be/test/vec/data_types/decimal_test.cpp @@ -23,6 +23,7 @@ #include #include "gtest/gtest_pred_impl.h" +#include "runtime/define_primitive_type.h" #include "runtime/raw_value.h" #include "runtime/type_limit.h" #include "util/string_parser.hpp" @@ -209,4 +210,107 @@ TEST(DecimalTest, hash) { EXPECT_EQ(hash_val, 12344); } } + +TEST(DecimalTest, to_string) { + { + Decimal32 dec(999999999); + auto dec_str = dec.to_string(9, 0); + EXPECT_EQ(dec_str, "999999999"); + dec_str = dec.to_string(9, 6); + EXPECT_EQ(dec_str, "999.999999"); + dec_str = dec.to_string(9, 9); + EXPECT_EQ(dec_str, "0.999999999"); + + dec_str = dec.to_string(8, 0); + EXPECT_EQ(dec_str, "99999999"); + dec_str = dec.to_string(8, 6); + EXPECT_EQ(dec_str, "99.999999"); + dec_str = dec.to_string(8, 8); + EXPECT_EQ(dec_str, "0.99999999"); + + dec_str = dec.to_string(10, 0); + EXPECT_EQ(dec_str, "999999999"); + dec_str = dec.to_string(10, 6); + EXPECT_EQ(dec_str, "999.999999"); + dec_str = dec.to_string(10, 9); + EXPECT_EQ(dec_str, "0.999999999"); + } + { + Decimal32 dec(-999999999); + auto dec_str = dec.to_string(9, 0); + EXPECT_EQ(dec_str, "-999999999"); + dec_str = dec.to_string(9, 6); + EXPECT_EQ(dec_str, "-999.999999"); + dec_str = dec.to_string(9, 9); + EXPECT_EQ(dec_str, "-0.999999999"); + + dec_str = dec.to_string(8, 0); + EXPECT_EQ(dec_str, "-99999999"); + dec_str = dec.to_string(8, 6); + EXPECT_EQ(dec_str, "-99.999999"); + dec_str = dec.to_string(8, 8); + EXPECT_EQ(dec_str, "-0.99999999"); + + dec_str = dec.to_string(10, 0); + EXPECT_EQ(dec_str, "-999999999"); + dec_str = dec.to_string(10, 6); + EXPECT_EQ(dec_str, "-999.999999"); + dec_str = dec.to_string(10, 9); + EXPECT_EQ(dec_str, "-0.999999999"); + } + { + std::string val_str("999999999999999999999999999999"); // 30 digits + StringParser::ParseResult parse_result; + Decimal128V3 dec = StringParser::string_to_decimal( + val_str.data(), val_str.size(), val_str.size(), 0, &parse_result); + EXPECT_EQ(parse_result, StringParser::ParseResult::PARSE_SUCCESS); + auto dec_str = dec.to_string(30, 0); + EXPECT_EQ(dec_str, "999999999999999999999999999999"); + dec_str = dec.to_string(30, 6); + EXPECT_EQ(dec_str, "999999999999999999999999.999999"); + dec_str = dec.to_string(30, 30); + EXPECT_EQ(dec_str, "0.999999999999999999999999999999"); + + dec_str = dec.to_string(20, 0); + EXPECT_EQ(dec_str, "99999999999999999999"); + dec_str = dec.to_string(20, 6); + EXPECT_EQ(dec_str, "99999999999999.999999"); + dec_str = dec.to_string(20, 20); + EXPECT_EQ(dec_str, "0.99999999999999999999"); + } + { + std::string val_str("-999999999999999999999999999999"); // 30 digits + StringParser::ParseResult parse_result; + Decimal128V3 dec = StringParser::string_to_decimal( + val_str.data(), val_str.size(), val_str.size(), 0, &parse_result); + EXPECT_EQ(parse_result, StringParser::ParseResult::PARSE_SUCCESS); + auto dec_str = dec.to_string(30, 0); + EXPECT_EQ(dec_str, "-999999999999999999999999999999"); + dec_str = dec.to_string(30, 6); + EXPECT_EQ(dec_str, "-999999999999999999999999.999999"); + dec_str = dec.to_string(30, 30); + EXPECT_EQ(dec_str, "-0.999999999999999999999999999999"); + + dec_str = dec.to_string(20, 0); + EXPECT_EQ(dec_str, "-99999999999999999999"); + dec_str = dec.to_string(20, 6); + EXPECT_EQ(dec_str, "-99999999999999.999999"); + dec_str = dec.to_string(20, 20); + EXPECT_EQ(dec_str, "-0.99999999999999999999"); + } + + { + Decimal256 dec(type_limit::max()); + // Decimal256 dec_min(type_limit::min()); + auto dec_str = dec.to_string(76, 0); + EXPECT_EQ(dec_str, + "9999999999999999999999999999999999999999999999999999999999999999999999999999"); + dec_str = dec.to_string(76, 6); + EXPECT_EQ(dec_str, + "9999999999999999999999999999999999999999999999999999999999999999999999.999999"); + dec_str = dec.to_string(76, 76); + EXPECT_EQ(dec_str, + "0.9999999999999999999999999999999999999999999999999999999999999999999999999999"); + } +} } // namespace doris::vectorized \ No newline at end of file diff --git a/regression-test/data/nereids_p0/join/test_runtimefilter_on_decimal.out b/regression-test/data/nereids_p0/join/test_runtimefilter_on_decimal.out new file mode 100644 index 000000000000000..bdcd7847029973a --- /dev/null +++ b/regression-test/data/nereids_p0/join/test_runtimefilter_on_decimal.out @@ -0,0 +1,8 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !dec_rftest_1 -- + +-- !dec_rftest_2 -- +-99999999999999999999.999999 -99999999999999999999.999999 +12345678901234567890.123456 12345678901234567890.123456 +99999999999999999999.999999 99999999999999999999.999999 + diff --git a/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy b/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy new file mode 100644 index 000000000000000..f1bef86bbb2c4fc --- /dev/null +++ b/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_runtimefilter_on_decimal", "nereids_p0") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + // bug fix + sql "set disable_join_reorder=true;" + sql "set enable_runtime_filter_prune=false;" + sql "set runtime_filter_type='MIN_MAX';" + sql "set runtime_filter_wait_infinitely=true;" + + sql "drop table if exists decimal_rftest_l"; + sql "drop table if exists decimal_rftest_r"; + sql """ + CREATE TABLE `decimal_rftest_l` ( + `k1_dec_l` decimalv3(26, 6) + ) + DISTRIBUTED BY HASH(`k1_dec_l`) buckets 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ + CREATE TABLE `decimal_rftest_r` ( + `k1_dec_r` decimalv3(27, 6) + ) + DISTRIBUTED BY HASH(`k1_dec_r`) buckets 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ + insert into decimal_rftest_l values ("12345678901234567890.123456"); + """ + sql """ + insert into decimal_rftest_r values (null); + """ + qt_dec_rftest_1 """ + select /*+SET_VAR(parallel_pipeline_task_num=2)*/ * from decimal_rftest_l join decimal_rftest_r on k1_dec_l = k1_dec_r order by 1, 2; + """ + + sql """ + insert into decimal_rftest_l values ("-99999999999999999999.999999"), ("99999999999999999999.999999"); + """ + sql """ + insert into decimal_rftest_r values ("-99999999999999999999.999999"), ("12345678901234567890.123456"), ("99999999999999999999.999999"); + """ + qt_dec_rftest_2 """ + select /*+SET_VAR(parallel_pipeline_task_num=8)*/ * from decimal_rftest_l join decimal_rftest_r on k1_dec_l = k1_dec_r order by 1, 2; + """ +} From c2b0c48c2ab8d9755e9d99c51c09c210d2eff422 Mon Sep 17 00:00:00 2001 From: HHoflittlefish777 <77738092+HHoflittlefish777@users.noreply.github.com> Date: Mon, 25 Mar 2024 22:48:42 +0800 Subject: [PATCH 04/22] [opt](routine-load) optimize routine load task thread pool and related param(#32282) --- be/src/common/config.cpp | 4 ++-- be/src/common/config.h | 4 ++-- be/src/runtime/exec_env_init.cpp | 1 + .../routine_load_task_executor.cpp | 24 ++++++++++++------- .../routine_load/routine_load_task_executor.h | 6 +++-- .../routine_load_task_executor_test.cpp | 6 +++-- .../java/org/apache/doris/common/Config.java | 4 ++-- 7 files changed, 30 insertions(+), 19 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index ddf393d315c5ceb..66eb4444ba007ee 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -617,9 +617,9 @@ DEFINE_Bool(enable_metric_calculator, "true"); // max consumer num in one data consumer group, for routine load DEFINE_mInt32(max_consumer_num_per_group, "3"); -// the size of thread pool for routine load task. +// the max size of thread pool for routine load task. // this should be larger than FE config 'max_routine_load_task_num_per_be' (default 5) -DEFINE_Int32(routine_load_thread_pool_size, "10"); +DEFINE_Int32(max_routine_load_thread_pool_size, "1024"); // max external scan cache batch count, means cache max_memory_cache_batch_count * batch_size row // default is 20, batch_size's default value is 1024 means 20 * 1024 rows will be cached diff --git a/be/src/common/config.h b/be/src/common/config.h index e7b258ddbbdd02f..904fa5864bdb8b1 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -667,9 +667,9 @@ DECLARE_Bool(enable_metric_calculator); // max consumer num in one data consumer group, for routine load DECLARE_mInt32(max_consumer_num_per_group); -// the size of thread pool for routine load task. +// the max size of thread pool for routine load task. // this should be larger than FE config 'max_routine_load_task_num_per_be' (default 5) -DECLARE_Int32(routine_load_thread_pool_size); +DECLARE_Int32(max_routine_load_thread_pool_size); // max external scan cache batch count, means cache max_memory_cache_batch_count * batch_size row // default is 20, batch_size's default value is 1024 means 20 * 1024 rows will be cached diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 2c46e4ab244e749..9fbed5099967297 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -234,6 +234,7 @@ Status ExecEnv::_init(const std::vector& store_paths, _function_client_cache = new BrpcClientCache(); _stream_load_executor = StreamLoadExecutor::create_shared(this); _routine_load_task_executor = new RoutineLoadTaskExecutor(this); + RETURN_IF_ERROR(_routine_load_task_executor->init()); _small_file_mgr = new SmallFileMgr(this, config::small_file_dir); _block_spill_mgr = new BlockSpillManager(store_paths); _group_commit_mgr = new GroupCommitMgr(this); diff --git a/be/src/runtime/routine_load/routine_load_task_executor.cpp b/be/src/runtime/routine_load/routine_load_task_executor.cpp index 3e5eb48afbca805..2e8303ca4cc5b78 100644 --- a/be/src/runtime/routine_load/routine_load_task_executor.cpp +++ b/be/src/runtime/routine_load/routine_load_task_executor.cpp @@ -62,10 +62,7 @@ using namespace ErrorCode; DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(routine_load_task_count, MetricUnit::NOUNIT); RoutineLoadTaskExecutor::RoutineLoadTaskExecutor(ExecEnv* exec_env) - : _exec_env(exec_env), - _thread_pool(config::routine_load_thread_pool_size, config::routine_load_thread_pool_size, - "routine_load"), - _data_consumer_pool(config::routine_load_consumer_pool_size) { + : _exec_env(exec_env), _data_consumer_pool(config::routine_load_consumer_pool_size) { REGISTER_HOOK_METRIC(routine_load_task_count, [this]() { // std::lock_guard l(_lock); return _task_map.size(); @@ -79,10 +76,19 @@ RoutineLoadTaskExecutor::~RoutineLoadTaskExecutor() { _task_map.clear(); } +Status RoutineLoadTaskExecutor::init() { + return ThreadPoolBuilder("routine_load") + .set_min_threads(0) + .set_max_threads(config::max_routine_load_thread_pool_size) + .set_max_queue_size(config::max_routine_load_thread_pool_size) + .build(&_thread_pool); +} + void RoutineLoadTaskExecutor::stop() { DEREGISTER_HOOK_METRIC(routine_load_task_count); - _thread_pool.shutdown(); - _thread_pool.join(); + if (_thread_pool) { + _thread_pool->shutdown(); + } _data_consumer_pool.stop(); } @@ -180,10 +186,10 @@ Status RoutineLoadTaskExecutor::submit_task(const TRoutineLoadTask& task) { return Status::OK(); } - if (_task_map.size() >= config::routine_load_thread_pool_size) { + if (_task_map.size() >= config::max_routine_load_thread_pool_size) { LOG(INFO) << "too many tasks in thread pool. reject task: " << UniqueId(task.id) << ", job id: " << task.job_id - << ", queue size: " << _thread_pool.get_queue_size() + << ", queue size: " << _thread_pool->get_queue_size() << ", current tasks num: " << _task_map.size(); return Status::TooManyTasks("{}_{}", UniqueId(task.id).to_string(), BackendOptions::get_localhost()); @@ -259,7 +265,7 @@ Status RoutineLoadTaskExecutor::submit_task(const TRoutineLoadTask& task) { _task_map[ctx->id] = ctx; // offer the task to thread pool - if (!_thread_pool.offer(std::bind( + if (!_thread_pool->submit_func(std::bind( &RoutineLoadTaskExecutor::exec_task, this, ctx, &_data_consumer_pool, [this](std::shared_ptr ctx) { std::unique_lock l(_lock); diff --git a/be/src/runtime/routine_load/routine_load_task_executor.h b/be/src/runtime/routine_load/routine_load_task_executor.h index e4ad8be59214fa7..b2a61612fe248ed 100644 --- a/be/src/runtime/routine_load/routine_load_task_executor.h +++ b/be/src/runtime/routine_load/routine_load_task_executor.h @@ -27,8 +27,8 @@ #include #include "runtime/routine_load/data_consumer_pool.h" +#include "util/threadpool.h" #include "util/uid_util.h" -#include "util/work_thread_pool.hpp" namespace doris { @@ -51,6 +51,8 @@ class RoutineLoadTaskExecutor { ~RoutineLoadTaskExecutor(); + Status init(); + void stop(); // submit a routine load task @@ -81,7 +83,7 @@ class RoutineLoadTaskExecutor { private: ExecEnv* _exec_env = nullptr; - PriorityThreadPool _thread_pool; + std::unique_ptr _thread_pool; DataConsumerPool _data_consumer_pool; std::mutex _lock; diff --git a/be/test/runtime/routine_load_task_executor_test.cpp b/be/test/runtime/routine_load_task_executor_test.cpp index 8a8dcc4d6774411..f95fdcfdadfb8a5 100644 --- a/be/test/runtime/routine_load_task_executor_test.cpp +++ b/be/test/runtime/routine_load_task_executor_test.cpp @@ -59,7 +59,7 @@ class RoutineLoadTaskExecutorTest : public testing::Test { _env.set_new_load_stream_mgr(NewLoadStreamMgr::create_unique()); _env.set_stream_load_executor(StreamLoadExecutor::create_unique(&_env)); - config::routine_load_thread_pool_size = 5; + config::max_routine_load_thread_pool_size = 1024; config::max_consumer_num_per_group = 3; } @@ -93,8 +93,10 @@ TEST_F(RoutineLoadTaskExecutorTest, exec_task) { task.__set_kafka_load_info(k_info); RoutineLoadTaskExecutor executor(&_env); - // submit task Status st; + st = executor.init(); + EXPECT_TRUE(st.ok()); + // submit task st = executor.submit_task(task); EXPECT_TRUE(st.ok()); diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 5f8b6c09624873c..f6af3a044aa1a18 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -1143,8 +1143,8 @@ public class Config extends ConfigBase { /** * the max concurrent routine load task num per BE. * This is to limit the num of routine load tasks sending to a BE, and it should also less - * than BE config 'routine_load_thread_pool_size'(default 10), - * which is the routine load task thread pool size on BE. + * than BE config 'max_routine_load_thread_pool_size'(default 1024), + * which is the routine load task thread pool max size on BE. */ @ConfField(mutable = true, masterOnly = true) public static int max_routine_load_task_num_per_be = 5; From 18f1b6f99aad256fdd04efb7c8fe9b89d64c1c20 Mon Sep 17 00:00:00 2001 From: walter Date: Mon, 25 Mar 2024 23:00:22 +0800 Subject: [PATCH 05/22] [fix](merge-cloud) MS return KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES instead of KV_TXN_CONFLICT (#32763) For KV_TXN_CONFLICT, we should return KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES, because BE will retries the KV_TXN_CONFLICT error. --- cloud/src/meta-service/meta_service.h | 11 +++++++---- gensrc/proto/cloud.proto | 3 +++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/cloud/src/meta-service/meta_service.h b/cloud/src/meta-service/meta_service.h index 85232de7bd9ca89..08a6b0884e059f5 100644 --- a/cloud/src/meta-service/meta_service.h +++ b/cloud/src/meta-service/meta_service.h @@ -622,11 +622,14 @@ class MetaServiceProxy final : public MetaService { TEST_SYNC_POINT("MetaServiceProxy::call_impl:2"); if (--retry_times < 0) { + // For KV_TXN_CONFLICT, we should return KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES, + // because BE will retries the KV_TXN_CONFLICT error. resp->mutable_status()->set_code( - code == MetaServiceCode::KV_TXN_STORE_COMMIT_RETRYABLE ? KV_TXN_COMMIT_ERR - : code == MetaServiceCode::KV_TXN_STORE_GET_RETRYABLE ? KV_TXN_GET_ERR - : code == MetaServiceCode::KV_TXN_STORE_CREATE_RETRYABLE ? KV_TXN_CREATE_ERR - : code); + code == MetaServiceCode::KV_TXN_STORE_COMMIT_RETRYABLE ? KV_TXN_COMMIT_ERR + : code == MetaServiceCode::KV_TXN_STORE_GET_RETRYABLE ? KV_TXN_GET_ERR + : code == MetaServiceCode::KV_TXN_STORE_CREATE_RETRYABLE + ? KV_TXN_CREATE_ERR + : KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES); return; } diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index e91bc5ec4a5d5fa..cad9eac22baf093 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -1220,6 +1220,9 @@ enum MetaServiceCode { // partial update ROWSET_META_NOT_FOUND = 9001; + // The meta service retries KV_TXN_CONFLICT error but is exceeded the max times. + KV_TXN_CONFLICT_RETRY_EXCEEDED_MAX_TIMES = 10001; + UNDEFINED_ERR = 1000000; } From 19f2adee47dff6c339ad04d93d31ecc4061169b0 Mon Sep 17 00:00:00 2001 From: walter Date: Mon, 25 Mar 2024 23:00:37 +0800 Subject: [PATCH 06/22] [feature](merge-cloud) Add cloud related operation types (#32769) --- .../java/org/apache/doris/journal/JournalEntity.java | 7 +++++++ .../src/main/java/org/apache/doris/persist/EditLog.java | 6 ++++++ .../java/org/apache/doris/persist/OperationType.java | 5 +++++ .../java/org/apache/doris/persist/meta/MetaReader.java | 9 +++++++++ 4 files changed, 27 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java index 1c3f7444dd954cc..6f95f99aeb5f950 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/JournalEntity.java @@ -948,6 +948,13 @@ public void readFields(DataInput in) throws IOException { isRead = true; break; } + // FIXME: support cloud related operation types. + case OperationType.OP_UPDATE_CLOUD_REPLICA: + case OperationType.OP_MODIFY_TTL_SECONDS: + case OperationType.OP_MODIFY_CLOUD_WARM_UP_JOB: { + isRead = true; + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java index ffae17c6b1c6c92..e8f66d4ac583671 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/EditLog.java @@ -1200,6 +1200,12 @@ public static void loadJournal(Env env, Long logId, JournalEntity journal) { // TODO: implement this while statistics finished related work. break; } + case OperationType.OP_UPDATE_CLOUD_REPLICA: + case OperationType.OP_MODIFY_TTL_SECONDS: + case OperationType.OP_MODIFY_CLOUD_WARM_UP_JOB: { + // TODO: support cloud replated operation type. + break; + } default: { IOException e = new IOException(); LOG.error("UNKNOWN Operation Type {}", opCode, e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java index f5c71e58b72e63e..6911003232db055 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/OperationType.java @@ -384,6 +384,11 @@ public class OperationType { public static final short OP_ALTER_ROLE = 475; + // For cloud. + public static final short OP_UPDATE_CLOUD_REPLICA = 1000; + public static final short OP_MODIFY_TTL_SECONDS = 1001; + public static final short OP_MODIFY_CLOUD_WARM_UP_JOB = 1002; + /** * Get opcode name by op code. **/ diff --git a/fe/fe-core/src/main/java/org/apache/doris/persist/meta/MetaReader.java b/fe/fe-core/src/main/java/org/apache/doris/persist/meta/MetaReader.java index 8024105fe262118..8eb913d7f69b1fc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/persist/meta/MetaReader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/persist/meta/MetaReader.java @@ -100,6 +100,15 @@ public static void read(File imageFile, Env env) throws IOException, DdlExceptio LOG.info("Skip {} module since empty meta length in the end.", metaIndex.name); continue; } + // FIXME: pick cloudWarmUpJob and remove below codes. + if (metaIndex.name.equals("cloudWarmUpJob")) { + LOG.warn("meta modules {} is not supported yet, ignore and skip it", metaIndex.name); + // If this is the last module, nothing need to do. + if (i < metaFooter.metaIndices.size() - 1) { + IOUtils.skipFully(dis, metaFooter.metaIndices.get(i + 1).offset - metaIndex.offset); + } + continue; + } // skip deprecated modules if (PersistMetaModules.DEPRECATED_MODULE_NAMES.contains(metaIndex.name)) { LOG.warn("meta modules {} is deprecated, ignore and skip it", metaIndex.name); From 3c0e72448779e3ba56f131480b8f9ff4b80a6f8e Mon Sep 17 00:00:00 2001 From: walter Date: Mon, 25 Mar 2024 23:01:44 +0800 Subject: [PATCH 07/22] [feature](merge-cloud) Add check long running task mechanism for recycle task (#32589) * [feature](merge-cloud) Set instance_recycler_worker_pool_size default 1 * We meet the error `responseCode=503 error="Please reduce your request rate.` with aws s3 storage in the recycler log, so set instance_recycler_worker_pool_size default 1 to reduce parallel of delete objects Co-authored-by: w41ter * [feature](merge-cloud) Add check long running task mechanism for recycle task * In order to report long running recycle task, implement a check_recycle_task function Co-authored-by: w41ter --------- Co-authored-by: Lei Zhang <27994433+SWJTU-ZhangLei@users.noreply.github.com> --- cloud/src/common/config.h | 5 +- cloud/src/recycler/recycler.cpp | 167 +++++++++++++++++++++++++------- cloud/src/recycler/recycler.h | 12 +++ cloud/test/recycler_test.cpp | 9 +- 4 files changed, 154 insertions(+), 39 deletions(-) diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 859271f6503fed6..03ae47abe569c1b 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -60,7 +60,7 @@ CONF_mInt64(dropped_partition_retention_seconds, "10800"); // 3h CONF_Strings(recycle_whitelist, ""); // Comma seprated list // These instances will not be recycled, only effective when whitelist is empty. CONF_Strings(recycle_blacklist, ""); // Comma seprated list -CONF_mInt32(instance_recycler_worker_pool_size, "10"); +CONF_mInt32(instance_recycler_worker_pool_size, "1"); CONF_Bool(enable_checker, "false"); // Currently only used for recycler test CONF_Bool(enable_inverted_check, "false"); @@ -69,6 +69,9 @@ CONF_mInt32(scan_instances_interval_seconds, "60"); // 1min // interval for check object CONF_mInt32(check_object_interval_seconds, "43200"); // 12hours +CONF_mInt64(check_recycle_task_interval_seconds, "600"); // 10min +CONF_mInt64(recycle_task_threshold_seconds, "10800"); // 3h + CONF_String(test_s3_ak, "ak"); CONF_String(test_s3_sk, "sk"); CONF_String(test_s3_endpoint, "endpoint"); diff --git a/cloud/src/recycler/recycler.cpp b/cloud/src/recycler/recycler.cpp index c765a58d0fcfbcb..1ca1e05f741e571 100644 --- a/cloud/src/recycler/recycler.cpp +++ b/cloud/src/recycler/recycler.cpp @@ -49,6 +49,8 @@ namespace doris::cloud { +using namespace std::chrono; + // return 0 for success get a key, 1 for key not found, negative for error [[maybe_unused]] static int txn_get(TxnKv* txn_kv, std::string_view key, std::string& val) { std::unique_ptr txn; @@ -143,6 +145,23 @@ static int txn_remove(TxnKv* txn_kv, std::vector keys) { } } +static inline void check_recycle_task(const std::string& instance_id, const std::string& task_name, + int64_t num_scanned, int64_t num_recycled, + int64_t start_time) { + if ((num_scanned % 10000) == 0 && (num_scanned > 0)) [[unlikely]] { + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; + if (cost > config::recycle_task_threshold_seconds) { + LOG_INFO("recycle task cost too much time cost={}s", cost) + .tag("instance_id", instance_id) + .tag("task", task_name) + .tag("num_scanned", num_scanned) + .tag("num_recycled", num_recycled); + } + } + return; +} + Recycler::Recycler(std::shared_ptr txn_kv) : txn_kv_(std::move(txn_kv)) { ip_port_ = std::string(butil::my_ip_cstr()) + ":" + std::to_string(config::brpc_listen_port); } @@ -221,7 +240,6 @@ void Recycler::recycle_callback() { } if (stopped()) return; LOG_INFO("begin to recycle instance").tag("instance_id", instance_id); - using namespace std::chrono; auto ctime_ms = duration_cast(system_clock::now().time_since_epoch()).count(); ret = instance_recycler->do_recycle(); // If instance recycler has been aborted, don't finish this job @@ -268,6 +286,23 @@ void Recycler::lease_recycle_jobs() { } } +void Recycler::check_recycle_tasks() { + while (!stopped()) { + std::unordered_map> recycling_instance_map; + { + std::lock_guard lock(mtx_); + recycling_instance_map = recycling_instance_map_; + } + for (auto& entry : recycling_instance_map) { + entry.second->check_recycle_tasks(); + } + + std::unique_lock lock(mtx_); + notifier_.wait_for(lock, std::chrono::seconds(config::check_recycle_task_interval_seconds), + [&]() { return stopped(); }); + } +} + int Recycler::start(brpc::Server* server) { instance_filter_.reset(config::recycle_whitelist, config::recycle_blacklist); @@ -298,6 +333,7 @@ int Recycler::start(brpc::Server* server) { } workers_.push_back(std::thread(std::mem_fn(&Recycler::lease_recycle_jobs), this)); + workers_.push_back(std::thread(std::mem_fn(&Recycler::check_recycle_tasks), this)); return 0; } @@ -470,7 +506,6 @@ int InstanceRecycler::recycle_deleted_instance() { LOG_INFO("begin to recycle deleted instance").tag("instance_id", instance_id_); int ret = 0; - using namespace std::chrono; auto start_time = steady_clock::now(); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { @@ -560,6 +595,7 @@ int InstanceRecycler::recycle_deleted_instance() { } int InstanceRecycler::recycle_indexes() { + const std::string task_name = "recycle_indexes"; int num_scanned = 0; int num_expired = 0; int num_recycled = 0; @@ -573,11 +609,13 @@ int InstanceRecycler::recycle_indexes() { LOG_INFO("begin to recycle indexes").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + unregister_recycle_task(task_name); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("recycle indexes finished, cost={}s", cost) .tag("instance_id", instance_id_) .tag("num_scanned", num_scanned) @@ -659,6 +697,7 @@ int InstanceRecycler::recycle_indexes() { return -1; } ++num_recycled; + check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); index_keys.push_back(k); return 0; }; @@ -678,6 +717,7 @@ int InstanceRecycler::recycle_indexes() { } int InstanceRecycler::recycle_partitions() { + const std::string task_name = "recycle_partitions"; int num_scanned = 0; int num_expired = 0; int num_recycled = 0; @@ -691,11 +731,13 @@ int InstanceRecycler::recycle_partitions() { LOG_INFO("begin to recycle partitions").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + unregister_recycle_task(task_name); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("recycle partitions finished, cost={}s", cost) .tag("instance_id", instance_id_) .tag("num_scanned", num_scanned) @@ -786,6 +828,7 @@ int InstanceRecycler::recycle_partitions() { } if (ret == 0) { ++num_recycled; + check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); partition_keys.push_back(k); if (part_pb.db_id() > 0) { version_keys.push_back(version_key( @@ -831,7 +874,6 @@ int InstanceRecycler::recycle_versions() { LOG_INFO("begin to recycle partition versions").tag("instance_id", instance_id_); - using namespace std::chrono; auto start_time = steady_clock::now(); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { @@ -928,7 +970,6 @@ int InstanceRecycler::recycle_tablets(int64_t table_id, int64_t index_id, int64_ .tag("index_id", index_id) .tag("partition_id", partition_id); - using namespace std::chrono; auto start_time = steady_clock::now(); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { @@ -1199,7 +1240,6 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) { .tag("instance_id", instance_id_) .tag("tablet_id", tablet_id); - using namespace std::chrono; auto start_time = steady_clock::now(); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { @@ -1261,6 +1301,7 @@ int InstanceRecycler::recycle_tablet(int64_t tablet_id) { } int InstanceRecycler::recycle_rowsets() { + const std::string task_name = "recycle_rowsets"; int num_scanned = 0; int num_expired = 0; int num_prepare = 0; @@ -1277,11 +1318,13 @@ int InstanceRecycler::recycle_rowsets() { LOG_INFO("begin to recycle rowsets").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + unregister_recycle_task(task_name); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("recycle rowsets finished, cost={}s", cost) .tag("instance_id", instance_id_) .tag("num_scanned", num_scanned) @@ -1325,6 +1368,8 @@ int InstanceRecycler::recycle_rowsets() { << instance_id_; } else { num_recycled.fetch_add(keys.size(), std::memory_order_relaxed); + check_recycle_task(instance_id_, "recycle_rowsets", num_scanned, + num_recycled, start_time); } }, 0); @@ -1472,6 +1517,7 @@ int InstanceRecycler::recycle_rowsets() { } int InstanceRecycler::recycle_tmp_rowsets() { + const std::string task_name = "recycle_tmp_rowsets"; int num_scanned = 0; int num_expired = 0; int num_recycled = 0; @@ -1487,11 +1533,13 @@ int InstanceRecycler::recycle_tmp_rowsets() { LOG_INFO("begin to recycle tmp rowsets").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + unregister_recycle_task(task_name); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("recycle tmp rowsets finished, cost={}s", cost) .tag("instance_id", instance_id_) .tag("num_scanned", num_scanned) @@ -1616,6 +1664,7 @@ int InstanceRecycler::scan_and_recycle( } int InstanceRecycler::abort_timeout_txn() { + const std::string task_name = "abort_timeout_txn"; int num_scanned = 0; int num_timeout = 0; int num_abort = 0; @@ -1629,11 +1678,13 @@ int InstanceRecycler::abort_timeout_txn() { LOG_INFO("begin to abort timeout txn").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + unregister_recycle_task(task_name); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("end to abort timeout txn, cost={}s", cost) .tag("instance_id", instance_id_) .tag("num_scanned", num_scanned) @@ -1731,6 +1782,7 @@ int InstanceRecycler::abort_timeout_txn() { } int InstanceRecycler::recycle_expired_txn_label() { + const std::string task_name = "recycle_expired_txn_label"; int num_scanned = 0; int num_expired = 0; int num_recycled = 0; @@ -1744,11 +1796,12 @@ int InstanceRecycler::recycle_expired_txn_label() { LOG_INFO("begin to recycle expire txn").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); - + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + unregister_recycle_task(task_name); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("end to recycle expired txn, cost={}s", cost) .tag("instance_id", instance_id_) .tag("num_scanned", num_scanned) @@ -1970,14 +2023,17 @@ int InstanceRecycler::recycle_copy_jobs() { int num_recycled = 0; // Used for INTERNAL stage's copy jobs to tag each batch for log trace uint64_t batch_count = 0; + const std::string task_name = "recycle_copy_jobs"; LOG_INFO("begin to recycle copy jobs").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + unregister_recycle_task(task_name); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("recycle copy jobs finished, cost={}s", cost) .tag("instance_id", instance_id_) .tag("num_scanned", num_scanned) @@ -1993,8 +2049,9 @@ int InstanceRecycler::recycle_copy_jobs() { copy_job_key(key_info0, &key0); copy_job_key(key_info1, &key1); std::unordered_map> stage_accessor_map; - auto recycle_func = [&num_scanned, &num_finished, &num_expired, &num_recycled, &batch_count, - &stage_accessor_map, this](std::string_view k, std::string_view v) -> int { + auto recycle_func = [&start_time, &num_scanned, &num_finished, &num_expired, &num_recycled, + &batch_count, &stage_accessor_map, &task_name, + this](std::string_view k, std::string_view v) -> int { ++num_scanned; CopyJobPB copy_job; if (!copy_job.ParseFromArray(v.data(), v.size())) { @@ -2099,6 +2156,7 @@ int InstanceRecycler::recycle_copy_jobs() { } ++num_recycled; + check_recycle_task(instance_id_, task_name, num_scanned, num_recycled, start_time); return 0; }; @@ -2222,14 +2280,17 @@ int InstanceRecycler::init_copy_job_accessor(const std::string& stage_id, int InstanceRecycler::recycle_stage() { int num_scanned = 0; int num_recycled = 0; + const std::string task_name = "recycle_stage"; LOG_INFO("begin to recycle stage").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); + register_recycle_task(task_name, start_time); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + unregister_recycle_task(task_name); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("recycle stage, cost={}s", cost) .tag("instance_id", instance_id_) .tag("num_scanned", num_scanned) @@ -2245,7 +2306,7 @@ int InstanceRecycler::recycle_stage() { // Elements in `tmp_rowset_keys` has the same lifetime as `it` std::vector stage_keys; - auto recycle_func = [&num_scanned, &num_recycled, &stage_keys, this]( + auto recycle_func = [&start_time, &num_scanned, &num_recycled, &stage_keys, this]( std::string_view k, std::string_view v) -> int { ++num_scanned; RecycleStagePB recycle_stage; @@ -2304,6 +2365,7 @@ int InstanceRecycler::recycle_stage() { return -1; } ++num_recycled; + check_recycle_task(instance_id_, "recycle_stage", num_scanned, num_recycled, start_time); stage_keys.push_back(k); return 0; }; @@ -2325,11 +2387,11 @@ int InstanceRecycler::recycle_stage() { int InstanceRecycler::recycle_expired_stage_objects() { LOG_INFO("begin to recycle expired stage objects").tag("instance_id", instance_id_); - using namespace std::chrono; - auto start_time = steady_clock::now(); + int64_t start_time = duration_cast(steady_clock::now().time_since_epoch()).count(); std::unique_ptr> defer_log_statistics((int*)0x01, [&](int*) { - auto cost = duration(steady_clock::now() - start_time).count(); + int64_t cost = + duration_cast(steady_clock::now().time_since_epoch()).count() - start_time; LOG_INFO("recycle expired stage objects, cost={}s", cost).tag("instance_id", instance_id_); }); int ret = 0; @@ -2391,4 +2453,37 @@ int InstanceRecycler::recycle_expired_stage_objects() { return ret; } +void InstanceRecycler::register_recycle_task(const std::string& task_name, int64_t start_time) { + std::lock_guard lock(recycle_tasks_mutex); + running_recycle_tasks[task_name] = start_time; +} + +void InstanceRecycler::unregister_recycle_task(const std::string& task_name) { + std::lock_guard lock(recycle_tasks_mutex); + DCHECK(running_recycle_tasks[task_name] > 0); + running_recycle_tasks.erase(task_name); +} + +bool InstanceRecycler::check_recycle_tasks() { + std::map tmp_running_recycle_tasks; + { + std::lock_guard lock(recycle_tasks_mutex); + tmp_running_recycle_tasks = running_recycle_tasks; + } + + bool found = false; + int64_t now = duration_cast(steady_clock::now().time_since_epoch()).count(); + for (auto& [task_name, start_time] : tmp_running_recycle_tasks) { + int64_t cost = now - start_time; + if (cost > config::recycle_task_threshold_seconds) [[unlikely]] { + LOG_INFO("recycle task cost too much time cost={}s", cost) + .tag("instance_id", instance_id_) + .tag("task", task_name); + found = true; + } + } + + return found; +} + } // namespace doris::cloud diff --git a/cloud/src/recycler/recycler.h b/cloud/src/recycler/recycler.h index 34139be6d1a3dbd..e745e815a796220 100644 --- a/cloud/src/recycler/recycler.h +++ b/cloud/src/recycler/recycler.h @@ -59,6 +59,8 @@ class Recycler { void lease_recycle_jobs(); + void check_recycle_tasks(); + private: friend class RecyclerServiceImpl; @@ -155,6 +157,8 @@ class InstanceRecycler { // returns 0 for success otherwise error int recycle_expired_stage_objects(); + bool check_recycle_tasks(); + private: /** * Scan key-value pairs between [`begin`, `end`), and perform `recycle_func` on each key-value pair. @@ -182,6 +186,10 @@ class InstanceRecycler { int init_copy_job_accessor(const std::string& stage_id, const StagePB::StageType& stage_type, std::shared_ptr* accessor); + void register_recycle_task(const std::string& task_name, int64_t start_time); + + void unregister_recycle_task(const std::string& task_name); + private: std::atomic_bool stopped_ {false}; std::shared_ptr txn_kv_; @@ -195,6 +203,10 @@ class InstanceRecycler { std::mutex recycled_tablets_mtx_; // Store recycled tablets, we can skip deleting rowset data of these tablets because these data has already been deleted. std::unordered_set recycled_tablets_; + + std::mutex recycle_tasks_mutex; + // > + std::map running_recycle_tasks; }; } // namespace doris::cloud diff --git a/cloud/test/recycler_test.cpp b/cloud/test/recycler_test.cpp index cf6d606585d457a..87c70833a30d22a 100644 --- a/cloud/test/recycler_test.cpp +++ b/cloud/test/recycler_test.cpp @@ -713,6 +713,7 @@ TEST(RecyclerTest, bench_recycle_rowsets) { obj_info->set_prefix("recycle_rowsets"); config::instance_recycler_worker_pool_size = 10; + config::recycle_task_threshold_seconds = 0; InstanceRecycler recycler(txn_kv, instance); ASSERT_EQ(recycler.init(), 0); @@ -723,8 +724,11 @@ TEST(RecyclerTest, bench_recycle_rowsets) { *((int*)limit) = 100; std::this_thread::sleep_for(std::chrono::milliseconds(5)); }); - sp->set_call_back("MockAccessor::delete_objects", - [&](void* p) { std::this_thread::sleep_for(std::chrono::milliseconds(20)); }); + sp->set_call_back("MockAccessor::delete_objects", [&](void* p) { + std::this_thread::sleep_for(std::chrono::seconds(1)); + bool found = recycler.check_recycle_tasks(); + ASSERT_EQ(found, true); + }); sp->set_call_back("MockAccessor::delete_objects_by_prefix", [&](void* p) { std::this_thread::sleep_for(std::chrono::milliseconds(20)); }); sp->enable_processing(); @@ -748,6 +752,7 @@ TEST(RecyclerTest, bench_recycle_rowsets) { } ASSERT_EQ(recycler.recycle_rowsets(), 0); + ASSERT_EQ(recycler.check_recycle_tasks(), false); // check rowset does not exist on obj store std::vector files; From 9e7d4160f1b0d4bf955df9acc93fc196a50c43bd Mon Sep 17 00:00:00 2001 From: walter Date: Mon, 25 Mar 2024 23:02:43 +0800 Subject: [PATCH 08/22] [feature](merge-cloud) Get routine load progress info from meta service (#32532) Co-authored-by: Luwei <814383175@qq.com> Co-authored-by: Gavin Chou --- cloud/src/common/bvars.cpp | 1 + cloud/src/common/bvars.h | 1 + cloud/src/meta-service/meta_service.h | 12 + cloud/src/meta-service/meta_service_txn.cpp | 238 ++++++++++++------ .../doris/cloud/rpc/MetaServiceClient.java | 11 + .../doris/cloud/rpc/MetaServiceProxy.java | 15 ++ .../load/routineload/KafkaRoutineLoadJob.java | 33 +++ .../load/routineload/RoutineLoadJob.java | 2 + .../routineload/RoutineLoadScheduler.java | 6 + gensrc/proto/cloud.proto | 24 ++ 10 files changed, 263 insertions(+), 80 deletions(-) diff --git a/cloud/src/common/bvars.cpp b/cloud/src/common/bvars.cpp index 702dea865022b2d..ab0b5934b509986 100644 --- a/cloud/src/common/bvars.cpp +++ b/cloud/src/common/bvars.cpp @@ -69,6 +69,7 @@ BvarLatencyRecorderWithTag g_bvar_ms_get_delete_bitmap("ms", "get_delete_bitmap" BvarLatencyRecorderWithTag g_bvar_ms_get_delete_bitmap_update_lock("ms", "get_delete_bitmap_update_lock"); BvarLatencyRecorderWithTag g_bvar_ms_get_instance("ms", "get_instance"); +BvarLatencyRecorderWithTag g_bvar_ms_get_rl_task_commit_attach("ms", "get_rl_task_commit_attach"); BvarLatencyRecorderWithTag g_bvar_ms_start_tablet_job("ms", "start_tablet_job"); BvarLatencyRecorderWithTag g_bvar_ms_finish_tablet_job("ms", "finish_tablet_job"); diff --git a/cloud/src/common/bvars.h b/cloud/src/common/bvars.h index 1c4c4f749b6fe13..dbdbfa834e98126 100644 --- a/cloud/src/common/bvars.h +++ b/cloud/src/common/bvars.h @@ -170,6 +170,7 @@ extern BvarLatencyRecorderWithTag g_bvar_ms_get_delete_bitmap_update_lock; extern BvarLatencyRecorderWithTag g_bvar_ms_get_cluster_status; extern BvarLatencyRecorderWithTag g_bvar_ms_set_cluster_status; extern BvarLatencyRecorderWithTag g_bvar_ms_get_instance; +extern BvarLatencyRecorderWithTag g_bvar_ms_get_rl_task_commit_attach; // txn_kv's bvars extern bvar::LatencyRecorder g_bvar_txn_kv_get; diff --git a/cloud/src/meta-service/meta_service.h b/cloud/src/meta-service/meta_service.h index 08a6b0884e059f5..a8cd0f521487b28 100644 --- a/cloud/src/meta-service/meta_service.h +++ b/cloud/src/meta-service/meta_service.h @@ -251,6 +251,11 @@ class MetaServiceImpl : public cloud::MetaService { GetClusterStatusResponse* response, ::google::protobuf::Closure* done) override; + void get_rl_task_commit_attach(::google::protobuf::RpcController* controller, + const GetRLTaskCommitAttachRequest* request, + GetRLTaskCommitAttachResponse* response, + ::google::protobuf::Closure* done) override; + // ATTN: If you add a new method, please also add the corresponding implementation in `MetaServiceProxy`. std::pair get_instance_info(const std::string& instance_id, @@ -574,6 +579,13 @@ class MetaServiceProxy final : public MetaService { call_impl(&cloud::MetaService::get_cluster_status, controller, request, response, done); } + void get_rl_task_commit_attach(::google::protobuf::RpcController* controller, + const GetRLTaskCommitAttachRequest* request, + GetRLTaskCommitAttachResponse* response, + ::google::protobuf::Closure* done) override { + call_impl(&cloud::MetaService::get_rl_task_commit_attach, controller, request, response, done); + } + private: template using MetaServiceMethod = void (cloud::MetaService::*)(::google::protobuf::RpcController*, diff --git a/cloud/src/meta-service/meta_service_txn.cpp b/cloud/src/meta-service/meta_service_txn.cpp index 788663c2ceac3f1..03251ee0f071a78 100644 --- a/cloud/src/meta-service/meta_service_txn.cpp +++ b/cloud/src/meta-service/meta_service_txn.cpp @@ -478,6 +478,163 @@ void MetaServiceImpl::precommit_txn(::google::protobuf::RpcController* controlle } } +void put_routine_load_progress(MetaServiceCode& code, std::string& msg, + const std::string& instance_id, + const CommitTxnRequest* request, + Transaction* txn, int64_t db_id) { + std::stringstream ss; + int64_t txn_id = request->txn_id(); + if (!request->has_commit_attachment()) { + ss << "failed to get commit attachment from req, db_id=" << db_id + << " txn_id=" << txn_id; + msg = ss.str(); + return; + } + + TxnCommitAttachmentPB txn_commit_attachment = request->commit_attachment(); + RLTaskTxnCommitAttachmentPB commit_attachment = + txn_commit_attachment.rl_task_txn_commit_attachment(); + int64_t job_id = commit_attachment.job_id(); + + std::string rl_progress_key; + std::string rl_progress_val; + bool prev_progress_existed = true; + RLJobProgressKeyInfo rl_progress_key_info {instance_id, db_id, job_id}; + rl_job_progress_key_info(rl_progress_key_info, &rl_progress_key); + TxnErrorCode err = txn->get(rl_progress_key, &rl_progress_val); + if (err != TxnErrorCode::TXN_OK) { + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + prev_progress_existed = false; + } else { + code = cast_as(err); + ss << "failed to get routine load progress, db_id=" << db_id << " txn_id=" << txn_id + << " err=" << err; + msg = ss.str(); + return; + } + } + + RoutineLoadProgressPB prev_progress_info; + if (prev_progress_existed) { + if (!prev_progress_info.ParseFromString(rl_progress_val)) { + code = MetaServiceCode::PROTOBUF_PARSE_ERR; + ss << "failed to parse routine load progress, db_id=" << db_id + << " txn_id=" << txn_id; + msg = ss.str(); + return; + } + } + + std::string new_progress_val; + RoutineLoadProgressPB new_progress_info; + new_progress_info.CopyFrom(commit_attachment.progress()); + for (auto const& elem : prev_progress_info.partition_to_offset()) { + auto it = new_progress_info.partition_to_offset().find(elem.first); + if (it == new_progress_info.partition_to_offset().end()) { + new_progress_info.mutable_partition_to_offset()->insert(elem); + } + } + + std::string new_statistic_val; + RoutineLoadJobStatisticPB* new_statistic_info = new_progress_info.mutable_stat(); + if (prev_progress_info.has_stat()) { + const RoutineLoadJobStatisticPB& prev_statistic_info = prev_progress_info.stat(); + + new_statistic_info->set_filtered_rows(prev_statistic_info.filtered_rows() + commit_attachment.filtered_rows()); + new_statistic_info->set_loaded_rows(prev_statistic_info.loaded_rows() + commit_attachment.loaded_rows()); + new_statistic_info->set_unselected_rows(prev_statistic_info.unselected_rows() + commit_attachment.unselected_rows()); + new_statistic_info->set_received_bytes(prev_statistic_info.received_bytes() + commit_attachment.received_bytes()); + new_statistic_info->set_task_execution_time_ms(prev_statistic_info.task_execution_time_ms() + commit_attachment.task_execution_time_ms()); + } else { + new_statistic_info->set_filtered_rows(commit_attachment.filtered_rows()); + new_statistic_info->set_loaded_rows(commit_attachment.loaded_rows()); + new_statistic_info->set_unselected_rows(commit_attachment.unselected_rows()); + new_statistic_info->set_received_bytes(commit_attachment.received_bytes()); + new_statistic_info->set_task_execution_time_ms(commit_attachment.task_execution_time_ms()); + } + + LOG(INFO) << "routine load new progress: " << new_progress_info.ShortDebugString(); + + if (!new_progress_info.SerializeToString(&new_progress_val)) { + code = MetaServiceCode::PROTOBUF_SERIALIZE_ERR; + ss << "failed to serialize new progress val, txn_id=" << txn_id; + msg = ss.str(); + return; + } + + txn->put(rl_progress_key, new_progress_val); +} + +void MetaServiceImpl::get_rl_task_commit_attach(::google::protobuf::RpcController* controller, + const GetRLTaskCommitAttachRequest* request, + GetRLTaskCommitAttachResponse* response, + ::google::protobuf::Closure* done) { + RPC_PREPROCESS(get_rl_task_commit_attach); + instance_id = get_instance_id(resource_mgr_, request->cloud_unique_id()); + if (instance_id.empty()) { + code = MetaServiceCode::INVALID_ARGUMENT; + msg = "empty instance_id"; + LOG(INFO) << msg << ", cloud_unique_id=" << request->cloud_unique_id(); + return; + } + RPC_RATE_LIMIT(get_rl_task_commit_attach) + + std::unique_ptr txn; + TxnErrorCode err = txn_kv_->create_txn(&txn); + if (err != TxnErrorCode::TXN_OK) { + code = cast_as(err); + ss << "filed to create txn, err=" << err; + msg = ss.str(); + return; + } + + if (!request->has_db_id() || !request->has_job_id()) { + code = MetaServiceCode::INVALID_ARGUMENT; + msg = "empty db_id or job_id"; + LOG(INFO) << msg << ", cloud_unique_id=" << request->cloud_unique_id(); + return; + } + + int64_t db_id = request->db_id(); + int64_t job_id = request->job_id(); + std::string rl_progress_key; + std::string rl_progress_val; + RLJobProgressKeyInfo rl_progress_key_info {instance_id, db_id, job_id}; + rl_job_progress_key_info(rl_progress_key_info, &rl_progress_key); + err = txn->get(rl_progress_key, &rl_progress_val); + if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { + code = MetaServiceCode::ROUTINE_LOAD_PROGRESS_NOT_FOUND; + ss << "pregress info not found, db_id=" << db_id + << " job_id=" << job_id << " err=" << err; + msg = ss.str(); + return; + } else if (err != TxnErrorCode::TXN_OK) { + code = cast_as(err); + ss << "failed to get pregress info, db_id=" << db_id + << " job_id=" << job_id << " err=" << err; + msg = ss.str(); + return; + } + + RLTaskTxnCommitAttachmentPB* commit_attach = response->mutable_commit_attach(); + RoutineLoadProgressPB* progress_info = commit_attach->mutable_progress(); + if (!progress_info->ParseFromString(rl_progress_val)) { + code = MetaServiceCode::PROTOBUF_PARSE_ERR; + ss << "failed to parse progress info, db_id=" << db_id << " job_id=" << job_id; + msg = ss.str(); + return; + } + + if (progress_info->has_stat()) { + const RoutineLoadJobStatisticPB& statistic_info = progress_info->stat(); + commit_attach->set_filtered_rows(statistic_info.filtered_rows()); + commit_attach->set_loaded_rows(statistic_info.loaded_rows()); + commit_attach->set_unselected_rows(statistic_info.unselected_rows()); + commit_attach->set_received_bytes(statistic_info.received_bytes()); + commit_attach->set_task_execution_time_ms(statistic_info.task_execution_time_ms()); + } +} + /** * 0. Extract txn_id from request * 1. Get db id from TxnKv with txn_id @@ -977,86 +1134,7 @@ void MetaServiceImpl::commit_txn(::google::protobuf::RpcController* controller, if (txn_info.load_job_source_type() == LoadJobSourceTypePB::LOAD_JOB_SRC_TYPE_ROUTINE_LOAD_TASK) { - if (!request->has_commit_attachment()) { - ss << "failed to get commit attachment from req, db_id=" << db_id - << " txn_id=" << txn_id; - msg = ss.str(); - return; - } - - TxnCommitAttachmentPB txn_commit_attachment = request->commit_attachment(); - RLTaskTxnCommitAttachmentPB commit_attachment = - txn_commit_attachment.rl_task_txn_commit_attachment(); - int64_t job_id = commit_attachment.job_id(); - - std::string rl_progress_key; - std::string rl_progress_val; - bool prev_progress_existed = true; - RLJobProgressKeyInfo rl_progress_key_info {instance_id, db_id, job_id}; - rl_job_progress_key_info(rl_progress_key_info, &rl_progress_key); - TxnErrorCode err = txn->get(rl_progress_key, &rl_progress_val); - if (err != TxnErrorCode::TXN_OK) { - if (err == TxnErrorCode::TXN_KEY_NOT_FOUND) { - prev_progress_existed = false; - } else { - code = cast_as(err); - ss << "failed to get txn_info, db_id=" << db_id << " txn_id=" << txn_id - << " err=" << err; - msg = ss.str(); - return; - } - } - - RoutineLoadProgressPB prev_progress_info; - if (prev_progress_existed) { - if (!prev_progress_info.ParseFromString(rl_progress_val)) { - code = MetaServiceCode::PROTOBUF_PARSE_ERR; - ss << "failed to parse txn_info, db_id=" << db_id << " txn_id=" << txn_id; - msg = ss.str(); - return; - } - - int cal_row_num = 0; - for (auto const& elem : commit_attachment.progress().partition_to_offset()) { - if (elem.second >= 0) { - auto it = prev_progress_info.partition_to_offset().find(elem.first); - if (it != prev_progress_info.partition_to_offset().end() && it->second >= 0) { - cal_row_num += elem.second - it->second; - } else { - cal_row_num += elem.second + 1; - } - } - } - - LOG(INFO) << " calculated row num " << cal_row_num << " actual row num " - << commit_attachment.loaded_rows() << " prev progress " - << prev_progress_info.DebugString(); - - if (cal_row_num == 0) { - LOG(WARNING) << " repeated to load task in routine load, db_id=" << db_id - << " txn_id=" << txn_id << " calculated row num " << cal_row_num - << " actual row num " << commit_attachment.loaded_rows(); - return; - } - } - - std::string new_progress_val; - RoutineLoadProgressPB new_progress_info; - new_progress_info.CopyFrom(commit_attachment.progress()); - for (auto const& elem : prev_progress_info.partition_to_offset()) { - auto it = new_progress_info.partition_to_offset().find(elem.first); - if (it == new_progress_info.partition_to_offset().end()) { - new_progress_info.mutable_partition_to_offset()->insert(elem); - } - } - - if (!new_progress_info.SerializeToString(&new_progress_val)) { - code = MetaServiceCode::PROTOBUF_SERIALIZE_ERR; - ss << "failed to serialize new progress val, txn_id=" << txn_info.txn_id(); - msg = ss.str(); - return; - } - txn->put(rl_progress_key, new_progress_val); + put_routine_load_progress(code, msg, instance_id, request, txn.get(), db_id); } LOG(INFO) << "xxx commit_txn put recycle_key key=" << hex(recycle_key) << " txn_id=" << txn_id; diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceClient.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceClient.java index 49cb3c205903fca..19949ac73d3c0c1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceClient.java @@ -314,4 +314,15 @@ public Cloud.GetInstanceResponse getInstance(Cloud.GetInstanceRequest request) { } return blockingStub.getInstance(request); } + + public Cloud.GetRLTaskCommitAttachResponse + getRLTaskCommitAttach(Cloud.GetRLTaskCommitAttachRequest request) { + if (!request.hasCloudUniqueId()) { + Cloud.GetRLTaskCommitAttachRequest.Builder builder = + Cloud.GetRLTaskCommitAttachRequest.newBuilder(); + builder.mergeFrom(request); + return blockingStub.getRlTaskCommitAttach(builder.setCloudUniqueId(Config.cloud_unique_id).build()); + } + return blockingStub.getRlTaskCommitAttach(request); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java index 9715d831e8f3ae3..680189d4d276f21 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/rpc/MetaServiceProxy.java @@ -442,4 +442,19 @@ public Cloud.AlterObjStoreInfoResponse alterObjStoreInfo(Cloud.AlterObjStoreInfo throw new RpcException("", e.getMessage(), e); } } + + public Cloud.GetRLTaskCommitAttachResponse + getRLTaskCommitAttach(Cloud.GetRLTaskCommitAttachRequest request) + throws RpcException { + if (metaServiceHostPort == null) { + throw new RpcException("", "cloud mode, please configure cloud_unique_id and meta_service_endpoint"); + } + TNetworkAddress metaAddress = new TNetworkAddress(metaServiceHostPort.first, metaServiceHostPort.second); + try { + final MetaServiceClient client = getProxy(metaAddress); + return client.getRLTaskCommitAttach(request); + } catch (Exception e) { + throw new RpcException(metaAddress.hostname, e.getMessage(), e); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java index bdcfb9e4a2724af..1067a759e5f416b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java @@ -23,6 +23,8 @@ import org.apache.doris.catalog.Database; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.OlapTable; +import org.apache.doris.cloud.proto.Cloud; +import org.apache.doris.cloud.rpc.MetaServiceProxy; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; import org.apache.doris.common.InternalErrorCode; @@ -40,6 +42,7 @@ import org.apache.doris.load.routineload.kafka.KafkaConfiguration; import org.apache.doris.load.routineload.kafka.KafkaDataSourceProperties; import org.apache.doris.persist.AlterRoutineLoadJobOperationLog; +import org.apache.doris.rpc.RpcException; import org.apache.doris.thrift.TFileCompressType; import org.apache.doris.transaction.TransactionState; import org.apache.doris.transaction.TransactionStatus; @@ -69,6 +72,7 @@ import java.util.TimeZone; import java.util.UUID; + /** * KafkaRoutineLoadJob is a kind of RoutineLoadJob which fetch data from kafka. * The progress which is super class property is seems like "{"partition1": offset1, "partition2": offset2}" @@ -247,6 +251,35 @@ public void divideRoutineLoadJob(int currentConcurrentTaskNum) throws UserExcept } } + @Override + public void updateCloudProgress() throws UserException { + Cloud.GetRLTaskCommitAttachRequest.Builder builder = + Cloud.GetRLTaskCommitAttachRequest.newBuilder(); + builder.setCloudUniqueId(Config.cloud_unique_id); + builder.setDbId(dbId); + builder.setJobId(id); + + Cloud.GetRLTaskCommitAttachResponse response; + try { + response = MetaServiceProxy.getInstance().getRLTaskCommitAttach(builder.build()); + if (response.getStatus().getCode() != Cloud.MetaServiceCode.OK) { + LOG.warn("failed to get routine load commit attach, response: {}", response); + if (response.getStatus().getCode() == Cloud.MetaServiceCode.ROUTINE_LOAD_PROGRESS_NOT_FOUND) { + LOG.warn("not found routine load progress, response: {}", response); + return; + } else { + throw new UserException(response.getStatus().getMsg()); + } + } + } catch (RpcException e) { + LOG.info("failed to get routine load commit attach {}", e); + throw new UserException(e.getMessage()); + } + + RLTaskTxnCommitAttachment commitAttach = new RLTaskTxnCommitAttachment(response.getCommitAttach()); + updateProgress(commitAttach); + } + @Override public int calculateCurrentConcurrentTaskNum() { int partitionNum = currentKafkaPartitions.size(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index 77663be058cd771..20c1b999d030e0a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -779,6 +779,8 @@ public void processTimeoutTasks() { } } + abstract void updateCloudProgress() throws UserException; + abstract void divideRoutineLoadJob(int currentConcurrentTaskNum) throws UserException; public int calculateCurrentConcurrentTaskNum() throws MetaNotFoundException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadScheduler.java index 84f9548de1324a1..51029c3d18b194e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadScheduler.java @@ -18,6 +18,7 @@ package org.apache.doris.load.routineload; import org.apache.doris.catalog.Env; +import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; import org.apache.doris.common.LoadException; import org.apache.doris.common.MetaNotFoundException; @@ -73,10 +74,15 @@ private void process() throws UserException { if (!routineLoadJobList.isEmpty()) { LOG.info("there are {} job need schedule", routineLoadJobList.size()); } + for (RoutineLoadJob routineLoadJob : routineLoadJobList) { RoutineLoadJob.JobState errorJobState = null; UserException userException = null; try { + if (Config.isCloudMode()) { + routineLoadJob.updateCloudProgress(); + } + routineLoadJob.prepare(); // judge nums of tasks more than max concurrent tasks of cluster int desiredConcurrentTaskNum = routineLoadJob.calculateCurrentConcurrentTaskNum(); diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index cad9eac22baf093..2db7dd1a5ef5572 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -258,6 +258,7 @@ message TxnCoordinatorPB { message RoutineLoadProgressPB { map partition_to_offset = 1; + optional RoutineLoadJobStatisticPB stat = 2; } message RLTaskTxnCommitAttachmentPB { @@ -272,6 +273,14 @@ message RLTaskTxnCommitAttachmentPB { optional string error_log_url = 9; } +message RoutineLoadJobStatisticPB { + optional int64 filtered_rows = 1; + optional int64 loaded_rows = 2; + optional int64 unselected_rows = 3; + optional int64 received_bytes = 4; + optional int64 task_execution_time_ms = 5; +} + message TxnCommitAttachmentPB { enum Type { LODD_JOB_FINAL_OPERATION = 0; @@ -1205,6 +1214,7 @@ enum MetaServiceCode { JOB_TABLET_BUSY = 5001; JOB_ALREADY_SUCCESS = 5002; ROUTINE_LOAD_DATA_INCONSISTENT = 5003; + ROUTINE_LOAD_PROGRESS_NOT_FOUND = 5004; // Rate limit MAX_QPS_LIMIT = 6001; @@ -1288,6 +1298,17 @@ message GetDeleteBitmapUpdateLockResponse { optional MetaServiceResponseStatus status = 1; } +message GetRLTaskCommitAttachRequest { + optional string cloud_unique_id = 1; // For auth + optional int64 db_id = 2; + optional int64 job_id = 3; +} + +message GetRLTaskCommitAttachResponse { + optional MetaServiceResponseStatus status = 1; + optional RLTaskTxnCommitAttachmentPB commit_attach = 2; +} + service MetaService { rpc begin_txn(BeginTxnRequest) returns (BeginTxnResponse); rpc precommit_txn(PrecommitTxnRequest) returns (PrecommitTxnResponse); @@ -1350,6 +1371,9 @@ service MetaService { rpc update_delete_bitmap(UpdateDeleteBitmapRequest) returns(UpdateDeleteBitmapResponse); rpc get_delete_bitmap(GetDeleteBitmapRequest) returns(GetDeleteBitmapResponse); rpc get_delete_bitmap_update_lock(GetDeleteBitmapUpdateLockRequest) returns(GetDeleteBitmapUpdateLockResponse); + + // routine load progress + rpc get_rl_task_commit_attach(GetRLTaskCommitAttachRequest) returns (GetRLTaskCommitAttachResponse); }; service RecyclerService { From f82eb9e73a04640f16c19dc65fea38900fed01e6 Mon Sep 17 00:00:00 2001 From: walter Date: Mon, 25 Mar 2024 23:06:15 +0800 Subject: [PATCH 09/22] [feature](merge-cloud) Support check compatibility mode for FE (#32509) --- .../java/org/apache/doris/common/Config.java | 7 ++++ .../java/org/apache/doris/catalog/Env.java | 38 ++++++++++++++++++- .../doris/journal/bdbje/BDBJEJournal.java | 4 ++ 3 files changed, 48 insertions(+), 1 deletion(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index f6af3a044aa1a18..848a22d1012043c 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2530,6 +2530,13 @@ public class Config extends ConfigBase { }) public static boolean enable_proxy_protocol = false; + // Used to check compatibility when upgrading. + @ConfField + public static boolean enable_check_compatibility_mode = false; + + // Do checkpoint after replaying edit logs. + @ConfField + public static boolean checkpoint_after_check_compatibility = false; //========================================================================== // begin of cloud config diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index e91ff7febe71975..10592282155041c 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -313,6 +313,7 @@ import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; + /** * A singleton class can also be seen as an entry point of Doris. * All manager classes can be obtained through this class. @@ -1000,7 +1001,13 @@ public void initialize(String[] args) throws Exception { auditEventProcessor.start(); // 2. get cluster id and role (Observer or Follower) - getClusterIdAndRole(); + if (!Config.enable_check_compatibility_mode) { + getClusterIdAndRole(); + } else { + role = FrontendNodeType.FOLLOWER; + nodeName = genFeNodeName(selfNode.getHost(), + selfNode.getPort(), false /* new style */); + } // 3. Load image first and replay edits this.editLog = new EditLog(nodeName); @@ -1009,6 +1016,10 @@ public void initialize(String[] args) throws Exception { this.globalTransactionMgr.setEditLog(editLog); this.idGenerator.setEditLog(editLog); + if (Config.enable_check_compatibility_mode) { + replayJournalsAndExit(); + } + // 4. create load and export job label cleaner thread createLabelCleaner(); @@ -1467,6 +1478,13 @@ private void transferToMaster() { long replayEndTime = System.currentTimeMillis(); LOG.info("finish replay in " + (replayEndTime - replayStartTime) + " msec"); + if (Config.enable_check_compatibility_mode) { + String msg = "check metadata compatibility successfully"; + LOG.info(msg); + System.out.println(msg); + System.exit(0); + } + checkCurrentNodeExist(); checkBeExecVersion(); @@ -5250,6 +5268,9 @@ public String dumpImage() { LOG.info("acquired all jobs' read lock."); long journalId = getMaxJournalId(); File dumpFile = new File(Config.meta_dir, "image." + journalId); + if (Config.enable_check_compatibility_mode) { + dumpFile = new File(imageDir, "image." + journalId); + } dumpFilePath = dumpFile.getAbsolutePath(); try { LOG.info("begin to dump {}", dumpFilePath); @@ -6107,4 +6128,19 @@ public void checkReadyOrThrowTException() throws TException { throw new TException(e); } } + + private void replayJournalsAndExit() { + replayJournal(-1); + LOG.info("check metadata compatibility successfully"); + System.out.println("check metadata compatibility successfully"); + + if (Config.checkpoint_after_check_compatibility) { + String imagePath = dumpImage(); + String msg = "the new image file path is: " + imagePath; + LOG.info(msg); + System.out.println(msg); + } + + System.exit(0); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java index 13fa926c06d1ce9..a675257fea9bb73 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java +++ b/fe/fe-core/src/main/java/org/apache/doris/journal/bdbje/BDBJEJournal.java @@ -18,6 +18,7 @@ package org.apache.doris.journal.bdbje; import org.apache.doris.catalog.Env; +import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; import org.apache.doris.common.io.DataOutputBuffer; import org.apache.doris.common.io.Writable; @@ -374,6 +375,9 @@ public JournalCursor read(long fromKey, long toKey) { @Override public long getMaxJournalId() { + if (Config.enable_check_compatibility_mode) { + return getMaxJournalIdWithoutCheck(); + } return getMaxJournalIdInternal(true); } From ca97b91279b9683247692d659a99af55e5e828ae Mon Sep 17 00:00:00 2001 From: Xin Liao Date: Mon, 25 Mar 2024 23:08:44 +0800 Subject: [PATCH 10/22] [enhancement](be-meta) disable sync rocksdb by default for better performance (#32714) --- be/src/common/config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 66eb4444ba007ee..8b0c33d45a793d0 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -603,7 +603,7 @@ DEFINE_mInt32(result_buffer_cancelled_interval_time, "300"); DEFINE_mInt32(priority_queue_remaining_tasks_increased_frequency, "512"); // sync tablet_meta when modifying meta -DEFINE_mBool(sync_tablet_meta, "true"); +DEFINE_mBool(sync_tablet_meta, "false"); // default thrift rpc timeout ms DEFINE_mInt32(thrift_rpc_timeout_ms, "60000"); From 7f639602d0e4c2a62929fe9fac02906500ec4123 Mon Sep 17 00:00:00 2001 From: Gavin Chou Date: Mon, 25 Mar 2024 23:12:02 +0800 Subject: [PATCH 11/22] [chore] Format regression-conf.groovy (#32713) --- regression-test/conf/regression-conf.groovy | 5 +- .../conf/regression-conf-custom.groovy | 58 ++++++++++++++----- .../conf/regression-conf-custom.groovy | 18 +++++- .../external/conf/regression-conf.groovy | 9 ++- .../pipeline/p0/conf/regression-conf.groovy | 21 +++++-- .../pipeline/p1/conf/regression-conf.groovy | 17 +++++- 6 files changed, 102 insertions(+), 26 deletions(-) diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index c6764a2ce863732..041744c9b028807 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -86,7 +86,10 @@ excludeGroups = "" // this suites will not be executed excludeSuites = "test_broker_load" // this directories will not be executed -excludeDirectories = "segcompaction_p2,workload_manager_p1" +excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "segcompaction_p2," + + "workload_manager_p1," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line customConf1 = "test_custom_conf_value" diff --git a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy index 92198e6339c13cf..5c7162e1eef1971 100644 --- a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy +++ b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy @@ -16,20 +16,50 @@ // under the License. testGroups = "p0" -//exclude groups and exclude suites is more prior than include groups and include suites. -excludeSuites = "test_index_failure_injection,test_dump_image,test_profile,test_spark_load,test_refresh_mtmv,test_bitmap_filter,test_information_schema_external,test_stream_load_new_move_memtable,test_stream_load_move_memtable,test_materialized_view_move_memtable,test_disable_move_memtable,test_insert_move_memtable,set_and_unset_variable,test_pk_uk_case_cluster,test_point_query_cluster_key,test_compaction_uniq_cluster_keys_with_delete,test_compaction_uniq_keys_cluster_key,test_set_partition_version,test_show_transaction,test_be_inject_publish_txn_fail,test_report_version_missing,test_publish_timeout,set_replica_status" +// exclude groups and exclude suites is more prior than include groups and include suites. +// keep them in lexico order(add/remove cases between the sentinals and sort): +// * sort lines in vim: select lines and then type :sort +// * sort lines in vscode: https://ulfschneider.io/2023-09-01-sort-in-vscode/ +excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "set_and_unset_variable," + + "set_replica_status," + // not a case for cloud mode, no need to run + "test_be_inject_publish_txn_fail," + // not a case for cloud mode, no need to run + "test_bitmap_filter," + + "test_compaction_uniq_cluster_keys_with_delete," + + "test_compaction_uniq_keys_cluster_key," + + "test_disable_move_memtable," + + "test_dump_image," + + "test_index_failure_injection," + + "test_information_schema_external," + + "test_insert_move_memtable," + + "test_materialized_view_move_memtable," + + "test_pk_uk_case_cluster," + + "test_point_query_cluster_key," + + "test_profile," + + "test_publish_timeout," + + "test_refresh_mtmv," + // not supported yet + "test_report_version_missing," + + "test_set_partition_version," + + "test_show_transaction," + // not supported yet + "test_spark_load," + + "test_stream_load_move_memtable," + + "test_stream_load_new_move_memtable," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line -excludeDirectories = """ - cloud/multi_cluster, - workload_manager_p1, - nereids_rules_p0/subquery, - unique_with_mow_p0/cluster_key, - unique_with_mow_p0/ssb_unique_sql_zstd_cluster, - unique_with_mow_p0/ssb_unique_load_zstd_c, - nereids_rules_p0/mv, - backup_restore, - cold_heat_separation, - storage_medium_p0 -""" + +excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "cloud/multi_cluster," + // run in specific regression pipeline + "workload_manager_p1," + + "nereids_rules_p0/subquery," + + "unique_with_mow_p0/cluster_key," + + "unique_with_mow_p0/ssb_unique_sql_zstd_cluster," + + "unique_with_mow_p0/ssb_unique_load_zstd_c," + + "nereids_rules_p0/mv," + + "backup_restore," + // not a case for cloud mode, no need to run + "cold_heat_separation," + + "storage_medium_p0," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line max_failure_num = 100 + +// vim: tw=10086 et ts=4 sw=4: diff --git a/regression-test/pipeline/cloud_p1/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p1/conf/regression-conf-custom.groovy index b693cf0fb01d6f0..2cd61d23a1a9732 100644 --- a/regression-test/pipeline/cloud_p1/conf/regression-conf-custom.groovy +++ b/regression-test/pipeline/cloud_p1/conf/regression-conf-custom.groovy @@ -1,5 +1,19 @@ testGroups = "p1" //exclude groups and exclude suites is more prior than include groups and include suites. -excludeSuites = "test_big_pad,test_profile,test_broker_load,test_spark_load,test_analyze_stats_p1,test_refresh_mtmv,test_bitmap_filter" -excludeDirectories = "workload_manager_p1,fault_injection_p0" +excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "test_analyze_stats_p1," + + "test_big_pad," + + "test_bitmap_filter," + + "test_broker_load," + + "test_profile," + + "test_refresh_mtmv," + + "test_spark_load," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line + +excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "fault_injection_p0," + + "workload_manager_p1," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line + max_failure_num = 50 + diff --git a/regression-test/pipeline/external/conf/regression-conf.groovy b/regression-test/pipeline/external/conf/regression-conf.groovy index 02b5c68fd084aab..52bf2ff8908fb25 100644 --- a/regression-test/pipeline/external/conf/regression-conf.groovy +++ b/regression-test/pipeline/external/conf/regression-conf.groovy @@ -57,7 +57,14 @@ testDirectories = "" // this groups will not be executed excludeGroups = "" // this suites will not be executed -excludeSuites = "test_cast_string_to_array,test_broker_load,test_spark_load,test_analyze_stats_p1,test_refresh_mtmv" +excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "test_analyze_stats_p1," + + "test_broker_load," + + "test_cast_string_to_array," + + "test_refresh_mtmv," + + "test_spark_load," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line + // this directories will not be executed excludeDirectories = "" diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy index bd909f903fafac3..3cbadc1d481ea67 100644 --- a/regression-test/pipeline/p0/conf/regression-conf.groovy +++ b/regression-test/pipeline/p0/conf/regression-conf.groovy @@ -62,14 +62,23 @@ excludeGroups = "" // this suites will not be executed // load_stream_fault_injection may cause bad disk -excludeSuites = "test_stream_stub_fault_injection,test_index_failure_injection,test_dump_image,test_profile,test_spark_load,test_refresh_mtmv,test_bitmap_filter,test_information_schema_external" +excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "test_bitmap_filter," + + "test_dump_image," + + "test_index_failure_injection," + + "test_information_schema_external," + + "test_profile," + + "test_refresh_mtmv," + + "test_spark_load," + + "test_stream_stub_fault_injection," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line // this directories will not be executed -excludeDirectories = """ - cloud, - nereids_rules_p0/subquery, - workload_manager_p1 -""" +excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "cloud," + + "nereids_rules_p0/subquery," + + "workload_manager_p1," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line customConf1 = "test_custom_conf_value" diff --git a/regression-test/pipeline/p1/conf/regression-conf.groovy b/regression-test/pipeline/p1/conf/regression-conf.groovy index 362d33b100284e8..cfe34f96bc96abe 100644 --- a/regression-test/pipeline/p1/conf/regression-conf.groovy +++ b/regression-test/pipeline/p1/conf/regression-conf.groovy @@ -52,9 +52,22 @@ testGroups = "" // empty suite will test all suite testSuites = "" // this suites will not be executed -excludeSuites = "test_big_pad,test_profile,test_broker_load,test_spark_load,test_analyze_stats_p1,test_refresh_mtmv,test_bitmap_filter" +excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "test_analyze_stats_p1," + + "test_big_pad," + + "test_bitmap_filter," + + "test_broker_load," + + "test_profile," + + "test_refresh_mtmv," + + "test_spark_load," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line + // this dir will not be executed -excludeDirectories = "workload_manager_p1,fault_injection_p0" +excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line + "fault_injection_p0," + + "workload_manager_p1," + + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line + cacheDataPath="/data/regression/" s3Endpoint = "cos.ap-hongkong.myqcloud.com" From be313cb88a4c61ed543f374400bb970839dede68 Mon Sep 17 00:00:00 2001 From: deardeng <565620795@qq.com> Date: Mon, 25 Mar 2024 23:13:35 +0800 Subject: [PATCH 12/22] [feature](merge-cloud) Change fe log rolling max size (#32777) --- .../java/org/apache/doris/common/Config.java | 4 ++++ .../org/apache/doris/common/Log4jConfig.java | 17 ++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 848a22d1012043c..93b7df593fe1887 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -2542,6 +2542,10 @@ public class Config extends ConfigBase { // begin of cloud config //========================================================================== + @ConfField public static int info_sys_accumulated_file_size = 4; + @ConfField public static int warn_sys_accumulated_file_size = 2; + @ConfField public static int audit_sys_accumulated_file_size = 4; + @ConfField public static String cloud_unique_id = ""; diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Log4jConfig.java b/fe/fe-core/src/main/java/org/apache/doris/common/Log4jConfig.java index 47d96765a9c30cc..c76459bac140cab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Log4jConfig.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Log4jConfig.java @@ -59,7 +59,9 @@ public class Log4jConfig extends XmlConfiguration { + " \n" + " \n" + " \n" - + " \n" + + " \n" + + " \n" + + " \n" + " \n" + " \n" + " \n" @@ -74,7 +76,9 @@ public class Log4jConfig extends XmlConfiguration { + " \n" + " \n" + " \n" - + " \n" + + " \n" + + " \n" + + " \n" + " \n" + " \n" + " \n" @@ -89,7 +93,9 @@ public class Log4jConfig extends XmlConfiguration { + " \n" + " \n" + " \n" - + " \n" + + " \n" + + " \n" + + " \n" + " \n" + " \n" + " \n" @@ -215,6 +221,11 @@ private static void reconfig() throws IOException { properties.put("audit_roll_maxsize", auditRollMaxSize); properties.put("audit_roll_num", auditRollNum); properties.put("audit_log_delete_age", auditDeleteAge); + + properties.put("info_sys_accumulated_file_size", String.valueOf(Config.info_sys_accumulated_file_size)); + properties.put("warn_sys_accumulated_file_size", String.valueOf(Config.warn_sys_accumulated_file_size)); + properties.put("audit_sys_accumulated_file_size", String.valueOf(Config.audit_sys_accumulated_file_size)); + properties.put("include_location_flag", sysLogMode.equalsIgnoreCase("NORMAL") ? "true" : "false"); properties.put("immediate_flush_flag", sysLogMode.equalsIgnoreCase("ASYNC") ? "false" : "true"); properties.put("audit_file_postfix", compressAuditLog ? ".gz" : ""); From 3f1224abda1f067f41d99d4d5f9d9b3f9d022812 Mon Sep 17 00:00:00 2001 From: Lightman <31928846+Lchangliang@users.noreply.github.com> Date: Mon, 25 Mar 2024 23:15:01 +0800 Subject: [PATCH 13/22] (cloud-merge)[feature] Support to create table with "file_cache_ttl_seconds" property (#32409) --- be/src/cloud/cloud_rowset_builder.cpp | 1 + be/src/cloud/cloud_tablet.cpp | 83 +++++++++++++++++-- be/src/olap/base_tablet.h | 1 + be/src/olap/rowset/beta_rowset.h | 2 +- be/src/olap/rowset/beta_rowset_reader.cpp | 8 ++ be/src/olap/rowset/rowset.h | 2 + be/src/olap/rowset/rowset_reader_context.h | 1 + be/src/olap/tablet_meta.cpp | 2 + be/src/olap/tablet_meta.h | 13 +++ be/src/olap/tablet_reader.cpp | 1 + be/test/testutil/mock_rowset.h | 2 + .../analysis/ModifyTablePropertiesClause.java | 3 + .../java/org/apache/doris/catalog/Env.java | 7 +- .../apache/doris/catalog/TableProperty.java | 3 +- .../apache/doris/journal/JournalEntity.java | 2 +- .../org/apache/doris/persist/EditLog.java | 6 +- .../apache/doris/persist/OperationType.java | 2 +- 17 files changed, 124 insertions(+), 15 deletions(-) diff --git a/be/src/cloud/cloud_rowset_builder.cpp b/be/src/cloud/cloud_rowset_builder.cpp index 72010fa6e9c53cf..d8bb50dc5f2a002 100644 --- a/be/src/cloud/cloud_rowset_builder.cpp +++ b/be/src/cloud/cloud_rowset_builder.cpp @@ -65,6 +65,7 @@ Status CloudRowsetBuilder::init() { context.mow_context = mow_context; context.write_file_cache = _req.write_file_cache; context.partial_update_info = _partial_update_info; + context.file_cache_ttl_sec = _tablet->ttl_seconds(); // New loaded data is always written to latest shared storage // TODO(AlexYue): use the passed resource id to retrive the corresponding // fs to pass to the RowsetWriterContext diff --git a/be/src/cloud/cloud_tablet.cpp b/be/src/cloud/cloud_tablet.cpp index 92e6699ceec4d62..08ed09d90f50552 100644 --- a/be/src/cloud/cloud_tablet.cpp +++ b/be/src/cloud/cloud_tablet.cpp @@ -95,11 +95,6 @@ Status CloudTablet::capture_rs_readers(const Version& spec_version, return capture_rs_readers_unlocked(version_path, rs_splits); } -Status CloudTablet::sync_meta() { - // TODO(lightman): FileCache - return Status::NotSupported("CloudTablet::sync_meta is not implemented"); -} - // There are only two tablet_states RUNNING and NOT_READY in cloud mode // This function will erase the tablet from `CloudTabletMgr` when it can't find this tablet in MS. Status CloudTablet::sync_rowsets(int64_t query_version, bool warmup_delta_data) { @@ -618,4 +613,82 @@ Status CloudTablet::calc_delete_bitmap_for_compaciton( return Status::OK(); } +Status CloudTablet::sync_meta() { + if (!config::enable_file_cache) { + return Status::OK(); + } + + TabletMetaSharedPtr tablet_meta; + auto st = _engine.meta_mgr().get_tablet_meta(tablet_id(), &tablet_meta); + if (!st.ok()) { + if (st.is()) { + // TODO(Lchangliang): recycle_resources_by_self(); + } + return st; + } + if (tablet_meta->tablet_state() != TABLET_RUNNING) { // impossible + return Status::InternalError("invalid tablet state. tablet_id={}", tablet_id()); + } + + auto new_ttl_seconds = tablet_meta->ttl_seconds(); + if (_tablet_meta->ttl_seconds() != new_ttl_seconds) { + _tablet_meta->set_ttl_seconds(new_ttl_seconds); + int64_t cur_time = UnixSeconds(); + std::shared_lock rlock(_meta_lock); + for (auto& [_, rs] : _rs_version_map) { + for (int seg_id = 0; seg_id < rs->num_segments(); ++seg_id) { + int64_t new_expiration_time = + new_ttl_seconds + rs->rowset_meta()->newest_write_timestamp(); + new_expiration_time = new_expiration_time > cur_time ? new_expiration_time : 0; + auto file_key = io::BlockFileCache::hash( + io::Path(rs->segment_file_path(seg_id)).filename().native()); + auto* file_cache = io::FileCacheFactory::instance()->get_by_path(file_key); + file_cache->modify_expiration_time(file_key, new_expiration_time); + } + } + } + + auto new_compaction_policy = tablet_meta->compaction_policy(); + if (_tablet_meta->compaction_policy() != new_compaction_policy) { + _tablet_meta->set_compaction_policy(new_compaction_policy); + } + auto new_time_series_compaction_goal_size_mbytes = + tablet_meta->time_series_compaction_goal_size_mbytes(); + if (_tablet_meta->time_series_compaction_goal_size_mbytes() != + new_time_series_compaction_goal_size_mbytes) { + _tablet_meta->set_time_series_compaction_goal_size_mbytes( + new_time_series_compaction_goal_size_mbytes); + } + auto new_time_series_compaction_file_count_threshold = + tablet_meta->time_series_compaction_file_count_threshold(); + if (_tablet_meta->time_series_compaction_file_count_threshold() != + new_time_series_compaction_file_count_threshold) { + _tablet_meta->set_time_series_compaction_file_count_threshold( + new_time_series_compaction_file_count_threshold); + } + auto new_time_series_compaction_time_threshold_seconds = + tablet_meta->time_series_compaction_time_threshold_seconds(); + if (_tablet_meta->time_series_compaction_time_threshold_seconds() != + new_time_series_compaction_time_threshold_seconds) { + _tablet_meta->set_time_series_compaction_time_threshold_seconds( + new_time_series_compaction_time_threshold_seconds); + } + auto new_time_series_compaction_empty_rowsets_threshold = + tablet_meta->time_series_compaction_empty_rowsets_threshold(); + if (_tablet_meta->time_series_compaction_empty_rowsets_threshold() != + new_time_series_compaction_empty_rowsets_threshold) { + _tablet_meta->set_time_series_compaction_empty_rowsets_threshold( + new_time_series_compaction_empty_rowsets_threshold); + } + auto new_time_series_compaction_level_threshold = + tablet_meta->time_series_compaction_level_threshold(); + if (_tablet_meta->time_series_compaction_level_threshold() != + new_time_series_compaction_level_threshold) { + _tablet_meta->set_time_series_compaction_level_threshold( + new_time_series_compaction_level_threshold); + } + + return Status::OK(); +} + } // namespace doris diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index f32139b989bdfad..347f960841c41fd 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -60,6 +60,7 @@ class BaseTablet { int32_t schema_hash() const { return _tablet_meta->schema_hash(); } KeysType keys_type() const { return _tablet_meta->tablet_schema()->keys_type(); } size_t num_key_columns() const { return _tablet_meta->tablet_schema()->num_key_columns(); } + int64_t ttl_seconds() const { return _tablet_meta->ttl_seconds(); } std::mutex& get_schema_change_lock() { return _schema_change_lock; } bool enable_unique_key_merge_on_write() const { #ifdef BE_TEST diff --git a/be/src/olap/rowset/beta_rowset.h b/be/src/olap/rowset/beta_rowset.h index 46f16f4d50230f6..41cecadf7837473 100644 --- a/be/src/olap/rowset/beta_rowset.h +++ b/be/src/olap/rowset/beta_rowset.h @@ -49,7 +49,7 @@ class BetaRowset final : public Rowset { Status create_reader(RowsetReaderSharedPtr* result) override; - std::string segment_file_path(int segment_id) const; + std::string segment_file_path(int segment_id) const override; static std::string segment_file_path(const std::string& rowset_dir, const RowsetId& rowset_id, int segment_id); diff --git a/be/src/olap/rowset/beta_rowset_reader.cpp b/be/src/olap/rowset/beta_rowset_reader.cpp index a0dff2613aec0ae..f5e0b8e5c62b49e 100644 --- a/be/src/olap/rowset/beta_rowset_reader.cpp +++ b/be/src/olap/rowset/beta_rowset_reader.cpp @@ -238,6 +238,14 @@ Status BetaRowsetReader::get_segment_iterators(RowsetReaderContext* read_context _read_context->runtime_state->query_options().disable_file_cache; } + _read_options.io_ctx.expiration_time = + read_context->ttl_seconds == 0 + ? 0 + : _rowset->rowset_meta()->newest_write_timestamp() + read_context->ttl_seconds; + if (_read_options.io_ctx.expiration_time <= UnixSeconds()) { + _read_options.io_ctx.expiration_time = 0; + } + // load segments bool should_use_cache = use_cache || _read_context->reader_type == ReaderType::READER_QUERY; RETURN_IF_ERROR(SegmentLoader::instance()->load_segments(_rowset, &_segment_cache_handle, diff --git a/be/src/olap/rowset/rowset.h b/be/src/olap/rowset/rowset.h index fd5264f3ac048e6..f15527ddd6b1bd5 100644 --- a/be/src/olap/rowset/rowset.h +++ b/be/src/olap/rowset/rowset.h @@ -169,6 +169,8 @@ class Rowset : public std::enable_shared_from_this { // TODO should we rename the method to remove_files() to be more specific? virtual Status remove() = 0; + virtual std::string segment_file_path(int segment_id) const = 0; + // close to clear the resource owned by rowset // including: open files, indexes and so on // NOTICE: can not call this function in multithreads diff --git a/be/src/olap/rowset/rowset_reader_context.h b/be/src/olap/rowset/rowset_reader_context.h index d5683924a9ec0cb..8bfdeda60a85686 100644 --- a/be/src/olap/rowset/rowset_reader_context.h +++ b/be/src/olap/rowset/rowset_reader_context.h @@ -83,6 +83,7 @@ struct RowsetReaderContext { RowsetId rowset_id; // slots that cast may be eliminated in storage layer std::map target_cast_type_for_variants; + int64_t ttl_seconds = 0; }; } // namespace doris diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 322ff03625100e8..a15e809b8b8ad99 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -529,6 +529,7 @@ void TabletMeta::init_from_pb(const TabletMetaPB& tablet_meta_pb) { _creation_time = tablet_meta_pb.creation_time(); _cumulative_layer_point = tablet_meta_pb.cumulative_layer_point(); _tablet_uid = TabletUid(tablet_meta_pb.tablet_uid()); + _ttl_seconds = tablet_meta_pb.ttl_seconds(); if (tablet_meta_pb.has_tablet_type()) { _tablet_type = tablet_meta_pb.tablet_type(); } else { @@ -647,6 +648,7 @@ void TabletMeta::to_meta_pb(TabletMetaPB* tablet_meta_pb) { tablet_meta_pb->set_cumulative_layer_point(cumulative_layer_point()); *(tablet_meta_pb->mutable_tablet_uid()) = tablet_uid().to_proto(); tablet_meta_pb->set_tablet_type(_tablet_type); + tablet_meta_pb->set_ttl_seconds(_ttl_seconds); switch (tablet_state()) { case TABLET_NOTREADY: tablet_meta_pb->set_tablet_state(PB_NOTREADY); diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index 58201c1c1f1d54d..6c5233eac53e616 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -273,6 +273,16 @@ class TabletMeta { return _time_series_compaction_level_threshold; } + int64_t ttl_seconds() const { + std::shared_lock rlock(_meta_lock); + return _ttl_seconds; + } + + void set_ttl_seconds(int64_t ttl_seconds) { + std::lock_guard wlock(_meta_lock); + _ttl_seconds = ttl_seconds; + } + private: Status _save_meta(DataDir* data_dir); @@ -328,6 +338,9 @@ class TabletMeta { int64_t _time_series_compaction_empty_rowsets_threshold = 0; int64_t _time_series_compaction_level_threshold = 0; + // cloud + int64_t _ttl_seconds = 0; + mutable std::shared_mutex _meta_lock; }; diff --git a/be/src/olap/tablet_reader.cpp b/be/src/olap/tablet_reader.cpp index 7b40ff4eae1238a..b2afd1360d4e4f8 100644 --- a/be/src/olap/tablet_reader.cpp +++ b/be/src/olap/tablet_reader.cpp @@ -262,6 +262,7 @@ Status TabletReader::_capture_rs_readers(const ReaderParams& read_params) { _reader_context.common_expr_ctxs_push_down = read_params.common_expr_ctxs_push_down; _reader_context.output_columns = &read_params.output_columns; _reader_context.push_down_agg_type_opt = read_params.push_down_agg_type_opt; + _reader_context.ttl_seconds = _tablet->ttl_seconds(); return Status::OK(); } diff --git a/be/test/testutil/mock_rowset.h b/be/test/testutil/mock_rowset.h index 50065ebe6b43d44..89fbb8cac3b5d7f 100644 --- a/be/test/testutil/mock_rowset.h +++ b/be/test/testutil/mock_rowset.h @@ -50,6 +50,8 @@ class MockRowset : public Rowset { return Status::NotSupported("MockRowset not support this method."); } + std::string segment_file_path(int segment_id) const override { return ""; } + Status get_segments_key_bounds(std::vector* segments_key_bounds) override { // TODO(zhangchen): remove this after we implemented memrowset. if (is_mem_rowset_) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java index c2bc7bc7d0dea0f..8a3543d3d36f8fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ModifyTablePropertiesClause.java @@ -299,6 +299,9 @@ public void analyze(Analyzer analyzer) throws AnalysisException { } this.needTableStable = false; this.opType = AlterOpType.MODIFY_TABLE_PROPERTY_SYNC; + } else if (properties.containsKey(PropertyAnalyzer.PROPERTIES_FILE_CACHE_TTL_SECONDS)) { + this.needTableStable = false; + this.opType = AlterOpType.MODIFY_TABLE_PROPERTY_SYNC; } else { throw new AnalysisException("Unknown table property: " + properties.keySet()); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 10592282155041c..2fe4361778e821b 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -4925,7 +4925,8 @@ public void modifyTableProperties(Database db, OlapTable table, Map Date: Mon, 25 Mar 2024 23:15:24 +0800 Subject: [PATCH 14/22] [feature](merge-cloud) Disable r/w when warehouse is overdue in cloud mode (#32643) --- .../java/org/apache/doris/catalog/Env.java | 2 +- .../apache/doris/cloud/catalog/CloudEnv.java | 10 +++++++++ .../httpv2/controller/BaseController.java | 21 ++++++++++++++++++- .../org/apache/doris/qe/ConnectProcessor.java | 11 ++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 2fe4361778e821b..1b479f364639b25 100755 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -1693,7 +1693,7 @@ protected void startMasterOnlyDaemonThreads() { } // start threads that should running on all FE - private void startNonMasterDaemonThreads() { + protected void startNonMasterDaemonThreads() { // start load manager thread loadManager.start(); tabletStatMgr.start(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java index 19bd102c84c1716..28e58f3cf4b0067 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java @@ -54,19 +54,29 @@ public class CloudEnv extends Env { private static final Logger LOG = LogManager.getLogger(CloudEnv.class); + private CloudInstanceStatusChecker cloudInstanceStatusChecker; private CloudClusterChecker cloudClusterCheck; public CloudEnv(boolean isCheckpointCatalog) { super(isCheckpointCatalog); this.cloudClusterCheck = new CloudClusterChecker((CloudSystemInfoService) systemInfo); + this.cloudInstanceStatusChecker = new CloudInstanceStatusChecker((CloudSystemInfoService) systemInfo); } + @Override protected void startMasterOnlyDaemonThreads() { LOG.info("start cloud Master only daemon threads"); super.startMasterOnlyDaemonThreads(); cloudClusterCheck.start(); } + @Override + protected void startNonMasterDaemonThreads() { + LOG.info("start cloud Non Master only daemon threads"); + super.startNonMasterDaemonThreads(); + cloudInstanceStatusChecker.start(); + } + public static String genFeNodeNameFromMeta(String host, int port, long timeMs) { return host + "_" + port + "_" + timeMs; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/BaseController.java b/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/BaseController.java index cd753a100b62e7b..f95701b3e4258f3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/BaseController.java +++ b/fe/fe-core/src/main/java/org/apache/doris/httpv2/controller/BaseController.java @@ -19,6 +19,8 @@ import org.apache.doris.analysis.UserIdentity; import org.apache.doris.catalog.Env; +import org.apache.doris.cloud.proto.Cloud; +import org.apache.doris.cloud.system.CloudSystemInfoService; import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.AuthenticationException; import org.apache.doris.common.Config; @@ -67,7 +69,8 @@ public ActionAuthorizationInfo checkWithCookie(HttpServletRequest request, ActionAuthorizationInfo authInfo = getAuthorizationInfo(request); UserIdentity currentUser = checkPassword(authInfo); - if (checkAuth) { + if (Config.isCloudMode() && checkAuth) { + checkInstanceOverdue(currentUser); checkGlobalAuth(currentUser, PrivPredicate.ADMIN_OR_NODE); } @@ -134,6 +137,13 @@ private ActionAuthorizationInfo checkCookie(HttpServletRequest request, HttpServ return null; } + if (Config.isCloudMode() && checkAuth && !sessionValue.currentUser.isRootUser() + && ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getInstanceStatus() + == Cloud.InstanceInfoPB.Status.OVERDUE) { + return null; + } + + updateCookieAge(request, PALO_SESSION_ID, PALO_SESSION_EXPIRED_TIME, response); ConnectContext ctx = new ConnectContext(); @@ -199,6 +209,15 @@ public String toString() { } } + protected void checkInstanceOverdue(UserIdentity currentUsr) { + Cloud.InstanceInfoPB.Status s = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getInstanceStatus(); + if (!currentUsr.isRootUser() + && s == Cloud.InstanceInfoPB.Status.OVERDUE) { + LOG.warn("this warehouse is overdue root:{}, status:{}", currentUsr.isRootUser(), s); + throw new UnauthorizedException("The warehouse is overdue!"); + } + } + protected void checkGlobalAuth(UserIdentity currentUser, PrivPredicate predicate) throws UnauthorizedException { if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(currentUser, predicate)) { throw new UnauthorizedException("Access denied; you need (at least one of) the " diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java index 0f8f887ee901f0c..b6278d0b55f7fb4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java @@ -29,6 +29,8 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.TableIf; import org.apache.doris.cloud.catalog.CloudEnv; +import org.apache.doris.cloud.proto.Cloud; +import org.apache.doris.cloud.system.CloudSystemInfoService; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.DdlException; @@ -193,6 +195,15 @@ protected void auditAfterExec(String origStmt, StatementBase parsedStmt, // only throw an exception when there is a problem interacting with the requesting client protected void handleQuery(MysqlCommand mysqlCommand, String originStmt) { + if (Config.isCloudMode()) { + if (!ctx.getCurrentUserIdentity().isRootUser() + && ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getInstanceStatus() + == Cloud.InstanceInfoPB.Status.OVERDUE) { + Exception exception = new Exception("warehouse is overdue!"); + handleQueryException(exception, originStmt, null, null); + return; + } + } try { executeQuery(mysqlCommand, originStmt); } catch (Exception ignored) { From f00750f7fe89b374c4d299ee69950fa7ac1a0ea1 Mon Sep 17 00:00:00 2001 From: deardeng <565620795@qq.com> Date: Mon, 25 Mar 2024 23:16:37 +0800 Subject: [PATCH 15/22] [feature](merge-cloud) Support auto suspend and auto start cluster in cloud (#32764) --- be/src/agent/heartbeat_server.cpp | 5 + be/src/agent/task_worker_pool.cpp | 23 +++- be/src/runtime/fragment_mgr.cpp | 41 +++++++ be/src/runtime/fragment_mgr.h | 5 + .../apache/doris/cloud/catalog/CloudEnv.java | 1 - .../cloud/system/CloudSystemInfoService.java | 109 +++++++++++++++++- .../org/apache/doris/qe/ConnectContext.java | 79 +++++++++++++ 7 files changed, 258 insertions(+), 5 deletions(-) diff --git a/be/src/agent/heartbeat_server.cpp b/be/src/agent/heartbeat_server.cpp index dfeb05a932d389b..e6d893a4a2d9ba7 100644 --- a/be/src/agent/heartbeat_server.cpp +++ b/be/src/agent/heartbeat_server.cpp @@ -30,6 +30,7 @@ #include "common/status.h" #include "olap/storage_engine.h" #include "runtime/exec_env.h" +#include "runtime/fragment_mgr.h" #include "runtime/heartbeat_flags.h" #include "service/backend_options.h" #include "util/debug_util.h" @@ -83,6 +84,10 @@ void HeartbeatServer::heartbeat(THeartbeatResult& heartbeat_result, heartbeat_result.backend_info.__set_be_node_role(config::be_node_role); // If be is gracefully stop, then k_doris_exist is set to true heartbeat_result.backend_info.__set_is_shutdown(doris::k_doris_exit); + heartbeat_result.backend_info.__set_fragment_executing_count( + get_fragment_executing_count()); + heartbeat_result.backend_info.__set_fragment_last_active_time( + get_fragment_last_active_time()); } watch.stop(); if (watch.elapsed_time() > 1000L * 1000L * 1000L) { diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index be4d3b137e6453c..ca6a9817737937a 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -76,6 +76,7 @@ #include "olap/txn_manager.h" #include "olap/utils.h" #include "runtime/exec_env.h" +#include "runtime/fragment_mgr.h" #include "runtime/snapshot_loader.h" #include "service/backend_options.h" #include "util/doris_metrics.h" @@ -446,7 +447,6 @@ void add_task_count(const TAgentTaskRequest& task, int n) { ADD_TASK_COUNT(PUBLISH_VERSION) ADD_TASK_COUNT(CLEAR_TRANSACTION_TASK) ADD_TASK_COUNT(UPDATE_TABLET_META_INFO) - ADD_TASK_COUNT(ALTER) ADD_TASK_COUNT(CLONE) ADD_TASK_COUNT(STORAGE_MEDIUM_MIGRATE) ADD_TASK_COUNT(GC_BINLOG) @@ -459,6 +459,17 @@ void add_task_count(const TAgentTaskRequest& task, int n) { DELETE_count << n; } return; + case TTaskType::ALTER: + { + ALTER_count << n; + // cloud auto stop need sc jobs, a tablet's sc can also be considered a fragment + doris::g_fragment_executing_count << 1; + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + g_fragment_last_active_time.set_value(now); + return; + } default: return; } @@ -1851,6 +1862,11 @@ void alter_tablet_callback(StorageEngine& engine, const TAgentTaskRequest& req) alter_tablet(engine, req, signature, task_type, &finish_task_request); finish_task(finish_task_request); } + doris::g_fragment_executing_count << -1; + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + g_fragment_last_active_time.set_value(now); remove_task_info(req.task_type, req.signature); } @@ -1872,6 +1888,11 @@ void alter_cloud_tablet_callback(CloudStorageEngine& engine, const TAgentTaskReq alter_cloud_tablet(engine, req, signature, task_type, &finish_task_request); finish_task(finish_task_request); } + doris::g_fragment_executing_count << -1; + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + g_fragment_last_active_time.set_value(now); remove_task_info(req.task_type, req.signature); } diff --git a/be/src/runtime/fragment_mgr.cpp b/be/src/runtime/fragment_mgr.cpp index d858737d7803509..190c0f5b0e19123 100644 --- a/be/src/runtime/fragment_mgr.cpp +++ b/be/src/runtime/fragment_mgr.cpp @@ -99,6 +99,19 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(fragment_thread_pool_queue_size, MetricUnit:: bvar::LatencyRecorder g_fragmentmgr_prepare_latency("doris_FragmentMgr", "prepare"); bvar::Adder g_pipeline_fragment_instances_count("doris_pipeline_fragment_instances_count"); +bvar::Adder g_fragment_executing_count("fragment_executing_count"); +bvar::Status g_fragment_last_active_time( + "fragment_last_active_time", duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count()); + +uint64_t get_fragment_executing_count() { + return g_fragment_executing_count.get_value(); +} +uint64_t get_fragment_last_active_time() { + return g_fragment_last_active_time.get_value(); +} + std::string to_load_error_http_path(const std::string& file_name) { if (file_name.empty()) { return ""; @@ -470,9 +483,15 @@ void FragmentMgr::_exec_actual(std::shared_ptr fragment_ex // remove exec state after this fragment finished { + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); std::lock_guard lock(_lock); _fragment_instance_map.erase(fragment_executor->fragment_instance_id()); + g_fragment_executing_count << -1; + g_fragment_last_active_time.set_value(now); + LOG_INFO("Instance {} finished", print_id(fragment_executor->fragment_instance_id())); if (all_done && query_ctx) { @@ -584,6 +603,11 @@ void FragmentMgr::remove_pipeline_context( std::vector ins_ids; f_context->instance_ids(ins_ids); bool all_done = q_context->countdown(ins_ids.size()); + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); + g_fragment_executing_count << -1; + g_fragment_last_active_time.set_value(now); for (const auto& ins_id : ins_ids) { LOG_INFO("Removing query {} instance {}, all done? {}", print_id(query_id), print_id(ins_id), all_done); @@ -733,7 +757,12 @@ Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, static_cast(_runtimefilter_controller.add_entity( params.params, params.params.query_id, params.query_options, &handler, RuntimeFilterParamsContext::create(fragment_executor->runtime_state()))); + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); { + g_fragment_executing_count << 1; + g_fragment_last_active_time.set_value(now); std::lock_guard lock(_lock); if (handler) { query_ctx->set_merge_controller_handler(handler); @@ -753,6 +782,8 @@ Status FragmentMgr::exec_plan_fragment(const TExecPlanFragmentParams& params, // Remove the exec state added std::lock_guard lock(_lock); _fragment_instance_map.erase(params.params.fragment_instance_id); + g_fragment_executing_count << -1; + g_fragment_last_active_time.set_value(now); } fragment_executor->cancel(PPlanFragmentCancelReason::INTERNAL_ERROR, "push plan fragment to thread pool failed"); @@ -844,7 +875,12 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, query_ctx->set_ready_to_execute_only(); } } + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); { + g_fragment_executing_count << 1; + g_fragment_last_active_time.set_value(now); std::lock_guard lock(_lock); std::vector ins_ids; reinterpret_cast(context.get()) @@ -905,7 +941,12 @@ Status FragmentMgr::exec_plan_fragment(const TPipelineFragmentParams& params, if (i == 0 && handler) { query_ctx->set_merge_controller_handler(handler); } + int64 now = duration_cast( + std::chrono::system_clock::now().time_since_epoch()) + .count(); { + g_fragment_executing_count << 1; + g_fragment_last_active_time.set_value(now); std::lock_guard lock(_lock); _pipeline_map.insert(std::make_pair(fragment_instance_id, context)); } diff --git a/be/src/runtime/fragment_mgr.h b/be/src/runtime/fragment_mgr.h index 3435d1f4f64e8b0..5aef45954d3f9c4 100644 --- a/be/src/runtime/fragment_mgr.h +++ b/be/src/runtime/fragment_mgr.h @@ -44,6 +44,8 @@ class IOBufAsZeroCopyInputStream; } namespace doris { +extern bvar::Adder g_fragment_executing_count; +extern bvar::Status g_fragment_last_active_time; namespace pipeline { class PipelineFragmentContext; @@ -202,4 +204,7 @@ class FragmentMgr : public RestMonitorIface { nullptr; // used for pipeliine context report }; +uint64_t get_fragment_executing_count(); +uint64_t get_fragment_last_active_time(); + } // namespace doris diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java index 28e58f3cf4b0067..613fef3be685f53 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudEnv.java @@ -392,7 +392,6 @@ public void checkCloudClusterPriv(String clusterName) throws DdlException { public void changeCloudCluster(String clusterName, ConnectContext ctx) throws DdlException { checkCloudClusterPriv(clusterName); - // TODO(merge-cloud): pick cloud auto start CloudSystemInfoService.waitForAutoStart(clusterName); try { ((CloudSystemInfoService) Env.getCurrentSystemInfo()).addCloudCluster(clusterName, ""); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java index 8d852c1109714b7..5eb74590d562923 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java @@ -38,9 +38,11 @@ import org.apache.doris.system.SystemInfoService; import org.apache.doris.thrift.TStorageMedium; +import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; +import org.apache.commons.lang3.time.StopWatch; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -578,9 +580,110 @@ public static void waitForAutoStartCurrentCluster() throws DdlException { } } - public static void waitForAutoStart(final String clusterName) throws DdlException { - // TODO(merge-cloud): merge from cloud. - // throw new DdlException("Env.waitForAutoStart unimplemented"); + public static String getClusterNameAutoStart(final String clusterName) { + if (!Strings.isNullOrEmpty(clusterName)) { + return clusterName; + } + + ConnectContext context = ConnectContext.get(); + if (context == null) { + LOG.warn("auto start cant get context so new it"); + context = new ConnectContext(); + } + ConnectContext.CloudClusterResult cloudClusterTypeAndName = context.getCloudClusterByPolicy(); + if (cloudClusterTypeAndName == null) { + LOG.warn("get cluster from ctx err"); + return null; + } + if (cloudClusterTypeAndName.comment + == ConnectContext.CloudClusterResult.Comment.DEFAULT_CLUSTER_SET_BUT_NOT_EXIST) { + LOG.warn("get default cluster from ctx err"); + return null; + } + + Preconditions.checkState(!Strings.isNullOrEmpty(cloudClusterTypeAndName.clusterName), + "get cluster name empty"); + LOG.info("get cluster to resume {}", cloudClusterTypeAndName); + return cloudClusterTypeAndName.clusterName; + } + + public static void waitForAutoStart(String clusterName) throws DdlException { + if (Config.isNotCloudMode()) { + return; + } + clusterName = getClusterNameAutoStart(clusterName); + if (Strings.isNullOrEmpty(clusterName)) { + LOG.warn("auto start in cloud mode, but clusterName empty {}", clusterName); + return; + } + String clusterStatus = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getCloudStatusByName(clusterName); + if (Strings.isNullOrEmpty(clusterStatus)) { + // for cluster rename or cluster dropped + LOG.warn("cant find clusterStatus in fe, clusterName {}", clusterName); + return; + } + // nofity ms -> wait for clusterStatus to normal + LOG.debug("auto start wait cluster {} status {}-{}", clusterName, clusterStatus, + Cloud.ClusterStatus.valueOf(clusterStatus)); + if (Cloud.ClusterStatus.valueOf(clusterStatus) != Cloud.ClusterStatus.NORMAL) { + Cloud.AlterClusterRequest.Builder builder = Cloud.AlterClusterRequest.newBuilder(); + builder.setCloudUniqueId(Config.cloud_unique_id); + builder.setOp(Cloud.AlterClusterRequest.Operation.SET_CLUSTER_STATUS); + + Cloud.ClusterPB.Builder clusterBuilder = Cloud.ClusterPB.newBuilder(); + clusterBuilder.setClusterId(((CloudSystemInfoService) + Env.getCurrentSystemInfo()).getCloudClusterIdByName(clusterName)); + clusterBuilder.setClusterStatus(Cloud.ClusterStatus.TO_RESUME); + builder.setCluster(clusterBuilder); + + Cloud.AlterClusterResponse response; + try { + response = MetaServiceProxy.getInstance().alterCluster(builder.build()); + if (response.getStatus().getCode() != Cloud.MetaServiceCode.OK) { + LOG.warn("notify to resume cluster not ok, cluster {}, response: {}", clusterName, response); + } + LOG.info("notify to resume cluster {}, response: {} ", clusterName, response); + } catch (RpcException e) { + LOG.warn("failed to notify to resume cluster {}", clusterName, e); + throw new DdlException("notify to resume cluster not ok"); + } + } + // wait 15 mins? + int retryTimes = 15 * 60; + int retryTime = 0; + StopWatch stopWatch = new StopWatch(); + stopWatch.start(); + boolean hasAutoStart = false; + while (!String.valueOf(Cloud.ClusterStatus.NORMAL).equals(clusterStatus) + && retryTime < retryTimes) { + hasAutoStart = true; + ++retryTime; + // sleep random millis [0.5, 1] s + int randomSeconds = 500 + (int) (Math.random() * (1000 - 500)); + LOG.info("resume cluster {} retry times {}, wait randomMillis: {}, current status: {}", + clusterName, retryTime, randomSeconds, clusterStatus); + try { + if (retryTime > retryTimes / 2) { + // sleep random millis [1, 1.5] s + randomSeconds = 1000 + (int) (Math.random() * (1000 - 500)); + } + Thread.sleep(randomSeconds); + } catch (InterruptedException e) { + LOG.info("change cluster sleep wait InterruptedException: ", e); + } + clusterStatus = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getCloudStatusByName(clusterName); + } + if (retryTime >= retryTimes) { + // auto start timeout + stopWatch.stop(); + LOG.warn("auto start cluster {} timeout, wait {} ms", clusterName, stopWatch.getTime()); + throw new DdlException("auto start cluster timeout"); + } + + stopWatch.stop(); + if (hasAutoStart) { + LOG.info("auto start cluster {}, start cost {} ms", clusterName, stopWatch.getTime()); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java index a20e40cdc675852..2023395aa55a17b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java @@ -1070,6 +1070,85 @@ public String getCloudCluster() { return getCloudCluster(true); } + public static class CloudClusterResult { + public enum Comment { + FOUND_BY_DEFAULT_CLUSTER, + DEFAULT_CLUSTER_SET_BUT_NOT_EXIST, + FOUND_BY_FIRST_CLUSTER_WITH_ALIVE_BE, + FOUND_BY_FRIST_CLUSTER_HAS_AUTH, + } + + public String clusterName; + public Comment comment; + + public CloudClusterResult(final String name, Comment c) { + this.clusterName = name; + this.comment = c; + } + + @Override + public String toString() { + return "CloudClusterResult{" + + "clusterName='" + clusterName + '\'' + + ", comment=" + comment + + '}'; + } + } + + + // can't get cluster from context, use the following strategy to obtain the cluster name + // 当用户有多个集群的权限时,会按照如下策略进行拉取: + // 如果当前mysql用户没有指定cluster(没有default 或者 use), 选择有权限的cluster。 + // 如果有多个cluster满足权限条件,优先选活的,按字母序选 + // 如果没有活的,则拉起一个,按字母序选 + public CloudClusterResult getCloudClusterByPolicy() { + List cloudClusterNames = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getCloudClusterNames(); + // try set default cluster + String defaultCloudCluster = Env.getCurrentEnv().getAuth().getDefaultCloudCluster(getQualifiedUser()); + if (!Strings.isNullOrEmpty(defaultCloudCluster)) { + // check cluster validity + CloudClusterResult r; + if (cloudClusterNames.contains(defaultCloudCluster)) { + // valid + r = new CloudClusterResult(defaultCloudCluster, + CloudClusterResult.Comment.FOUND_BY_DEFAULT_CLUSTER); + LOG.info("use default cluster {}", defaultCloudCluster); + } else { + // invalid + r = new CloudClusterResult(defaultCloudCluster, + CloudClusterResult.Comment.DEFAULT_CLUSTER_SET_BUT_NOT_EXIST); + LOG.warn("default cluster {} current invalid, please change it", r); + } + return r; + } + + List hasAuthCluster = new ArrayList<>(); + // get all available cluster of the user + for (String cloudClusterName : cloudClusterNames) { + if (Env.getCurrentEnv().getAuth().checkCloudPriv(getCurrentUserIdentity(), + cloudClusterName, PrivPredicate.USAGE, ResourceTypeEnum.CLUSTER)) { + hasAuthCluster.add(cloudClusterName); + // find a cluster has more than one alive be + List bes = ((CloudSystemInfoService) Env.getCurrentSystemInfo()) + .getBackendsByClusterName(cloudClusterName); + AtomicBoolean hasAliveBe = new AtomicBoolean(false); + bes.stream().filter(Backend::isAlive).findAny().ifPresent(backend -> { + LOG.debug("get a clusterName {}, it's has more than one alive be {}", cloudCluster, backend); + hasAliveBe.set(true); + }); + if (hasAliveBe.get()) { + // set a cluster to context cloudCluster + CloudClusterResult r = new CloudClusterResult(cloudClusterName, + CloudClusterResult.Comment.FOUND_BY_FIRST_CLUSTER_WITH_ALIVE_BE); + LOG.debug("set context {}", r); + return r; + } + } + } + return hasAuthCluster.isEmpty() ? null + : new CloudClusterResult(hasAuthCluster.get(0), CloudClusterResult.Comment.FOUND_BY_FRIST_CLUSTER_HAS_AUTH); + } + /** * @param updateErr whether set this connect state to error when the returned cluster is null or empty. * From 72e8c62f787820300969546c998c99151291a083 Mon Sep 17 00:00:00 2001 From: AlexYue Date: Mon, 25 Mar 2024 23:21:44 +0800 Subject: [PATCH 16/22] [chore](ColdHeatSeparation) Reduce the frequency of log following read failure (#32706) --- be/src/io/fs/s3_file_writer.cpp | 7 ++++--- be/src/olap/olap_server.cpp | 36 ++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/be/src/io/fs/s3_file_writer.cpp b/be/src/io/fs/s3_file_writer.cpp index b7a8eb043115dc8..388aed1ea34eeca 100644 --- a/be/src/io/fs/s3_file_writer.cpp +++ b/be/src/io/fs/s3_file_writer.cpp @@ -508,11 +508,12 @@ void S3FileWriter::_put_object(UploadFileBuffer& buf) { } std::string S3FileWriter::_dump_completed_part() const { - std::string view; + std::stringstream ss; + ss << "part_numbers:"; for (const auto& part : _completed_parts) { - view.append(fmt::format("part {}, ", view, part->GetPartNumber())); + ss << " " << part->GetPartNumber(); } - return view; + return ss.str(); } } // namespace doris::io diff --git a/be/src/olap/olap_server.cpp b/be/src/olap/olap_server.cpp index c288d3b42e118db..d0cd43172a0917a 100644 --- a/be/src/olap/olap_server.cpp +++ b/be/src/olap/olap_server.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -1403,22 +1404,25 @@ void StorageEngine::_cold_data_compaction_producer_callback() { for (auto& [tablet, score] : tablet_to_follow) { LOG(INFO) << "submit to follow cooldown meta. tablet_id=" << tablet->tablet_id() << " score=" << score; - static_cast( - _cold_data_compaction_thread_pool->submit_func([&, t = std::move(tablet)]() { - { - std::lock_guard lock(tablet_submitted_mtx); - tablet_submitted.insert(t->tablet_id()); - } - auto st = t->cooldown(); - { - std::lock_guard lock(tablet_submitted_mtx); - tablet_submitted.erase(t->tablet_id()); - } - if (!st.ok()) { - LOG(WARNING) << "failed to cooldown. tablet_id=" << t->tablet_id() - << " err=" << st; - } - })); + static_cast(_cold_data_compaction_thread_pool->submit_func([&, + t = std::move( + tablet)]() { + { + std::lock_guard lock(tablet_submitted_mtx); + tablet_submitted.insert(t->tablet_id()); + } + auto st = t->cooldown(); + { + std::lock_guard lock(tablet_submitted_mtx); + tablet_submitted.erase(t->tablet_id()); + } + if (!st.ok()) { + // The cooldown of the replica may be relatively slow + // resulting in a short period of time where following cannot be successful + LOG_EVERY_N(WARNING, 5) + << "failed to cooldown. tablet_id=" << t->tablet_id() << " err=" << st; + } + })); } } } From fa345dd55c6d103ff981c31978879bc43fbc4168 Mon Sep 17 00:00:00 2001 From: AlexYue Date: Mon, 25 Mar 2024 23:24:00 +0800 Subject: [PATCH 17/22] [feature](Resource) Support to specify the root path for hdfs resource (#32632) --- be/src/agent/task_worker_pool.cpp | 11 +++++++---- be/src/io/fs/hdfs_file_system.cpp | 15 ++++++++------- be/src/io/fs/hdfs_file_system.h | 7 ++++--- .../org/apache/doris/catalog/HdfsResource.java | 3 +++ gensrc/thrift/PlanNodes.thrift | 2 ++ 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index ca6a9817737937a..5f24d8ce054aa13 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -1398,12 +1398,14 @@ void update_s3_resource(const TStorageResource& param, io::RemoteFileSystemSPtr void update_hdfs_resource(const TStorageResource& param, io::RemoteFileSystemSPtr existed_fs) { Status st; io::RemoteFileSystemSPtr fs; + std::string root_path = + param.hdfs_storage_param.__isset.root_path ? param.hdfs_storage_param.root_path : ""; if (!existed_fs) { // No such FS instance on BE - auto res = io::HdfsFileSystem::create(param.hdfs_storage_param, - param.hdfs_storage_param.fs_name, - std::to_string(param.id), nullptr); + auto res = io::HdfsFileSystem::create( + param.hdfs_storage_param, param.hdfs_storage_param.fs_name, + std::to_string(param.id), nullptr, std::move(root_path)); if (!res.has_value()) { st = std::move(res).error(); } else { @@ -1421,7 +1423,8 @@ void update_hdfs_resource(const TStorageResource& param, io::RemoteFileSystemSPt } else { LOG_INFO("successfully update hdfs resource") .tag("resource_id", param.id) - .tag("resource_name", param.name); + .tag("resource_name", param.name) + .tag("root_path", fs->root_path().string()); put_storage_resource(param.id, {std::move(fs), param.version}); } } diff --git a/be/src/io/fs/hdfs_file_system.cpp b/be/src/io/fs/hdfs_file_system.cpp index 892ee836ae6e100..f988bdf49049286 100644 --- a/be/src/io/fs/hdfs_file_system.cpp +++ b/be/src/io/fs/hdfs_file_system.cpp @@ -53,14 +53,15 @@ namespace doris::io { Result> HdfsFileSystem::create( const std::map& properties, std::string fs_name, std::string id, - RuntimeProfile* profile) { + RuntimeProfile* profile, std::string root_path) { return HdfsFileSystem::create(parse_properties(properties), std::move(fs_name), std::move(id), - profile); + profile, std::move(root_path)); } Result> HdfsFileSystem::create(const THdfsParams& hdfs_params, std::string fs_name, std::string id, - RuntimeProfile* profile) { + RuntimeProfile* profile, + std::string root_path) { #ifdef USE_HADOOP_HDFS if (!config::enable_java_support) { return ResultError(Status::InternalError( @@ -68,15 +69,15 @@ Result> HdfsFileSystem::create(const THdfsParams "true.")); } #endif - std::shared_ptr fs( - new HdfsFileSystem(hdfs_params, std::move(fs_name), std::move(id), profile)); + std::shared_ptr fs(new HdfsFileSystem( + hdfs_params, std::move(fs_name), std::move(id), profile, std::move(root_path))); RETURN_IF_ERROR_RESULT(fs->init()); return fs; } HdfsFileSystem::HdfsFileSystem(const THdfsParams& hdfs_params, std::string fs_name, std::string id, - RuntimeProfile* profile) - : RemoteFileSystem("", std::move(id), FileSystemType::HDFS), + RuntimeProfile* profile, std::string root_path) + : RemoteFileSystem(root_path, std::move(id), FileSystemType::HDFS), _hdfs_params(hdfs_params), _fs_name(std::move(fs_name)), _profile(profile) { diff --git a/be/src/io/fs/hdfs_file_system.h b/be/src/io/fs/hdfs_file_system.h index ec6401964eea45f..23ae65b0820ef46 100644 --- a/be/src/io/fs/hdfs_file_system.h +++ b/be/src/io/fs/hdfs_file_system.h @@ -48,11 +48,12 @@ class HdfsFileSystem final : public RemoteFileSystem { public: static Result> create(const THdfsParams& hdfs_params, std::string fs_name, std::string id, - RuntimeProfile* profile); + RuntimeProfile* profile, + std::string root_path = ""); static Result> create( const std::map& properties, std::string fs_name, - std::string id, RuntimeProfile* profile); + std::string id, RuntimeProfile* profile, std::string root_path = ""); ~HdfsFileSystem() override; @@ -84,7 +85,7 @@ class HdfsFileSystem final : public RemoteFileSystem { private: friend class HdfsFileWriter; HdfsFileSystem(const THdfsParams& hdfs_params, std::string fs_name, std::string id, - RuntimeProfile* profile); + RuntimeProfile* profile, std::string root_path); const THdfsParams& _hdfs_params; // Only used in init, so we can use reference here std::string _fs_name; // do not use std::shared_ptr or std::unique_ptr diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java index 6d441707420ce83..c9cb77fbd93ff84 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/HdfsResource.java @@ -45,6 +45,7 @@ public class HdfsResource extends Resource { public static final String HADOOP_FS_PREFIX = "dfs."; public static String HADOOP_FS_NAME = "fs.defaultFS"; + public static String HADOOP_FS_ROOT_PATH = "root_path"; public static String HADOOP_SHORT_CIRCUIT = "dfs.client.read.shortcircuit"; public static String HADOOP_SOCKET_PATH = "dfs.domain.socket.path"; public static String DSF_NAMESERVICES = "dfs.nameservices"; @@ -106,6 +107,8 @@ public static THdfsParams generateHdfsParam(Map properties) { for (Map.Entry property : properties.entrySet()) { if (property.getKey().equalsIgnoreCase(HADOOP_FS_NAME)) { tHdfsParams.setFsName(property.getValue()); + } else if (property.getKey().equalsIgnoreCase(HADOOP_FS_ROOT_PATH)) { + tHdfsParams.setRootPath(property.getValue()); } else if (property.getKey().equalsIgnoreCase(AuthenticationConfig.HADOOP_USER_NAME)) { tHdfsParams.setUser(property.getValue()); } else if (property.getKey().equalsIgnoreCase(AuthenticationConfig.HADOOP_KERBEROS_PRINCIPAL)) { diff --git a/gensrc/thrift/PlanNodes.thrift b/gensrc/thrift/PlanNodes.thrift index 148db2b9a1709bb..2fadcdae5387952 100644 --- a/gensrc/thrift/PlanNodes.thrift +++ b/gensrc/thrift/PlanNodes.thrift @@ -154,6 +154,8 @@ struct THdfsParams { 3: optional string hdfs_kerberos_principal 4: optional string hdfs_kerberos_keytab 5: optional list hdfs_conf + // Used for Cold Heat Separation to specify the root path + 6: optional string root_path } // One broker range information. From 38e3ed017e96424bd1c6ca19b0c441d383cba269 Mon Sep 17 00:00:00 2001 From: kkop <45192870+cjj2010@users.noreply.github.com> Date: Mon, 25 Mar 2024 23:36:05 +0800 Subject: [PATCH 18/22] [enhancement](test) unique model by modify a value type from LARGEINT to other type (#32718) --- .../test_unique_model_schema_value_change.out | 40 ++++ ...st_unique_model_schema_value_change.groovy | 218 +++++++++++++++++- 2 files changed, 254 insertions(+), 4 deletions(-) diff --git a/regression-test/data/schema_change_p0/test_unique_model_schema_value_change.out b/regression-test/data/schema_change_p0/test_unique_model_schema_value_change.out index 593a45475daf4aa..7babab624e0de47 100644 --- a/regression-test/data/schema_change_p0/test_unique_model_schema_value_change.out +++ b/regression-test/data/schema_change_p0/test_unique_model_schema_value_change.out @@ -419,3 +419,43 @@ 789012345 Grace 21234683141 Xian 29 0 13333333333 No. 222 Street, Xian 2022-07-07T22:00 123456689 Alice asd Yaan 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 +-- ! -- +123456789 Alice 2.14748324E10 Beijing 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 +234567890 Bob 2.14743491E10 Shanghai 30 1 13998765432 No. 456 Street, Shanghai 2022-02-02T12:00 +345678901 Carol 2.14742385E11 Guangzhou 28 0 13724681357 No. 789 Street, Guangzhou 2022-03-03T14:00 +456789012 Dave 2.14742835E10 Shenzhen 35 1 13680864279 No. 987 Street, Shenzhen 2022-04-04T16:00 +567890123 Eve 2.12748636E10 Chengdu 27 0 13572468091 No. 654 Street, Chengdu 2022-05-05T18:00 +678901234 Frank 2.12448829E10 Hangzhou 32 1 13467985213 No. 321 Street, Hangzhou 2022-06-06T20:00 +789012345 Grace 2.12346839E10 Xian 29 0 13333333333 No. 222 Street, Xian 2022-07-07T22:00 +123456689 Alice 1.2 Yaan 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 + +-- ! -- +123456789 Alice 2.1474832641E10 Beijing 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 +234567890 Bob 2.1474348364E10 Shanghai 30 1 13998765432 No. 456 Street, Shanghai 2022-02-02T12:00 +345678901 Carol 2.14742383441E11 Guangzhou 28 0 13724681357 No. 789 Street, Guangzhou 2022-03-03T14:00 +456789012 Dave 2.1474283141E10 Shenzhen 35 1 13680864279 No. 987 Street, Shenzhen 2022-04-04T16:00 +567890123 Eve 2.1274863141E10 Chengdu 27 0 13572468091 No. 654 Street, Chengdu 2022-05-05T18:00 +678901234 Frank 2.1244883141E10 Hangzhou 32 1 13467985213 No. 321 Street, Hangzhou 2022-06-06T20:00 +789012345 Grace 2.1234683141E10 Xian 29 0 13333333333 No. 222 Street, Xian 2022-07-07T22:00 +123456689 Alice 1.23 Yaan 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 + +-- ! -- +123456789 Alice 21474832641 Beijing 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 +234567890 Bob 21474348364 Shanghai 30 1 13998765432 No. 456 Street, Shanghai 2022-02-02T12:00 +345678901 Carol 214742383441 Guangzhou 28 0 13724681357 No. 789 Street, Guangzhou 2022-03-03T14:00 +456789012 Dave 21474283141 Shenzhen 35 1 13680864279 No. 987 Street, Shenzhen 2022-04-04T16:00 +567890123 Eve 21274863141 Chengdu 27 0 13572468091 No. 654 Street, Chengdu 2022-05-05T18:00 +678901234 Frank 21244883141 Hangzhou 32 1 13467985213 No. 321 Street, Hangzhou 2022-06-06T20:00 +789012345 Grace 21234683141 Xian 29 0 13333333333 No. 222 Street, Xian 2022-07-07T22:00 +123456689 Alice asd Yaan 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 + +-- ! -- +123456789 Alice 21474832641 Beijing 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 +234567890 Bob 21474348364 Shanghai 30 1 13998765432 No. 456 Street, Shanghai 2022-02-02T12:00 +345678901 Carol 214742383441 Guangzhou 28 0 13724681357 No. 789 Street, Guangzhou 2022-03-03T14:00 +456789012 Dave 21474283141 Shenzhen 35 1 13680864279 No. 987 Street, Shenzhen 2022-04-04T16:00 +567890123 Eve 21274863141 Chengdu 27 0 13572468091 No. 654 Street, Chengdu 2022-05-05T18:00 +678901234 Frank 21244883141 Hangzhou 32 1 13467985213 No. 321 Street, Hangzhou 2022-06-06T20:00 +789012345 Grace 21234683141 Xian 29 0 13333333333 No. 222 Street, Xian 2022-07-07T22:00 +123456689 Alice asd Yaan 25 0 13812345678 No. 123 Street, Beijing 2022-01-01T10:00 + diff --git a/regression-test/suites/schema_change_p0/test_unique_model_schema_value_change.groovy b/regression-test/suites/schema_change_p0/test_unique_model_schema_value_change.groovy index 0f92f45e1c38106..9137e44d6a82d1d 100644 --- a/regression-test/suites/schema_change_p0/test_unique_model_schema_value_change.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_model_schema_value_change.groovy @@ -894,11 +894,11 @@ suite("test_unique_model_schema_value_change","p0") { //TODO Test the unique model by modify a value type from INT to DECIMAL - errorMessage = "errCode = 2, detailMessage = Can not change INT to DECIMAL32" + errorMessage = "errCode = 2, detailMessage = Can not change INT to DECIMAL128" expectException({ sql initTable sql initTableData - sql """ alter table ${tbName} MODIFY column sn_number DECIMAL """ + sql """ alter table ${tbName} MODIFY column sn_number DECIMAL(38,0) """ insertSql = "insert into ${tbName} values(123456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " waitForSchemaChangeDone({ sql getTableStatusSql @@ -931,6 +931,18 @@ suite("test_unique_model_schema_value_change","p0") { time 60 }, insertSql, true, "${tbName}") + //Test the unique model by modify a value type from INT to VARCHAR + errorMessage="errCode = 2, detailMessage = Can not change from wider type INT to narrower type VARCHAR(2)" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column sn_number VARCHAR(2) """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + },errorMessage) //Test the unique model by modify a value type from INT to STRING sql initTable @@ -1060,11 +1072,11 @@ suite("test_unique_model_schema_value_change","p0") { //TODO Test the unique model by modify a value type from BIGINT to DECIMAL - errorMessage = "errCode = 2, detailMessage = Can not change BIGINT to DECIMAL32" + errorMessage = "errCode = 2, detailMessage = Can not change BIGINT to DECIMAL128" expectException({ sql initTable sql initTableData - sql """ alter table ${tbName} MODIFY column fan_number DECIMAL """ + sql """ alter table ${tbName} MODIFY column fan_number DECIMAL(38,0) """ insertSql = "insert into ${tbName} values(123456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " waitForSchemaChangeDone({ sql getTableStatusSql @@ -1098,6 +1110,20 @@ suite("test_unique_model_schema_value_change","p0") { }, insertSql, true, "${tbName}") + //Test the unique model by modify a value type from BIGINT to VARCHAR(2) + errorMessage="errCode = 2, detailMessage = Can not change from wider type BIGINT to narrower type VARCHAR(2)" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column fan_number VARCHAR(2) """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + },errorMessage) + + //Test the unique model by modify a value type from BIGINT to STRING sql initTable sql initTableData @@ -1108,4 +1134,188 @@ suite("test_unique_model_schema_value_change","p0") { time 60 }, insertSql, true, "${tbName}") + /** + * Test the unique model by modify a value type from LARGEINT to other type + */ + sql """ DROP TABLE IF EXISTS ${tbName} """ + initTable = " CREATE TABLE IF NOT EXISTS ${tbName}\n" + + " (\n" + + " `user_id` LARGEINT NOT NULL COMMENT \"用户id\",\n" + + " `username` VARCHAR(50) NOT NULL COMMENT \"用户昵称\",\n" + + " `item_number` LARGEINT COMMENT \"item序列号\",\n" + + " `city` VARCHAR(20) COMMENT \"用户所在城市\",\n" + + " `age` SMALLINT COMMENT \"用户年龄\",\n" + + " `sex` TINYINT COMMENT \"用户性别\",\n" + + " `phone` LARGEINT COMMENT \"用户电话\",\n" + + " `address` VARCHAR(500) COMMENT \"用户地址\",\n" + + " `register_time` DATETIME COMMENT \"用户注册时间\"\n" + + " )\n" + + " UNIQUE KEY(`user_id`, `username`)\n" + + " DISTRIBUTED BY HASH(`user_id`) BUCKETS 1\n" + + " PROPERTIES (\n" + + " \"replication_allocation\" = \"tag.location.default: 1\",\n" + + " \"enable_unique_key_merge_on_write\" = \"true\"\n" + + " );" + + initTableData = "insert into ${tbName} values(123456789, 'Alice', 21474832641, 'Beijing', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00')," + + " (234567890, 'Bob', 21474348364, 'Shanghai', 30, 1, 13998765432, 'No. 456 Street, Shanghai', '2022-02-02 12:00:00')," + + " (345678901, 'Carol', 214742383441, 'Guangzhou', 28, 0, 13724681357, 'No. 789 Street, Guangzhou', '2022-03-03 14:00:00')," + + " (456789012, 'Dave', 21474283141, 'Shenzhen', 35, 1, 13680864279, 'No. 987 Street, Shenzhen', '2022-04-04 16:00:00')," + + " (567890123, 'Eve', 21274863141, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + + " (678901234, 'Frank', 21244883141, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + + " (789012345, 'Grace', 21234683141, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + + //TODO Test the unique model by modify a value type from LARGEINT to BOOLEAN + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to BOOLEAN" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number BOOLEAN """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', false, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + }, errorMessage) + + + // TODO Test the unique model by modify a value type from LARGEINT to TINYINT + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to TINYINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number TINYINT """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + }, errorMessage) + + + //Test the unique model by modify a value type from LARGEINT to SMALLINT + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to SMALLINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number SMALLINT """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 3, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + }, errorMessage) + + //Test the unique model by modify a value type from LARGEINT to INT + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to INT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number INT """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 4, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + }, errorMessage) + + //Test the unique model by modify a value type from LARGEINT to BIGINT + errorMessage="errCode = 2, detailMessage = Can not change LARGEINT to BIGINT" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number BIGINT """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 5, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + },errorMessage) + + + //Test the unique model by modify a value type from LARGEINT to FLOAT + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number FLOAT """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 1.2, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + + //Test the unique model by modify a value type from LARGEINT to DOUBLE + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number DOUBLE """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + + + //TODO Test the unique model by modify a value type from LARGEINT to DECIMAL + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to DECIMAL128" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number DECIMAL(38,0) """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 1.23, 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + + }, errorMessage) + + //TODO Test the unique model by modify a value type from LARGEINT to CHAR + errorMessage = "errCode = 2, detailMessage = Can not change LARGEINT to CHAR" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number CHAR(15) """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + }, errorMessage) + + + //Test the unique model by modify a value type from LARGEINT to VARCHAR + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number VARCHAR(100) """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + + + //Test the unique model by modify a value type from LARGEINT to VARCHAR(2) + errorMessage="errCode = 2, detailMessage = Can not change from wider type LARGEINT to narrower type VARCHAR(2)" + expectException({ + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number VARCHAR(2) """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + },errorMessage) + + + //Test the unique model by modify a value type from LARGEINT to STRING + sql initTable + sql initTableData + sql """ alter table ${tbName} MODIFY column item_number STRING """ + insertSql = "insert into ${tbName} values(123456689, 'Alice', 'asd', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00'); " + waitForSchemaChangeDone({ + sql getTableStatusSql + time 60 + }, insertSql, true, "${tbName}") + + } From 0945e4dda76956489a15089177ec1cb86d522e5d Mon Sep 17 00:00:00 2001 From: Tiewei Fang <43782773+BePPPower@users.noreply.github.com> Date: Mon, 25 Mar 2024 23:42:14 +0800 Subject: [PATCH 19/22] [fix](trino-connector) fix trino-connector log (#32498) Trino uses java.util.logging as its log system, and will print a lot of logs to fe.out/be.out. Here is a redirect to a dedicated log file. --- .../TrinoConnectorPluginLoader.java | 15 +++++++++++++++ .../TrinoConnectorPluginLoader.java | 16 ++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/fe/be-java-extensions/trino-connector-scanner/src/main/java/org/apache/doris/trinoconnector/TrinoConnectorPluginLoader.java b/fe/be-java-extensions/trino-connector-scanner/src/main/java/org/apache/doris/trinoconnector/TrinoConnectorPluginLoader.java index 134f315c333bc77..1f08e28b005fa63 100644 --- a/fe/be-java-extensions/trino-connector-scanner/src/main/java/org/apache/doris/trinoconnector/TrinoConnectorPluginLoader.java +++ b/fe/be-java-extensions/trino-connector-scanner/src/main/java/org/apache/doris/trinoconnector/TrinoConnectorPluginLoader.java @@ -30,6 +30,9 @@ import org.apache.logging.log4j.Logger; import java.io.File; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.SimpleFormatter; public class TrinoConnectorPluginLoader { private static final Logger LOG = LogManager.getLogger(TrinoConnectorPluginLoader.class); @@ -42,6 +45,18 @@ private static class TrinoConnectorPluginLoad { static { try { + // Trino uses jul as its own log system, so the attributes of JUL are configured here + System.setProperty("java.util.logging.SimpleFormatter.format", + "%1$tY-%1$tm-%1$td %1$tH:%1$tM:%1$tS %4$s: %5$s%6$s%n"); + java.util.logging.Logger logger = java.util.logging.Logger.getLogger(""); + logger.setUseParentHandlers(false); + FileHandler fileHandler = new FileHandler(EnvUtils.getDorisHome() + "/log/trinoconnector%g.log", + 500000000, 10, true); + fileHandler.setLevel(Level.INFO); + fileHandler.setFormatter(new SimpleFormatter()); + logger.addHandler(fileHandler); + java.util.logging.LogManager.getLogManager().addLogger(logger); + TypeOperators typeOperators = new TypeOperators(); featuresConfig = new FeaturesConfig(); TypeRegistry typeRegistry = new TypeRegistry(typeOperators, featuresConfig); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/TrinoConnectorPluginLoader.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/TrinoConnectorPluginLoader.java index a0d34ffa31e3069..1e08c9effcb00fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/TrinoConnectorPluginLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/TrinoConnectorPluginLoader.java @@ -18,6 +18,7 @@ package org.apache.doris.datasource.trinoconnector; import org.apache.doris.common.Config; +import org.apache.doris.common.EnvUtils; import org.apache.doris.trinoconnector.TrinoConnectorPluginManager; import com.google.common.util.concurrent.MoreExecutors; @@ -31,6 +32,9 @@ import org.apache.logging.log4j.Logger; import java.io.File; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.SimpleFormatter; public class TrinoConnectorPluginLoader { private static final Logger LOG = LogManager.getLogger(TrinoConnectorPluginLoader.class); @@ -44,6 +48,18 @@ private static class TrinoConnectorPluginLoad { static { try { + // Trino uses jul as its own log system, so the attributes of JUL are configured here + System.setProperty("java.util.logging.SimpleFormatter.format", + "%1$tY-%1$tm-%1$td %1$tH:%1$tM:%1$tS %4$s: %5$s%6$s%n"); + java.util.logging.Logger logger = java.util.logging.Logger.getLogger(""); + logger.setUseParentHandlers(false); + FileHandler fileHandler = new FileHandler(EnvUtils.getDorisHome() + "/log/trinoconnector%g.log", + 500000000, 10, true); + fileHandler.setLevel(Level.INFO); + fileHandler.setFormatter(new SimpleFormatter()); + logger.addHandler(fileHandler); + java.util.logging.LogManager.getLogManager().addLogger(logger); + typeRegistry = new TypeRegistry(typeOperators, featuresConfig); ServerPluginsProviderConfig serverPluginsProviderConfig = new ServerPluginsProviderConfig() .setInstalledPluginsDir(new File(Config.trino_connector_plugin_dir)); From 04d220d3e43f723119eca988868a37bbe8d12e66 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Mon, 25 Mar 2024 23:42:48 +0800 Subject: [PATCH 20/22] [testcase](hive)add testcase for partitionvalue (#32768) add test for #32664 --- .../apache/doris/catalog/PartitionKey.java | 5 + .../planner/ListPartitionPrunerV2Test.java | 136 ++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/planner/ListPartitionPrunerV2Test.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java index b227afdc142eab3..3f11e9ffc4fd1b5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/PartitionKey.java @@ -578,4 +578,9 @@ public JsonElement serialize(PartitionKey partitionKey, java.lang.reflect.Type r return result; } } + + // for test + public List getOriginHiveKeys() { + return originHiveKeys; + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/ListPartitionPrunerV2Test.java b/fe/fe-core/src/test/java/org/apache/doris/planner/ListPartitionPrunerV2Test.java new file mode 100644 index 000000000000000..0a00a94d597d9ab --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/ListPartitionPrunerV2Test.java @@ -0,0 +1,136 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.planner; + +import org.apache.doris.analysis.PartitionValue; +import org.apache.doris.catalog.ListPartitionItem; +import org.apache.doris.catalog.PartitionItem; +import org.apache.doris.catalog.PartitionKey; +import org.apache.doris.catalog.ScalarType; +import org.apache.doris.catalog.Type; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.ThreadPoolManager; +import org.apache.doris.datasource.hive.HMSCachedClient; +import org.apache.doris.datasource.hive.HMSExternalCatalog; +import org.apache.doris.datasource.hive.HiveMetaStoreCache; +import org.apache.doris.datasource.hive.ThriftHMSCachedClient; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import mockit.Mock; +import mockit.MockUp; +import org.apache.hadoop.hive.conf.HiveConf; +import org.junit.Assert; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ThreadPoolExecutor; + +public class ListPartitionPrunerV2Test { + @Test + public void testPartitionValuesMap() throws AnalysisException { + List partitionValues = new ArrayList<>(); + partitionValues.add(new PartitionValue("1.123000")); + ArrayList types = new ArrayList<>(); + types.add(ScalarType.DOUBLE); + + // for hive table + PartitionKey key = PartitionKey.createListPartitionKeyWithTypes(partitionValues, types, true); + ListPartitionItem listPartitionItem = new ListPartitionItem(Lists.newArrayList(key)); + Map idToPartitionItem = Maps.newHashMapWithExpectedSize(partitionValues.size()); + idToPartitionItem.put(1L, listPartitionItem); + + // for olap table + PartitionKey key2 = PartitionKey.createListPartitionKeyWithTypes(partitionValues, types, false); + ListPartitionItem listPartitionItem2 = new ListPartitionItem(Lists.newArrayList(key2)); + idToPartitionItem.put(2L, listPartitionItem2); + + Map> partitionValuesMap = ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem); + Assert.assertEquals("1.123000", partitionValuesMap.get(1L).get(0)); + Assert.assertEquals("1.123", partitionValuesMap.get(2L).get(0)); + } + + @Test + public void testInvalidateTable() { + + new MockUp(HMSExternalCatalog.class) { + @Mock + public HMSCachedClient getClient() { + return new ThriftHMSCachedClient(new HiveConf(), 2); + } + }; + + new MockUp(ThriftHMSCachedClient.class) { + @Mock + public List listPartitionNames(String dbName, String tblName) { + // Mock is used here to represent the existence of a partition in the original table + return new ArrayList() {{ + add("c1=1.234000"); + }}; + } + }; + + ThreadPoolExecutor executor = ThreadPoolManager.newDaemonFixedThreadPool( + 20, 20, "mgr", 120, false); + HiveMetaStoreCache cache = new HiveMetaStoreCache( + new HMSExternalCatalog(1L, "catalog", null, new HashMap<>(), null), executor); + ArrayList types = new ArrayList<>(); + types.add(ScalarType.DOUBLE); + + // test cache + // the original partition of the table (in mock) will be loaded here + String dbName = "db"; + String tblName = "tb"; + HiveMetaStoreCache.HivePartitionValues partitionValues = cache.getPartitionValues(dbName, tblName, types); + Assert.assertEquals(1, partitionValues.getIdToPartitionItem().size()); + Assert.assertTrue(partitionValues.getIdToPartitionItem().containsKey(0L)); + List items = partitionValues.getIdToPartitionItem().get(0L).getItems(); + Assert.assertEquals(1, items.size()); + PartitionKey partitionKey = items.get(0); + Assert.assertEquals("1.234", partitionKey.getKeys().get(0).toString()); + Assert.assertEquals("1.234000", partitionKey.getOriginHiveKeys().get(0)); + + // test add cache + ArrayList values = new ArrayList<>(); + values.add("c1=5.678000"); + cache.addPartitionsCache(dbName, tblName, values, types); + HiveMetaStoreCache.HivePartitionValues partitionValues2 = cache.getPartitionValues(dbName, tblName, types); + Assert.assertEquals(2, partitionValues2.getIdToPartitionItem().size()); + Assert.assertTrue(partitionValues2.getIdToPartitionItem().containsKey(1L)); + List items2 = partitionValues2.getIdToPartitionItem().get(1L).getItems(); + Assert.assertEquals(1, items2.size()); + PartitionKey partitionKey2 = items2.get(0); + Assert.assertEquals("5.678", partitionKey2.getKeys().get(0).toString()); + Assert.assertEquals("5.678000", partitionKey2.getOriginHiveKeys().get(0)); + + // test refresh table + // simulates the manually added partition table being deleted, leaving only one original partition in mock + cache.invalidateTableCache(dbName, tblName); + HiveMetaStoreCache.HivePartitionValues partitionValues3 = cache.getPartitionValues(dbName, tblName, types); + Assert.assertEquals(1, partitionValues3.getIdToPartitionItem().size()); + Assert.assertTrue(partitionValues3.getIdToPartitionItem().containsKey(0L)); + List items3 = partitionValues3.getIdToPartitionItem().get(0L).getItems(); + Assert.assertEquals(1, items3.size()); + PartitionKey partitionKey3 = items3.get(0); + Assert.assertEquals("1.234", partitionKey3.getKeys().get(0).toString()); + Assert.assertEquals("1.234000", partitionKey3.getOriginHiveKeys().get(0)); + } +} From ba541c5930382a18f54a4961a1fd4ceb4ac4beff Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Tue, 26 Mar 2024 09:50:11 +0800 Subject: [PATCH 21/22] [fix](Nereids) system default decimalv3 scale should be 9 (#32754) select round('1.1234', 2) should return 1.12, not 1 --- .../org/apache/doris/nereids/types/DecimalV3Type.java | 2 +- .../rules/expression/SimplifyArithmeticRuleTest.java | 2 +- .../apache/doris/nereids/util/TypeCoercionUtilsTest.java | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DecimalV3Type.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DecimalV3Type.java index f87f95db4a3b0f3..aaef3775b34360c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DecimalV3Type.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/types/DecimalV3Type.java @@ -42,7 +42,7 @@ public class DecimalV3Type extends FractionalType { public static final int MAX_DECIMAL256_PRECISION = 76; public static final DecimalV3Type WILDCARD = new DecimalV3Type(-1, -1); - public static final DecimalV3Type SYSTEM_DEFAULT = new DecimalV3Type(MAX_DECIMAL128_PRECISION, DEFAULT_SCALE); + public static final DecimalV3Type SYSTEM_DEFAULT = new DecimalV3Type(MAX_DECIMAL128_PRECISION, 9); public static final DecimalV3Type CATALOG_DEFAULT = new DecimalV3Type(MAX_DECIMAL32_PRECISION, DEFAULT_SCALE); private static final DecimalV3Type BOOLEAN_DECIMAL = new DecimalV3Type(1, 0); diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyArithmeticRuleTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyArithmeticRuleTest.java index 4ea50bf1f8817ca..174592270dd9731 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyArithmeticRuleTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/expression/SimplifyArithmeticRuleTest.java @@ -87,7 +87,7 @@ void testSimplifyArithmeticRuleOnly() { assertRewriteAfterTypeCoercion("(-IA / 2) / ((-IB - 1) / (3 + (IC * 4)))", "(((cast((0 - IA) as DOUBLE) / cast(((0 - IB) - 1) as DOUBLE)) * cast((3 + (IC * 4)) as DOUBLE)) / cast(2 as DOUBLE))"); // unsupported decimal - assertRewriteAfterTypeCoercion("-2 - MA - ((1 - IB) - (3 + IC))", "((cast(-2 as DECIMALV3(38, 0)) - MA) - cast(((1 - IB) - (3 + IC)) as DECIMALV3(38, 0)))"); + assertRewriteAfterTypeCoercion("-2 - MA - ((1 - IB) - (3 + IC))", "((cast(-2 as DECIMALV3(38, 9)) - MA) - cast(((1 - IB) - (3 + IC)) as DECIMALV3(38, 9)))"); assertRewriteAfterTypeCoercion("-IA / 2.0 * ((-IB - 1) - (3 + (IC + 4)))", "((cast((0 - IA) as DECIMALV3(25, 5)) / 2.0) * cast((((0 - IB) - 1) - (3 + (IC + 4))) as DECIMALV3(20, 0)))"); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java index a30d67d4d314e45..d9a2946ba883c5f 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/TypeCoercionUtilsTest.java @@ -284,7 +284,7 @@ public void testFindCommonPrimitiveTypeForCaseWhen() { testFindCommonPrimitiveTypeForCaseWhen(LargeIntType.INSTANCE, LargeIntType.INSTANCE, BigIntType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(LargeIntType.INSTANCE, LargeIntType.INSTANCE, LargeIntType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(DecimalV2Type.SYSTEM_DEFAULT, LargeIntType.INSTANCE, DecimalV2Type.SYSTEM_DEFAULT); - testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, LargeIntType.INSTANCE, DecimalV3Type.SYSTEM_DEFAULT); + testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.createDecimalV3Type(38), LargeIntType.INSTANCE, DecimalV3Type.createDecimalV3Type(38)); testFindCommonPrimitiveTypeForCaseWhen(DoubleType.INSTANCE, LargeIntType.INSTANCE, FloatType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(DoubleType.INSTANCE, LargeIntType.INSTANCE, DoubleType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(StringType.INSTANCE, LargeIntType.INSTANCE, CharType.SYSTEM_DEFAULT); @@ -308,7 +308,7 @@ public void testFindCommonPrimitiveTypeForCaseWhen() { testFindCommonPrimitiveTypeForCaseWhen(DecimalV2Type.SYSTEM_DEFAULT, DecimalV2Type.SYSTEM_DEFAULT, LargeIntType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(DecimalV2Type.SYSTEM_DEFAULT, DecimalV2Type.SYSTEM_DEFAULT, DecimalV2Type.SYSTEM_DEFAULT); - testFindCommonPrimitiveTypeForCaseWhen(DoubleType.INSTANCE, DecimalV2Type.SYSTEM_DEFAULT, + testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, DecimalV2Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT); testFindCommonPrimitiveTypeForCaseWhen(DoubleType.INSTANCE, DecimalV2Type.SYSTEM_DEFAULT, FloatType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(DoubleType.INSTANCE, DecimalV2Type.SYSTEM_DEFAULT, DoubleType.INSTANCE); @@ -331,10 +331,10 @@ public void testFindCommonPrimitiveTypeForCaseWhen() { testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT, SmallIntType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT, IntegerType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT, BigIntType.INSTANCE); - testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT, LargeIntType.INSTANCE); + testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.createDecimalV3Type(38), DecimalV3Type.createDecimalV3Type(38), LargeIntType.INSTANCE); testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT, DecimalV2Type.createDecimalV2Type(27, 0)); - testFindCommonPrimitiveTypeForCaseWhen(DoubleType.INSTANCE, DecimalV3Type.SYSTEM_DEFAULT, + testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT, DecimalV2Type.SYSTEM_DEFAULT); testFindCommonPrimitiveTypeForCaseWhen(DecimalV3Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT, DecimalV3Type.SYSTEM_DEFAULT); From c6c1d1c946b09e114e81e2a29d31d275307b02e0 Mon Sep 17 00:00:00 2001 From: qiye Date: Tue, 26 Mar 2024 10:05:37 +0800 Subject: [PATCH 22/22] [improvement](index compaction)Use tmp dir as the temporary index writer path (#32686) Co-authored-by: Luennng --- be/src/olap/compaction.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 141cdce24aea148..2947a7a5fc28452 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -617,7 +617,8 @@ Status CompactionMixin::do_inverted_index_compaction() { // we choose the first destination segment name as the temporary index writer path // Used to distinguish between different index compaction - auto index_tmp_path = tablet_path + "/" + dest_rowset_id.to_string() + "_" + "tmp"; + auto tmp_file_dir = ExecEnv::GetInstance()->get_tmp_file_dirs()->get_tmp_file_dir(); + auto index_tmp_path = tmp_file_dir / dest_rowset_id.to_string(); LOG(INFO) << "start index compaction" << ". tablet=" << _tablet->tablet_id() << ", source index size=" << src_segment_num << ", destination index size=" << dest_segment_num << ".";