From 112a904f844c46b34063f3c7babd474d8654aa7a Mon Sep 17 00:00:00 2001 From: laihui Date: Sun, 22 Dec 2024 18:12:58 +0800 Subject: [PATCH] fix core dump when parsing csv with enclose --- .../vec/exec/format/file_reader/new_plain_text_line_reader.cpp | 3 ++- .../broker_load/test_csv_with_enclose_and_escapeS3_load.out | 2 ++ .../load_p0/stream_load/test_csv_with_enclose_and_escape.out | 2 ++ .../stream_load/test_csv_with_enclose_and_escape.groovy | 2 +- 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp index 9a09a90d1aa4ad..ad86cca212b6ed 100644 --- a/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp +++ b/be/src/vec/exec/format/file_reader/new_plain_text_line_reader.cpp @@ -173,8 +173,9 @@ void EncloseCsvLineReaderContext::_on_pre_match_enclose(const uint8_t* start, si void EncloseCsvLineReaderContext::_on_match_enclose(const uint8_t* start, size_t& len) { const uint8_t* curr_start = start + _idx; + size_t curr_len = len - _idx; const uint8_t* delim_pos = - find_col_sep_func(curr_start, _column_sep_len, _column_sep.c_str(), _column_sep_len); + find_col_sep_func(curr_start, curr_len, _column_sep.c_str(), _column_sep_len); if (delim_pos != nullptr) [[likely]] { on_col_sep_found(start, delim_pos); diff --git a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out index 53aea0d8f89a21..cbcd131d675ea2 100644 --- a/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out +++ b/regression-test/data/load_p0/broker_load/test_csv_with_enclose_and_escapeS3_load.out @@ -9,3 +9,5 @@ 3 abc"de,fg"h 2023-07-15 i\nj,k\n" 2023-07-20:05:48:31 ghi 6 ab"c 2023-07-20 d"ef" 2023-07-20:05:48:31 "g"hi 7 aaa 2023-07-20 2023-07-20:05:48:31 +8 aaa"bbb"ccc 2023-07-20 "aa"bb 2023-07-20:05:48:31 aa"bb" +9 aa,"bbb cc" 2023-07-20 ""aa"bb ,2023-07-20:05:48:31,"aa"bb" diff --git a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out index 18b4968b7cd0d4..5646d96230f4a9 100644 --- a/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out +++ b/regression-test/data/load_p0/stream_load/test_csv_with_enclose_and_escape.out @@ -5,6 +5,8 @@ 3 abc"de,fg"h 2023-07-15 i\nj,k\n" 2023-07-20T05:48:31 ghi 6 ab"c 2023-07-20 d"ef" 2023-07-20T05:48:31 "g"hi 7 aaa 2023-07-20 2023-07-20T05:48:31 +8 aaa"bbb"ccc 2023-07-20 "aa"bb 2023-07-20T05:48:31 aa"bb" +9 aa,"bbb \N 2023-07-20 \N ,2023-07-20:05:48:31,"aa"bb" 10 ab@@cd@@efg 2023-07-20 ab@@cd$$$efg 2023-07-20T05:48:31 @@ab$$$cd$$$ 10 ab@@cd@@efg 2023-07-20 ab@@cd$$$efg 2023-07-20T05:48:31 @@ab$$$cd$$$ 10 abc \N "def" \N "ghi" diff --git a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy index 7e24b4158c887d..1562fa35cfd99d 100644 --- a/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy +++ b/regression-test/suites/load_p0/stream_load/test_csv_with_enclose_and_escape.groovy @@ -85,7 +85,7 @@ suite("test_csv_with_enclose_and_escape", "p0") { result, exception, startTime, endTime -> assertTrue(exception == null) def json = parseJson(result) - assertEquals("Fail", json.Status) + assertEquals("Success", json.Status) } }