Skip to content

Commit

Permalink
[Fix](JsonReader) Return correct status when parse failed (apache#39206)
Browse files Browse the repository at this point in the history
When using `JsonFunctions::extract_from_object`, but input obj is not
object type but other types like null, then Status should be
`simdjson::INCORRECT_TYPE` to fill the default value in the later
process. Example, json path is `$.city.name`, but input json is `{"city"
: null}` then `Status::NotFound` should be returned, in the following,
column will be filled with default values.
  • Loading branch information
eldenmoon authored Aug 12, 2024
1 parent 3040b1a commit ee4d0a7
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 3 deletions.
9 changes: 7 additions & 2 deletions be/src/exprs/json_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <rapidjson/stringbuffer.h>
#include <rapidjson/writer.h>
#include <re2/re2.h>
#include <simdjson/error.h>
#include <simdjson/simdjson.h> // IWYU pragma: keep
#include <stdlib.h>

Expand Down Expand Up @@ -259,13 +260,17 @@ Status JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
const std::vector<JsonPath>& jsonpath,
simdjson::ondemand::value* value) noexcept {
// Return DataQualityError when it's a malformed json.
// Otherwise the path was not found, due to array out of bound or not exist
// Otherwise the path was not found, due to
// 1. array out of bound
// 2. not exist such field in object
// 3. the input type is not object but could be null or other types and lead to simdjson::INCORRECT_TYPE
#define HANDLE_SIMDJSON_ERROR(err, msg) \
do { \
const simdjson::error_code& _err = err; \
const std::string& _msg = msg; \
if (UNLIKELY(_err)) { \
if (_err == simdjson::NO_SUCH_FIELD || _err == simdjson::INDEX_OUT_OF_BOUNDS) { \
if (_err == simdjson::NO_SUCH_FIELD || _err == simdjson::INDEX_OUT_OF_BOUNDS || \
_err == simdjson::INCORRECT_TYPE) { \
return Status::NotFound<false>( \
fmt::format("Not found target filed, err: {}, msg: {}", \
simdjson::error_message(_err), _msg)); \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[
{
"id": 789,
"city": {
"name": "beijing",
"region": "haidian"
}
},
{
"id": 1111,
"city": null
}
]
6 changes: 5 additions & 1 deletion regression-test/data/load_p0/stream_load/test_json_load.out
Original file line number Diff line number Diff line change
Expand Up @@ -255,4 +255,8 @@ test k2_value
12345 {"k1":12345,"k2":"11111","k3":111111,"k4":[11111]} {"k1":12345,"k2":"11111","k3":111111,"k4":[11111]} 111111
12346 {"k1":12346,"k2":"22222","k4":[22222]} {"k1":12346,"k2":"22222","k4":[22222]} \N
12347 {"k1":12347,"k3":"33333","k4":[22222]} {"k1":12347,"k3":"33333","k4":[22222]} 33333
12348 {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} 33333
12348 {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} {"k1":12348,"k3":"33333","k5":{"k51":1024,"xxxx":[11111]}} 33333

-- !select31 --
789 beijing haidian
1111 \N \N
27 changes: 27 additions & 0 deletions regression-test/suites/load_p0/stream_load/test_json_load.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -905,4 +905,31 @@ suite("test_json_load", "p0,nonConcurrent") {
} finally {
// try_sql("DROP TABLE IF EXISTS ${testTable}")
}

// test extract json path with invalid type(none object types like null)
try {
sql "DROP TABLE IF EXISTS ${testTable}"
sql """
CREATE TABLE ${testTable} (
`id` int NOT NULL,
`name` varchar(24) NULL,
`region` varchar(30) NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT ''
DISTRIBUTED BY RANDOM BUCKETS AUTO
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""

load_json_data.call("${testTable}", "${testTable}_case31", 'true', 'false', 'json', '', '[\"$.id\", \"$.city.name\", \"$.city.region\"]',
'', '', '', 'test_json_extract_path_invalid_type.json', false, 2)

sql "sync"
qt_select31 "select * from ${testTable} order by id"

} finally {
// try_sql("DROP TABLE IF EXISTS ${testTable}")
}
}

0 comments on commit ee4d0a7

Please sign in to comment.