Skip to content

Commit

Permalink
[Improvement](JSONB) improve performance JSONB initial json parsing u…
Browse files Browse the repository at this point in the history
…sing simdjson (apache#15219)

test data: https://data.gharchive.org/2020-11-13-18.json.gz, 2GB, 197696 lines
before: String 13s vs. JSONB 28s
after: String 13s vs. JSONB 16s

**NOTICE: simdjson need to be patched since BOOL is conflicted with a macro BOOL defined in odbc sqltypes.h**
  • Loading branch information
xiaokang authored Dec 29, 2022
1 parent 1b1083e commit 0f3c0b7
Show file tree
Hide file tree
Showing 6 changed files with 369 additions and 7 deletions.
1 change: 1 addition & 0 deletions .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ header:
- "be/src/util/jsonb_document.h"
- "be/src/util/jsonb_error.h"
- "be/src/util/jsonb_parser.h"
- "be/src/util/jsonb_parser_simd.h"
- "be/src/util/jsonb_stream.h"
- "be/src/util/jsonb_updater.h"
- "be/src/util/jsonb_utils.h"
Expand Down
2 changes: 2 additions & 0 deletions be/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,8 @@ if ("${CMAKE_BUILD_TARGET_ARCH}" STREQUAL "x86" OR "${CMAKE_BUILD_TARGET_ARCH}"
if (USE_AVX2)
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -mavx2")
endif()
# set -mlzcnt for leading zero count used by simdjson
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -msse4.2")
endif()
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-attributes -DS2_USE_GFLAGS -DS2_USE_GLOG")

Expand Down
4 changes: 2 additions & 2 deletions be/src/runtime/jsonb_value.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include "util/cpu_info.h"
#include "util/hash_util.hpp"
#include "util/jsonb_error.h"
#include "util/jsonb_parser.h"
#include "util/jsonb_parser_simd.h"
#include "util/jsonb_utils.h"
#include "vec/common/string_ref.h"

Expand All @@ -38,7 +38,7 @@ struct JsonBinaryValue {
// default nullprt and size 0 for invalid or NULL value
const char* ptr = nullptr;
size_t len = 0;
JsonbParser parser;
JsonbParserSIMD parser;

JsonBinaryValue() : ptr(nullptr), len(0) {}
JsonBinaryValue(char* ptr, int len) { from_json_string(const_cast<const char*>(ptr), len); }
Expand Down
10 changes: 8 additions & 2 deletions be/src/util/jsonb_error.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ enum class JsonbErrType {
E_EMPTY_DOCUMENT,
E_OUTPUT_FAIL,
E_INVALID_DOCU,
E_INVALID_TYPE,
E_INVALID_SCALAR_VALUE,
E_INVALID_KEY_STRING,
E_INVALID_KEY_LENGTH,
E_INVALID_STR,
E_INVALID_OBJ,
E_INVALID_ARR,
E_INVALID_NUMBER,
E_INVALID_HEX,
E_INVALID_OCTAL,
E_INVALID_DECIMAL,
Expand All @@ -53,6 +55,7 @@ enum class JsonbErrType {
E_INVALID_JSONB_OBJ,
E_NESTING_LVL_OVERFLOW,
E_INVALID_DOCU_COMPAT,
E_EXCEPTION,

// new error code should always be added above
E_NUM_ERRORS
Expand All @@ -77,13 +80,15 @@ class JsonbErrMsg {
"Invalid document version",
"Empty document",
"Fatal error in writing JSONB",
"Invalid document: document must be an object or an array",
"Invalid document",
"Invalid json value type",
"Invalid scalar value",
"Invalid key string",
"Key length exceeds maximum size allowed (64 bytes)",
"Invalid string value",
"Invalid JSON object",
"Invalid JSON array",
"Invalid number",
"Invalid HEX number",
"Invalid octal number",
"Invalid decimal number",
Expand All @@ -100,7 +105,8 @@ class JsonbErrMsg {
"Invalid update operation",
"Invalid JSONB object (internal)",
"Object or array has too many nesting levels",
"Invalid document: document must be an object or an array",
"Invalid document",
"Exception throwed",

nullptr /* E_NUM_ERRORS */
};
Expand Down
Loading

0 comments on commit 0f3c0b7

Please sign in to comment.