From 559c4027c764c13f62ecddd1e3ac5674bd102b86 Mon Sep 17 00:00:00 2001 From: Rusty Conover Date: Wed, 10 Apr 2024 07:58:12 -0400 Subject: [PATCH] fix: change yyjson comparisions to type rather than tag. Many comparisons where performed using `yyjson_get_tag()` rather than `yyjson_get_type()`. The tag can have additional information set using bits beyond just the type, causing these type comparisons to fail and JSON failing to parse. fix: fix the extension to build with current duckdb main branch. Fix a few std::move() calls and a call to fs.OpenFile(). --- src/common/iceberg.cpp | 4 ++-- src/common/schema.cpp | 14 +++++++------- src/common/utils.cpp | 8 ++++---- src/iceberg_functions/iceberg_snapshots.cpp | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/common/iceberg.cpp b/src/common/iceberg.cpp index ba2f284..669ebe4 100644 --- a/src/common/iceberg.cpp +++ b/src/common/iceberg.cpp @@ -127,7 +127,7 @@ unique_ptr IcebergSnapshot::GetParseInfo(const string &path, parse_info->doc = doc; parse_info->document = std::move(metadata_json); - return std::move(parse_info); + return parse_info; } IcebergSnapshot IcebergSnapshot::GetLatestSnapshot(const string &path, FileSystem &fs, @@ -195,7 +195,7 @@ IcebergSnapshot IcebergSnapshot::ParseSnapShot(yyjson_val *snapshot, idx_t icebe vector &schemas, string metadata_compression_codec, bool skip_schema_inference) { IcebergSnapshot ret; - auto snapshot_tag = yyjson_get_tag(snapshot); + auto snapshot_tag = yyjson_get_type(snapshot); if (snapshot_tag != YYJSON_TYPE_OBJ) { throw IOException("Invalid snapshot field found parsing iceberg metadata.json"); } diff --git a/src/common/schema.cpp b/src/common/schema.cpp index 0aeb7c4..4c2e012 100644 --- a/src/common/schema.cpp +++ b/src/common/schema.cpp @@ -9,7 +9,7 @@ namespace duckdb { static LogicalType ParseType(yyjson_val *type); static LogicalType ParseStruct(yyjson_val *struct_type) { - D_ASSERT(yyjson_get_tag(struct_type) == YYJSON_TYPE_OBJ); + D_ASSERT(yyjson_get_type(struct_type) == YYJSON_TYPE_OBJ); D_ASSERT(IcebergUtils::TryGetStrFromObject(struct_type, "type") == "struct"); child_list_t children; @@ -28,7 +28,7 @@ static LogicalType ParseStruct(yyjson_val *struct_type) { } static LogicalType ParseList(yyjson_val *list_type) { - D_ASSERT(yyjson_get_tag(list_type) == YYJSON_TYPE_OBJ); + D_ASSERT(yyjson_get_type(list_type) == YYJSON_TYPE_OBJ); D_ASSERT(IcebergUtils::TryGetStrFromObject(list_type, "type") == "list"); // NOTE: 'element-id', 'element-required' are ignored for now @@ -38,7 +38,7 @@ static LogicalType ParseList(yyjson_val *list_type) { } static LogicalType ParseMap(yyjson_val *map_type) { - D_ASSERT(yyjson_get_tag(map_type) == YYJSON_TYPE_OBJ); + D_ASSERT(yyjson_get_type(map_type) == YYJSON_TYPE_OBJ); D_ASSERT(IcebergUtils::TryGetStrFromObject(map_type, "type") == "map"); // NOTE: 'key-id', 'value-id', 'value-required' are ignored for now @@ -51,7 +51,7 @@ static LogicalType ParseMap(yyjson_val *map_type) { } static LogicalType ParseComplexType(yyjson_val *type) { - D_ASSERT(yyjson_get_tag(type) == YYJSON_TYPE_OBJ); + D_ASSERT(yyjson_get_type(type) == YYJSON_TYPE_OBJ); auto type_str = IcebergUtils::TryGetStrFromObject(type, "type"); if (type_str == "struct") { @@ -73,10 +73,10 @@ static LogicalType ParseType(yyjson_val *type) { if (!val) { throw IOException("Invalid field found while parsing field: type"); } - if (yyjson_get_tag(val) == YYJSON_TYPE_OBJ) { + if (yyjson_get_type(val) == YYJSON_TYPE_OBJ) { return ParseComplexType(val); } - if (yyjson_get_tag(val) != YYJSON_TYPE_STR) { + if (yyjson_get_type(val) != YYJSON_TYPE_STR) { throw IOException("Invalid field found while parsing field: type"); } @@ -154,7 +154,7 @@ static vector ParseSchemaFromJson(yyjson_val *schema_js if (type_str != "struct") { throw IOException("Schema in JSON Metadata is invalid"); } - D_ASSERT(yyjson_get_tag(schema_json) == YYJSON_TYPE_OBJ); + D_ASSERT(yyjson_get_type(schema_json) == YYJSON_TYPE_OBJ); D_ASSERT(IcebergUtils::TryGetStrFromObject(schema_json, "type") == "struct"); yyjson_val *field; size_t max, idx; diff --git a/src/common/utils.cpp b/src/common/utils.cpp index 230a3c7..3225cee 100644 --- a/src/common/utils.cpp +++ b/src/common/utils.cpp @@ -8,7 +8,7 @@ namespace duckdb { string IcebergUtils::FileToString(const string &path, FileSystem &fs) { auto handle = - fs.OpenFile(path, FileFlags::FILE_FLAGS_READ, FileSystem::DEFAULT_LOCK, FileSystem::DEFAULT_COMPRESSION); + fs.OpenFile(path, FileFlags::FILE_FLAGS_READ); auto file_size = handle->GetFileSize(); string ret_val(file_size, ' '); handle->Read((char *)ret_val.c_str(), file_size); @@ -38,7 +38,7 @@ string IcebergUtils::GetFullPath(const string &iceberg_path, const string &relat uint64_t IcebergUtils::TryGetNumFromObject(yyjson_val *obj, const string &field) { auto val = yyjson_obj_getn(obj, field.c_str(), field.size()); - if (!val || yyjson_get_tag(val) != YYJSON_TYPE_NUM) { + if (!val || yyjson_get_type(val) != YYJSON_TYPE_NUM) { throw IOException("Invalid field found while parsing field: " + field); } return yyjson_get_uint(val); @@ -46,7 +46,7 @@ uint64_t IcebergUtils::TryGetNumFromObject(yyjson_val *obj, const string &field) bool IcebergUtils::TryGetBoolFromObject(yyjson_val *obj, const string &field) { auto val = yyjson_obj_getn(obj, field.c_str(), field.size()); - if (!val || yyjson_get_tag(val) != YYJSON_TYPE_BOOL) { + if (!val || yyjson_get_type(val) != YYJSON_TYPE_BOOL) { throw IOException("Invalid field found while parsing field: " + field); } return yyjson_get_bool(val); @@ -54,7 +54,7 @@ bool IcebergUtils::TryGetBoolFromObject(yyjson_val *obj, const string &field) { string IcebergUtils::TryGetStrFromObject(yyjson_val *obj, const string &field) { auto val = yyjson_obj_getn(obj, field.c_str(), field.size()); - if (!val || yyjson_get_tag(val) != YYJSON_TYPE_STR) { + if (!val || yyjson_get_type(val) != YYJSON_TYPE_STR) { throw IOException("Invalid field found while parsing field: " + field); } return yyjson_get_str(val); diff --git a/src/iceberg_functions/iceberg_snapshots.cpp b/src/iceberg_functions/iceberg_snapshots.cpp index ec76b42..4dd5dd7 100644 --- a/src/iceberg_functions/iceberg_snapshots.cpp +++ b/src/iceberg_functions/iceberg_snapshots.cpp @@ -117,7 +117,7 @@ TableFunctionSet IcebergFunctions::GetIcebergSnapshotsFunction() { table_function.named_parameters["metadata_compression_codec"] = LogicalType::VARCHAR; table_function.named_parameters["skip_schema_inference"] = LogicalType::BOOLEAN; function_set.AddFunction(table_function); - return std::move(function_set); + return function_set; } } // namespace duckdb