Skip to content

Commit

Permalink
Update vendored DuckDB sources to 2c3845f
Browse files Browse the repository at this point in the history
  • Loading branch information
duckdblabs-bot committed Dec 5, 2024
1 parent 2c3845f commit 3d187b9
Show file tree
Hide file tree
Showing 40 changed files with 449 additions and 207 deletions.
3 changes: 2 additions & 1 deletion src/duckdb/extension/json/include/json_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,8 @@ struct JSONScanGlobalState {

//! Column names that we're actually reading (after projection pushdown)
vector<string> names;
vector<column_t> column_indices;
vector<column_t> column_ids;
vector<ColumnIndex> column_indices;

//! Buffer manager allocator
Allocator &allocator;
Expand Down
7 changes: 5 additions & 2 deletions src/duckdb/extension/json/include/json_transform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#pragma once

#include "duckdb/common/column_index.hpp"
#include "duckdb/common/optional_ptr.hpp"
#include "duckdb/function/scalar/strftime_format.hpp"
#include "json_common.hpp"

Expand Down Expand Up @@ -64,9 +66,10 @@ struct TryParseTimeStamp {

struct JSONTransform {
static bool Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
JSONTransformOptions &options);
JSONTransformOptions &options, optional_ptr<const ColumnIndex> column_index);
static bool TransformObject(yyjson_val *objects[], yyjson_alc *alc, const idx_t count, const vector<string> &names,
const vector<Vector *> &result_vectors, JSONTransformOptions &options);
const vector<Vector *> &result_vectors, JSONTransformOptions &options,
optional_ptr<const vector<ColumnIndex>> column_indices, bool error_unknown_key);
static bool GetStringVector(yyjson_val *vals[], const idx_t count, const LogicalType &target, Vector &string_vector,
JSONTransformOptions &options);
};
Expand Down
96 changes: 60 additions & 36 deletions src/duckdb/extension/json/json_functions/json_create.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,25 +40,36 @@ static LogicalType GetJSONType(StructNames &const_struct_names, const LogicalTyp
// These types can go directly into JSON
case LogicalTypeId::SQLNULL:
case LogicalTypeId::BOOLEAN:
case LogicalTypeId::BIGINT:
case LogicalTypeId::UBIGINT:
case LogicalTypeId::DOUBLE:
return type;
// We cast these types to a type that can go into JSON
case LogicalTypeId::TINYINT:
case LogicalTypeId::SMALLINT:
case LogicalTypeId::INTEGER:
return LogicalType::BIGINT;
case LogicalTypeId::BIGINT:
case LogicalTypeId::HUGEINT:
case LogicalTypeId::UHUGEINT:
case LogicalTypeId::UTINYINT:
case LogicalTypeId::USMALLINT:
case LogicalTypeId::UINTEGER:
return LogicalType::UBIGINT;
case LogicalTypeId::UBIGINT:
case LogicalTypeId::FLOAT:
case LogicalTypeId::DOUBLE:
case LogicalTypeId::BIT:
case LogicalTypeId::BLOB:
case LogicalTypeId::VARCHAR:
case LogicalTypeId::AGGREGATE_STATE:
case LogicalTypeId::ENUM:
case LogicalTypeId::DATE:
case LogicalTypeId::INTERVAL:
case LogicalTypeId::TIME:
case LogicalTypeId::TIME_TZ:
case LogicalTypeId::TIMESTAMP:
case LogicalTypeId::TIMESTAMP_TZ:
case LogicalTypeId::TIMESTAMP_NS:
case LogicalTypeId::TIMESTAMP_MS:
case LogicalTypeId::TIMESTAMP_SEC:
case LogicalTypeId::UUID:
case LogicalTypeId::VARINT:
case LogicalTypeId::DECIMAL:
case LogicalTypeId::UHUGEINT:
case LogicalTypeId::HUGEINT:
return LogicalType::DOUBLE;
// The nested types need to conform as well
return type;
case LogicalTypeId::LIST:
return LogicalType::LIST(GetJSONType(const_struct_names, ListType::GetChildType(type)));
case LogicalTypeId::ARRAY:
Expand Down Expand Up @@ -211,15 +222,15 @@ template <>
struct CreateJSONValue<hugeint_t, string_t> {
static inline yyjson_mut_val *Operation(yyjson_mut_doc *doc, const hugeint_t &input) {
const auto input_string = input.ToString();
return yyjson_mut_strncpy(doc, input_string.c_str(), input_string.length());
return yyjson_mut_rawncpy(doc, input_string.c_str(), input_string.length());
}
};

template <>
struct CreateJSONValue<uhugeint_t, string_t> {
static inline yyjson_mut_val *Operation(yyjson_mut_doc *doc, const uhugeint_t &input) {
const auto input_string = input.ToString();
return yyjson_mut_strncpy(doc, input_string.c_str(), input_string.length());
return yyjson_mut_rawncpy(doc, input_string.c_str(), input_string.length());
}
};

Expand Down Expand Up @@ -287,6 +298,22 @@ static void TemplatedCreateValues(yyjson_mut_doc *doc, yyjson_mut_val *vals[], V
}
}

static void CreateRawValues(yyjson_mut_doc *doc, yyjson_mut_val *vals[], Vector &value_v, idx_t count) {
UnifiedVectorFormat value_data;
value_v.ToUnifiedFormat(count, value_data);
auto values = UnifiedVectorFormat::GetData<string_t>(value_data);
for (idx_t i = 0; i < count; i++) {
idx_t val_idx = value_data.sel->get_index(i);
if (!value_data.validity.RowIsValid(val_idx)) {
vals[i] = yyjson_mut_null(doc);
} else {
const auto &str = values[val_idx];
vals[i] = yyjson_mut_rawncpy(doc, str.GetData(), str.GetSize());
}
D_ASSERT(vals[i] != nullptr);
}
}

static void CreateValuesStruct(const StructNames &names, yyjson_mut_doc *doc, yyjson_mut_val *vals[], Vector &value_v,
idx_t count) {
// Structs become values, therefore we initialize vals to JSON values
Expand Down Expand Up @@ -476,7 +503,8 @@ static void CreateValuesArray(const StructNames &names, yyjson_mut_doc *doc, yyj

static void CreateValues(const StructNames &names, yyjson_mut_doc *doc, yyjson_mut_val *vals[], Vector &value_v,
idx_t count) {
switch (value_v.GetType().id()) {
const auto &type = value_v.GetType();
switch (type.id()) {
case LogicalTypeId::SQLNULL:
CreateValuesNull(doc, vals, count);
break;
Expand Down Expand Up @@ -550,17 +578,28 @@ static void CreateValues(const StructNames &names, yyjson_mut_doc *doc, yyjson_m
case LogicalTypeId::TIMESTAMP_NS:
case LogicalTypeId::TIMESTAMP_MS:
case LogicalTypeId::TIMESTAMP_SEC:
case LogicalTypeId::VARINT:
case LogicalTypeId::UUID: {
Vector string_vector(LogicalTypeId::VARCHAR, count);
VectorOperations::DefaultCast(value_v, string_vector, count);
TemplatedCreateValues<string_t, string_t>(doc, vals, string_vector, count);
break;
}
case LogicalTypeId::VARINT: {
Vector string_vector(LogicalTypeId::VARCHAR, count);
VectorOperations::DefaultCast(value_v, string_vector, count);
CreateRawValues(doc, vals, string_vector, count);
break;
}
case LogicalTypeId::DECIMAL: {
Vector double_vector(LogicalType::DOUBLE, count);
VectorOperations::DefaultCast(value_v, double_vector, count);
TemplatedCreateValues<double, double>(doc, vals, double_vector, count);
if (DecimalType::GetWidth(type) > 15) {
Vector string_vector(LogicalTypeId::VARCHAR, count);
VectorOperations::DefaultCast(value_v, string_vector, count);
CreateRawValues(doc, vals, string_vector, count);
} else {
Vector double_vector(LogicalType::DOUBLE, count);
VectorOperations::DefaultCast(value_v, double_vector, count);
TemplatedCreateValues<double, double>(doc, vals, double_vector, count);
}
break;
}
case LogicalTypeId::INVALID:
Expand Down Expand Up @@ -604,7 +643,6 @@ static void ObjectFunction(DataChunk &args, ExpressionState &state, Vector &resu
for (idx_t i = 0; i < count; i++) {
objects[i] = JSONCommon::WriteVal<yyjson_mut_val>(objs[i], alc);
}

if (args.AllConstant()) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
}
Expand Down Expand Up @@ -637,7 +675,6 @@ static void ArrayFunction(DataChunk &args, ExpressionState &state, Vector &resul
for (idx_t i = 0; i < count; i++) {
objects[i] = JSONCommon::WriteVal<yyjson_mut_val>(arrs[i], alc);
}

if (args.AllConstant()) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
}
Expand All @@ -651,22 +688,9 @@ static void ToJSONFunctionInternal(const StructNames &names, Vector &input, cons
CreateValues(names, doc, vals, input, count);

// Write JSON values to string
auto objects = FlatVector::GetData<string_t>(result);
auto &result_validity = FlatVector::Validity(result);
UnifiedVectorFormat input_data;
input.ToUnifiedFormat(count, input_data);
for (idx_t i = 0; i < count; i++) {
idx_t idx = input_data.sel->get_index(i);
if (input_data.validity.RowIsValid(idx)) {
objects[i] = JSONCommon::WriteVal<yyjson_mut_val>(vals[i], alc);
} else {
result_validity.SetInvalid(i);
}
}

if (input.GetVectorType() == VectorType::CONSTANT_VECTOR || count == 1) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
}
UnaryExecutor::ExecuteWithNulls<data_t, string_t>(input, result, count, [&](data_t, ValidityMask &, idx_t index) {
return JSONCommon::WriteVal<yyjson_mut_val>(vals[index], alc);
});
}

static void ToJSONFunction(DataChunk &args, ExpressionState &state, Vector &result) {
Expand Down
3 changes: 1 addition & 2 deletions src/duckdb/extension/json/json_functions/json_structure.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
#include "json_structure.hpp"

#include "duckdb/common/enum_util.hpp"
#include "duckdb/common/extra_type_info.hpp"
#include "json_executors.hpp"
#include "json_scan.hpp"
#include "json_transform.hpp"

#include <duckdb/common/extra_type_info.hpp>

namespace duckdb {

static bool IsNumeric(LogicalTypeId type) {
Expand Down
62 changes: 45 additions & 17 deletions src/duckdb/extension/json/json_functions/json_transform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,14 @@ static bool TransformToString(yyjson_val *vals[], yyjson_alc *alc, Vector &resul

bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, const idx_t count,
const vector<string> &names, const vector<Vector *> &result_vectors,
JSONTransformOptions &options) {
JSONTransformOptions &options,
optional_ptr<const vector<ColumnIndex>> column_indices, bool error_unknown_key) {
if (column_indices && column_indices->empty()) {
column_indices = nullptr;
}
D_ASSERT(alc);
D_ASSERT(names.size() == result_vectors.size());
D_ASSERT(!column_indices || column_indices->size() == names.size());
const idx_t column_count = names.size();

// Build hash map from key to column index so we don't have to linearly search using the key
Expand Down Expand Up @@ -429,7 +434,7 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
found_keys[col_idx] = true;
found_key_count++;
}
} else if (success && options.error_unknown_key) {
} else if (success && error_unknown_key && options.error_unknown_key) {
options.error_message =
StringUtil::Format("Object %s has unknown key \"" + string(key_ptr, key_len) + "\"",
JSONCommon::ValToString(objects[i], 50));
Expand Down Expand Up @@ -458,7 +463,9 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
}

for (idx_t col_idx = 0; col_idx < column_count; col_idx++) {
if (!JSONTransform::Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options)) {
auto child_column_index = column_indices ? &(*column_indices)[col_idx] : nullptr;
if (!JSONTransform::Transform(nested_vals[col_idx], alc, *result_vectors[col_idx], count, options,
child_column_index)) {
success = false;
}
}
Expand All @@ -471,7 +478,11 @@ bool JSONTransform::TransformObject(yyjson_val *objects[], yyjson_alc *alc, cons
}

static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vector &result, const idx_t count,
JSONTransformOptions &options) {
JSONTransformOptions &options, optional_ptr<const ColumnIndex> column_index) {
if (column_index && column_index->ChildIndexCount() == 0) {
column_index = nullptr;
}

// Set validity first
auto &result_validity = FlatVector::Validity(result);
for (idx_t i = 0; i < count; i++) {
Expand All @@ -485,14 +496,31 @@ static bool TransformObjectInternal(yyjson_val *objects[], yyjson_alc *alc, Vect
auto &child_vs = StructVector::GetEntries(result);
vector<string> child_names;
vector<Vector *> child_vectors;
child_names.reserve(child_vs.size());
child_vectors.reserve(child_vs.size());

const auto child_count = column_index ? column_index->ChildIndexCount() : child_vs.size();
child_names.reserve(child_count);
child_vectors.reserve(child_count);

unordered_set<idx_t> projected_indices;
for (idx_t child_i = 0; child_i < child_count; child_i++) {
const auto actual_i = column_index ? column_index->GetChildIndex(child_i).GetPrimaryIndex() : child_i;
projected_indices.insert(actual_i);

child_names.push_back(StructType::GetChildName(result.GetType(), actual_i));
child_vectors.push_back(child_vs[actual_i].get());
}

for (idx_t child_i = 0; child_i < child_vs.size(); child_i++) {
child_names.push_back(StructType::GetChildName(result.GetType(), child_i));
child_vectors.push_back(child_vs[child_i].get());
if (projected_indices.find(child_i) == projected_indices.end()) {
child_vs[child_i]->SetVectorType(VectorType::CONSTANT_VECTOR);
ConstantVector::SetNull(*child_vs[child_i], true);
}
}

return JSONTransform::TransformObject(objects, alc, count, child_names, child_vectors, options);
auto child_indices = column_index ? &column_index->GetChildIndexes() : nullptr;
const auto error_unknown_key = child_count == child_vs.size(); // Nothing projected out, error if unknown
return JSONTransform::TransformObject(objects, alc, count, child_names, child_vectors, options, child_indices,
error_unknown_key);
}

static bool TransformArrayToList(yyjson_val *arrays[], yyjson_alc *alc, Vector &result, const idx_t count,
Expand Down Expand Up @@ -562,7 +590,7 @@ static bool TransformArrayToList(yyjson_val *arrays[], yyjson_alc *alc, Vector &
}

// Transform array values
if (!JSONTransform::Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options)) {
if (!JSONTransform::Transform(nested_vals, alc, ListVector::GetEntry(result), offset, options, nullptr)) {
success = false;
}

Expand Down Expand Up @@ -652,7 +680,7 @@ static bool TransformArrayToArray(yyjson_val *arrays[], yyjson_alc *alc, Vector
}

// Transform array values
if (!JSONTransform::Transform(nested_vals, alc, ArrayVector::GetEntry(result), child_count, options)) {
if (!JSONTransform::Transform(nested_vals, alc, ArrayVector::GetEntry(result), child_count, options, nullptr)) {
success = false;
}

Expand Down Expand Up @@ -720,13 +748,13 @@ static bool TransformObjectToMap(yyjson_val *objects[], yyjson_alc *alc, Vector
D_ASSERT(list_offset == list_size);

// Transform keys
if (!JSONTransform::Transform(keys, alc, MapVector::GetKeys(result), list_size, options)) {
if (!JSONTransform::Transform(keys, alc, MapVector::GetKeys(result), list_size, options, nullptr)) {
throw ConversionException(
StringUtil::Format(options.error_message + ". Cannot default to NULL, because map keys cannot be NULL"));
}

// Transform values
if (!JSONTransform::Transform(vals, alc, MapVector::GetValues(result), list_size, options)) {
if (!JSONTransform::Transform(vals, alc, MapVector::GetValues(result), list_size, options, nullptr)) {
success = false;
}

Expand Down Expand Up @@ -813,7 +841,7 @@ bool TransformValueIntoUnion(yyjson_val **vals, yyjson_alc *alc, Vector &result,
idx_t actual_tag = tag - names.begin();

Vector single(UnionType::GetMemberType(type, actual_tag), 1);
if (!JSONTransform::Transform(&val, alc, single, 1, options)) {
if (!JSONTransform::Transform(&val, alc, single, 1, options, nullptr)) {
success = false;
}

Expand All @@ -824,7 +852,7 @@ bool TransformValueIntoUnion(yyjson_val **vals, yyjson_alc *alc, Vector &result,
}

bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &result, const idx_t count,
JSONTransformOptions &options) {
JSONTransformOptions &options, optional_ptr<const ColumnIndex> column_index) {
auto result_type = result.GetType();
if ((result_type == LogicalTypeId::TIMESTAMP || result_type == LogicalTypeId::DATE) && options.date_format_map &&
options.date_format_map->HasFormats(result_type.id())) {
Expand Down Expand Up @@ -899,7 +927,7 @@ bool JSONTransform::Transform(yyjson_val *vals[], yyjson_alc *alc, Vector &resul
case LogicalTypeId::BLOB:
return TransformToString(vals, alc, result, count);
case LogicalTypeId::STRUCT:
return TransformObjectInternal(vals, alc, result, count, options);
return TransformObjectInternal(vals, alc, result, count, options, column_index);
case LogicalTypeId::LIST:
return TransformArrayToList(vals, alc, result, count, options);
case LogicalTypeId::MAP:
Expand Down Expand Up @@ -935,7 +963,7 @@ static bool TransformFunctionInternal(Vector &input, const idx_t count, Vector &
}
}

auto success = JSONTransform::Transform(vals, alc, result, count, options);
auto success = JSONTransform::Transform(vals, alc, result, count, options, nullptr);
if (input.GetVectorType() == VectorType::CONSTANT_VECTOR) {
result.SetVectorType(VectorType::CONSTANT_VECTOR);
}
Expand Down
Loading

0 comments on commit 3d187b9

Please sign in to comment.