Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
Stongtong authored Jul 26, 2024
2 parents 7855703 + 0334cd0 commit 80eaae5
Show file tree
Hide file tree
Showing 287 changed files with 27,284 additions and 23,712 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion scripts/jdbc_maven_deploy.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def exec(cmd):
<plugin>
<groupId>org.sonatype.plugins</groupId>
<artifactId>nexus-staging-maven-plugin</artifactId>
<version>1.6.13</version>
<version>1.6.14</version>
<extensions>true</extensions>
<configuration>
<serverId>ossrh</serverId>
Expand Down
74 changes: 65 additions & 9 deletions src/duckdb/extension/json/json_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,23 @@ string ThrowPathError(const char *ptr, const char *end, const bool binder) {
struct JSONKeyReadResult {
public:
static inline JSONKeyReadResult Empty() {
return {idx_t(0), string()};
return {idx_t(0), false, string()};
}

static inline JSONKeyReadResult WildCard() {
return {1, "*"};
return {1, false, "*"};
}

static inline JSONKeyReadResult RecWildCard() {
return {2, true, "*"};
}

static inline JSONKeyReadResult RecWildCardShortcut() {
return {1, true, "*"};
}

inline bool IsValid() {
return chars_read != 0;
return (chars_read != 0);
}

inline bool IsWildCard() {
Expand All @@ -52,6 +60,7 @@ struct JSONKeyReadResult {

public:
idx_t chars_read;
bool recursive;
string key;
};

Expand Down Expand Up @@ -82,7 +91,7 @@ static inline JSONKeyReadResult ReadString(const char *ptr, const char *const en
if (ptr == end || backslash) {
return JSONKeyReadResult::Empty();
} else {
return {idx_t(ptr - before), string(key.get(), key_len)};
return {idx_t(ptr - before), false, string(key.get(), key_len)};
}
} else {
while (ptr != end) {
Expand All @@ -91,7 +100,7 @@ static inline JSONKeyReadResult ReadString(const char *ptr, const char *const en
}
ptr++;
}
return {idx_t(ptr - before), string(before, ptr - before)};
return {idx_t(ptr - before), false, string(before, ptr - before)};
}
}

Expand Down Expand Up @@ -125,8 +134,23 @@ static inline idx_t ReadInteger(const char *ptr, const char *const end, idx_t &i
static inline JSONKeyReadResult ReadKey(const char *ptr, const char *const end) {
D_ASSERT(ptr != end);
if (*ptr == '*') { // Wildcard
if (*(ptr + 1) == '*') {
return JSONKeyReadResult::RecWildCard();
}
return JSONKeyReadResult::WildCard();
}
bool recursive = false;
if (*ptr == '.') {
char next = *(ptr + 1);
if (next == '*') {
return JSONKeyReadResult::RecWildCard();
}
if (next == '[') {
return JSONKeyReadResult::RecWildCardShortcut();
}
ptr++;
recursive = true;
}
bool escaped = false;
if (*ptr == '"') {
ptr++; // Skip past opening '"'
Expand All @@ -139,6 +163,10 @@ static inline JSONKeyReadResult ReadKey(const char *ptr, const char *const end)
if (escaped) {
result.chars_read += 2; // Account for surrounding quotes
}
if (recursive) {
result.chars_read += 1;
result.recursive = true;
}
return result;
}

Expand Down Expand Up @@ -197,7 +225,7 @@ JSONPathType JSONCommon::ValidatePath(const char *ptr, const idx_t &len, const b
auto key = ReadKey(ptr, end);
if (!key.IsValid()) {
ThrowPathError(ptr, end, binder);
} else if (key.IsWildCard()) {
} else if (key.IsWildCard() || key.recursive) {
path_type = JSONPathType::WILDCARD;
}
ptr += key.chars_read;
Expand Down Expand Up @@ -272,12 +300,39 @@ void GetWildcardPathInternal(yyjson_val *val, const char *ptr, const char *const
D_ASSERT(ptr != end);
switch (c) {
case '.': { // Object field
if (!unsafe_yyjson_is_obj(val)) {
return;
}
auto key_result = ReadKey(ptr, end);
D_ASSERT(key_result.IsValid());
if (key_result.recursive) {
if (key_result.IsWildCard()) {
ptr += key_result.chars_read;
}
vector<yyjson_val *> rec_vals;
rec_vals.emplace_back(val);
for (idx_t i = 0; i < rec_vals.size(); i++) {
yyjson_val *rec_val = rec_vals[i];
if (yyjson_is_arr(rec_val)) {
size_t idx, max;
yyjson_val *element;
yyjson_arr_foreach(rec_val, idx, max, element) {
rec_vals.emplace_back(element);
}
} else if (yyjson_is_obj(rec_val)) {
size_t idx, max;
yyjson_val *key, *element;
yyjson_obj_foreach(rec_val, idx, max, key, element) {
rec_vals.emplace_back(element);
}
}
if (i > 0 || ptr != end) {
GetWildcardPathInternal(rec_val, ptr, end, vals);
}
}
return;
}
ptr += key_result.chars_read;
if (!unsafe_yyjson_is_obj(val)) {
return;
}
if (key_result.IsWildCard()) { // Wildcard
size_t idx, max;
yyjson_val *key, *obj_val;
Expand Down Expand Up @@ -325,6 +380,7 @@ void GetWildcardPathInternal(yyjson_val *val, const char *ptr, const char *const
if (val != nullptr) {
vals.emplace_back(val);
}
return;
}

void JSONCommon::GetWildcardPath(yyjson_val *val, const char *ptr, const idx_t &len, vector<yyjson_val *> &vals) {
Expand Down
15 changes: 15 additions & 0 deletions src/duckdb/extension/parquet/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,21 @@ unique_ptr<ColumnReader> ParquetReader::CreateReaderRecursive(ClientContext &con

c_idx++;
}
// rename child type entries if there are case-insensitive duplicates by appending _1, _2 etc.
// behavior consistent with CSV reader fwiw
case_insensitive_map_t<idx_t> name_collision_count;
// get header names from CSV
for (auto &child_type : child_types) {
auto col_name = child_type.first;
// avoid duplicate header names
while (name_collision_count.find(col_name) != name_collision_count.end()) {
name_collision_count[col_name] += 1;
col_name = col_name + "_" + to_string(name_collision_count[col_name]);
}
child_type.first = col_name;
name_collision_count[col_name] = 0;
}

D_ASSERT(!child_types.empty());
unique_ptr<ColumnReader> result;
LogicalType result_type;
Expand Down
13 changes: 10 additions & 3 deletions src/duckdb/src/catalog/catalog_entry/macro_catalog_entry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ namespace duckdb {

MacroCatalogEntry::MacroCatalogEntry(Catalog &catalog, SchemaCatalogEntry &schema, CreateMacroInfo &info)
: FunctionEntry(
(info.function->type == MacroType::SCALAR_MACRO ? CatalogType::MACRO_ENTRY : CatalogType::TABLE_MACRO_ENTRY),
(info.macros[0]->type == MacroType::SCALAR_MACRO ? CatalogType::MACRO_ENTRY : CatalogType::TABLE_MACRO_ENTRY),
catalog, schema, info),
function(std::move(info.function)) {
macros(std::move(info.macros)) {
this->temporary = info.temporary;
this->internal = info.internal;
this->dependencies = info.dependencies;
Expand Down Expand Up @@ -43,11 +43,18 @@ unique_ptr<CreateInfo> MacroCatalogEntry::GetInfo() const {
info->catalog = catalog.GetName();
info->schema = schema.name;
info->name = name;
info->function = function->Copy();
for (auto &function : macros) {
info->macros.push_back(function->Copy());
}
info->dependencies = dependencies;
info->comment = comment;
info->tags = tags;
return std::move(info);
}

string MacroCatalogEntry::ToSQL() const {
auto create_info = GetInfo();
return create_info->ToString();
}

} // namespace duckdb
81 changes: 49 additions & 32 deletions src/duckdb/src/catalog/default/default_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,28 +33,28 @@ static const DefaultMacro internal_macros[] = {
{"pg_catalog", "current_schemas", {"include_implicit"}, "current_schemas(include_implicit)"}, // names of schemas in search path

// privilege functions
// {"has_any_column_privilege", {"user", "table", "privilege", nullptr}, "true"}, //boolean //does user have privilege for any column of table
{"pg_catalog", "has_any_column_privilege", {"table", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for any column of table
// {"has_column_privilege", {"user", "table", "column", "privilege", nullptr}, "true"}, //boolean //does user have privilege for column
{"pg_catalog", "has_any_column_privilege", {"user", "table", "privilege", nullptr}, "true"}, //boolean //does user have privilege for any column of table
{"pg_catalog", "has_column_privilege", {"table", "column", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for column
// {"has_database_privilege", {"user", "database", "privilege", nullptr}, "true"}, //boolean //does user have privilege for database
{"pg_catalog", "has_column_privilege", {"user", "table", "column", "privilege", nullptr}, "true"}, //boolean //does user have privilege for column
{"pg_catalog", "has_database_privilege", {"database", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for database
// {"has_foreign_data_wrapper_privilege", {"user", "fdw", "privilege", nullptr}, "true"}, //boolean //does user have privilege for foreign-data wrapper
{"pg_catalog", "has_database_privilege", {"user", "database", "privilege", nullptr}, "true"}, //boolean //does user have privilege for database
{"pg_catalog", "has_foreign_data_wrapper_privilege", {"fdw", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for foreign-data wrapper
// {"has_function_privilege", {"user", "function", "privilege", nullptr}, "true"}, //boolean //does user have privilege for function
{"pg_catalog", "has_foreign_data_wrapper_privilege", {"user", "fdw", "privilege", nullptr}, "true"}, //boolean //does user have privilege for foreign-data wrapper
{"pg_catalog", "has_function_privilege", {"function", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for function
// {"has_language_privilege", {"user", "language", "privilege", nullptr}, "true"}, //boolean //does user have privilege for language
{"pg_catalog", "has_function_privilege", {"user", "function", "privilege", nullptr}, "true"}, //boolean //does user have privilege for function
{"pg_catalog", "has_language_privilege", {"language", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for language
// {"has_schema_privilege", {"user", "schema, privilege", nullptr}, "true"}, //boolean //does user have privilege for schema
{"pg_catalog", "has_language_privilege", {"user", "language", "privilege", nullptr}, "true"}, //boolean //does user have privilege for language
{"pg_catalog", "has_schema_privilege", {"schema", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for schema
// {"has_sequence_privilege", {"user", "sequence", "privilege", nullptr}, "true"}, //boolean //does user have privilege for sequence
{"pg_catalog", "has_schema_privilege", {"user", "schema", "privilege", nullptr}, "true"}, //boolean //does user have privilege for schema
{"pg_catalog", "has_sequence_privilege", {"sequence", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for sequence
// {"has_server_privilege", {"user", "server", "privilege", nullptr}, "true"}, //boolean //does user have privilege for foreign server
{"pg_catalog", "has_sequence_privilege", {"user", "sequence", "privilege", nullptr}, "true"}, //boolean //does user have privilege for sequence
{"pg_catalog", "has_server_privilege", {"server", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for foreign server
// {"has_table_privilege", {"user", "table", "privilege", nullptr}, "true"}, //boolean //does user have privilege for table
{"pg_catalog", "has_server_privilege", {"user", "server", "privilege", nullptr}, "true"}, //boolean //does user have privilege for foreign server
{"pg_catalog", "has_table_privilege", {"table", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for table
// {"has_tablespace_privilege", {"user", "tablespace", "privilege", nullptr}, "true"}, //boolean //does user have privilege for tablespace
{"pg_catalog", "has_table_privilege", {"user", "table", "privilege", nullptr}, "true"}, //boolean //does user have privilege for table
{"pg_catalog", "has_tablespace_privilege", {"tablespace", "privilege", nullptr}, "true"}, //boolean //does current user have privilege for tablespace
{"pg_catalog", "has_tablespace_privilege", {"user", "tablespace", "privilege", nullptr}, "true"}, //boolean //does user have privilege for tablespace

// various postgres system functions
{"pg_catalog", "pg_get_viewdef", {"oid", nullptr}, "(select sql from duckdb_views() v where v.view_oid=oid)"},
Expand Down Expand Up @@ -166,44 +166,61 @@ static const DefaultMacro internal_macros[] = {
// regexp functions
{DEFAULT_SCHEMA, "regexp_split_to_table", {"text", "pattern", nullptr}, "unnest(string_split_regex(text, pattern))"},

// storage helper functions
{DEFAULT_SCHEMA, "get_block_size", {"db_name"}, "(SELECT block_size FROM pragma_database_size() WHERE database_name = db_name)"},
// storage helper functions
{DEFAULT_SCHEMA, "get_block_size", {"db_name"}, "(SELECT block_size FROM pragma_database_size() WHERE database_name = db_name)"},

// string functions
{DEFAULT_SCHEMA, "md5_number_upper", {"param"}, "((md5_number(param)::bit::varchar)[65:])::bit::uint64"},
{DEFAULT_SCHEMA, "md5_number_lower", {"param"}, "((md5_number(param)::bit::varchar)[:64])::bit::uint64"},

{nullptr, nullptr, {nullptr}, nullptr}
};

unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalMacroInfo(const DefaultMacro &default_macro, unique_ptr<MacroFunction> function) {
for (idx_t param_idx = 0; default_macro.parameters[param_idx] != nullptr; param_idx++) {
function->parameters.push_back(
make_uniq<ColumnRefExpression>(default_macro.parameters[param_idx]));
}
D_ASSERT(function->type == MacroType::SCALAR_MACRO);
unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalMacroInfo(const DefaultMacro &default_macro) {
return CreateInternalMacroInfo(array_ptr<const DefaultMacro>(default_macro));
}


unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalMacroInfo(array_ptr<const DefaultMacro> macros) {
auto type = CatalogType::MACRO_ENTRY;
auto bind_info = make_uniq<CreateMacroInfo>(type);
bind_info->schema = default_macro.schema;
bind_info->name = default_macro.name;
for(auto &default_macro : macros) {
// parse the expression
auto expressions = Parser::ParseExpressionList(default_macro.macro);
D_ASSERT(expressions.size() == 1);

auto function = make_uniq<ScalarMacroFunction>(std::move(expressions[0]));
for (idx_t param_idx = 0; default_macro.parameters[param_idx] != nullptr; param_idx++) {
function->parameters.push_back(
make_uniq<ColumnRefExpression>(default_macro.parameters[param_idx]));
}
D_ASSERT(function->type == MacroType::SCALAR_MACRO);
bind_info->macros.push_back(std::move(function));
}
bind_info->schema = macros[0].schema;
bind_info->name = macros[0].name;
bind_info->temporary = true;
bind_info->internal = true;
bind_info->function = std::move(function);
return bind_info;

}

unique_ptr<CreateMacroInfo> DefaultFunctionGenerator::CreateInternalMacroInfo(const DefaultMacro &default_macro) {
// parse the expression
auto expressions = Parser::ParseExpressionList(default_macro.macro);
D_ASSERT(expressions.size() == 1);

auto result = make_uniq<ScalarMacroFunction>(std::move(expressions[0]));
return CreateInternalMacroInfo(default_macro, std::move(result));
static bool DefaultFunctionMatches(const DefaultMacro &macro, const string &schema, const string &name) {
return macro.schema == schema && macro.name == name;
}

static unique_ptr<CreateFunctionInfo> GetDefaultFunction(const string &input_schema, const string &input_name) {
auto schema = StringUtil::Lower(input_schema);
auto name = StringUtil::Lower(input_name);
for (idx_t index = 0; internal_macros[index].name != nullptr; index++) {
if (internal_macros[index].schema == schema && internal_macros[index].name == name) {
return DefaultFunctionGenerator::CreateInternalMacroInfo(internal_macros[index]);
if (DefaultFunctionMatches(internal_macros[index], schema, name)) {
// found the function! keep on iterating to find all overloads
idx_t overload_count;
for(overload_count = 1; internal_macros[index + overload_count].name; overload_count++) {
if (!DefaultFunctionMatches(internal_macros[index + overload_count], schema, name)) {
break;
}
}
return DefaultFunctionGenerator::CreateInternalMacroInfo(array_ptr<const DefaultMacro>(internal_macros + index, overload_count));
}
}
return nullptr;
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/catalog/default/default_table_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ DefaultTableFunctionGenerator::CreateInternalTableMacroInfo(const DefaultTableMa
bind_info->name = default_macro.name;
bind_info->temporary = true;
bind_info->internal = true;
bind_info->function = std::move(function);
bind_info->macros.push_back(std::move(function));
return bind_info;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ void ArrowFixedSizeListData::Append(ArrowAppendData &append_data, Vector &input,
input.ToUnifiedFormat(input_size, format);
idx_t size = to - from;
AppendValidity(append_data, format, from, to);

input.Flatten(input_size);
auto array_size = ArrayType::GetSize(input.GetType());
auto &child_vector = ArrayVector::GetEntry(input);
auto &child_data = *append_data.child_data[0];
Expand Down
5 changes: 3 additions & 2 deletions src/duckdb/src/common/arrow/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,15 @@ void SetArrowFormat(DuckDBArrowSchemaHolder &root_holder, ArrowSchema &child, co
break;
}
case LogicalTypeId::BLOB:
case LogicalTypeId::BIT: {
case LogicalTypeId::BIT:

if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
child.format = "Z";
} else {
child.format = "z";
}
break;
}

case LogicalTypeId::LIST: {
if (options.arrow_use_list_view) {
if (options.arrow_offset_size == ArrowOffsetSize::LARGE) {
Expand Down
Loading

0 comments on commit 80eaae5

Please sign in to comment.