Skip to content

Commit

Permalink
optionally include httpfs plugin when building from source
Browse files Browse the repository at this point in the history
  • Loading branch information
BittnerBarnabas committed Jul 22, 2024
1 parent 7ce49a3 commit fb6c889
Show file tree
Hide file tree
Showing 16 changed files with 3,524 additions and 11 deletions.
22 changes: 22 additions & 0 deletions binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
"targets": [
{
"target_name": "<(module_name)",
"variables": {
"include_httpfs": "<!(echo ${DUCKDB_INCLUDE_HTTPFS})"
},
"sources": [
"src/duckdb_node.cpp",
"src/database.cpp",
Expand Down Expand Up @@ -397,6 +400,25 @@
"bcrypt.lib"
]
}
],
[
"include_httpfs=='true'",
{
"sources": [
"src/duckdb/extension/httpfs/create_secret_functions.cpp",
"src/duckdb/extension/httpfs/crypto.cpp",
"src/duckdb/extension/httpfs/hffs.cpp",
"src/duckdb/extension/httpfs/httpfs.cpp",
"src/duckdb/extension/httpfs/httpfs_extension.cpp",
"src/duckdb/extension/httpfs/s3fs.cpp"
],
"include_dirs": [
"src/duckdb/extension/httpfs/include"
],
"defines": [
"DUCKDB_EXTENSION_HTTPFS_LINKED"
]
}
]
],
"libraries": []
Expand Down
5 changes: 4 additions & 1 deletion binding.gyp.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
"targets": [
{
"target_name": "<(module_name)",
"variables" : {
},
"sources": [
"src/duckdb_node.cpp",
"src/database.cpp",
Expand Down Expand Up @@ -67,7 +69,8 @@
"rstrtmgr.lib", "bcrypt.lib"
]
}
]
],
"${OPTIONAL_EXTENSIONS}"
],
"libraries": [
"${LIBRARY_FILES}"
Expand Down
250 changes: 250 additions & 0 deletions src/duckdb/extension/httpfs/create_secret_functions.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,250 @@
#include "create_secret_functions.hpp"
#include "s3fs.hpp"
#include "duckdb/main/extension_util.hpp"
#include "duckdb/common/local_file_system.hpp"

namespace duckdb {

void CreateS3SecretFunctions::Register(DatabaseInstance &instance) {
RegisterCreateSecretFunction(instance, "s3");
RegisterCreateSecretFunction(instance, "r2");
RegisterCreateSecretFunction(instance, "gcs");
}

unique_ptr<BaseSecret> CreateS3SecretFunctions::CreateSecretFunctionInternal(ClientContext &context,
CreateSecretInput &input,
S3AuthParams params) {
// for r2 we can set the endpoint using the account id
if (input.type == "r2" && input.options.find("account_id") != input.options.end()) {
params.endpoint = input.options["account_id"].ToString() + ".r2.cloudflarestorage.com";
}

// apply any overridden settings
for (const auto &named_param : input.options) {
auto lower_name = StringUtil::Lower(named_param.first);

if (lower_name == "key_id") {
params.access_key_id = named_param.second.ToString();
} else if (lower_name == "secret") {
params.secret_access_key = named_param.second.ToString();
} else if (lower_name == "region") {
params.region = named_param.second.ToString();
} else if (lower_name == "session_token") {
params.session_token = named_param.second.ToString();
} else if (lower_name == "endpoint") {
params.endpoint = named_param.second.ToString();
} else if (lower_name == "url_style") {
params.url_style = named_param.second.ToString();
} else if (lower_name == "use_ssl") {
if (named_param.second.type() != LogicalType::BOOLEAN) {
throw InvalidInputException("Invalid type past to secret option: '%s', found '%s', expected: 'BOOLEAN'",
lower_name, named_param.second.type().ToString());
}
params.use_ssl = named_param.second.GetValue<bool>();
} else if (lower_name == "url_compatibility_mode") {
if (named_param.second.type() != LogicalType::BOOLEAN) {
throw InvalidInputException("Invalid type past to secret option: '%s', found '%s', expected: 'BOOLEAN'",
lower_name, named_param.second.type().ToString());
}
params.s3_url_compatibility_mode = named_param.second.GetValue<bool>();
} else if (lower_name == "account_id") {
continue; // handled already
} else {
throw InternalException("Unknown named parameter passed to CreateSecretFunctionInternal: " + lower_name);
}
}

// Set scope to user provided scope or the default
auto scope = input.scope;
if (scope.empty()) {
if (input.type == "s3") {
scope.push_back("s3://");
scope.push_back("s3n://");
scope.push_back("s3a://");
} else if (input.type == "r2") {
scope.push_back("r2://");
} else if (input.type == "gcs") {
scope.push_back("gcs://");
scope.push_back("gs://");
} else {
throw InternalException("Unknown secret type found in httpfs extension: '%s'", input.type);
}
}

return S3SecretHelper::CreateSecret(scope, input.type, input.provider, input.name, params);
}

unique_ptr<BaseSecret> CreateS3SecretFunctions::CreateS3SecretFromSettings(ClientContext &context,
CreateSecretInput &input) {
auto &opener = context.client_data->file_opener;
FileOpenerInfo info;
auto params = S3AuthParams::ReadFrom(opener.get(), info);
return CreateSecretFunctionInternal(context, input, params);
}

unique_ptr<BaseSecret> CreateS3SecretFunctions::CreateS3SecretFromConfig(ClientContext &context,
CreateSecretInput &input) {
S3AuthParams empty_params;
empty_params.use_ssl = true;
empty_params.s3_url_compatibility_mode = false;
empty_params.region = "us-east-1";
empty_params.endpoint = "s3.amazonaws.com";

if (input.type == "gcs") {
empty_params.endpoint = "storage.googleapis.com";
}

if (input.type == "gcs" || input.type == "r2") {
empty_params.url_style = "path";
}

return CreateSecretFunctionInternal(context, input, empty_params);
}

void CreateS3SecretFunctions::SetBaseNamedParams(CreateSecretFunction &function, string &type) {
function.named_parameters["key_id"] = LogicalType::VARCHAR;
function.named_parameters["secret"] = LogicalType::VARCHAR;
function.named_parameters["region"] = LogicalType::VARCHAR;
function.named_parameters["session_token"] = LogicalType::VARCHAR;
function.named_parameters["endpoint"] = LogicalType::VARCHAR;
function.named_parameters["url_style"] = LogicalType::VARCHAR;
function.named_parameters["use_ssl"] = LogicalType::BOOLEAN;
function.named_parameters["url_compatibility_mode"] = LogicalType::BOOLEAN;

if (type == "r2") {
function.named_parameters["account_id"] = LogicalType::VARCHAR;
}
}

void CreateS3SecretFunctions::RegisterCreateSecretFunction(DatabaseInstance &instance, string type) {
// Register the new type
SecretType secret_type;
secret_type.name = type;
secret_type.deserializer = KeyValueSecret::Deserialize<KeyValueSecret>;
secret_type.default_provider = "config";

ExtensionUtil::RegisterSecretType(instance, secret_type);

CreateSecretFunction from_empty_config_fun2 = {type, "config", CreateS3SecretFromConfig};
CreateSecretFunction from_settings_fun2 = {type, "duckdb_settings", CreateS3SecretFromSettings};
SetBaseNamedParams(from_empty_config_fun2, type);
SetBaseNamedParams(from_settings_fun2, type);
ExtensionUtil::RegisterFunction(instance, from_empty_config_fun2);
ExtensionUtil::RegisterFunction(instance, from_settings_fun2);
}

void CreateBearerTokenFunctions::Register(DatabaseInstance &instance) {
// Generic Bearer secret
SecretType secret_type;
secret_type.name = GENERIC_BEARER_TYPE;
secret_type.deserializer = KeyValueSecret::Deserialize<KeyValueSecret>;
secret_type.default_provider = "config";
ExtensionUtil::RegisterSecretType(instance, secret_type);

// Generic Bearer config provider
CreateSecretFunction config_fun = {GENERIC_BEARER_TYPE, "config", CreateBearerSecretFromConfig};
config_fun.named_parameters["token"] = LogicalType::VARCHAR;
ExtensionUtil::RegisterFunction(instance, config_fun);

// HuggingFace secret
SecretType secret_type_hf;
secret_type_hf.name = HUGGINGFACE_TYPE;
secret_type_hf.deserializer = KeyValueSecret::Deserialize<KeyValueSecret>;
secret_type_hf.default_provider = "config";
ExtensionUtil::RegisterSecretType(instance, secret_type_hf);

// Huggingface config provider
CreateSecretFunction hf_config_fun = {HUGGINGFACE_TYPE, "config", CreateBearerSecretFromConfig};
hf_config_fun.named_parameters["token"] = LogicalType::VARCHAR;
ExtensionUtil::RegisterFunction(instance, hf_config_fun);

// Huggingface credential_chain provider
CreateSecretFunction hf_cred_fun = {HUGGINGFACE_TYPE, "credential_chain",
CreateHuggingFaceSecretFromCredentialChain};
ExtensionUtil::RegisterFunction(instance, hf_cred_fun);
}

unique_ptr<BaseSecret> CreateBearerTokenFunctions::CreateSecretFunctionInternal(ClientContext &context,
CreateSecretInput &input,
const string &token) {
// Set scope to user provided scope or the default
auto scope = input.scope;
if (scope.empty()) {
if (input.type == GENERIC_BEARER_TYPE) {
scope.push_back("");
} else if (input.type == HUGGINGFACE_TYPE) {
scope.push_back("hf://");
} else {
throw InternalException("Unknown secret type found in httpfs extension: '%s'", input.type);
}
}
auto return_value = make_uniq<KeyValueSecret>(scope, input.type, input.provider, input.name);

//! Set key value map
return_value->secret_map["token"] = token;

//! Set redact keys
return_value->redact_keys = {"token"};

return std::move(return_value);
}

unique_ptr<BaseSecret> CreateBearerTokenFunctions::CreateBearerSecretFromConfig(ClientContext &context,
CreateSecretInput &input) {
string token;

auto token_input = input.options.find("token");
for (const auto &named_param : input.options) {
auto lower_name = StringUtil::Lower(named_param.first);
if (lower_name == "token") {
token = named_param.second.ToString();
}
}

return CreateSecretFunctionInternal(context, input, token);
}

static string TryReadTokenFile(const string &token_path, const string error_source_message,
bool fail_on_exception = true) {
try {
LocalFileSystem fs;
auto handle = fs.OpenFile(token_path, {FileOpenFlags::FILE_FLAGS_READ});
return handle->ReadLine();
} catch (std::exception &ex) {
if (!fail_on_exception) {
return "";
}
ErrorData error(ex);
throw IOException("Failed to read token path '%s'%s. (error: %s)", token_path, error_source_message,
error.RawMessage());
}
}

unique_ptr<BaseSecret>
CreateBearerTokenFunctions::CreateHuggingFaceSecretFromCredentialChain(ClientContext &context,
CreateSecretInput &input) {
// Step 1: Try the ENV variable HF_TOKEN
const char *hf_token_env = std::getenv("HF_TOKEN");
if (hf_token_env) {
return CreateSecretFunctionInternal(context, input, hf_token_env);
}
// Step 2: Try the ENV variable HF_TOKEN_PATH
const char *hf_token_path_env = std::getenv("HF_TOKEN_PATH");
if (hf_token_path_env) {
auto token = TryReadTokenFile(hf_token_path_env, " fetched from HF_TOKEN_PATH env variable");
return CreateSecretFunctionInternal(context, input, token);
}

// Step 3: Try the path $HF_HOME/token
const char *hf_home_env = std::getenv("HF_HOME");
if (hf_home_env) {
auto token_path = LocalFileSystem().JoinPath(hf_home_env, "token");
auto token = TryReadTokenFile(token_path, " constructed using the HF_HOME variable: '$HF_HOME/token'");
return CreateSecretFunctionInternal(context, input, token);
}

// Step 4: Check the default path
auto token = TryReadTokenFile("~/.cache/huggingface/token", "", false);
return CreateSecretFunctionInternal(context, input, token);
}
} // namespace duckdb
27 changes: 27 additions & 0 deletions src/duckdb/extension/httpfs/crypto.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "crypto.hpp"
#include "mbedtls_wrapper.hpp"

namespace duckdb {

void sha256(const char *in, size_t in_len, hash_bytes &out) {
duckdb_mbedtls::MbedTlsWrapper::ComputeSha256Hash(in, in_len, (char *)out);
}

void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out) {
duckdb_mbedtls::MbedTlsWrapper::Hmac256(secret, secret_len, message.data(), message.size(), (char *)out);
}

void hmac256(std::string message, hash_bytes secret, hash_bytes &out) {
hmac256(message, (char *)secret, sizeof(hash_bytes), out);
}

void hex256(hash_bytes &in, hash_str &out) {
const char *hex = "0123456789abcdef";
unsigned char *pin = in;
unsigned char *pout = out;
for (; pin < in + sizeof(in); pout += 2, pin++) {
pout[0] = hex[(*pin >> 4) & 0xF];
pout[1] = hex[*pin & 0xF];
}
}
} // namespace duckdb
Loading

0 comments on commit fb6c889

Please sign in to comment.