Skip to content

Commit

Permalink
Support AWS_ENDPOINT_URL (#84)
Browse files Browse the repository at this point in the history
- [x] Supports `AWS_ENDPOINT_URL`, e.g. you can set `AWS_ENDPOINT_URL=http://localhost:9000` and `AWS_ALLOW_HTTP=true` for local MinIO Server.
- [x] We were already supporting `AWS_SESSION_TOKEN` without documentation and tests. Added them.
 As a side note, not automatically `AssumeRole` to fetch the token, but when the token is passed from environment or config, authentication succeeds.

Closes #83.
  • Loading branch information
aykut-bozkurt authored Dec 16, 2024
1 parent 4086ea4 commit bab2208
Show file tree
Hide file tree
Showing 12 changed files with 178 additions and 82 deletions.
3 changes: 2 additions & 1 deletion .devcontainer/.env
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
AWS_ACCESS_KEY_ID=minioadmin
AWS_SECRET_ACCESS_KEY=minioadmin
AWS_REGION=us-east-1
AWS_ENDPOINT_URL=http://localhost:9000
AWS_ALLOW_HTTP=true
AWS_S3_TEST_BUCKET=testbucket
MINIO_ROOT_USER=minioadmin
MINIO_ROOT_PASSWORD=minioadmin

# Others
RUST_TEST_THREADS=1
PG_PARQUET_TEST=true
3 changes: 0 additions & 3 deletions .devcontainer/create-test-buckets.sh

This file was deleted.

1 change: 0 additions & 1 deletion .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
"dockerComposeFile": "docker-compose.yml",
"service": "app",
"workspaceFolder": "/workspace",
"postStartCommand": "bash .devcontainer/create-test-buckets.sh",
"postAttachCommand": "sudo chown -R rust /workspace",
"customizations": {
"vscode": {
Expand Down
4 changes: 3 additions & 1 deletion .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ services:
env_file:
- .env
network_mode: host
command: server /data
entrypoint: "./entrypoint.sh"
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "http://localhost:9000"]
interval: 6s
timeout: 2s
retries: 3
volumes:
- ./minio-entrypoint.sh:/entrypoint.sh
20 changes: 20 additions & 0 deletions .devcontainer/minio-entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

trap "echo 'Caught termination signal. Exiting...'; exit 0" SIGINT SIGTERM

minio server /data &

minio_pid=$!

while ! curl $AWS_ENDPOINT_URL; do
echo "Waiting for $AWS_ENDPOINT_URL..."
sleep 1
done

# set access key and secret key
mc alias set local $AWS_ENDPOINT_URL $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD

# create bucket
mc mb local/$AWS_S3_TEST_BUCKET

wait $minio_pid
24 changes: 14 additions & 10 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ jobs:
- name: Install and configure pgrx
run: |
cargo install --locked [email protected]
cargo pgrx init --pg${{ env.PG_MAJOR }} $(which pg_config)
cargo pgrx init --pg${{ env.PG_MAJOR }} /usr/lib/postgresql/${{ env.PG_MAJOR }}/bin/pg_config
- name: Install cargo-llvm-cov for coverage report
run: cargo install --locked [email protected]
Expand All @@ -101,21 +101,25 @@ jobs:
- name: Set up permissions for PostgreSQL
run: |
sudo chmod a+rwx $(pg_config --pkglibdir) \
$(pg_config --sharedir)/extension \
sudo chmod a+rwx $(/usr/lib/postgresql/${{ env.PG_MAJOR }}/bin/pg_config --pkglibdir) \
$(/usr/lib/postgresql/${{ env.PG_MAJOR }}/bin/pg_config --sharedir)/extension \
/var/run/postgresql/
- name: Start Minio for s3 emulator tests
run: |
docker run -d --env-file .devcontainer/.env -p 9000:9000 minio/minio server /data
while ! nc -z localhost 9000; do
echo "Waiting for localhost:9000..."
sleep 1
docker run -d \
--env-file .devcontainer/.env \
-p 9000:9000 \
--entrypoint "./entrypoint.sh" \
--volume ./.devcontainer/minio-entrypoint.sh:/entrypoint.sh \
--name miniocontainer \
minio/minio
while ! curl $AWS_ENDPOINT_URL; do
echo "Waiting for $AWS_ENDPOINT_URL..."
sleep 1
done
aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET
- name: Run tests
run: |
# Run tests with coverage tool
Expand Down
33 changes: 23 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ pg_test = []
arrow = {version = "53", default-features = false}
arrow-cast = {version = "53", default-features = false}
arrow-schema = {version = "53", default-features = false}
aws-config = { version = "1.5", default-features = false, features = ["rustls"]}
aws-credential-types = {version = "1.2", default-features = false}
aws-config = { version = "1", default-features = false, features = ["rustls"]}
aws-credential-types = {version = "1", default-features = false}
aws-sdk-sts = "1"
futures = "0.3"
object_store = {version = "0.11", default-features = false, features = ["aws"]}
once_cell = "1"
Expand Down
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -174,10 +174,14 @@ region = eu-central-1
Alternatively, you can use the following environment variables when starting postgres to configure the S3 client:
- `AWS_ACCESS_KEY_ID`: the access key ID of the AWS account
- `AWS_SECRET_ACCESS_KEY`: the secret access key of the AWS account
- `AWS_SESSION_TOKEN`: the session token for the AWS account
- `AWS_REGION`: the default region of the AWS account
- `AWS_SHARED_CREDENTIALS_FILE`: an alternative location for the credentials file
- `AWS_CONFIG_FILE`: an alternative location for the config file
- `AWS_PROFILE`: the name of the profile from the credentials and config file (default profile name is `default`)
- `AWS_ENDPOINT_URL`: the endpoint
- `AWS_SHARED_CREDENTIALS_FILE`: an alternative location for the credentials file **(only via environment variables)**
- `AWS_CONFIG_FILE`: an alternative location for the config file **(only via environment variables)**
- `AWS_PROFILE`: the name of the profile from the credentials and config file (default profile name is `default`) **(only via environment variables)**
- `AWS_ALLOW_HTTP`: allows http endpoints **(only via environment variables)**


> [!NOTE]
> To be able to write into a object store location, you need to grant `parquet_object_store_write` role to your current postgres user.
Expand Down
72 changes: 30 additions & 42 deletions src/arrow_parquet/uri_utils.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
use std::{sync::Arc, sync::LazyLock};

use arrow::datatypes::SchemaRef;
use aws_config::{
environment::{EnvironmentVariableCredentialsProvider, EnvironmentVariableRegionProvider},
meta::{credentials::CredentialsProviderChain, region::RegionProviderChain},
profile::{ProfileFileCredentialsProvider, ProfileFileRegionProvider},
};
use aws_config::BehaviorVersion;
use aws_credential_types::provider::ProvideCredentials;
use object_store::{
aws::{AmazonS3, AmazonS3Builder},
Expand Down Expand Up @@ -89,51 +85,43 @@ fn object_store_with_location(uri: &Url, copy_from: bool) -> (Arc<dyn ObjectStor
}
}

// get_s3_object_store creates an AmazonS3 object store with the given bucket name.
// It is configured by environment variables and aws config files as fallback method.
// We need to read the config files to make the fallback method work since object_store
// does not provide a way to read them. Currently, we only support to extract
// "AWS_ACCESS_KEY_ID", "AWS_SECRET_ACCESS_KEY", "AWS_SESSION_TOKEN", "AWS_ENDPOINT_URL",
// and "AWS_REGION" from the config files.
async fn get_s3_object_store(bucket_name: &str) -> AmazonS3 {
let mut aws_s3_builder = AmazonS3Builder::new().with_bucket_name(bucket_name);

let is_test_running = std::env::var("PG_PARQUET_TEST").is_ok();
let mut aws_s3_builder = AmazonS3Builder::from_env().with_bucket_name(bucket_name);

if is_test_running {
// use minio for testing
aws_s3_builder = aws_s3_builder.with_endpoint("http://localhost:9000");
aws_s3_builder = aws_s3_builder.with_allow_http(true);
}
// first tries environment variables and then the config files
let sdk_config = aws_config::defaults(BehaviorVersion::v2024_03_28())
.load()
.await;

let aws_profile_name = std::env::var("AWS_PROFILE").unwrap_or("default".to_string());

let region_provider = RegionProviderChain::first_try(EnvironmentVariableRegionProvider::new())
.or_else(
ProfileFileRegionProvider::builder()
.profile_name(aws_profile_name.clone())
.build(),
);
if let Some(credential_provider) = sdk_config.credentials_provider() {
if let Ok(credentials) = credential_provider.provide_credentials().await {
// AWS_ACCESS_KEY_ID
aws_s3_builder = aws_s3_builder.with_access_key_id(credentials.access_key_id());

let region = region_provider.region().await;
// AWS_SECRET_ACCESS_KEY
aws_s3_builder = aws_s3_builder.with_secret_access_key(credentials.secret_access_key());

if let Some(region) = region {
aws_s3_builder = aws_s3_builder.with_region(region.to_string());
if let Some(token) = credentials.session_token() {
// AWS_SESSION_TOKEN
aws_s3_builder = aws_s3_builder.with_token(token);
}
}
}

let credential_provider = CredentialsProviderChain::first_try(
"Environment",
EnvironmentVariableCredentialsProvider::new(),
)
.or_else(
"Profile",
ProfileFileCredentialsProvider::builder()
.profile_name(aws_profile_name)
.build(),
);

if let Ok(credentials) = credential_provider.provide_credentials().await {
aws_s3_builder = aws_s3_builder.with_access_key_id(credentials.access_key_id());

aws_s3_builder = aws_s3_builder.with_secret_access_key(credentials.secret_access_key());
// AWS_ENDPOINT_URL
if let Some(aws_endpoint_url) = sdk_config.endpoint_url() {
aws_s3_builder = aws_s3_builder.with_endpoint(aws_endpoint_url);
}

if let Some(token) = credentials.session_token() {
aws_s3_builder = aws_s3_builder.with_token(token);
}
// AWS_REGION
if let Some(aws_region) = sdk_config.region() {
aws_s3_builder = aws_s3_builder.with_region(aws_region.as_ref());
}

aws_s3_builder.build().unwrap_or_else(|e| panic!("{}", e))
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use pgrx::{prelude::*, GucContext, GucFlags, GucRegistry};
mod arrow_parquet;
mod parquet_copy_hook;
mod parquet_udfs;
#[cfg(any(test, feature = "pg_test"))]
mod pgrx_tests;
mod pgrx_utils;
mod type_compat;
Expand Down
Loading

0 comments on commit bab2208

Please sign in to comment.