Skip to content

Commit

Permalink
Adds Support for COPY TO/FROM Google Cloud Storage
Browse files Browse the repository at this point in the history
Supports following Google Cloud Storage uri forms:
- gs:// \<bucket\> / \<path\>

**Configuration**

The simplest way to configure object storage is by creating a json config file like [`/tmp/gcs.json`]:

```bash
$ cat /tmp/gcs.json
{
  "gcs_base_url": "http://localhost:4443",
  "disable_oauth": true,
  "client_email": "",
  "private_key_id": "",
  "private_key": ""
}
```

Alternatively, you can use the following environment variables when starting postgres to configure the Google Cloud Storage client:
- `GOOGLE_SERVICE_ACCOUNT_KEY`: json serialized service account key
- `GOOGLE_SERVICE_ACCOUNT_PATH`: an alternative location for the config file
  • Loading branch information
aykut-bozkurt committed Nov 9, 2024
1 parent 3d026b6 commit 4a93f99
Show file tree
Hide file tree
Showing 16 changed files with 274 additions and 114 deletions.
22 changes: 22 additions & 0 deletions .devcontainer/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# S3 tests
AWS_ACCESS_KEY_ID=minioadmin
AWS_SECRET_ACCESS_KEY=minioadmin
AWS_REGION=us-east-1
AWS_S3_TEST_BUCKET=testbucket
MINIO_ROOT_USER=minioadmin
MINIO_ROOT_PASSWORD=minioadmin

# Azure Blob tests
AZURE_STORAGE_ACCOUNT=devstoreaccount1
AZURE_STORAGE_KEY="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
AZURE_STORAGE_CONNECTION_STRING="DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;BlobEndpoint=http://localhost:10000/devstoreaccount1;"
AZURE_TEST_CONTAINER_NAME=testcontainer
AZURE_TEST_READ_ONLY_SAS="se=2100-05-05&sp=r&sv=2022-11-02&sr=c&sig=YMPFnAHKe9y0o3hFegncbwQTXtAyvsJEgPB2Ne1b9CQ%3D"
AZURE_TEST_READ_WRITE_SAS="se=2100-05-05&sp=rcw&sv=2022-11-02&sr=c&sig=TPz2jEz0t9L651t6rTCQr%2BOjmJHkM76tnCGdcyttnlA%3D"

# GCS tests
GOOGLE_TEST_BUCKET=testbucket

# Others
RUST_TEST_THREADS=1
PG_PARQUET_TEST=true
54 changes: 22 additions & 32 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@ RUN apt-get update && apt-get -y install build-essential libreadline-dev zlib1g-
curl lsb-release ca-certificates gnupg sudo git \
nano net-tools awscli

# install azure-cli
RUN curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | tee /etc/apt/trusted.gpg.d/microsoft.gpg > /dev/null
RUN echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/trusted.gpg.d/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ `lsb_release -cs` main" | tee /etc/apt/sources.list.d/azure-cli.list
RUN apt-get update && apt-get install -y azure-cli

# install Postgres
RUN sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
RUN wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
Expand All @@ -20,42 +25,20 @@ RUN apt-get update && apt-get -y install postgresql-${PG_MAJOR}-postgis-3 \
postgresql-client-${PG_MAJOR} \
libpq-dev

# install azure-cli and azurite
RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash -
RUN apt-get update && apt-get install -y nodejs
RUN curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | tee /etc/apt/trusted.gpg.d/microsoft.gpg > /dev/null
RUN echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/trusted.gpg.d/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ `lsb_release -cs` main" | tee /etc/apt/sources.list.d/azure-cli.list
RUN apt-get update && apt-get install -y azure-cli
RUN npm install -g azurite
# set up permissions so that rust user can create extensions
RUN chmod a+rwx `pg_config --pkglibdir` \
`pg_config --sharedir`/extension \
/var/run/postgresql/

# download and install MinIO server and client
RUN wget https://dl.min.io/server/minio/release/linux-amd64/minio
RUN chmod +x minio
RUN mv minio /usr/local/bin/minio

# download and install MinIO admin
RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc
RUN chmod +x mc
RUN mv mc /usr/local/bin/mc

# set up pgrx with non-sudo user
# initdb requires non-root user. This will also be the user that runs the container.
ARG USERNAME=rust
ARG USER_UID=501
ARG USER_GID=$USER_UID
RUN groupadd --gid $USER_GID $USERNAME \
&& useradd --uid $USER_UID --gid $USER_GID -s /bin/bash -m $USERNAME

RUN mkdir /workspaces && chown -R $USER_UID:$USER_GID /workspaces
ARG USER_UID=1000
ARG USER_GID=1000
RUN groupadd --gid $USER_GID $USERNAME
RUN useradd --uid $USER_UID --gid $USER_GID -s /bin/bash -m $USERNAME

# set up permissions so that the user below can create extensions
RUN chmod a+rwx `pg_config --pkglibdir` \
`pg_config --sharedir`/extension \
/var/run/postgresql/
RUN echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USERNAME

# add it to sudoers
RUN echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USERNAME

# now it is time to switch to user
USER $USERNAME

# install Rust environment
Expand All @@ -67,3 +50,10 @@ ARG PGRX_VERSION=0.12.6
RUN cargo install --locked cargo-pgrx@${PGRX_VERSION}
RUN cargo pgrx init --pg${PG_MAJOR} $(which pg_config)
RUN echo "shared_preload_libraries = 'pg_parquet'" >> $HOME/.pgrx/data-${PG_MAJOR}/postgresql.conf

# required for pgrx to work
ENV USER=$USERNAME

# git completion
RUN curl -o ~/.git-completion.bash https://raw.githubusercontent.com/git/git/master/contrib/completion/git-completion.bash
RUN echo "source ~/.git-completion.bash" >> ~/.bashrc
7 changes: 7 additions & 0 deletions .devcontainer/create-test-buckets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!/bin/bash

aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET

az storage container create -n $AZURE_TEST_CONTAINER_NAME --connection-string $AZURE_STORAGE_CONNECTION_STRING

curl -v -X POST --data-binary "{\"name\":\"$GOOGLE_TEST_BUCKET\"}" -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b"
18 changes: 7 additions & 11 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
{
"build": {
"dockerfile": "Dockerfile"
},
"name": "pg_parquet Dev Environment",
"dockerComposeFile": "docker-compose.yml",
"service": "app",
"workspaceFolder": "/workspace",
"postStartCommand": "bash .devcontainer/create-test-buckets.sh",
"postAttachCommand": "sudo chown -R rust /workspace",
"customizations": {
"vscode": {
"extensions": [
Expand All @@ -14,12 +17,5 @@
"henriiik.docker-linter"
]
}
},
"postStartCommand": "bash .devcontainer/scripts/setup_minio.sh && bash .devcontainer/scripts/setup_azurite.sh",
"forwardPorts": [
5432
],
"capAdd": [
"SYS_PTRACE"
]
}
}
60 changes: 60 additions & 0 deletions .devcontainer/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
services:
app:
build:
context: .
dockerfile: Dockerfile
command: sleep infinity
network_mode: host
volumes:
- ..:/workspace
- ${USERPROFILE}${HOME}/.ssh:/home/rust/.ssh:ro
- ${USERPROFILE}${HOME}/.ssh/known_hosts:/home/rust/.ssh/known_hosts:rw
- ${USERPROFILE}${HOME}/.gitconfig:/home/rust/.gitconfig:ro
- ${USERPROFILE}${HOME}/.aws:/home/rust/.aws:ro
- ${USERPROFILE}${HOME}/.azure:/home/rust/.azure:ro
env_file:
- .env
cap_add:
- SYS_PTRACE
depends_on:
- minio
- azurite
- fake-gcs-server

minio:
image: minio/minio
env_file:
- .env
network_mode: host
command: server /data
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "http://localhost:9000"]
interval: 6s
timeout: 2s
retries: 3

azurite:
image: mcr.microsoft.com/azure-storage/azurite
env_file:
- .env
network_mode: host
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "http://localhost:10000"]
interval: 6s
timeout: 2s
retries: 3

fake-gcs-server:
image: tustvold/fake-gcs-server
env_file:
- .env
network_mode: host
command: -scheme http -public-host localhost:4443
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "http://localhost:4443"]
interval: 6s
timeout: 2s
retries: 3
7 changes: 0 additions & 7 deletions .devcontainer/scripts/setup_azurite.sh

This file was deleted.

9 changes: 0 additions & 9 deletions .devcontainer/scripts/setup_minio.sh

This file was deleted.

19 changes: 0 additions & 19 deletions .devcontainer/scripts/setup_test_envs.sh

This file was deleted.

70 changes: 40 additions & 30 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ jobs:
path: ${{ env.SCCACHE_DIR }}
key: pg_parquet-sccache-cache-${{ runner.os }}-${{ hashFiles('Cargo.lock', '.github/workflows/ci.yml') }}

- name: Export environment variables from .env file
uses: falti/dotenv-action@v1
with:
path: .devcontainer/.env
export_variables: true

- name: Install PostgreSQL
run: |
sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
Expand All @@ -78,26 +84,11 @@ jobs:
postgresql-client-${{ env.PG_MAJOR }} \
libpq-dev
- name: Install Azurite
- name: Install azure-cli
run: |
curl -fsSL https://deb.nodesource.com/setup_20.x | sudo bash -
sudo apt-get update && sudo apt-get install -y nodejs
curl -sL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/microsoft.gpg > /dev/null
echo "deb [arch=`dpkg --print-architecture` signed-by=/etc/apt/trusted.gpg.d/microsoft.gpg] https://packages.microsoft.com/repos/azure-cli/ `lsb_release -cs` main" | sudo tee /etc/apt/sources.list.d/azure-cli.list
sudo apt-get update && sudo apt-get install -y azure-cli
npm install -g azurite
- name: Install MinIO
run: |
# Download and install MinIO server and client
wget https://dl.min.io/server/minio/release/linux-amd64/minio
chmod +x minio
mv minio /usr/local/bin/minio
# Download and install MinIO admin
wget https://dl.min.io/client/mc/release/linux-amd64/mc
chmod +x mc
mv mc /usr/local/bin/mc
- name: Install and configure pgrx
run: |
Expand All @@ -112,35 +103,54 @@ jobs:
cargo fmt --all -- --check
cargo clippy --all-targets --features "pg${{ env.PG_MAJOR }}, pg_test" --no-default-features -- -D warnings
- name: Run tests
- name: Set up permissions for PostgreSQL
run: |
# Set up permissions so that the current user below can create extensions
sudo chmod a+rwx $(pg_config --pkglibdir) \
$(pg_config --sharedir)/extension \
/var/run/postgresql/
# Set up test environments
source .devcontainer/scripts/setup_test_envs.sh
- name: Start Minio for s3 emulator tests
run: |
docker run -p 9000:9000 minio/minio server /data
while ! nc -z localhost 9000; do
echo "Waiting for localhost:9000..."
sleep 1
done
# Start MinIO server
bash .devcontainer/scripts/setup_minio.sh
aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET
- name: Start Azurite for Azure Blob Storage emulator tests
run: |
docker run -d -p 10000:10000 mcr.microsoft.com/azure-storage/azurite
# Start Azurite server
bash .devcontainer/scripts/setup_azurite.sh
while ! nc -z localhost 10000; do
echo "Waiting for localhost:10000..."
sleep 1
done
az storage container create -n $AZURE_TEST_CONTAINER_NAME --connection-string $AZURE_STORAGE_CONNECTION_STRING
- name: Start fake-gcs-server for Google Cloud Storage emulator tests
run: |
docker run -d -p 4443:4443 tustvold/fake-gcs-server -scheme http -filesystem-root /tmp/gcs -public-host localhost:4443
while ! nc -z localhost 4443; do
echo "Waiting for localhost:4443..."
sleep 1
done
curl -v -X POST --data-binary "{\"name\":\"$GOOGLE_TEST_BUCKET\"}" -H "Content-Type: application/json" "http://localhost:4443/storage/v1/b"
- name: Run tests
run: |
# Run tests with coverage tool
source <(cargo llvm-cov show-env --export-prefix)
cargo llvm-cov clean
cargo build --features "pg${{ env.PG_MAJOR }}, pg_test" --no-default-features
cargo pgrx test pg${{ env.PG_MAJOR }} --no-default-features
cargo llvm-cov report --lcov > lcov.info
# Stop MinIO server
pkill -9 minio
# Stop Azurite server
pkill -9 node
- name: Upload coverage report to Codecov
if: ${{ env.PG_MAJOR }} == 17
uses: codecov/codecov-action@v4
Expand Down
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,4 @@
*.lcov
*.xml
lcov.info
.env
playground.rs
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@
"rust-analyzer.check.command": "clippy",
"rust-analyzer.checkOnSave": true,
"editor.inlayHints.enabled": "offUnlessPressed",
"files.watcherExclude": {
"**/target/**": true
}
}
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ aws-config = { version = "1.5", default-features = false, features = ["rustls"]}
aws-credential-types = {version = "1.2", default-features = false}
futures = "0.3"
home = "0.5"
object_store = {version = "0.11", default-features = false, features = ["aws", "azure"]}
object_store = {version = "0.11", default-features = false, features = ["aws", "azure", "gcp"]}
once_cell = "1"
parquet = {version = "53", default-features = false, features = [
"arrow",
Expand Down
Loading

0 comments on commit 4a93f99

Please sign in to comment.