diff --git a/.devcontainer/.env b/.devcontainer/.env new file mode 100644 index 0000000..093b6bb --- /dev/null +++ b/.devcontainer/.env @@ -0,0 +1,11 @@ +# S3 tests + AWS_ACCESS_KEY_ID=minioadmin + AWS_SECRET_ACCESS_KEY=minioadmin + AWS_REGION=us-east-1 + AWS_S3_TEST_BUCKET=testbucket + MINIO_ROOT_USER=minioadmin + MINIO_ROOT_PASSWORD=minioadmin + + # Others + RUST_TEST_THREADS=1 + PG_PARQUET_TEST=true diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 3d3b802..ec712d7 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -6,10 +6,11 @@ ENV TZ="Europe/Istanbul" ARG PG_MAJOR=17 # install deps -RUN apt-get update && apt-get -y install build-essential libreadline-dev zlib1g-dev \ - flex bison libxml2-dev libxslt-dev libssl-dev \ - libxml2-utils xsltproc ccache pkg-config wget \ - curl lsb-release sudo nano net-tools git awscli +RUN apt-get update && apt-get -y install build-essential libreadline-dev zlib1g-dev \ + flex bison libxml2-dev libxslt-dev libssl-dev \ + libxml2-utils xsltproc ccache pkg-config wget \ + curl lsb-release ca-certificates gnupg sudo git \ + nano net-tools awscli # install Postgres RUN sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' @@ -19,34 +20,20 @@ RUN apt-get update && apt-get -y install postgresql-${PG_MAJOR}-postgis-3 \ postgresql-client-${PG_MAJOR} \ libpq-dev -# download and install MinIO server and client -RUN wget https://dl.min.io/server/minio/release/linux-amd64/minio -RUN chmod +x minio -RUN mv minio /usr/local/bin/minio +# set up permissions so that rust user can create extensions +RUN chmod a+rwx `pg_config --pkglibdir` \ + `pg_config --sharedir`/extension \ + /var/run/postgresql/ -# download and install MinIO admin -RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -RUN chmod +x mc -RUN mv mc /usr/local/bin/mc - -# set up pgrx with non-sudo user +# initdb requires non-root user. This will also be the user that runs the container. ARG USERNAME=rust -ARG USER_UID=501 -ARG USER_GID=$USER_UID -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -s /bin/bash -m $USERNAME - -RUN mkdir /workspaces && chown -R $USER_UID:$USER_GID /workspaces +ARG USER_UID=1000 +ARG USER_GID=1000 +RUN groupadd --gid $USER_GID $USERNAME +RUN useradd --uid $USER_UID --gid $USER_GID -s /bin/bash -m $USERNAME -# set up permissions so that the user below can create extensions -RUN chmod a+rwx `pg_config --pkglibdir` \ - `pg_config --sharedir`/extension \ - /var/run/postgresql/ +RUN echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USERNAME -# add it to sudoers -RUN echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USERNAME - -# now it is time to switch to user USER $USERNAME # install Rust environment @@ -59,10 +46,9 @@ RUN cargo install --locked cargo-pgrx@${PGRX_VERSION} RUN cargo pgrx init --pg${PG_MAJOR} $(which pg_config) RUN echo "shared_preload_libraries = 'pg_parquet'" >> $HOME/.pgrx/data-${PG_MAJOR}/postgresql.conf -ENV MINIO_ROOT_USER=admin -ENV MINIO_ROOT_PASSWORD=admin123 -ENV AWS_S3_TEST_BUCKET=testbucket -ENV AWS_REGION=us-east-1 -ENV AWS_ACCESS_KEY_ID=admin -ENV AWS_SECRET_ACCESS_KEY=admin123 -ENV PG_PARQUET_TEST=true +# required for pgrx to work +ENV USER=$USERNAME + +# git completion +RUN curl -o ~/.git-completion.bash https://raw.githubusercontent.com/git/git/master/contrib/completion/git-completion.bash +RUN echo "source ~/.git-completion.bash" >> ~/.bashrc diff --git a/.devcontainer/create-test-buckets.sh b/.devcontainer/create-test-buckets.sh new file mode 100644 index 0000000..ed6601c --- /dev/null +++ b/.devcontainer/create-test-buckets.sh @@ -0,0 +1,3 @@ +#!/bin/bash + + aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index efdd2d9..e2c90a8 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,7 +1,10 @@ { - "build": { - "dockerfile": "Dockerfile" - }, + "name": "pg_parquet Dev Environment", + "dockerComposeFile": "docker-compose.yml", + "service": "app", + "workspaceFolder": "/workspace", + "postStartCommand": "bash .devcontainer/create-test-buckets.sh", + "postAttachCommand": "sudo chown -R rust /workspace", "customizations": { "vscode": { "extensions": [ @@ -14,12 +17,5 @@ "henriiik.docker-linter" ] } - }, - "postStartCommand": "bash .devcontainer/scripts/setup-minio.sh", - "forwardPorts": [ - 5432 - ], - "capAdd": [ - "SYS_PTRACE" - ] + } } diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 0000000..259cfc8 --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,32 @@ +services: + app: + build: + context: . + dockerfile: Dockerfile + command: sleep infinity + network_mode: host + volumes: + - ..:/workspace + - ${USERPROFILE}${HOME}/.ssh:/home/rust/.ssh:ro + - ${USERPROFILE}${HOME}/.ssh/known_hosts:/home/rust/.ssh/known_hosts:rw + - ${USERPROFILE}${HOME}/.gitconfig:/home/rust/.gitconfig:ro + - ${USERPROFILE}${HOME}/.aws:/home/rust/.aws:ro + env_file: + - .env + cap_add: + - SYS_PTRACE + depends_on: + - minio + + minio: + image: minio/minio + env_file: + - .env + network_mode: host + command: server /data + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "http://localhost:9000"] + interval: 6s + timeout: 2s + retries: 3 diff --git a/.devcontainer/scripts/setup-minio.sh b/.devcontainer/scripts/setup-minio.sh deleted file mode 100644 index d611b70..0000000 --- a/.devcontainer/scripts/setup-minio.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -nohup minio server /tmp/minio-storage > /dev/null 2>&1 & - -mc alias set local http://localhost:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD - -aws --endpoint-url http://localhost:9000 s3 mb s3://testbucket diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 403e54c..4700584 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,28 +65,25 @@ jobs: path: ${{ env.SCCACHE_DIR }} key: pg_parquet-sccache-cache-${{ runner.os }}-${{ hashFiles('Cargo.lock', '.github/workflows/ci.yml') }} + - name: Export environment variables from .env file + uses: falti/dotenv-action@v1 + with: + path: .devcontainer/.env + export_variables: true + - name: Install PostgreSQL run: | sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - sudo apt-get update - sudo apt-get install build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev libxslt-dev libssl-dev libxml2-utils xsltproc ccache pkg-config + sudo apt-get -y install build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev \ + libxslt-dev libssl-dev libxml2-utils xsltproc ccache pkg-config \ + gnupg ca-certificates sudo apt-get -y install postgresql-${{ env.PG_MAJOR }}-postgis-3 \ postgresql-server-dev-${{ env.PG_MAJOR }} \ postgresql-client-${{ env.PG_MAJOR }} \ libpq-dev - - name: Install MinIO - run: | - # Download and install MinIO server and client - wget https://dl.min.io/server/minio/release/linux-amd64/minio - chmod +x minio - mv minio /usr/local/bin/minio - - # Download and install MinIO admin - wget https://dl.min.io/client/mc/release/linux-amd64/mc - chmod +x mc - mv mc /usr/local/bin/mc - name: Install and configure pgrx run: | @@ -101,31 +98,25 @@ jobs: cargo fmt --all -- --check cargo clippy --all-targets --features "pg${{ env.PG_MAJOR }}, pg_test" --no-default-features -- -D warnings - - name: Run tests + - name: Set up permissions for PostgreSQL run: | - # Set up permissions so that the current user below can create extensions sudo chmod a+rwx $(pg_config --pkglibdir) \ $(pg_config --sharedir)/extension \ /var/run/postgresql/ - # pgrx tests with runas argument ignores environment variables, so - # we read env vars from .env file in tests (https://github.com/pgcentralfoundation/pgrx/pull/1674) - touch /tmp/.env - echo AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }} >> /tmp/.env - echo AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }} >> /tmp/.env - echo AWS_S3_TEST_BUCKET=${{ env.AWS_S3_TEST_BUCKET }} >> /tmp/.env - echo AWS_REGION=${{ env.AWS_REGION }} >> /tmp/.env - echo PG_PARQUET_TEST=${{ env.PG_PARQUET_TEST }} >> /tmp/.env + - name: Start Minio for s3 emulator tests + run: | + docker run -p 9000:9000 minio/minio server /data - # Start MinIO server - export MINIO_ROOT_USER=${{ env.AWS_ACCESS_KEY_ID }} - export MINIO_ROOT_PASSWORD=${{ env.AWS_SECRET_ACCESS_KEY }} - minio server /tmp/minio-storage > /dev/null 2>&1 & + while ! nc -z localhost 9000; do + echo "Waiting for localhost:9000..." + sleep 1 + done - # Set access key and create test bucket - mc alias set local http://localhost:9000 ${{ env.AWS_ACCESS_KEY_ID }} ${{ env.AWS_SECRET_ACCESS_KEY }} - aws --endpoint-url http://localhost:9000 s3 mb s3://${{ env.AWS_S3_TEST_BUCKET }} + aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET + - name: Run tests + run: | # Run tests with coverage tool source <(cargo llvm-cov show-env --export-prefix) cargo llvm-cov clean @@ -133,16 +124,6 @@ jobs: cargo pgrx test pg${{ env.PG_MAJOR }} --no-default-features cargo llvm-cov report --lcov > lcov.info - # Stop MinIO server - pkill -9 minio - env: - RUST_TEST_THREADS: 1 - AWS_ACCESS_KEY_ID: test_secret_access_key - AWS_SECRET_ACCESS_KEY: test_access_key_id - AWS_REGION: us-east-1 - AWS_S3_TEST_BUCKET: testbucket - PG_PARQUET_TEST: true - - name: Upload coverage report to Codecov if: ${{ env.PG_MAJOR }} == 17 uses: codecov/codecov-action@v4 diff --git a/.gitignore b/.gitignore index d15537c..a5aa1d9 100644 --- a/.gitignore +++ b/.gitignore @@ -12,5 +12,4 @@ *.lcov *.xml lcov.info -.env playground.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index be4b716..f6ad919 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,4 +4,7 @@ "rust-analyzer.check.command": "clippy", "rust-analyzer.checkOnSave": true, "editor.inlayHints.enabled": "offUnlessPressed", -} \ No newline at end of file + "files.watcherExclude": { + "**/target/**": true + } +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..0bb4b1b --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,130 @@ +`pg_parquet` is an open source project primarily authored and +maintained by the team at Crunchy Data. All contributions are welcome. The pg_parquet uses the PostgreSQL license and does not require any contributor agreement to submit patches. + +Our contributors try to follow good software development practices to help +ensure that the code that we ship to our users is stable. If you wish to +contribute to the pg_parquet in any way, please follow the guidelines below. + +Thanks! We look forward to your contribution. + +# General Contributing Guidelines + +All ongoing development for an upcoming release gets committed to the +**`main`** branch. The `main` branch technically serves as the "development" +branch as well, but all code that is committed to the `main` branch should be +considered _stable_, even if it is part of an ongoing release cycle. + +Ensure any changes are clear and well-documented: + +- The most helpful code comments explain why, establish context, or efficiently +summarize how. Avoid simply repeating details from declarations. When in doubt, +favor overexplaining to underexplaining. + +- Do not submit commented-out code. If the code does not need to be used +anymore, please remove it. + +- While `TODO` comments are frowned upon, every now and then it is ok to put a +`TODO` to note that a particular section of code needs to be worked on in the +future. However, it is also known that "TODOs" often do not get worked on, and +as such, it is more likely you will be asked to complete the TODO at the time +you submit it. + +- Make sure to add tests which cover the code lines that are introduced by your changes. See [testing](#testing). + +- Make sure to [format and lint](#format-and-lint) the code. + +Please provide unit tests with your code if possible. If you are unable to +provide a unit test, please provide an explanation as to why in your pull +request, including a description of the steps used to manually verify the +changes. + +Ensure your commits are atomic. Each commit tells a story about what changes +are being made. This makes it easier to identify when a bug is introduced into +the codebase, and as such makes it easier to fix. + +All commits must either be rebased in atomic order or squashed (if the squashed +commit is considered atomic). Merge commits are not accepted. All conflicts must +be resolved prior to pushing changes. + +**All pull requests should be made from the `main` branch.** + +# Commit Messages + +Commit messages should be as descriptive and should follow the general format: + +``` +A one-sentence summary of what the commit is. + +Further details of the commit messages go in here. Try to be as descriptive of +possible as to what the changes are. Good things to include: + +- What the changes is. +- Why the change was made. +- What to expect now that the change is in place. +- Any advice that can be helpful if someone needs to review this commit and +understand. +``` + +If you wish to tag a GitHub issue or another project management tracker, please +do so at the bottom of the commit message, and make it clearly labeled like so: + +``` +Issue: CrunchyData/pg_parquet#12 +``` + +# Submitting Pull Requests + +All work should be made in your own repository fork. When you believe your work +is ready to be committed, please follow the guidance below for creating a pull +request. + +## Upcoming Features + +Ongoing work for new features should occur in branches off of the `main` +branch. + +# Start Local Environment + +There are 2 ways to start your local environment to start contributing pg_parquet. +- [Installation From Source](#installation-from-source) +- [Devcontainer](#devcontainer) + +## Installation From Source + +See [README.md](README.md#installation-from-source). + +## Devcontainer + +If you want to work on a totally ready-to-work container environment, you can try our +[devcontainer](.devcontainer/devcontainer.json). If you have chance to work on +[vscode editor](https://code.visualstudio.com), you can start pg_parquet project +inside the devcontainer. Please see [how you start the devcontainer](https://code.visualstudio.com/docs/devcontainers/containers). + +# Postgres Support Matrix + +You can see the current supported Postgres versions from [README.md](README.md#postgres-support-matrix). +Supported Postgres versions exist as Cargo feature flags at [Cargo.toml](Cargo.toml). +By default, pg_parquet is built with the latest supported Postgres version flag enabled. +If you want to build pg_parquet against another Postgres version, you can do it +by specifying the feature flag explicitly like `cargo pgrx build --features "pg16"`. +The same applies for running a session via `cargo pgrx run pg16` or +testing via `cargo pgrx test pg16`. + +# Testing + +We run `RUST_TEST_THREADS=1 cargo pgrx test` to run all our unit tests. If you +run a specific test, you can do it via regex patterns like below: +`cargo pgrx run pg17 test_numeric_*`. + +> [!WARNING] +> Make sure to pass RUST_TEST_THREADS=1 as environment variable to `cargo pgrx test`. + +Object storage tests are integration tests which require a storage emulator running +locally. See [ci.yml](.github/workflows/ci.yml) to see how local storage emulators +are started. You can also see the required environment variables from +[.env file](.devcontainer/.env). + +# Format and Lint + +We use `cargo-fmt` as formatter and `cargo-clippy` as linter. You can check +how we run them from [ci.yml](.github/workflows/ci.yml). diff --git a/Cargo.lock b/Cargo.lock index f5f042e..a7b7c33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -982,12 +982,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "dotenvy" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" - [[package]] name = "either" version = "1.13.0" @@ -2088,7 +2082,6 @@ dependencies = [ "arrow-schema", "aws-config", "aws-credential-types", - "dotenvy", "futures", "object_store", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 09b056f..a49a80d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,6 @@ arrow = {version = "53", default-features = false} arrow-schema = {version = "53", default-features = false} aws-config = { version = "1.5", default-features = false, features = ["rustls"]} aws-credential-types = {version = "1.2", default-features = false} -dotenvy = "0.15" futures = "0.3" object_store = {version = "0.11", default-features = false, features = ["aws"]} once_cell = "1" diff --git a/src/arrow_parquet/uri_utils.rs b/src/arrow_parquet/uri_utils.rs index e1dc475..3ff97af 100644 --- a/src/arrow_parquet/uri_utils.rs +++ b/src/arrow_parquet/uri_utils.rs @@ -90,9 +90,6 @@ fn object_store_with_location(uri: &Url, copy_from: bool) -> (Arc AmazonS3 { - // try loading environment vars from the .env file - dotenvy::from_path("/tmp/.env").ok(); - let mut aws_s3_builder = AmazonS3Builder::new().with_bucket_name(bucket_name); let is_test_running = std::env::var("PG_PARQUET_TEST").is_ok(); diff --git a/src/lib.rs b/src/lib.rs index 183682f..3bd6066 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1494,8 +1494,6 @@ mod tests { #[pg_test] fn test_s3_object_store_from_env() { - dotenvy::from_path("/tmp/.env").unwrap(); - let test_bucket_name: String = std::env::var("AWS_S3_TEST_BUCKET").expect("AWS_S3_TEST_BUCKET not found"); @@ -1509,8 +1507,6 @@ mod tests { #[pg_test] fn test_s3_object_store_from_config_file() { - dotenvy::from_path("/tmp/.env").unwrap(); - let test_bucket_name: String = std::env::var("AWS_S3_TEST_BUCKET").expect("AWS_S3_TEST_BUCKET not found"); @@ -1566,8 +1562,6 @@ mod tests { // set the current user to the regular user Spi::run("SET SESSION AUTHORIZATION regular_user;").unwrap(); - dotenvy::from_path("/tmp/.env").unwrap(); - let test_bucket_name: String = std::env::var("AWS_S3_TEST_BUCKET").expect("AWS_S3_TEST_BUCKET not found"); @@ -1608,8 +1602,6 @@ mod tests { // set the current user to the regular user Spi::run("SET SESSION AUTHORIZATION regular_user;").unwrap(); - dotenvy::from_path("/tmp/.env").unwrap(); - let test_bucket_name: String = std::env::var("AWS_S3_TEST_BUCKET").expect("AWS_S3_TEST_BUCKET not found");