diff --git a/.devcontainer/.env b/.devcontainer/.env new file mode 100644 index 0000000..093b6bb --- /dev/null +++ b/.devcontainer/.env @@ -0,0 +1,11 @@ +# S3 tests + AWS_ACCESS_KEY_ID=minioadmin + AWS_SECRET_ACCESS_KEY=minioadmin + AWS_REGION=us-east-1 + AWS_S3_TEST_BUCKET=testbucket + MINIO_ROOT_USER=minioadmin + MINIO_ROOT_PASSWORD=minioadmin + + # Others + RUST_TEST_THREADS=1 + PG_PARQUET_TEST=true diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 3d3b802..ec712d7 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -6,10 +6,11 @@ ENV TZ="Europe/Istanbul" ARG PG_MAJOR=17 # install deps -RUN apt-get update && apt-get -y install build-essential libreadline-dev zlib1g-dev \ - flex bison libxml2-dev libxslt-dev libssl-dev \ - libxml2-utils xsltproc ccache pkg-config wget \ - curl lsb-release sudo nano net-tools git awscli +RUN apt-get update && apt-get -y install build-essential libreadline-dev zlib1g-dev \ + flex bison libxml2-dev libxslt-dev libssl-dev \ + libxml2-utils xsltproc ccache pkg-config wget \ + curl lsb-release ca-certificates gnupg sudo git \ + nano net-tools awscli # install Postgres RUN sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' @@ -19,34 +20,20 @@ RUN apt-get update && apt-get -y install postgresql-${PG_MAJOR}-postgis-3 \ postgresql-client-${PG_MAJOR} \ libpq-dev -# download and install MinIO server and client -RUN wget https://dl.min.io/server/minio/release/linux-amd64/minio -RUN chmod +x minio -RUN mv minio /usr/local/bin/minio +# set up permissions so that rust user can create extensions +RUN chmod a+rwx `pg_config --pkglibdir` \ + `pg_config --sharedir`/extension \ + /var/run/postgresql/ -# download and install MinIO admin -RUN wget https://dl.min.io/client/mc/release/linux-amd64/mc -RUN chmod +x mc -RUN mv mc /usr/local/bin/mc - -# set up pgrx with non-sudo user +# initdb requires non-root user. This will also be the user that runs the container. ARG USERNAME=rust -ARG USER_UID=501 -ARG USER_GID=$USER_UID -RUN groupadd --gid $USER_GID $USERNAME \ - && useradd --uid $USER_UID --gid $USER_GID -s /bin/bash -m $USERNAME - -RUN mkdir /workspaces && chown -R $USER_UID:$USER_GID /workspaces +ARG USER_UID=1000 +ARG USER_GID=1000 +RUN groupadd --gid $USER_GID $USERNAME +RUN useradd --uid $USER_UID --gid $USER_GID -s /bin/bash -m $USERNAME -# set up permissions so that the user below can create extensions -RUN chmod a+rwx `pg_config --pkglibdir` \ - `pg_config --sharedir`/extension \ - /var/run/postgresql/ +RUN echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USERNAME -# add it to sudoers -RUN echo "$USERNAME ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/$USERNAME - -# now it is time to switch to user USER $USERNAME # install Rust environment @@ -59,10 +46,9 @@ RUN cargo install --locked cargo-pgrx@${PGRX_VERSION} RUN cargo pgrx init --pg${PG_MAJOR} $(which pg_config) RUN echo "shared_preload_libraries = 'pg_parquet'" >> $HOME/.pgrx/data-${PG_MAJOR}/postgresql.conf -ENV MINIO_ROOT_USER=admin -ENV MINIO_ROOT_PASSWORD=admin123 -ENV AWS_S3_TEST_BUCKET=testbucket -ENV AWS_REGION=us-east-1 -ENV AWS_ACCESS_KEY_ID=admin -ENV AWS_SECRET_ACCESS_KEY=admin123 -ENV PG_PARQUET_TEST=true +# required for pgrx to work +ENV USER=$USERNAME + +# git completion +RUN curl -o ~/.git-completion.bash https://raw.githubusercontent.com/git/git/master/contrib/completion/git-completion.bash +RUN echo "source ~/.git-completion.bash" >> ~/.bashrc diff --git a/.devcontainer/create-test-buckets.sh b/.devcontainer/create-test-buckets.sh new file mode 100644 index 0000000..ed6601c --- /dev/null +++ b/.devcontainer/create-test-buckets.sh @@ -0,0 +1,3 @@ +#!/bin/bash + + aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index efdd2d9..e2c90a8 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,7 +1,10 @@ { - "build": { - "dockerfile": "Dockerfile" - }, + "name": "pg_parquet Dev Environment", + "dockerComposeFile": "docker-compose.yml", + "service": "app", + "workspaceFolder": "/workspace", + "postStartCommand": "bash .devcontainer/create-test-buckets.sh", + "postAttachCommand": "sudo chown -R rust /workspace", "customizations": { "vscode": { "extensions": [ @@ -14,12 +17,5 @@ "henriiik.docker-linter" ] } - }, - "postStartCommand": "bash .devcontainer/scripts/setup-minio.sh", - "forwardPorts": [ - 5432 - ], - "capAdd": [ - "SYS_PTRACE" - ] + } } diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml new file mode 100644 index 0000000..259cfc8 --- /dev/null +++ b/.devcontainer/docker-compose.yml @@ -0,0 +1,32 @@ +services: + app: + build: + context: . + dockerfile: Dockerfile + command: sleep infinity + network_mode: host + volumes: + - ..:/workspace + - ${USERPROFILE}${HOME}/.ssh:/home/rust/.ssh:ro + - ${USERPROFILE}${HOME}/.ssh/known_hosts:/home/rust/.ssh/known_hosts:rw + - ${USERPROFILE}${HOME}/.gitconfig:/home/rust/.gitconfig:ro + - ${USERPROFILE}${HOME}/.aws:/home/rust/.aws:ro + env_file: + - .env + cap_add: + - SYS_PTRACE + depends_on: + - minio + + minio: + image: minio/minio + env_file: + - .env + network_mode: host + command: server /data + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "http://localhost:9000"] + interval: 6s + timeout: 2s + retries: 3 diff --git a/.devcontainer/scripts/setup-minio.sh b/.devcontainer/scripts/setup-minio.sh deleted file mode 100644 index d611b70..0000000 --- a/.devcontainer/scripts/setup-minio.sh +++ /dev/null @@ -1,7 +0,0 @@ -#!/bin/bash - -nohup minio server /tmp/minio-storage > /dev/null 2>&1 & - -mc alias set local http://localhost:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD - -aws --endpoint-url http://localhost:9000 s3 mb s3://testbucket diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 403e54c..b15ff23 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -65,28 +65,25 @@ jobs: path: ${{ env.SCCACHE_DIR }} key: pg_parquet-sccache-cache-${{ runner.os }}-${{ hashFiles('Cargo.lock', '.github/workflows/ci.yml') }} + - name: Export environment variables from .env file + uses: falti/dotenv-action@v1 + with: + path: .devcontainer/.env + export_variables: true + - name: Install PostgreSQL run: | sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - sudo apt-get update - sudo apt-get install build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev libxslt-dev libssl-dev libxml2-utils xsltproc ccache pkg-config + sudo apt-get -y install build-essential libreadline-dev zlib1g-dev flex bison libxml2-dev \ + libxslt-dev libssl-dev libxml2-utils xsltproc ccache pkg-config \ + gnupg ca-certificates sudo apt-get -y install postgresql-${{ env.PG_MAJOR }}-postgis-3 \ postgresql-server-dev-${{ env.PG_MAJOR }} \ postgresql-client-${{ env.PG_MAJOR }} \ libpq-dev - - name: Install MinIO - run: | - # Download and install MinIO server and client - wget https://dl.min.io/server/minio/release/linux-amd64/minio - chmod +x minio - mv minio /usr/local/bin/minio - - # Download and install MinIO admin - wget https://dl.min.io/client/mc/release/linux-amd64/mc - chmod +x mc - mv mc /usr/local/bin/mc - name: Install and configure pgrx run: | @@ -101,31 +98,25 @@ jobs: cargo fmt --all -- --check cargo clippy --all-targets --features "pg${{ env.PG_MAJOR }}, pg_test" --no-default-features -- -D warnings - - name: Run tests + - name: Set up permissions for PostgreSQL run: | - # Set up permissions so that the current user below can create extensions sudo chmod a+rwx $(pg_config --pkglibdir) \ $(pg_config --sharedir)/extension \ /var/run/postgresql/ - # pgrx tests with runas argument ignores environment variables, so - # we read env vars from .env file in tests (https://github.com/pgcentralfoundation/pgrx/pull/1674) - touch /tmp/.env - echo AWS_ACCESS_KEY_ID=${{ env.AWS_ACCESS_KEY_ID }} >> /tmp/.env - echo AWS_SECRET_ACCESS_KEY=${{ env.AWS_SECRET_ACCESS_KEY }} >> /tmp/.env - echo AWS_S3_TEST_BUCKET=${{ env.AWS_S3_TEST_BUCKET }} >> /tmp/.env - echo AWS_REGION=${{ env.AWS_REGION }} >> /tmp/.env - echo PG_PARQUET_TEST=${{ env.PG_PARQUET_TEST }} >> /tmp/.env + - name: Start Minio for s3 emulator tests + run: | + docker run -d -p 9000:9000 minio/minio server /data - # Start MinIO server - export MINIO_ROOT_USER=${{ env.AWS_ACCESS_KEY_ID }} - export MINIO_ROOT_PASSWORD=${{ env.AWS_SECRET_ACCESS_KEY }} - minio server /tmp/minio-storage > /dev/null 2>&1 & + while ! nc -z localhost 9000; do + echo "Waiting for localhost:9000..." + sleep 1 + done - # Set access key and create test bucket - mc alias set local http://localhost:9000 ${{ env.AWS_ACCESS_KEY_ID }} ${{ env.AWS_SECRET_ACCESS_KEY }} - aws --endpoint-url http://localhost:9000 s3 mb s3://${{ env.AWS_S3_TEST_BUCKET }} + aws --endpoint-url http://localhost:9000 s3 mb s3://$AWS_S3_TEST_BUCKET + - name: Run tests + run: | # Run tests with coverage tool source <(cargo llvm-cov show-env --export-prefix) cargo llvm-cov clean @@ -133,16 +124,6 @@ jobs: cargo pgrx test pg${{ env.PG_MAJOR }} --no-default-features cargo llvm-cov report --lcov > lcov.info - # Stop MinIO server - pkill -9 minio - env: - RUST_TEST_THREADS: 1 - AWS_ACCESS_KEY_ID: test_secret_access_key - AWS_SECRET_ACCESS_KEY: test_access_key_id - AWS_REGION: us-east-1 - AWS_S3_TEST_BUCKET: testbucket - PG_PARQUET_TEST: true - - name: Upload coverage report to Codecov if: ${{ env.PG_MAJOR }} == 17 uses: codecov/codecov-action@v4 diff --git a/.gitignore b/.gitignore index d15537c..a5aa1d9 100644 --- a/.gitignore +++ b/.gitignore @@ -12,5 +12,4 @@ *.lcov *.xml lcov.info -.env playground.rs diff --git a/.vscode/settings.json b/.vscode/settings.json index be4b716..f6ad919 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,4 +4,7 @@ "rust-analyzer.check.command": "clippy", "rust-analyzer.checkOnSave": true, "editor.inlayHints.enabled": "offUnlessPressed", -} \ No newline at end of file + "files.watcherExclude": { + "**/target/**": true + } +} diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..10869e6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,109 @@ +`pg_parquet` is an open source project primarily authored and +maintained by the team at Crunchy Data. All contributions are welcome. The pg_parquet uses the PostgreSQL license and does not require any contributor agreement to submit patches. + +Our contributors try to follow good software development practices to help +ensure that the code that we ship to our users is stable. If you wish to +contribute to the pg_parquet in any way, please follow the guidelines below. + +Thanks! We look forward to your contribution. + +# General Contributing Guidelines + +All ongoing development for an upcoming release gets committed to the +**`main`** branch. The `main` branch technically serves as the "development" +branch as well, but all code that is committed to the `main` branch should be +considered _stable_, even if it is part of an ongoing release cycle. + +- Ensure any changes are clear and well-documented: + - The most helpful code comments explain why, establish context, or efficiently summarize how. Avoid simply repeating details from declarations. When in doubt, favor overexplaining to underexplaining. + - Do not submit commented-out code. If the code does not need to be used +anymore, please remove it. + - While `TODO` comments are frowned upon, every now and then it is ok to put a `TODO` to note that a particular section of code needs to be worked on in the future. However, it is also known that "TODOs" often do not get worked on, and as such, it is more likely you will be asked to complete the TODO at the time you submit it. +- Make sure to add tests which cover the code lines that are introduced by your changes. See [testing](#testing). +- Make sure to [format and lint](#format-and-lint) the code. +- Ensure your commits are atomic. Each commit tells a story about what changes +are being made. This makes it easier to identify when a bug is introduced into +the codebase, and as such makes it easier to fix. +- All commits must either be rebased in atomic order or squashed (if the squashed +commit is considered atomic). Merge commits are not accepted. All conflicts must be resolved prior to pushing changes. +- **All pull requests should be made from the `main` branch.** + +# Commit Messages + +Commit messages should be as descriptive and should follow the general format: + +``` +A one-sentence summary of what the commit is. + +Further details of the commit messages go in here. Try to be as descriptive of +possible as to what the changes are. Good things to include: + +- What the changes is. +- Why the change was made. +- What to expect now that the change is in place. +- Any advice that can be helpful if someone needs to review this commit and +understand. +``` + +If you wish to tag a GitHub issue or another project management tracker, please +do so at the bottom of the commit message, and make it clearly labeled like so: + +``` +Issue: CrunchyData/pg_parquet#12 +``` + +# Submitting Pull Requests + +All work should be made in your own repository fork. When you believe your work +is ready to be committed. + +## Upcoming Features + +Ongoing work for new features should occur in branches off of the `main` +branch. + +# Start Local Environment + +There are 2 ways to start your local environment to start contributing pg_parquet. +- [Installation From Source](#installation-from-source) +- [Devcontainer](#devcontainer) + +## Installation From Source + +See [README.md](README.md#installation-from-source). + +## Devcontainer + +If you want to work on a totally ready-to-work container environment, you can try our +[devcontainer](.devcontainer/devcontainer.json). If you have chance to work on +[vscode editor](https://code.visualstudio.com), you can start pg_parquet project +inside the devcontainer. Please see [how you start the devcontainer](https://code.visualstudio.com/docs/devcontainers/containers). + +# Postgres Support Matrix + +You can see the current supported Postgres versions from [README.md](README.md#postgres-support-matrix). +Supported Postgres versions exist as Cargo feature flags at [Cargo.toml](Cargo.toml). +By default, pg_parquet is built with the latest supported Postgres version flag enabled. +If you want to build pg_parquet against another Postgres version, you can do it +by specifying the feature flag explicitly like `cargo pgrx build --features "pg16"`. +The same applies for running a session via `cargo pgrx run pg16` or +testing via `cargo pgrx test pg16`. + +# Testing + +We run `RUST_TEST_THREADS=1 cargo pgrx test` to run all our unit tests. If you +run a specific test, you can do it via regex patterns like below: +`cargo pgrx run pg17 test_numeric_*`. + +> [!WARNING] +> Make sure to pass RUST_TEST_THREADS=1 as environment variable to `cargo pgrx test`. + +Object storage tests are integration tests which require a storage emulator running +locally. See [ci.yml](.github/workflows/ci.yml) to see how local storage emulators +are started. You can also see the required environment variables from +[.env file](.devcontainer/.env). + +# Format and Lint + +We use `cargo-fmt` as formatter and `cargo-clippy` as linter. You can check +how we run them from [ci.yml](.github/workflows/ci.yml). diff --git a/Cargo.lock b/Cargo.lock index f5f042e..a7b7c33 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -982,12 +982,6 @@ dependencies = [ "subtle", ] -[[package]] -name = "dotenvy" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" - [[package]] name = "either" version = "1.13.0" @@ -2088,7 +2082,6 @@ dependencies = [ "arrow-schema", "aws-config", "aws-credential-types", - "dotenvy", "futures", "object_store", "once_cell", diff --git a/Cargo.toml b/Cargo.toml index 09b056f..a49a80d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,6 @@ arrow = {version = "53", default-features = false} arrow-schema = {version = "53", default-features = false} aws-config = { version = "1.5", default-features = false, features = ["rustls"]} aws-credential-types = {version = "1.2", default-features = false} -dotenvy = "0.15" futures = "0.3" object_store = {version = "0.11", default-features = false, features = ["aws"]} once_cell = "1" diff --git a/src/arrow_parquet/uri_utils.rs b/src/arrow_parquet/uri_utils.rs index e1dc475..3ff97af 100644 --- a/src/arrow_parquet/uri_utils.rs +++ b/src/arrow_parquet/uri_utils.rs @@ -90,9 +90,6 @@ fn object_store_with_location(uri: &Url, copy_from: bool) -> (Arc AmazonS3 { - // try loading environment vars from the .env file - dotenvy::from_path("/tmp/.env").ok(); - let mut aws_s3_builder = AmazonS3Builder::new().with_bucket_name(bucket_name); let is_test_running = std::env::var("PG_PARQUET_TEST").is_ok(); diff --git a/src/lib.rs b/src/lib.rs index 183682f..3bd6066 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1494,8 +1494,6 @@ mod tests { #[pg_test] fn test_s3_object_store_from_env() { - dotenvy::from_path("/tmp/.env").unwrap(); - let test_bucket_name: String = std::env::var("AWS_S3_TEST_BUCKET").expect("AWS_S3_TEST_BUCKET not found"); @@ -1509,8 +1507,6 @@ mod tests { #[pg_test] fn test_s3_object_store_from_config_file() { - dotenvy::from_path("/tmp/.env").unwrap(); - let test_bucket_name: String = std::env::var("AWS_S3_TEST_BUCKET").expect("AWS_S3_TEST_BUCKET not found"); @@ -1566,8 +1562,6 @@ mod tests { // set the current user to the regular user Spi::run("SET SESSION AUTHORIZATION regular_user;").unwrap(); - dotenvy::from_path("/tmp/.env").unwrap(); - let test_bucket_name: String = std::env::var("AWS_S3_TEST_BUCKET").expect("AWS_S3_TEST_BUCKET not found"); @@ -1608,8 +1602,6 @@ mod tests { // set the current user to the regular user Spi::run("SET SESSION AUTHORIZATION regular_user;").unwrap(); - dotenvy::from_path("/tmp/.env").unwrap(); - let test_bucket_name: String = std::env::var("AWS_S3_TEST_BUCKET").expect("AWS_S3_TEST_BUCKET not found");