From 630c0030fec92fd7f9c5bed1bba0b2cb4c1ca7bd Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Tue, 3 Dec 2024 18:44:08 +0000 Subject: [PATCH 1/2] Revert "update to the latest delta-rs which has a few performance improvements" This reverts commit 525c2cbe3226a1e69b0a5881ba61784682e92060. There have been checkpoint related issues in 0.22 :thinking: --- Cargo.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ca0c5cd..7cfe1eb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ resolver = "2" [workspace.package] -version = "1.2.0" +version = "1.1.1" edition = "2021" keywords = ["deltalake", "parquet", "lambda", "delta", "sqs"] homepage = "https://github.com/buoyant-data/oxbow" @@ -20,7 +20,7 @@ anyhow = "=1" chrono = "0.4.31" aws_lambda_events = { version = "0.15.1", default-features = false, features = ["sns", "sqs", "s3"] } # The datafusion feature is required to support invariants which may be in error, but is required as of currently released 0.18.2 -deltalake = { version = "0.22.0", features = ["s3", "json", "datafusion"]} +deltalake = { version = "0.21.0", features = ["s3", "json", "datafusion"]} #deltalake = { git = "https://github.com/delta-io/delta-rs", branch = "main", features = ["s3", "json", "datafusion"]} #deltalake = { path = "../../delta-io/delta-rs/crates/deltalake", features = ["s3", "json", "datafusion"]} tokio = { version = "=1", features = ["macros"] } From 133cf945be07b1cb527bffd95c3f1302fab22e4f Mon Sep 17 00:00:00 2001 From: "R. Tyler Croy" Date: Fri, 20 Dec 2024 14:55:34 +0000 Subject: [PATCH 2/2] Upgrade to the pre-release of deltalake 0.23 which includs concurrency fixes Unfortunately this also includes a newer delta_kernel which includes reqwest with default features (native-tls) which breaks the zig-based cross-compilation that cargo-lambda attempts by default. This `Cross.toml` is a hopefully temporary fix. --- Cargo.toml | 6 +++--- Cross.toml | 11 +++++++++++ ci/build-release.sh | 2 +- lambdas/file-loader/src/main.rs | 7 ------- 4 files changed, 15 insertions(+), 11 deletions(-) create mode 100644 Cross.toml diff --git a/Cargo.toml b/Cargo.toml index 7cfe1eb..37568d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,7 +7,7 @@ members = [ resolver = "2" [workspace.package] -version = "1.1.1" +version = "1.2.0" edition = "2021" keywords = ["deltalake", "parquet", "lambda", "delta", "sqs"] homepage = "https://github.com/buoyant-data/oxbow" @@ -20,8 +20,8 @@ anyhow = "=1" chrono = "0.4.31" aws_lambda_events = { version = "0.15.1", default-features = false, features = ["sns", "sqs", "s3"] } # The datafusion feature is required to support invariants which may be in error, but is required as of currently released 0.18.2 -deltalake = { version = "0.21.0", features = ["s3", "json", "datafusion"]} -#deltalake = { git = "https://github.com/delta-io/delta-rs", branch = "main", features = ["s3", "json", "datafusion"]} +#deltalake = { version = "0.21.0", features = ["s3", "json", "datafusion"]} +deltalake = { git = "https://github.com/delta-io/delta-rs", branch = "main", features = ["s3", "json", "datafusion"]} #deltalake = { path = "../../delta-io/delta-rs/crates/deltalake", features = ["s3", "json", "datafusion"]} tokio = { version = "=1", features = ["macros"] } regex = "=1" diff --git a/Cross.toml b/Cross.toml new file mode 100644 index 0000000..37c541a --- /dev/null +++ b/Cross.toml @@ -0,0 +1,11 @@ + +# This is currently needed to ensure openssl libraries are bundled into the +# cross compilation image +# +# This is due to the reqwest library being pulled in recent releases with +# default features enabled, including native-tls :( +[target.x86_64-unknown-linux-gnu] +pre-build = [ + "dpkg --add-architecture $CROSS_DEB_ARCH", + "apt-get update && apt-get install --assume-yes libssl-dev:$CROSS_DEB_ARCH" +] diff --git a/ci/build-release.sh b/ci/build-release.sh index df38d55..07ddd3e 100755 --- a/ci/build-release.sh +++ b/ci/build-release.sh @@ -6,4 +6,4 @@ fi; . venv/bin/activate -exec cargo lambda build --release --output-format zip +exec cargo lambda build --compiler cross --release --output-format zip diff --git a/lambdas/file-loader/src/main.rs b/lambdas/file-loader/src/main.rs index 2d67d3e..58ca572 100644 --- a/lambdas/file-loader/src/main.rs +++ b/lambdas/file-loader/src/main.rs @@ -94,13 +94,6 @@ async fn main() -> Result<(), Error> { let _ = env::var("DELTA_TABLE_URI").expect("The `DELTA_TABLE_URI` must be set in the environment"); - match env::var("DYNAMO_LOCK_TABLE_NAME") { - Ok(_) => {} - Err(_) => { - warn!("file-loader SHOULD have `DYNAMO_LOCK_TABLE_NAME` set to a valid name, and should have AWS_S3_LOCKING_PROVIDER=dynamodb set so that concurrent writes can be performed safely."); - } - } - info!("Starting file-loader"); run(service_fn(function_handler)).await