diff --git a/Cargo.lock b/Cargo.lock index 4123a825..1f5dd815 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -97,7 +97,7 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "arrow" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-arith", "arrow-array", @@ -117,7 +117,7 @@ dependencies = [ [[package]] name = "arrow-arith" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-array", "arrow-buffer", @@ -131,7 +131,7 @@ dependencies = [ [[package]] name = "arrow-array" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "ahash", "arrow-buffer", @@ -147,7 +147,7 @@ dependencies = [ [[package]] name = "arrow-buffer" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "bytes", "half", @@ -157,7 +157,7 @@ dependencies = [ [[package]] name = "arrow-cast" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-array", "arrow-buffer", @@ -177,7 +177,7 @@ dependencies = [ [[package]] name = "arrow-csv" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-array", "arrow-buffer", @@ -195,7 +195,7 @@ dependencies = [ [[package]] name = "arrow-data" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-buffer", "arrow-schema", @@ -206,7 +206,7 @@ dependencies = [ [[package]] name = "arrow-ipc" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-array", "arrow-buffer", @@ -220,7 +220,7 @@ dependencies = [ [[package]] name = "arrow-json" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-array", "arrow-buffer", @@ -239,7 +239,7 @@ dependencies = [ [[package]] name = "arrow-ord" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-array", "arrow-buffer", @@ -253,7 +253,7 @@ dependencies = [ [[package]] name = "arrow-row" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "ahash", "arrow-array", @@ -266,7 +266,7 @@ dependencies = [ [[package]] name = "arrow-schema" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "bitflags 2.6.0", "serde", @@ -275,7 +275,7 @@ dependencies = [ [[package]] name = "arrow-select" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "ahash", "arrow-array", @@ -288,7 +288,7 @@ dependencies = [ [[package]] name = "arrow-string" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "arrow-array", "arrow-buffer", @@ -738,7 +738,7 @@ dependencies = [ [[package]] name = "datafusion" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "ahash", "arrow", @@ -794,7 +794,7 @@ dependencies = [ [[package]] name = "datafusion-catalog" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "arrow-schema", "async-trait", @@ -808,7 +808,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "ahash", "arrow", @@ -831,7 +831,7 @@ dependencies = [ [[package]] name = "datafusion-common-runtime" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "log", "tokio", @@ -840,7 +840,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "arrow", "chrono", @@ -860,7 +860,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "ahash", "arrow", @@ -881,7 +881,7 @@ dependencies = [ [[package]] name = "datafusion-expr-common" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "arrow", "datafusion-common", @@ -996,7 +996,7 @@ dependencies = [ [[package]] name = "datafusion-functions" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "arrow", "arrow-buffer", @@ -1022,7 +1022,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "ahash", "arrow", @@ -1042,7 +1042,7 @@ dependencies = [ [[package]] name = "datafusion-functions-aggregate-common" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "ahash", "arrow", @@ -1055,7 +1055,7 @@ dependencies = [ [[package]] name = "datafusion-functions-nested" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "arrow", "arrow-array", @@ -1077,7 +1077,7 @@ dependencies = [ [[package]] name = "datafusion-functions-window" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "datafusion-common", "datafusion-expr", @@ -1088,7 +1088,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "arrow", "async-trait", @@ -1107,7 +1107,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "ahash", "arrow", @@ -1138,7 +1138,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr-common" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "ahash", "arrow", @@ -1151,7 +1151,7 @@ dependencies = [ [[package]] name = "datafusion-physical-optimizer" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "arrow-schema", "datafusion-common", @@ -1164,7 +1164,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "ahash", "arrow", @@ -1198,7 +1198,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "42.0.0" -source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=8ffbb23b1#8ffbb23b110e9aa1418b305b05155e081f5b5a3e" +source = "git+https://github.com/blaze-init/arrow-datafusion.git?rev=b84eccc4d#b84eccc4dfd48605fd514728a4dce985300c8e82" dependencies = [ "arrow", "arrow-array", @@ -2075,7 +2075,7 @@ checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "orc-rust" version = "0.4.1" -source = "git+https://github.com/blaze-init/datafusion-orc.git?rev=b6e4de6#b6e4de62d17086efc4d3c03ff15342994b7b74e6" +source = "git+https://github.com/blaze-init/datafusion-orc.git?rev=7261462#726146260aaddd86c83c764907214b2018ce9158" dependencies = [ "arrow", "async-trait", @@ -2138,7 +2138,7 @@ dependencies = [ [[package]] name = "parquet" version = "53.0.0" -source = "git+https://github.com/blaze-init/arrow-rs.git?rev=2ff32f9f72#2ff32f9f72b56159fb411c2b384e753895b3a927" +source = "git+https://github.com/blaze-init/arrow-rs.git?rev=3e276d0474#3e276d047445838be103f340cd94730da1351c86" dependencies = [ "ahash", "arrow-array", diff --git a/Cargo.toml b/Cargo.toml index cbb9e4e5..8dec9565 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,27 +66,27 @@ serde_json = { version = "1.0.96" } [patch.crates-io] # datafusion: branch=v42-blaze -datafusion = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "8ffbb23b1"} -datafusion-common = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "8ffbb23b1"} -datafusion-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "8ffbb23b1"} -datafusion-execution = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "8ffbb23b1"} -datafusion-optimizer = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "8ffbb23b1"} -datafusion-physical-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "8ffbb23b1"} -orc-rust = { git = "https://github.com/blaze-init/datafusion-orc.git", rev = "b6e4de6"} +datafusion = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "6823ee9d3"} +datafusion-common = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "6823ee9d3"} +datafusion-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "6823ee9d3"} +datafusion-execution = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "6823ee9d3"} +datafusion-optimizer = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "6823ee9d3"} +datafusion-physical-expr = { git = "https://github.com/blaze-init/arrow-datafusion.git", rev = "6823ee9d3"} +orc-rust = { git = "https://github.com/blaze-init/datafusion-orc.git", rev = "1c416bc"} # arrow: branch=v53-blaze -arrow = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-arith = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-array = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-buffer = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-cast = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-data = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-ord = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-row = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-schema = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-select = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -arrow-string = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} -parquet = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "2ff32f9f72"} +arrow = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-arith = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-array = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-buffer = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-cast = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-data = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-ord = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-row = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-schema = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-select = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +arrow-string = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} +parquet = { git = "https://github.com/blaze-init/arrow-rs.git", rev = "e3b6a2f22a"} # serde_json: branch=v1.0.96-blaze serde_json = { git = "https://github.com/blaze-init/json", branch = "v1.0.96-blaze" } diff --git a/spark-extension/src/main/scala/org/apache/spark/sql/execution/blaze/plan/NativeFileSourceScanBase.scala b/spark-extension/src/main/scala/org/apache/spark/sql/execution/blaze/plan/NativeFileSourceScanBase.scala index 56ab5f35..03038130 100644 --- a/spark-extension/src/main/scala/org/apache/spark/sql/execution/blaze/plan/NativeFileSourceScanBase.scala +++ b/spark-extension/src/main/scala/org/apache/spark/sql/execution/blaze/plan/NativeFileSourceScanBase.scala @@ -94,7 +94,7 @@ abstract class NativeFileSourceScanBase(basedFileScan: FileSourceScanExec) field.copy(nullable = true) case field => // avoid converting unsupported type in non-used fields - StructField(field.name, NullType, nullable = true) + StructField(field.name, field.dataType, nullable = true) })) protected def nativePartitionSchema: pb.Schema =