Skip to content

Commit

Permalink
Let Turbine handle Unicode paths
Browse files Browse the repository at this point in the history
This requires patching rules_graalvm with sgammon/rules_graalvm#514 to ensure the native image runs with `sun.jnu.encoding` set to `UTF-8` on Linux.

Also enable unrelated tests after the recent java_tools update.

Work towards #24444
Unblocks #24457

Closes #24565.

PiperOrigin-RevId: 703409662
Change-Id: I44de4387de4a6da404436f5a35a2b27274122bbb
  • Loading branch information
fmeum authored and copybara-github committed Dec 6, 2024
1 parent aa3a236 commit a37d288
Show file tree
Hide file tree
Showing 5 changed files with 105 additions and 12 deletions.
1 change: 1 addition & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ genrule(
"//third_party:BUILD",
"//third_party:rules_jvm_external_6.5.patch",
"//third_party:rules_graalvm_fix.patch",
"//third_party:rules_graalvm_unicode.patch",
],
outs = ["MODULE.bazel.lock.dist"],
cmd = " && ".join([
Expand Down
7 changes: 5 additions & 2 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ single_version_override(
single_version_override(
module_name = "rules_graalvm",
patch_strip = 1,
patches = ["//third_party:rules_graalvm_fix.patch"],
patches = [
"//third_party:rules_graalvm_fix.patch",
"//third_party:rules_graalvm_unicode.patch",
],
)

local_path_override(
Expand Down Expand Up @@ -264,9 +267,9 @@ use_repo(

python = use_extension("@rules_python//python/extensions:python.bzl", "python")
python.toolchain(
ignore_root_user_error = True,
is_default = True,
python_version = "3.11",
ignore_root_user_error = True,
)

pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,12 @@ native_image(
# Statically link zlib but not glibc.
"-H:+StaticExecutableWithDynamicLibC",
],
"@platforms//os:windows": [
# The charset specified by sun.jnu.encoding is not automatically included in the image,
# but may be one of the legacy code pages on Windows, which aren't added by default.
# https://github.com/oracle/graal/pull/10232
"-H:+AddAllCharsets",
],
"//conditions:default": [],
}) + select({
"@platforms//cpu:x86_64": [
Expand Down
52 changes: 42 additions & 10 deletions src/test/shell/bazel/bazel_java_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1934,6 +1934,48 @@ EOF
bazel build //pkg:a >& $TEST_log || fail "build failed"
}

function test_header_compiler_direct_supports_unicode() {
if [[ "${JAVA_TOOLS_ZIP}" == released ]]; then
# TODO: Enable test after the next java_tools release.
return 0
fi

if "$is_windows"; then
# GraalVM native images on Windows use the same active code page they have been built
# with, which in the case of Bazel CI is 1252 (not UTF-8). Even with -H:+AddAllCharsets
# InvalidPathExceptions are still thrown when accessing a Unicode file path, indicating a
# problem within GraalVM's path encoding handling.
# https://github.com/oracle/graal/issues/10237
# TODO: Fix this by building java_tools binaries on a machine with system code page set to
# UTF-8.
echo "Skipping test on Windows"
return 0
elif [[ "$(uname -s)" == "Linux" ]]; then
export LC_ALL=C.UTF-8
if [[ $(locale charmap) != "UTF-8" ]]; then
echo "Skipping test due to missing UTF-8 locale"
return 0
fi
local -r unicode="äöüÄÖÜß🌱"
else
# JVMs on macOS always support UTF-8 since JEP 400.
local -r unicode="äöüÄÖÜß🌱"
fi
mkdir -p pkg
cat << EOF > pkg/BUILD
java_library(name = "a", srcs = ["A.java"], deps = [":b"])
java_library(name = "b", srcs = ["${unicode}.java"])
EOF
cat << 'EOF' > pkg/A.java
public class A extends B {}
EOF
cat << 'EOF' > "pkg/${unicode}.java"
class B {}
EOF

bazel build //pkg:a //pkg:b >& $TEST_log || fail "build failed"
}

function test_sandboxed_multiplexing() {
mkdir -p pkg
cat << 'EOF' > pkg/BUILD
Expand Down Expand Up @@ -1962,11 +2004,6 @@ EOF
}

function test_sandboxed_multiplexing_hermetic_paths_in_diagnostics() {
if [[ "${JAVA_TOOLS_ZIP}" == released ]]; then
# TODO: Enable test after the next java_tools release.
return 0
fi

mkdir -p pkg
cat << 'EOF' > pkg/BUILD
load("@bazel_tools//tools/jdk:default_java_toolchain.bzl", "default_java_toolchain")
Expand Down Expand Up @@ -2178,11 +2215,6 @@ EOF
}

function test_one_version_allowlist() {
if [[ "${JAVA_TOOLS_ZIP}" == released ]]; then
# TODO: Enable test after the next java_tools release.
return 0
fi

mkdir -p pkg
cat << 'EOF' > pkg/BUILD
load("@bazel_tools//tools/jdk:default_java_toolchain.bzl", "default_java_toolchain")
Expand Down
51 changes: 51 additions & 0 deletions third_party/rules_graalvm_unicode.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
From 5c67c84a014a1b0c165b8f1ff47243973fb58b6e Mon Sep 17 00:00:00 2001
From: Fabian Meumertzheim <[email protected]>
Date: Wed, 4 Dec 2024 21:05:49 +0100
Subject: [PATCH] fix: Force `sun.jnu.encoding` to be UTF-8 in images (#514)

Images pick up `sun.jnu.encoding` from the host build environment, which thus needs to be UTF-8 to be able to access paths with non-ASCII characters in them on Linux. Since Bazel does not inherit `LC_CTYPE` from the host, the JVM would otherwise default to an ASCII locale.

Signed-off-by: Fabian Meumertzheim <[email protected]>
---
internal/native_image/rules.bzl | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/internal/native_image/rules.bzl b/internal/native_image/rules.bzl
index f21566e..1d07278 100644
--- a/internal/native_image/rules.bzl
+++ b/internal/native_image/rules.bzl
@@ -64,6 +64,9 @@ def _graal_binary_implementation(ctx):
or install a GraalVM `native-image` toolchain.
""")

+ is_linux = ctx.target_platform_has_constraint(
+ ctx.attr._linux_constraint[platform_common.ConstraintValueInfo],
+ )
is_macos = ctx.target_platform_has_constraint(
ctx.attr._macos_constraint[platform_common.ConstraintValueInfo],
)
@@ -100,6 +103,15 @@ def _graal_binary_implementation(ctx):
bin_postfix = bin_postfix,
)

+ env = native_toolchain.env
+ # The native image will use the same native encoding (as determined by "sun.jnu.encoding")
+ # as the build environment, so we need to force a UTF-8 locale. On other platforms, the
+ # encoding is always UTF-8 (on macOS since JEP 400) or determined by the active code page
+ # on Windows.
+ # TODO: Match on the exec platform instead once Graal supports cross-compilation.
+ if is_linux:
+ env["LC_CTYPE"] = "C.UTF-8"
+
# assemble final inputs
inputs = depset(
direct_inputs,
@@ -110,7 +122,7 @@ def _graal_binary_implementation(ctx):
"executable": graal,
"inputs": inputs,
"mnemonic": "NativeImage",
- "env": native_toolchain.env,
+ "env": env,
"execution_requirements": {k: "" for k in native_toolchain.execution_requirements},
"progress_message": "Native Image __target__ (__mode__) %{label}"
.replace("__mode__", _build_action_message(ctx))

0 comments on commit a37d288

Please sign in to comment.