diff --git a/BUILD b/BUILD index 626ece371608e2..68a57162c54168 100644 --- a/BUILD +++ b/BUILD @@ -85,6 +85,7 @@ genrule( "//third_party:BUILD", "//third_party:rules_jvm_external_6.5.patch", "//third_party:rules_graalvm_fix.patch", + "//third_party:rules_graalvm_unicode.patch", ], outs = ["MODULE.bazel.lock.dist"], cmd = " && ".join([ diff --git a/MODULE.bazel b/MODULE.bazel index 5bd1735c6ac369..019da12da7c804 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -64,7 +64,10 @@ single_version_override( single_version_override( module_name = "rules_graalvm", patch_strip = 1, - patches = ["//third_party:rules_graalvm_fix.patch"], + patches = [ + "//third_party:rules_graalvm_fix.patch", + "//third_party:rules_graalvm_unicode.patch", + ], ) local_path_override( @@ -264,9 +267,9 @@ use_repo( python = use_extension("@rules_python//python/extensions:python.bzl", "python") python.toolchain( + ignore_root_user_error = True, is_default = True, python_version = "3.11", - ignore_root_user_error = True, ) pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip") diff --git a/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD index 4abefcb4b2b8d9..34a90028f8e48d 100644 --- a/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD +++ b/src/java_tools/buildjar/java/com/google/devtools/build/java/turbine/BUILD @@ -53,6 +53,12 @@ native_image( # Statically link zlib but not glibc. "-H:+StaticExecutableWithDynamicLibC", ], + "@platforms//os:windows": [ + # The charset specified by sun.jnu.encoding is not automatically included in the image, + # but may be one of the legacy code pages on Windows, which aren't added by default. + # https://github.com/oracle/graal/pull/10232 + "-H:+AddAllCharsets", + ], "//conditions:default": [], }) + select({ "@platforms//cpu:x86_64": [ diff --git a/src/test/shell/bazel/bazel_java_test.sh b/src/test/shell/bazel/bazel_java_test.sh index a4ca51749bb894..09b82deedc93c1 100755 --- a/src/test/shell/bazel/bazel_java_test.sh +++ b/src/test/shell/bazel/bazel_java_test.sh @@ -1934,6 +1934,48 @@ EOF bazel build //pkg:a >& $TEST_log || fail "build failed" } +function test_header_compiler_direct_supports_unicode() { + if [[ "${JAVA_TOOLS_ZIP}" == released ]]; then + # TODO: Enable test after the next java_tools release. + return 0 + fi + + if "$is_windows"; then + # GraalVM native images on Windows use the same active code page they have been built + # with, which in the case of Bazel CI is 1252 (not UTF-8). Even with -H:+AddAllCharsets + # InvalidPathExceptions are still thrown when accessing a Unicode file path, indicating a + # problem within GraalVM's path encoding handling. + # https://github.com/oracle/graal/issues/10237 + # TODO: Fix this by building java_tools binaries on a machine with system code page set to + # UTF-8. + echo "Skipping test on Windows" + return 0 + elif [[ "$(uname -s)" == "Linux" ]]; then + export LC_ALL=C.UTF-8 + if [[ $(locale charmap) != "UTF-8" ]]; then + echo "Skipping test due to missing UTF-8 locale" + return 0 + fi + local -r unicode="äöüÄÖÜß🌱" + else + # JVMs on macOS always support UTF-8 since JEP 400. + local -r unicode="äöüÄÖÜß🌱" + fi + mkdir -p pkg + cat << EOF > pkg/BUILD +java_library(name = "a", srcs = ["A.java"], deps = [":b"]) +java_library(name = "b", srcs = ["${unicode}.java"]) +EOF + cat << 'EOF' > pkg/A.java +public class A extends B {} +EOF + cat << 'EOF' > "pkg/${unicode}.java" +class B {} +EOF + + bazel build //pkg:a //pkg:b >& $TEST_log || fail "build failed" +} + function test_sandboxed_multiplexing() { mkdir -p pkg cat << 'EOF' > pkg/BUILD @@ -1962,11 +2004,6 @@ EOF } function test_sandboxed_multiplexing_hermetic_paths_in_diagnostics() { - if [[ "${JAVA_TOOLS_ZIP}" == released ]]; then - # TODO: Enable test after the next java_tools release. - return 0 - fi - mkdir -p pkg cat << 'EOF' > pkg/BUILD load("@bazel_tools//tools/jdk:default_java_toolchain.bzl", "default_java_toolchain") @@ -2178,11 +2215,6 @@ EOF } function test_one_version_allowlist() { - if [[ "${JAVA_TOOLS_ZIP}" == released ]]; then - # TODO: Enable test after the next java_tools release. - return 0 - fi - mkdir -p pkg cat << 'EOF' > pkg/BUILD load("@bazel_tools//tools/jdk:default_java_toolchain.bzl", "default_java_toolchain") diff --git a/third_party/rules_graalvm_unicode.patch b/third_party/rules_graalvm_unicode.patch new file mode 100644 index 00000000000000..d4b0dc3f790fe4 --- /dev/null +++ b/third_party/rules_graalvm_unicode.patch @@ -0,0 +1,51 @@ +From 5c67c84a014a1b0c165b8f1ff47243973fb58b6e Mon Sep 17 00:00:00 2001 +From: Fabian Meumertzheim +Date: Wed, 4 Dec 2024 21:05:49 +0100 +Subject: [PATCH] fix: Force `sun.jnu.encoding` to be UTF-8 in images (#514) + +Images pick up `sun.jnu.encoding` from the host build environment, which thus needs to be UTF-8 to be able to access paths with non-ASCII characters in them on Linux. Since Bazel does not inherit `LC_CTYPE` from the host, the JVM would otherwise default to an ASCII locale. + +Signed-off-by: Fabian Meumertzheim +--- + internal/native_image/rules.bzl | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +diff --git a/internal/native_image/rules.bzl b/internal/native_image/rules.bzl +index f21566e..1d07278 100644 +--- a/internal/native_image/rules.bzl ++++ b/internal/native_image/rules.bzl +@@ -64,6 +64,9 @@ def _graal_binary_implementation(ctx): + or install a GraalVM `native-image` toolchain. + """) + ++ is_linux = ctx.target_platform_has_constraint( ++ ctx.attr._linux_constraint[platform_common.ConstraintValueInfo], ++ ) + is_macos = ctx.target_platform_has_constraint( + ctx.attr._macos_constraint[platform_common.ConstraintValueInfo], + ) +@@ -100,6 +103,15 @@ def _graal_binary_implementation(ctx): + bin_postfix = bin_postfix, + ) + ++ env = native_toolchain.env ++ # The native image will use the same native encoding (as determined by "sun.jnu.encoding") ++ # as the build environment, so we need to force a UTF-8 locale. On other platforms, the ++ # encoding is always UTF-8 (on macOS since JEP 400) or determined by the active code page ++ # on Windows. ++ # TODO: Match on the exec platform instead once Graal supports cross-compilation. ++ if is_linux: ++ env["LC_CTYPE"] = "C.UTF-8" ++ + # assemble final inputs + inputs = depset( + direct_inputs, +@@ -110,7 +122,7 @@ def _graal_binary_implementation(ctx): + "executable": graal, + "inputs": inputs, + "mnemonic": "NativeImage", +- "env": native_toolchain.env, ++ "env": env, + "execution_requirements": {k: "" for k in native_toolchain.execution_requirements}, + "progress_message": "Native Image __target__ (__mode__) %{label}" + .replace("__mode__", _build_action_message(ctx))