From 77a74f37ed11c3628b7506d8b2810759a50f012b Mon Sep 17 00:00:00 2001 From: Googler Date: Tue, 26 Nov 2024 07:46:34 -0800 Subject: [PATCH] Automated rollback of commit 3fdec931bfc401a379e2446731cf19daffe24a10. *** Reason for rollback *** Internal test failures, see attached bug. *** Original change description *** Fix more Unicode bugs PiperOrigin-RevId: 700339865 Change-Id: I5d63020afff2b6b0862ab438dfa3130c09f38a8f --- .../google/devtools/build/lib/analysis/BUILD | 2 - .../analysis/ConfiguredRuleClassProvider.java | 6 +- .../analysis/RepoMappingManifestAction.java | 3 +- .../lib/analysis/SourceManifestAction.java | 3 +- .../LazyWriteNestedSetOfTupleAction.java | 5 +- .../actions/LazyWritePathsFileAction.java | 4 +- .../LocalTemplateExpansionStrategy.java | 5 +- .../lib/analysis/actions/StarlarkAction.java | 7 +- .../build/lib/analysis/actions/Template.java | 9 +- .../starlark/StarlarkActionFactory.java | 17 ++- .../devtools/build/lib/authandtls/BUILD | 1 - .../BasicHttpAuthenticationEncoder.java | 27 ++-- .../lib/authandtls/NetrcCredentials.java | 4 +- .../build/lib/authandtls/NetrcParser.java | 7 +- .../devtools/build/lib/bazel/bzlmod/BUILD | 1 - .../lib/bazel/bzlmod/VendorFileFunction.java | 11 +- .../repository/RepositoryResolvedModule.java | 5 +- .../build/lib/bazel/repository/starlark/BUILD | 1 - .../starlark/StarlarkBaseExternalContext.java | 15 +- .../starlark/StarlarkExecutionResult.java | 9 +- .../starlark/StarlarkRepositoryContext.java | 9 +- .../build/lib/profiler/JsonProfile.java | 6 +- .../lib/profiler/JsonTraceFileWriter.java | 8 +- .../google/devtools/build/lib/rules/cpp/BUILD | 1 - .../lib/rules/cpp/ShowIncludesFilter.java | 132 +++++++++++++----- .../com/google/devtools/build/lib/util/BUILD | 1 - .../build/lib/util/DependencySet.java | 18 +-- .../google/devtools/build/lib/analysis/BUILD | 1 - .../actions/TemplateExpansionActionTest.java | 6 +- .../devtools/build/lib/authandtls/BUILD | 1 - .../BasicHttpAuthenticationEncoderTest.java | 17 +-- .../lib/authandtls/GoogleAuthUtilsTest.java | 3 +- .../lib/authandtls/NetrcCredentialsTest.java | 3 +- .../lib/bazel/bzlmod/IndexRegistryTest.java | 3 +- .../downloader/UrlRewriterTest.java | 3 +- .../lib/rules/cpp/ShowIncludesFilterTest.java | 6 + ...arlarkRuleImplementationFunctionsTest.java | 25 ++-- src/test/shell/bazel/BUILD | 2 +- src/test/shell/bazel/bazel_workspaces_test.sh | 16 ++- .../shell/bazel/starlark_repository_test.sh | 90 +----------- .../shell/bazel/unicode_filenames_test.sh | 105 +++++++------- src/test/shell/integration/run_test.sh | 1 + .../starlark_dependency_pruning_test.sh | 45 ------ 43 files changed, 294 insertions(+), 350 deletions(-) diff --git a/src/main/java/com/google/devtools/build/lib/analysis/BUILD b/src/main/java/com/google/devtools/build/lib/analysis/BUILD index e8832fd32de22a..4bc092f4f4ee6b 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/BUILD +++ b/src/main/java/com/google/devtools/build/lib/analysis/BUILD @@ -1482,7 +1482,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/actions:artifacts", "//src/main/java/com/google/devtools/build/lib/actions:commandline_item", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", - "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/net/starlark/java/eval", "//third_party:jsr305", @@ -1499,7 +1498,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/actions:artifact_expander", "//src/main/java/com/google/devtools/build/lib/actions:artifacts", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", - "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//third_party:guava", "//third_party:jsr305", diff --git a/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java b/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java index 564309c487f868..df527b31de764f 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/ConfiguredRuleClassProvider.java @@ -53,7 +53,6 @@ import com.google.devtools.build.lib.packages.RuleTransitionData; import com.google.devtools.build.lib.packages.WorkspaceFactory; import com.google.devtools.build.lib.starlarkbuildapi.core.Bootstrap; -import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.vfs.DigestHashFunction; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.PathFragment; @@ -132,10 +131,7 @@ protected synchronized byte[] getFastDigest(PathFragment path) { @Override protected synchronized byte[] getDigest(PathFragment path) { - return getDigestFunction() - .getHashFunction() - .hashBytes(StringUnsafe.getInstance().getInternalStringBytes(path.getPathString())) - .asBytes(); + return getDigestFunction().getHashFunction().hashString(path.toString(), UTF_8).asBytes(); } } diff --git a/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java b/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java index 6d903571edc1ef..f4b724886be784 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/RepoMappingManifestAction.java @@ -16,6 +16,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap; import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Comparator.comparing; import com.github.benmanes.caffeine.cache.Caffeine; @@ -145,7 +146,7 @@ protected void computeKey( public String getFileContents(@Nullable EventHandler eventHandler) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); newDeterministicWriter().writeOutputFile(stream); - return stream.toString(ISO_8859_1); + return stream.toString(UTF_8); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java b/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java index e95d3921dec0b7..b1cf17f6dba355 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/SourceManifestAction.java @@ -15,6 +15,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; @@ -217,7 +218,7 @@ public void writeOutputFile(OutputStream out, @Nullable EventHandler eventHandle public String getFileContents(@Nullable EventHandler eventHandler) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); writeOutputFile(stream, eventHandler); - return stream.toString(ISO_8859_1); + return stream.toString(UTF_8); } @Override diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java index 6bd98ea17edf37..8958b5e2aee2ab 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWriteNestedSetOfTupleAction.java @@ -14,6 +14,7 @@ package com.google.devtools.build.lib.analysis.actions; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.devtools.build.lib.actions.ActionExecutionContext; import com.google.devtools.build.lib.actions.ActionKeyContext; @@ -24,7 +25,6 @@ import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; import com.google.devtools.build.lib.collect.nestedset.Order; -import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.Fingerprint; import javax.annotation.Nullable; import net.starlark.java.eval.Tuple; @@ -49,8 +49,7 @@ public LazyWriteNestedSetOfTupleAction( @Override public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) { - return out -> - out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents(delimiter))); + return out -> out.write(getContents(delimiter).getBytes(UTF_8)); } /** Computes the Action key for this action by computing the fingerprint for the file contents. */ diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java index 2e24dc3d0d6ee1..e27d03b48eac12 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LazyWritePathsFileAction.java @@ -15,6 +15,7 @@ package com.google.devtools.build.lib.analysis.actions; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableSet; import com.google.devtools.build.lib.actions.ActionExecutionContext; @@ -25,7 +26,6 @@ import com.google.devtools.build.lib.collect.nestedset.NestedSet; import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder; import com.google.devtools.build.lib.collect.nestedset.Order; -import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.Fingerprint; import java.util.function.Function; import javax.annotation.Nullable; @@ -71,7 +71,7 @@ public LazyWritePathsFileAction( @Override public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) { - return out -> out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents())); + return out -> out.write(getContents().getBytes(UTF_8)); } /** Computes the Action key for this action by computing the fingerprint for the file contents. */ diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java index 93396ad2923fb9..2c288480127756 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/LocalTemplateExpansionStrategy.java @@ -14,7 +14,7 @@ package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.actions.AbstractAction; @@ -47,8 +47,7 @@ public ImmutableList expandTemplate( final String expandedTemplate = getExpandedTemplateUnsafe( templateMetadata.template(), templateMetadata.substitutions(), ctx.getPathResolver()); - DeterministicWriter deterministicWriter = - out -> out.write(expandedTemplate.getBytes(ISO_8859_1)); + DeterministicWriter deterministicWriter = out -> out.write(expandedTemplate.getBytes(UTF_8)); return ctx.getContext(FileWriteActionContext.class) .writeOutputToFile( action, diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java index ecf92e4768fb04..c6df48534c8e04 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/StarlarkAction.java @@ -13,7 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableMap; @@ -349,13 +349,10 @@ protected void afterExecute( for (Artifact input : allInputs.toList()) { usedInputsByMappedPath.put(pathMapper.getMappedExecPathString(input), input); } - // Bazel encodes file system paths as raw bytes stored in a Latin-1 encoded string, so we need - // to make sure to also decode the unused input list as Latin-1. try (BufferedReader br = new BufferedReader( new InputStreamReader( - getUnusedInputListInputStream(actionExecutionContext, spawnResults), - ISO_8859_1))) { + getUnusedInputListInputStream(actionExecutionContext, spawnResults), UTF_8))) { String line; while ((line = br.readLine()) != null) { line = line.trim(); diff --git a/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java b/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java index 40a856c473c745..3cc16afd28166a 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/actions/Template.java @@ -14,8 +14,6 @@ package com.google.devtools.build.lib.analysis.actions; -import static java.nio.charset.StandardCharsets.ISO_8859_1; - import com.google.common.annotations.VisibleForTesting; import com.google.devtools.build.lib.actions.Artifact; import com.google.devtools.build.lib.actions.ArtifactPathResolver; @@ -24,12 +22,16 @@ import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import javax.annotation.Nullable; /** A template that contains text content, or alternatively throws an {@link IOException}. */ @Immutable // all subclasses are immutable public abstract class Template { + static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8; + /** We only allow subclasses in this file. */ private Template() {} @@ -103,8 +105,7 @@ private static final class ArtifactTemplate extends Template { public String getContent(ArtifactPathResolver resolver) throws IOException { Path templatePath = resolver.toPath(templateArtifact); try { - // Bazel's internal encoding for strings is raw bytes as Latin-1 - return FileSystemUtils.readContent(templatePath, ISO_8859_1); + return FileSystemUtils.readContent(templatePath, DEFAULT_CHARSET); } catch (IOException e) { throw new IOException( "failed to load template file '" diff --git a/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java b/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java index fde094bf65f097..60d263dba8f5b5 100644 --- a/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java +++ b/src/main/java/com/google/devtools/build/lib/analysis/starlark/StarlarkActionFactory.java @@ -977,8 +977,13 @@ public void expandTemplate( ImmutableMap.Builder substitutionsBuilder = ImmutableMap.builder(); for (Map.Entry substitution : Dict.cast(substitutionsUnchecked, String.class, String.class, "substitutions").entrySet()) { + // Blaze calls ParserInput.fromLatin1 when reading BUILD files, which might + // contain UTF-8 encoded symbols as part of template substitution. + // As a quick fix, the substitution values are corrected before being passed on. + // In the long term, avoiding ParserInput.fromLatin would be a better approach. substitutionsBuilder.put( - substitution.getKey(), Substitution.of(substitution.getKey(), substitution.getValue())); + substitution.getKey(), + Substitution.of(substitution.getKey(), convertLatin1ToUtf8(substitution.getValue()))); } if (!Starlark.UNBOUND.equals(computedSubstitutions)) { for (Substitution substitution : ((TemplateDict) computedSubstitutions).getAll()) { @@ -1002,6 +1007,16 @@ public void expandTemplate( registerAction(action); } + /** + * Returns the proper UTF-8 representation of a String that was erroneously read using Latin1. + * + * @param latin1 Input string + * @return The input string, UTF8 encoded + */ + private static String convertLatin1ToUtf8(String latin1) { + return new String(latin1.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8); + } + @Override public Args args(StarlarkThread thread) { return Args.newArgs(thread.mutability(), getSemantics()); diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/BUILD b/src/main/java/com/google/devtools/build/lib/authandtls/BUILD index 8ecbc9f277190a..bea95405f9a646 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/BUILD +++ b/src/main/java/com/google/devtools/build/lib/authandtls/BUILD @@ -21,7 +21,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/authandtls/credentialhelper", "//src/main/java/com/google/devtools/build/lib/concurrent", "//src/main/java/com/google/devtools/build/lib/events", - "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/common/options", "//third_party:auth", diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java b/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java index 244aaa75f0b93f..a4a4f453b9f7bf 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java +++ b/src/main/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoder.java @@ -13,7 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.authandtls; -import com.google.devtools.build.lib.unsafe.StringUnsafe; +import com.google.common.base.Strings; +import java.nio.charset.Charset; import java.util.Base64; /** @@ -25,18 +26,16 @@ public final class BasicHttpAuthenticationEncoder { private BasicHttpAuthenticationEncoder() {} - /** - * Encode username and password into a token. - * - *

username and password are expected to use Bazel's internal string encoding. The returned - * string is a regular Unicode string. - */ - public static String encode(String username, String password) { - // The raw bytes in the internal string are assumed to be UTF-8, which is the encoding used for - // basic authentication. - return "Basic " - + Base64.getEncoder() - .encodeToString( - StringUnsafe.getInstance().getInternalStringBytes(username + ":" + password)); + /** Encode username and password into a token with given {@link Charset}. */ + public static String encode(String username, String password, Charset charset) { + StringBuilder sb = new StringBuilder(); + if (!Strings.isNullOrEmpty(username)) { + sb.append(username); + } + sb.append(":"); + if (!Strings.isNullOrEmpty(password)) { + sb.append(password); + } + return "Basic " + Base64.getEncoder().encodeToString(sb.toString().getBytes(charset)); } } diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java index bc40512d659a63..ee15a91b9f9422 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java +++ b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcCredentials.java @@ -13,6 +13,8 @@ // limitations under the License. package com.google.devtools.build.lib.authandtls; +import static java.nio.charset.StandardCharsets.UTF_8; + import com.google.auth.Credentials; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; @@ -56,7 +58,7 @@ public Map> getRequestMetadata(URI uri) throws IOException Credential credential = netrc.getCredential(uri.getHost()); if (credential != null) { String token = - BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password()); + BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password(), UTF_8); return ImmutableMap.of("Authorization", ImmutableList.of(token)); } else { return ImmutableMap.of(); diff --git a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcParser.java b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcParser.java index a1f45e00dde6c3..5bf080d9e0b7ce 100644 --- a/src/main/java/com/google/devtools/build/lib/authandtls/NetrcParser.java +++ b/src/main/java/com/google/devtools/build/lib/authandtls/NetrcParser.java @@ -14,7 +14,7 @@ package com.google.devtools.build.lib.authandtls; import static com.google.common.base.Predicates.not; -import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; import com.google.common.base.Strings; @@ -77,7 +77,7 @@ private static class TokenStream implements Closeable { private final Queue tokens = new ArrayDeque<>(); TokenStream(InputStream inputStream) throws IOException { - bufferedReader = new BufferedReader(new InputStreamReader(inputStream, ISO_8859_1)); + bufferedReader = new BufferedReader(new InputStreamReader(inputStream, UTF_8)); processLine(); } @@ -183,7 +183,8 @@ private static Credential parseCredentialForMachine(TokenStream tokenStream, Str while (!done && tokenStream.hasNext()) { // Peek rather than taking next token since we probably won't process it Token token = tokenStream.peek(); - if (token instanceof ItemToken(String item)) { + if (token instanceof ItemToken itemToken) { + String item = itemToken.item(); switch (item) { case LOGIN -> { tokenStream.next(); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD index 9127d464f26a9a..37bde3ca91e61b 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/BUILD @@ -259,7 +259,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function", "//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value", "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", - "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", "//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java index aaba70a5ffdf89..07eb48edd23d00 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/bzlmod/VendorFileFunction.java @@ -14,7 +14,7 @@ package com.google.devtools.build.lib.bazel.bzlmod; -import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableList; import com.google.devtools.build.lib.actions.FileValue; @@ -25,7 +25,6 @@ import com.google.devtools.build.lib.packages.VendorThreadContext; import com.google.devtools.build.lib.rules.repository.RepositoryDelegatorFunction; import com.google.devtools.build.lib.skyframe.PrecomputedValue; -import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.devtools.build.lib.vfs.Root; @@ -55,8 +54,7 @@ public class VendorFileFunction implements SkyFunction { private static final String VENDOR_FILE_HEADER = - StringEncoding.unicodeToInternal( - """ + """ ############################################################################### # This file is used to configure how external repositories are handled in vendor mode. # ONLY the two following functions can be used: @@ -69,7 +67,7 @@ public class VendorFileFunction implements SkyFunction { # Note that Bazel will NOT update the vendored source for this repo while running vendor command # unless it's unpinned. The user can modify and maintain the vendored source for this repo manually. ############################################################################### -"""); +"""; private final BazelStarlarkEnvironment starlarkEnv; @@ -140,7 +138,8 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath) throws VendorFileFunctionException { try { vendorPath.createDirectoryAndParents(); - FileSystemUtils.writeContent(vendorFilePath, ISO_8859_1, VENDOR_FILE_HEADER); + byte[] vendorFileContents = VENDOR_FILE_HEADER.getBytes(UTF_8); + FileSystemUtils.writeContent(vendorFilePath, vendorFileContents); } catch (IOException e) { throw new VendorFileFunctionException( new IOException("error creating VENDOR.bazel file", e), Transience.TRANSIENT); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java index 97940ed169d1d7..84b9ec714d6d0b 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/RepositoryResolvedModule.java @@ -13,8 +13,6 @@ // limitations under the License. package com.google.devtools.build.lib.bazel.repository; -import static java.nio.charset.StandardCharsets.ISO_8859_1; - import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -31,6 +29,7 @@ import java.io.File; import java.io.IOException; import java.io.Writer; +import java.nio.charset.StandardCharsets; import java.util.LinkedHashMap; import java.util.Map; import net.starlark.java.eval.Printer; @@ -82,7 +81,7 @@ public void afterCommand() { for (Object resolved : resolvedValues.values()) { resultBuilder.add(resolved); } - try (Writer writer = Files.newWriter(new File(resolvedFile), ISO_8859_1)) { + try (Writer writer = Files.newWriter(new File(resolvedFile), StandardCharsets.UTF_8)) { writer.write(EXPORTED_NAME + " = " + new ValuePrinter().repr(resultBuilder.build())); } catch (IOException e) { logger.atWarning().withCause(e).log("IO Error writing to file %s", resolvedFile); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD index 7e9b206a718169..615d9d14dde7ae 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/BUILD @@ -48,7 +48,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value", "//src/main/java/com/google/devtools/build/lib/skyframe:repository_mapping_value", "//src/main/java/com/google/devtools/build/lib/starlarkbuildapi/repository", - "//src/main/java/com/google/devtools/build/lib/unsafe:string", "//src/main/java/com/google/devtools/build/lib/util", "//src/main/java/com/google/devtools/build/lib/util:string", "//src/main/java/com/google/devtools/build/lib/util/io:out-err", diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java index 737bfc781e3f35..2b98acad4af9b1 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkBaseExternalContext.java @@ -58,7 +58,6 @@ import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor; import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor.ExecutionResult; import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction; -import com.google.devtools.build.lib.unsafe.StringUnsafe; import com.google.devtools.build.lib.util.OsUtils; import com.google.devtools.build.lib.util.io.OutErr; import com.google.devtools.build.lib.vfs.FileSystemUtils; @@ -1364,17 +1363,23 @@ private static String renamedStripPrefix(String method, String stripPrefix, Stri @Param( name = "legacy_utf8", named = true, - defaultValue = "False", + defaultValue = "True", doc = """ - No-op. This parameter is deprecated and will be removed in a future version of \ - Bazel. + Encode file content to UTF-8, true by default. Future versions will change \ + the default and remove this parameter. """), }) public void createFile( Object path, String content, Boolean executable, Boolean legacyUtf8, StarlarkThread thread) throws RepositoryFunctionException, EvalException, InterruptedException { StarlarkPath p = getPath(path); + byte[] contentBytes; + if (legacyUtf8) { + contentBytes = content.getBytes(UTF_8); + } else { + contentBytes = content.getBytes(ISO_8859_1); + } WorkspaceRuleEvent w = WorkspaceRuleEvent.newFileEvent( p.toString(), @@ -1388,7 +1393,7 @@ public void createFile( makeDirectories(p.getPath()); p.getPath().delete(); try (OutputStream stream = p.getPath().getOutputStream()) { - stream.write(StringUnsafe.getInstance().getInternalStringBytes(content)); + stream.write(contentBytes); } if (executable) { p.getPath().setExecutable(true); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java index d9b76819144a34..cf1d10de10f364 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkExecutionResult.java @@ -13,7 +13,7 @@ // limitations under the License. package com.google.devtools.build.lib.bazel.repository.starlark; -import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.base.Preconditions; import com.google.common.collect.Maps; @@ -38,6 +38,7 @@ import java.util.Set; import net.starlark.java.annot.StarlarkBuiltin; import net.starlark.java.annot.StarlarkMethod; +import net.starlark.java.eval.EvalException; import net.starlark.java.eval.StarlarkValue; /** @@ -181,14 +182,14 @@ Builder setQuiet(boolean quiet) { private static String toString(ByteArrayOutputStream stream) { try { - return stream.toString(ISO_8859_1); + return new String(stream.toByteArray(), UTF_8); } catch (IllegalStateException e) { return ""; } } - /** Execute the command specified by {@link #addArguments}. */ - StarlarkExecutionResult execute() throws InterruptedException { + /** Execute the command specified by {@link #addArguments(Iterable)}. */ + StarlarkExecutionResult execute() throws EvalException, InterruptedException { Preconditions.checkArgument(timeout > 0, "Timeout must be set prior to calling execute()."); Preconditions.checkArgument(!args.isEmpty(), "No command specified."); Preconditions.checkState(!executed, "Command was already executed, cannot re-use builder."); diff --git a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java index 3cab859e9e31cf..05d6bc4a50b515 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/repository/starlark/StarlarkRepositoryContext.java @@ -14,8 +14,6 @@ package com.google.devtools.build.lib.bazel.repository.starlark; -import static java.nio.charset.StandardCharsets.ISO_8859_1; - import com.github.difflib.patch.PatchFailedException; import com.google.common.collect.ImmutableMap; import com.google.devtools.build.docgen.annot.DocCategory; @@ -49,6 +47,7 @@ import com.google.devtools.build.skyframe.SkyFunctionException.Transience; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.nio.file.InvalidPathException; import java.util.HashMap; import java.util.Map; @@ -323,16 +322,14 @@ public void createFileFromTemplate( try { checkInOutputDirectory("write", p); makeDirectories(p.getPath()); - // Read and write files as raw bytes by using the Latin-1 encoding, which matches the encoding - // used by Bazel for strings. - String tpl = FileSystemUtils.readContent(t.getPath(), ISO_8859_1); + String tpl = FileSystemUtils.readContent(t.getPath(), StandardCharsets.UTF_8); for (Map.Entry substitution : substitutionMap.entrySet()) { tpl = StringUtilities.replaceAllLiteral(tpl, substitution.getKey(), substitution.getValue()); } p.getPath().delete(); try (OutputStream stream = p.getPath().getOutputStream()) { - stream.write(tpl.getBytes(ISO_8859_1)); + stream.write(tpl.getBytes(StandardCharsets.UTF_8)); } if (executable) { p.getPath().setExecutable(true); diff --git a/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java b/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java index 778214161fce27..5ed8a6de5a6caa 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/JsonProfile.java @@ -14,8 +14,6 @@ package com.google.devtools.build.lib.profiler; -import static java.nio.charset.StandardCharsets.ISO_8859_1; - import com.google.devtools.build.lib.profiler.statistics.PhaseSummaryStatistics; import com.google.gson.stream.JsonReader; import com.google.gson.stream.JsonToken; @@ -25,6 +23,7 @@ import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; +import java.nio.charset.StandardCharsets; import java.time.Duration; import java.util.List; import java.util.zip.GZIPInputStream; @@ -47,7 +46,8 @@ public JsonProfile(File profileFile) throws IOException { public JsonProfile(InputStream inputStream) throws IOException { try (JsonReader reader = - new JsonReader(new BufferedReader(new InputStreamReader(inputStream, ISO_8859_1)))) { + new JsonReader( + new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)))) { if (reader.peek() == JsonToken.BEGIN_OBJECT) { reader.beginObject(); while (reader.hasNext()) { diff --git a/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java b/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java index 2ab8881db77cd4..d73fa42380960f 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/JsonTraceFileWriter.java @@ -13,8 +13,6 @@ // limitations under the License. package com.google.devtools.build.lib.profiler; -import static java.nio.charset.StandardCharsets.ISO_8859_1; - import com.google.common.base.Preconditions; import com.google.devtools.build.lib.analysis.BlazeVersionInfo; import com.google.devtools.build.lib.profiler.Profiler.TaskData; @@ -23,6 +21,7 @@ import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; +import java.nio.charset.StandardCharsets; import java.time.Duration; import java.time.Instant; import java.util.HashMap; @@ -224,9 +223,8 @@ public void run() { try (JsonWriter writer = new JsonWriter( // The buffer size of 262144 is chosen at random. - // Bazel internally stores strings as raw bytes encoded in ISO_8859_1, so we use the - // same encoding here to also write out raw bytes. - new OutputStreamWriter(new BufferedOutputStream(outStream, 262144), ISO_8859_1))) { + new OutputStreamWriter( + new BufferedOutputStream(outStream, 262144), StandardCharsets.UTF_8))) { var startDate = Instant.now(); writer.beginObject(); writer.name("otherData"); diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD b/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD index f00a600daf5c7f..836bdc52f90d22 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/BUILD @@ -113,7 +113,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:os", "//src/main/java/com/google/devtools/build/lib/util:shell_escaper", "//src/main/java/com/google/devtools/build/lib/util:string", - "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:ospathpolicy", diff --git a/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java b/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java index 46efc909977e65..e6804184fb707f 100644 --- a/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java +++ b/src/main/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilter.java @@ -14,17 +14,14 @@ package com.google.devtools.build.lib.rules.cpp; -import static com.google.common.collect.ImmutableList.toImmutableList; -import static java.nio.charset.StandardCharsets.ISO_8859_1; - import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; -import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.vfs.Path; import java.io.ByteArrayOutputStream; import java.io.FilterOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -62,37 +59,94 @@ public static class FilterShowIncludesOutputStream extends FilterOutputStream { // cl.exe will print different prefix according to the locale configured for MSVC. private static final ImmutableList SHOW_INCLUDES_PREFIXES = ImmutableList.of( - // English - "Note: including file:", - // Traditional Chinese - "注意: 包含檔案:", - // Czech - "Poznámka: Včetně souboru:", - // German - "Hinweis: Einlesen der Datei:", - // French (non-breaking spaces before the colons) - "Remarque : inclusion du fichier :", - // Italian (the missing : is intentional, this appears to be a bug in MSVC) - "Nota: file incluso", - // Japanese - "メモ: インクルード ファイル:", - // Korean - "참고: 포함 파일:", - // Polish - "Uwaga: w tym pliku:", - // Portuguese - "Observação: incluindo arquivo:", - // Russian - "Примечание: включение файла:", - // Turkish - "Not: eklenen dosya:", - // Simplified Chinese - "注意: 包含文件:", - // Spanish - "Nota: inclusión del archivo:") - .stream() - .map(StringEncoding::unicodeToInternal) - .collect(toImmutableList()); + new String( + new byte[] { + 78, 111, 116, 101, 58, 32, 105, 110, 99, 108, 117, 100, 105, 110, 103, 32, 102, + 105, 108, 101, 58 + }, + StandardCharsets.UTF_8), // English + new String( + new byte[] { + -26, -77, -88, -26, -124, -113, 58, 32, -27, -116, -123, -27, -112, -85, -26, -86, + -108, -26, -95, -120, 58 + }, + StandardCharsets.UTF_8), // Traditional Chinese + new String( + new byte[] { + 80, 111, 122, 110, -61, -95, 109, 107, 97, 58, 32, 86, -60, -115, 101, 116, 110, + -60, -101, 32, 115, 111, 117, 98, 111, 114, 117, 58 + }, + StandardCharsets.UTF_8), // Czech + new String( + new byte[] { + 72, 105, 110, 119, 101, 105, 115, 58, 32, 69, 105, 110, 108, 101, 115, 101, 110, + 32, 100, 101, 114, 32, 68, 97, 116, 101, 105, 58 + }, + StandardCharsets.UTF_8), // German + new String( + new byte[] { + 82, 101, 109, 97, 114, 113, 117, 101, -62, -96, 58, 32, 105, 110, 99, 108, 117, + 115, 105, 111, 110, 32, 100, 117, 32, 102, 105, 99, 104, 105, 101, 114, -62, -96, + 58 + }, + StandardCharsets.UTF_8), // French + new String( + new byte[] { + 78, 111, 116, 97, 58, 32, 102, 105, 108, 101, 32, 105, 110, 99, 108, 117, 115, 111 + }, + StandardCharsets.UTF_8), // Italian + new String( + new byte[] { + -29, -125, -95, -29, -125, -94, 58, 32, -29, -126, -92, -29, -125, -77, -29, -126, + -81, -29, -125, -85, -29, -125, -68, -29, -125, -119, 32, -29, -125, -107, -29, + -126, -95, -29, -126, -92, -29, -125, -85, 58 + }, + StandardCharsets.UTF_8), // Janpanese + new String( + new byte[] { + -20, -80, -72, -22, -77, -96, 58, 32, -19, -113, -84, -19, -107, -88, 32, -19, + -116, -116, -20, -99, -68, 58 + }, + StandardCharsets.UTF_8), // Korean + new String( + new byte[] { + 85, 119, 97, 103, 97, 58, 32, 119, 32, 116, 121, 109, 32, 112, 108, 105, 107, 117, + 58 + }, + StandardCharsets.UTF_8), // Polish + new String( + new byte[] { + 79, 98, 115, 101, 114, 118, 97, -61, -89, -61, -93, 111, 58, 32, 105, 110, 99, + 108, 117, 105, 110, 100, 111, 32, 97, 114, 113, 117, 105, 118, 111, 58 + }, + StandardCharsets.UTF_8), // Portuguese + new String( + new byte[] { + -48, -97, -47, -128, -48, -72, -48, -68, -48, -75, -47, -121, -48, -80, -48, -67, + -48, -72, -48, -75, 58, 32, -48, -78, -48, -70, -48, -69, -47, -114, -47, -121, + -48, -75, -48, -67, -48, -72, -48, -75, 32, -47, -124, -48, -80, -48, -71, -48, + -69, -48, -80, 58 + }, + StandardCharsets.UTF_8), // Russian + new String( + new byte[] { + 78, 111, 116, 58, 32, 101, 107, 108, 101, 110, 101, 110, 32, 100, 111, 115, 121, + 97, 58 + }, + StandardCharsets.UTF_8), // Turkish + new String( + new byte[] { + -26, -77, -88, -26, -124, -113, 58, 32, -27, -116, -123, -27, -112, -85, -26, + -106, -121, -28, -69, -74, 58 + }, + StandardCharsets.UTF_8), // Simplified Chinese + new String( + new byte[] { + 78, 111, 116, 97, 58, 32, 105, 110, 99, 108, 117, 115, 105, -61, -77, 110, 32, + 100, 101, 108, 32, 97, 114, 99, 104, 105, 118, 111, 58 + }, + StandardCharsets.UTF_8) // Spanish + ); private final String sourceFileName; private boolean sawPotentialUnsupportedShowIncludesLine; // Grab everything under the execroot base so that external repository header files are covered @@ -117,7 +171,7 @@ public FilterShowIncludesOutputStream(OutputStream out, String sourceFileName) { public void write(int b) throws IOException { buffer.write(b); if (b == NEWLINE) { - String line = buffer.toString(ISO_8859_1); + String line = buffer.toString(StandardCharsets.UTF_8.name()); boolean prefixMatched = false; for (String prefix : SHOW_INCLUDES_PREFIXES) { if (line.startsWith(prefix)) { @@ -141,7 +195,9 @@ public void write(int b) throws IOException { // can use non-UTF8 encodings, which the checks above fail to detect. As this results in // incorrect incremental builds, we emit a warning if the raw byte sequence comprising the // line looks like it could be a /showIncludes line. - if (POTENTIAL_UNSUPPORTED_SHOW_INCLUDES_LINE.matcher(line.trim()).matches()) { + if (POTENTIAL_UNSUPPORTED_SHOW_INCLUDES_LINE + .matcher(buffer.toString(StandardCharsets.ISO_8859_1).trim()) + .matches()) { sawPotentialUnsupportedShowIncludesLine = true; } buffer.writeTo(out); @@ -152,7 +208,7 @@ public void write(int b) throws IOException { @Override public void flush() throws IOException { - String line = buffer.toString(ISO_8859_1); + String line = buffer.toString(StandardCharsets.UTF_8.name()); // If this line starts or could start with a prefix. boolean startingWithAnyPrefix = false; diff --git a/src/main/java/com/google/devtools/build/lib/util/BUILD b/src/main/java/com/google/devtools/build/lib/util/BUILD index 2f2809c3735dad..4d3409e3739775 100644 --- a/src/main/java/com/google/devtools/build/lib/util/BUILD +++ b/src/main/java/com/google/devtools/build/lib/util/BUILD @@ -251,7 +251,6 @@ java_library( deps = [ ":os", ":shell_escaper", - ":string_encoding", "//src/main/java/com/google/devtools/build/lib/bugreport", "//src/main/java/com/google/devtools/build/lib/concurrent", "//src/main/java/com/google/devtools/build/lib/vfs", diff --git a/src/main/java/com/google/devtools/build/lib/util/DependencySet.java b/src/main/java/com/google/devtools/build/lib/util/DependencySet.java index 62a34a000efded..524cda148e0c39 100644 --- a/src/main/java/com/google/devtools/build/lib/util/DependencySet.java +++ b/src/main/java/com/google/devtools/build/lib/util/DependencySet.java @@ -14,16 +14,14 @@ package com.google.devtools.build.lib.util; -import static java.nio.charset.StandardCharsets.ISO_8859_1; - import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Ascii; import com.google.common.base.Preconditions; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; import com.google.errorprone.annotations.CanIgnoreReturnValue; import java.io.IOException; import java.io.PrintStream; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -148,7 +146,7 @@ public DependencySet process(byte[] content) throws IOException { // keep scanning. We do this to cope with "foo.o : \" which is // valid Makefile syntax produced by the cuda compiler. if (sawTarget && w > 0) { - addDependency(new String(content, 0, w, ISO_8859_1)); + addDependency(new String(content, 0, w, StandardCharsets.UTF_8)); w = 0; } continue; @@ -162,7 +160,7 @@ public DependencySet process(byte[] content) throws IOException { // (Arguably if !sawTarget && w > 0 we should report an error, // as that suggests the .d file is malformed.) if (sawTarget && w > 0) { - addDependency(new String(content, 0, w, ISO_8859_1)); + addDependency(new String(content, 0, w, StandardCharsets.UTF_8)); } w = 0; sawTarget = false; // reset for new line @@ -176,7 +174,7 @@ public DependencySet process(byte[] content) throws IOException { case '\n': case '\r': if (w > 0) { - outputFileName = new String(content, 0, w, ISO_8859_1); + outputFileName = new String(content, 0, w, StandardCharsets.UTF_8); w = 0; sawTarget = true; } @@ -272,7 +270,11 @@ private static String translateWindowsPath(String path) { return path; } if (n >= 2 && isAsciiLetter(path.charAt(1)) && (n == 2 || path.charAt(2) == '/')) { - return Ascii.toUpperCase(path.charAt(1)) + ":/" + path.substring(2); + StringBuilder sb = new StringBuilder(path.length()); + sb.append(Character.toUpperCase(path.charAt(1))); + sb.append(":/"); + sb.append(path, 2, path.length()); + return sb.toString(); } else { String unixRoot = getUnixRoot(); return unixRoot + path; @@ -308,7 +310,7 @@ private static String getUnixRoot() { @Nullable private static String determineUnixRoot(String jvmArgName) { // Get the path from a JVM flag, if specified. - String path = StringEncoding.platformToInternal(System.getProperty(jvmArgName)); + String path = System.getProperty(jvmArgName); if (path == null) { return null; } diff --git a/src/test/java/com/google/devtools/build/lib/analysis/BUILD b/src/test/java/com/google/devtools/build/lib/analysis/BUILD index 4202f6e4a92aa9..bcf7c921a91d31 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/BUILD +++ b/src/test/java/com/google/devtools/build/lib/analysis/BUILD @@ -154,7 +154,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/util:detailed_exit_code", "//src/main/java/com/google/devtools/build/lib/util:filetype", "//src/main/java/com/google/devtools/build/lib/util:os", - "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/util/io", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs:pathfragment", diff --git a/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java b/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java index 1ebd5b08e4449f..15f208d16b6e82 100644 --- a/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java +++ b/src/test/java/com/google/devtools/build/lib/analysis/actions/TemplateExpansionActionTest.java @@ -39,7 +39,6 @@ import com.google.devtools.build.lib.exec.util.TestExecutorBuilder; import com.google.devtools.build.lib.testutil.FoundationTestCase; import com.google.devtools.build.lib.util.Fingerprint; -import com.google.devtools.build.lib.util.StringEncoding; import com.google.devtools.build.lib.util.io.FileOutErr; import com.google.devtools.build.lib.vfs.FileSystemUtils; import com.google.devtools.build.lib.vfs.Path; @@ -253,10 +252,7 @@ public void testWithSpecialCharacters() throws Exception { // scratch.overwriteFile appends a newline, so we need an additional \n here String expected = String.format("%s%s\n", SPECIAL_CHARS, SPECIAL_CHARS); - executeTemplateExpansion( - expected, - ImmutableList.of( - Substitution.of("%key%", StringEncoding.unicodeToInternal(SPECIAL_CHARS)))); + executeTemplateExpansion(expected, ImmutableList.of(Substitution.of("%key%", SPECIAL_CHARS))); } private String computeKey(TemplateExpansionAction action) throws EvalException { diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/BUILD b/src/test/java/com/google/devtools/build/lib/authandtls/BUILD index d013c143102b22..d16811aae7af9b 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/BUILD +++ b/src/test/java/com/google/devtools/build/lib/authandtls/BUILD @@ -27,7 +27,6 @@ java_library( "//src/main/java/com/google/devtools/build/lib/authandtls", "//src/main/java/com/google/devtools/build/lib/authandtls/credentialhelper", "//src/main/java/com/google/devtools/build/lib/events", - "//src/main/java/com/google/devtools/build/lib/util:string_encoding", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs", "//src/main/java/com/google/devtools/common/options", diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java index a7586f49a9758b..7873e0572d0a4e 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/BasicHttpAuthenticationEncoderTest.java @@ -16,7 +16,6 @@ import static com.google.common.truth.Truth.assertThat; import static java.nio.charset.StandardCharsets.UTF_8; -import com.google.devtools.build.lib.util.StringEncoding; import java.util.Base64; import org.junit.Test; import org.junit.runner.RunWith; @@ -35,13 +34,13 @@ private static String[] decode(String message) { @Test public void encode_normalUsernamePassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame"); + String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame", UTF_8); assertThat(message).isEqualTo("Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="); } @Test public void encode_normalUsernamePassword_canBeDecoded() { - String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame"); + String message = BasicHttpAuthenticationEncoder.encode("Aladdin", "open sesame", UTF_8); String[] usernameAndPassword = decode(message); assertThat(usernameAndPassword[0]).isEqualTo("Aladdin"); @@ -50,7 +49,7 @@ public void encode_normalUsernamePassword_canBeDecoded() { @Test public void encode_usernameContainsColon_canBeDecoded() { - String message = BasicHttpAuthenticationEncoder.encode("foo:user", "foopass"); + String message = BasicHttpAuthenticationEncoder.encode("foo:user", "foopass", UTF_8); String[] usernameAndPassword = decode(message); assertThat(usernameAndPassword[0]).isEqualTo("foo"); @@ -59,27 +58,25 @@ public void encode_usernameContainsColon_canBeDecoded() { @Test public void encode_emptyUsername_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("", "foopass"); + String message = BasicHttpAuthenticationEncoder.encode("", "foopass", UTF_8); assertThat(message).isEqualTo("Basic OmZvb3Bhc3M="); } @Test public void encode_emptyPassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("foouser", ""); + String message = BasicHttpAuthenticationEncoder.encode("foouser", "", UTF_8); assertThat(message).isEqualTo("Basic Zm9vdXNlcjo="); } @Test public void encode_emptyUsernamePassword_outputExpected() { - String message = BasicHttpAuthenticationEncoder.encode("", ""); + String message = BasicHttpAuthenticationEncoder.encode("", "", UTF_8); assertThat(message).isEqualTo("Basic Og=="); } @Test public void encode_specialCharacterUtf8_outputExpected() { - String message = - BasicHttpAuthenticationEncoder.encode( - "test", StringEncoding.unicodeToInternal("123\u00A3")); + String message = BasicHttpAuthenticationEncoder.encode("test", "123\u00A3", UTF_8); assertThat(message).isEqualTo("Basic dGVzdDoxMjPCow=="); } } diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java index ec1561e96def83..ec3f4e6437d3ea 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/GoogleAuthUtilsTest.java @@ -15,6 +15,7 @@ package com.google.devtools.build.lib.authandtls; import static com.google.common.truth.Truth.assertThat; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.auth.Credentials; import com.google.common.base.Preconditions; @@ -343,7 +344,7 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); } private static CredentialHelperProvider newCredentialHelperProvider( diff --git a/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java b/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java index 12d204f3d4e3ba..2cb07cdf815003 100644 --- a/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java +++ b/src/test/java/com/google/devtools/build/lib/authandtls/NetrcCredentialsTest.java @@ -14,6 +14,7 @@ package com.google.devtools.build.lib.authandtls; import static com.google.common.truth.Truth.assertThat; +import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; @@ -130,6 +131,6 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); } } diff --git a/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java b/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java index 7be12079291310..cedcaadf90ff66 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/bzlmod/IndexRegistryTest.java @@ -70,7 +70,8 @@ public ImmutableMap> getRecordedHashes() { } } - private final String authToken = BasicHttpAuthenticationEncoder.encode("rinne", "rinnepass"); + private final String authToken = + BasicHttpAuthenticationEncoder.encode("rinne", "rinnepass", UTF_8); private DownloadManager downloadManager; private EventRecorder eventRecorder; @Rule public final TestHttpServer server = new TestHttpServer(authToken); diff --git a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java index 9b2b8cb0a18857..dcc41d16e58280 100644 --- a/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java +++ b/src/test/java/com/google/devtools/build/lib/bazel/repository/downloader/UrlRewriterTest.java @@ -16,6 +16,7 @@ import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.truth.Truth.assertThat; import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.UTF_8; import static org.junit.Assert.fail; import com.google.auth.Credentials; @@ -439,6 +440,6 @@ private static void assertRequestMetadata( Map> requestMetadata, String username, String password) { assertThat(requestMetadata.keySet()).containsExactly("Authorization"); assertThat(Iterables.getOnlyElement(requestMetadata.values())) - .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password)); + .containsExactly(BasicHttpAuthenticationEncoder.encode(username, password, UTF_8)); } } diff --git a/src/test/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilterTest.java b/src/test/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilterTest.java index c831070a17061b..12c3cf88dfd440 100644 --- a/src/test/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilterTest.java +++ b/src/test/java/com/google/devtools/build/lib/rules/cpp/ShowIncludesFilterTest.java @@ -17,6 +17,9 @@ import static com.google.common.truth.Truth.assertThat; import static java.nio.charset.StandardCharsets.UTF_8; +import com.google.devtools.build.lib.vfs.DigestHashFunction; +import com.google.devtools.build.lib.vfs.FileSystem; +import com.google.devtools.build.lib.vfs.inmemoryfs.InMemoryFileSystem; import java.io.ByteArrayOutputStream; import java.io.FilterOutputStream; import java.io.IOException; @@ -32,12 +35,15 @@ public class ShowIncludesFilterTest { private ShowIncludesFilter showIncludesFilter; private ByteArrayOutputStream output; private FilterOutputStream filterOutputStream; + private FileSystem fs; @Before public void setUpOutputStreams() throws IOException { showIncludesFilter = new ShowIncludesFilter("foo.cpp"); output = new ByteArrayOutputStream(); filterOutputStream = showIncludesFilter.getFilteredOutputStream(output); + fs = new InMemoryFileSystem(DigestHashFunction.SHA256); + fs.getPath("/out").createDirectory(); } private byte[] getBytes(String str) { diff --git a/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java b/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java index a2c537ba939f38..14e7ee62fe7101 100644 --- a/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java +++ b/src/test/java/com/google/devtools/build/lib/starlark/StarlarkRuleImplementationFunctionsTest.java @@ -19,7 +19,6 @@ import static com.google.common.truth.Truth.assertWithMessage; import static com.google.devtools.build.lib.bazel.bzlmod.BzlmodTestUtil.createModuleKey; import static com.google.devtools.build.lib.skyframe.BzlLoadValue.keyForBuild; -import static java.nio.charset.StandardCharsets.ISO_8859_1; import static org.junit.Assert.assertThrows; import static org.junit.Assert.fail; @@ -69,6 +68,8 @@ import com.google.devtools.build.lib.testutil.TestConstants; import com.google.devtools.build.lib.util.Fingerprint; import com.google.devtools.build.lib.util.OsUtils; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; @@ -915,20 +916,29 @@ public void testCreateTemplateAction() throws Exception { assertThat(action.makeExecutable()).isFalse(); } + /** + * Simulates the fact that the Parser currently uses Latin1 to read BUILD files, while users + * usually write those files using UTF-8 encoding. Currently, the string-valued 'substitutions' + * parameter of the template_action function contains a hack that assumes its input is a UTF-8 + * encoded string which has been ingested as Latin 1. The hack converts the string to its + * "correct" UTF-8 value. Once Blaze starts calling {@link + * net.starlark.java.syntax.ParserInput#fromUTF8} instead of {@code fromLatin1} and the hack for + * the substitutions parameter is removed, this test will fail. + */ @Test - public void testCreateTemplateActionUnicode() throws Exception { + public void testCreateTemplateActionWithWrongEncoding() throws Exception { // The following array contains bytes that represent a string of length two when treated as // UTF-8 and a string of length four when treated as ISO-8859-1 (a.k.a. Latin 1). - String internalString = - new String(new byte[] {(byte) 0xC2, (byte) 0xA2, (byte) 0xC2, (byte) 0xA2}, ISO_8859_1); + byte[] bytesToDecode = {(byte) 0xC2, (byte) 0xA2, (byte) 0xC2, (byte) 0xA2}; + Charset latin1 = StandardCharsets.ISO_8859_1; + Charset utf8 = StandardCharsets.UTF_8; StarlarkRuleContext ruleContext = createRuleContext("//foo:foo"); setRuleContext(ruleContext); - // In production, Bazel parses Starlark as raw bytes encoded as Latin-1. ev.exec( "ruleContext.actions.expand_template(", " template = ruleContext.files.srcs[0],", " output = ruleContext.files.srcs[1],", - " substitutions = {'a" + internalString + "': '" + internalString + "'},", + " substitutions = {'a': '" + new String(bytesToDecode, latin1) + "'},", " is_executable = False)"); TemplateExpansionAction action = (TemplateExpansionAction) @@ -936,8 +946,7 @@ public void testCreateTemplateActionUnicode() throws Exception { ruleContext.getRuleContext().getAnalysisEnvironment().getRegisteredActions()); List substitutions = action.getSubstitutions(); assertThat(substitutions).hasSize(1); - assertThat(substitutions.get(0).getKey()).isEqualTo("a" + internalString); - assertThat(substitutions.get(0).getValue()).isEqualTo(internalString); + assertThat(substitutions.get(0).getValue()).isEqualTo(new String(bytesToDecode, utf8)); } @Test diff --git a/src/test/shell/bazel/BUILD b/src/test/shell/bazel/BUILD index 3aee8794ae5c05..cc5ebafe945635 100644 --- a/src/test/shell/bazel/BUILD +++ b/src/test/shell/bazel/BUILD @@ -874,7 +874,7 @@ sh_test( data = [ ":test-deps", "@bazel_tools//tools/bash/runfiles", - "@local_jdk//:jdk", # for remote_helpers setup_localjdk_javabase + "@local_jdk//:jdk", ], shard_count = 10, tags = [ diff --git a/src/test/shell/bazel/bazel_workspaces_test.sh b/src/test/shell/bazel/bazel_workspaces_test.sh index 7b689800249803..dc21ce0c7859f9 100755 --- a/src/test/shell/bazel/bazel_workspaces_test.sh +++ b/src/test/shell/bazel/bazel_workspaces_test.sh @@ -579,7 +579,21 @@ function test_read() { ensure_contains_exactly 'path: ".*filefile.sh"' 2 } -function test_read_roundtrip_utf8() { +function test_read_roundtrip_legacy_utf8() { + # See discussion on https://github.com/bazelbuild/bazel/pull/7309 + set_workspace_command ' + content = "echo fïlëfïlë" + repository_ctx.file("filefile.sh", content, True, legacy_utf8=True) + read_result = repository_ctx.read("filefile.sh") + + corrupted_content = "echo fïlëfïlë" + if read_result != corrupted_content: + fail("read(): expected %r, got %r" % (corrupted_content, read_result))' + + build_and_process_log --exclude_rule "repository @@local_config_cc" +} + +function test_read_roundtrip_nolegacy_utf8() { set_workspace_command ' content = "echo fïlëfïlë" repository_ctx.file("filefile.sh", content, True, legacy_utf8=False) diff --git a/src/test/shell/bazel/starlark_repository_test.sh b/src/test/shell/bazel/starlark_repository_test.sh index a67b618b6468c3..431d1581f8bebf 100755 --- a/src/test/shell/bazel/starlark_repository_test.sh +++ b/src/test/shell/bazel/starlark_repository_test.sh @@ -56,14 +56,6 @@ msys*) ;; esac -if $is_windows; then - export LC_ALL=C.utf8 -elif [[ "$(uname -s)" == "Linux" ]]; then - export LC_ALL=C.UTF-8 -else - export LC_ALL=en_US.UTF-8 -fi - source "$(rlocation "io_bazel/src/test/shell/bazel/remote_helpers.sh")" \ || { echo "remote_helpers.sh not found!" >&2; exit 1; } @@ -478,84 +470,6 @@ EOF expect_log "PWD=$repo2 TOTO=titi" } -function test_starlark_repository_unicode() { - setup_starlark_repository - - if "$is_windows"; then - # äöüÄÖÜß in UTF-8 - local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') - else - # äöüÄÖÜß🌱 in UTF-8 - local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') - fi - - tmpdir="$(mktemp -d ${TEST_TMPDIR}/test.XXXXXXXX)" - input_file="${tmpdir}/input$unicode" - echo -n "$unicode" > "${input_file}" - - cat >test.bzl <indirect%s.txt" % (UNICODE, UNICODE)] - ) - if result.return_code != 0: - fail("Incorrect return code from bash: %s != 0\n%s" % (result.return_code, result.stderr)) - - result = repository_ctx.execute([str(repository_ctx.which("bash")), "-c", "echo '%s'" % UNICODE]) - if result.return_code != 0: - fail("Incorrect return code from bash: %s != 0\n%s" % (result.return_code, result.stderr)) - if result.stdout.strip() != UNICODE: - fail("Incorrect output from bash: %s != %s\n%s" % (result.stdout.strip(), UNICODE, result.stderr)) - - result = repository_ctx.execute([str(repository_ctx.which("bash")), "-c", "echo '%s' && exit 123" % UNICODE]) - if result.return_code != 123: - fail("Incorrect return code from bash: %s != 123\n%s" % (result.return_code, result.stderr)) - if result.stdout.strip() != UNICODE: - fail("Incorrect output from bash: %s != %s\n%s" % (result.stdout.strip(), UNICODE, result.stderr)) - - repository_ctx.file("foo.txt", UNICODE) - read_content = repository_ctx.read("foo.txt") - if read_content != UNICODE: - fail("Incorrect content in foo.txt: %s != %s" % (read_content, UNICODE)) - - print("UNICODE = %s" % UNICODE) -repo = repository_rule(implementation=_impl) -EOF - - bazel build "--repo_env=INPUT_$unicode=${input_file}" @foo//:bar >& $TEST_log || fail "Failed to build" - expect_log "UNICODE = $unicode" - output_base="$(bazel info output_base)" - assert_contains "$unicode" "$output_base/external/+_repo_rules+foo/direct${unicode}.txt" - assert_contains "$unicode" "$output_base/external/+_repo_rules+foo/indirect${unicode}.txt" - assert_contains "${unicode}_replaced_${unicode}" "$output_base/external/+_repo_rules+foo/template${unicode}.txt" - - # The repo rule should not be re-run on server restart - bazel shutdown - bazel build "--repo_env=INPUT_${unicode}=${input_file}" @foo//:bar >& $TEST_log || fail "Failed to build" - expect_not_log "UNICODE" -} - function test_starlark_repository_environ() { setup_starlark_repository @@ -2033,7 +1947,7 @@ password foopass machine bar.example.org login barusername -password passbar🌱 +password passbar # following lines mix tabs and spaces machine oauthlife.com @@ -2098,7 +2012,7 @@ expected = { "https://bar.example.org/file3.tar" : { "type" : "basic", "login": "barusername", - "password" : "passbar🌱", + "password" : "passbar", }, "https://oauthlife.com/fizz/buzz/file5.tar": { "type" : "pattern", diff --git a/src/test/shell/bazel/unicode_filenames_test.sh b/src/test/shell/bazel/unicode_filenames_test.sh index dad4f071f46a94..0db493480a971c 100755 --- a/src/test/shell/bazel/unicode_filenames_test.sh +++ b/src/test/shell/bazel/unicode_filenames_test.sh @@ -50,13 +50,6 @@ msys*|mingw*|cygwin*) ;; esac -if $is_windows; then - export LC_ALL=C.utf8 -elif [[ "$(uname -s)" == "Linux" ]]; then - export LC_ALL=C.UTF-8 -else - export LC_ALL=en_US.UTF-8 -fi #### SETUP ############################################################# @@ -98,7 +91,23 @@ function has_iso_8859_1_locale() { [[ "${charmap}" == "ISO-8859-1" ]] } +function has_utf8_locale() { + charmap="$(LC_ALL=en_US.UTF-8 locale charmap 2>/dev/null)" + [[ "${charmap}" == "UTF-8" ]] +} + function test_utf8_source_artifact() { + # Bazel relies on the JVM for filename encoding, and can only support + # UTF-8 if either a UTF-8 or ISO-8859-1 locale is available. + if ! "$is_windows"; then + if ! has_iso_8859_1_locale && ! has_utf8_locale; then + echo "Skipping test (no ISO-8859-1 or UTF-8 locale)." + echo "Available locales (need ISO-8859-1 or UTF-8):" + locale -a + return + fi + fi + unicode_filenames_test_setup touch 'pkg/srcs/regular file.txt' @@ -110,7 +119,14 @@ function test_utf8_source_artifact() { # 'pkg/srcs/\xc3\xbcn\xc3\xafc\xc3\xb6d\xc3\xab f\xc3\xafl\xc3\xab.txt' touch "$(printf '%b' 'pkg/srcs/\xc3\xbcn\xc3\xafc\xc3\xb6d\xc3\xab f\xc3\xafl\xc3\xab.txt')" - bazel build //pkg:ls_srcs >$TEST_log 2>&1 || fail "Should build" + # On systems without an ISO-8859-1 locale, the environment locale must be + # the same as the file encoding. + # + # This doesn't affect systems that do have an ISO-8859-1 locale, because the + # Bazel launcher will force it to be used. + bazel shutdown + LC_ALL=en_US.UTF-8 bazel build //pkg:ls_srcs >$TEST_log 2>&1 || fail "Should build" + bazel shutdown assert_contains "pkg/srcs/regular file.txt" bazel-bin/pkg/ls_srcs assert_contains "pkg/srcs/subdir/file.txt" bazel-bin/pkg/ls_srcs @@ -149,6 +165,17 @@ function test_traditional_encoding_source_artifact() { } function test_utf8_source_artifact_in_bep() { + # Bazel relies on the JVM for filename encoding, and can only support + # UTF-8 if either a UTF-8 or ISO-8859-1 locale is available. + if ! "$is_windows"; then + if ! has_iso_8859_1_locale && ! has_utf8_locale; then + echo "Skipping test (no ISO-8859-1 or UTF-8 locale)." + echo "Available locales (need ISO-8859-1 or UTF-8):" + locale -a + return + fi + fi + unicode_filenames_test_setup touch 'pkg/srcs/regular file.txt' @@ -160,8 +187,15 @@ function test_utf8_source_artifact_in_bep() { # 'pkg/srcs/\xc3\xbcn\xc3\xafc\xc3\xb6d\xc3\xab f\xc3\xafl\xc3\xab.txt' touch "$(printf '%b' 'pkg/srcs/\xc3\xbcn\xc3\xafc\xc3\xb6d\xc3\xab f\xc3\xafl\xc3\xab.txt')" - bazel build --build_event_json_file="$TEST_log" \ + # On systems without an ISO-8859-1 locale, the environment locale must be + # the same as the file encoding. + # + # This doesn't affect systems that do have an ISO-8859-1 locale, because the + # Bazel launcher will force it to be used. + bazel shutdown + LC_ALL=en_US.UTF-8 bazel build --build_event_json_file="$TEST_log" \ //pkg:filegroup 2>&1 || fail "Should build" + bazel shutdown expect_log '"name":"pkg/srcs/regular file.txt"' expect_log '"name":"pkg/srcs/subdir/file.txt"' @@ -169,6 +203,10 @@ function test_utf8_source_artifact_in_bep() { } function test_utf8_filename_in_java_test() { + # Intentionally do not check for available locales: Either C.UTF_8 or + # en_US.UTF-8 should exist on all CI machines - if not, we want to learn about + # this so that the Java stub template can be adapted accordingly. + touch WORKSPACE mkdir pkg @@ -195,53 +233,4 @@ EOF bazel test //pkg:Test --test_output=errors 2>$TEST_log || fail "Test should pass" } -function test_cc_dependency_with_utf8_filename() { - # TODO: Find a way to get cl.exe to output Unicode when not running in a - # console or migrate to /sourceDependencies. - if $is_windows; then - echo "Skipping test on Windows." && return - fi - - local unicode="äöüÄÖÜß🌱" - - setup_module_dot_bazel - - mkdir pkg - cat >pkg/BUILD <"pkg/${unicode}.h" < pkg/bin.cc - cat >>pkg/bin.cc < - -int main() { - std::cout << MY_STRING << std::endl; - return 0; -} -EOF - bazel run //pkg:bin >$TEST_log 2>&1 || fail "Should build" - expect_log "original" - - # Change the header file and rebuild - cat >"pkg/${unicode}.h" <$TEST_log 2>&1 || fail "Should build" - expect_log "changed" -} - run_suite "Tests for handling of Unicode filenames" diff --git a/src/test/shell/integration/run_test.sh b/src/test/shell/integration/run_test.sh index 9cca51e6fd9a18..f8d172b7c6bb48 100755 --- a/src/test/shell/integration/run_test.sh +++ b/src/test/shell/integration/run_test.sh @@ -678,6 +678,7 @@ EOF fi } + function test_build_id_env_var() { local -r pkg="pkg${LINENO}" mkdir -p "${pkg}" diff --git a/src/test/shell/integration/starlark_dependency_pruning_test.sh b/src/test/shell/integration/starlark_dependency_pruning_test.sh index 802c31a2c1e345..ceb84465081346 100755 --- a/src/test/shell/integration/starlark_dependency_pruning_test.sh +++ b/src/test/shell/integration/starlark_dependency_pruning_test.sh @@ -49,14 +49,6 @@ msys*|mingw*|cygwin*) ;; esac -if $is_windows; then - export LC_ALL=C.utf8 -elif [[ "$(uname -s)" == "Linux" ]]; then - export LC_ALL=C.UTF-8 -else - export LC_ALL=en_US.UTF-8 -fi - add_to_bazelrc "build --package_path=%workspace%" add_to_bazelrc "build --spawn_strategy=local" @@ -250,43 +242,6 @@ function test_dependency_pruning_scenario() { check_unused_content "pkg/c.input" } -function test_dependency_pruning_scenario_unicode() { - if "$is_windows"; then - # äöüÄÖÜß in UTF-8 - local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F') - else - # äöüÄÖÜß🌱 in UTF-8 - local unicode=$(echo -e '\xC3\xA4\xC3\xB6\xC3\xBC\xC3\x84\xC3\x96\xC3\x9C\xC3\x9F\xF0\x9F\x8C\xB1') - fi - - # Initial build. - echo "contentD${unicode}" > "pkg/d${unicode}.input" - bazel build //pkg:output || fail "build failed" - check_output_content "contentA contentB contentC contentD${unicode}" - check_unused_content - - # Mark "d" as unused. - echo "unused" > "pkg/d${unicode}.input" - bazel build //pkg:output || fail "build failed" - check_output_content "contentA contentB contentC" - check_unused_content "pkg/d${unicode}.input" - - # Change "d" again: - # This time it should be used. But given that it was marked "unused" - # the build should not trigger: "d" should still be considered unused. - echo "newContentD${unicode}" > "pkg/d${unicode}.input" - bazel build //pkg:output || fail "build failed" - check_output_content "contentA contentB contentC" - check_unused_content "pkg/d${unicode}.input" - - # Change c: - # The build should be triggered, and the newer version of "d" should be used. - echo "unused" > pkg/c.input - bazel build //pkg:output || fail "build failed" - check_output_content "contentA contentB newContentD${unicode}" - check_unused_content "pkg/c.input" -} - # Verify that the state of the local action cache survives server shutdown. function test_unused_shutdown() { # Mark "b" as unused + initial build