Skip to content

Commit

Permalink
Automated rollback of commit 3fdec93.
Browse files Browse the repository at this point in the history
*** Reason for rollback ***

Internal test failures, see attached bug.

*** Original change description ***

Fix more Unicode bugs

PiperOrigin-RevId: 700339865
Change-Id: I5d63020afff2b6b0862ab438dfa3130c09f38a8f
  • Loading branch information
katre authored and keertk committed Dec 4, 2024
1 parent 7b20da2 commit 77a74f3
Show file tree
Hide file tree
Showing 43 changed files with 294 additions and 350 deletions.
2 changes: 0 additions & 2 deletions src/main/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1482,7 +1482,6 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:commandline_item",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/net/starlark/java/eval",
"//third_party:jsr305",
Expand All @@ -1499,7 +1498,6 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifact_expander",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//third_party:guava",
"//third_party:jsr305",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
import com.google.devtools.build.lib.packages.RuleTransitionData;
import com.google.devtools.build.lib.packages.WorkspaceFactory;
import com.google.devtools.build.lib.starlarkbuildapi.core.Bootstrap;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand Down Expand Up @@ -132,10 +131,7 @@ protected synchronized byte[] getFastDigest(PathFragment path) {

@Override
protected synchronized byte[] getDigest(PathFragment path) {
return getDigestFunction()
.getHashFunction()
.hashBytes(StringUnsafe.getInstance().getInternalStringBytes(path.getPathString()))
.asBytes();
return getDigestFunction().getHashFunction().hashString(path.toString(), UTF_8).asBytes();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Comparator.comparing;

import com.github.benmanes.caffeine.cache.Caffeine;
Expand Down Expand Up @@ -145,7 +146,7 @@ protected void computeKey(
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
newDeterministicWriter().writeOutputFile(stream);
return stream.toString(ISO_8859_1);
return stream.toString(UTF_8);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -217,7 +218,7 @@ public void writeOutputFile(OutputStream out, @Nullable EventHandler eventHandle
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
writeOutputFile(stream, eventHandler);
return stream.toString(ISO_8859_1);
return stream.toString(UTF_8);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionKeyContext;
Expand All @@ -24,7 +25,6 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import javax.annotation.Nullable;
import net.starlark.java.eval.Tuple;
Expand All @@ -49,8 +49,7 @@ public LazyWriteNestedSetOfTupleAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out ->
out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents(delimiter)));
return out -> out.write(getContents(delimiter).getBytes(UTF_8));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableSet;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
Expand All @@ -25,7 +26,6 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import java.util.function.Function;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -71,7 +71,7 @@ public LazyWritePathsFileAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents()));
return out -> out.write(getContents().getBytes(UTF_8));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.AbstractAction;
Expand Down Expand Up @@ -47,8 +47,7 @@ public ImmutableList<SpawnResult> expandTemplate(
final String expandedTemplate =
getExpandedTemplateUnsafe(
templateMetadata.template(), templateMetadata.substitutions(), ctx.getPathResolver());
DeterministicWriter deterministicWriter =
out -> out.write(expandedTemplate.getBytes(ISO_8859_1));
DeterministicWriter deterministicWriter = out -> out.write(expandedTemplate.getBytes(UTF_8));
return ctx.getContext(FileWriteActionContext.class)
.writeOutputToFile(
action,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -349,13 +349,10 @@ protected void afterExecute(
for (Artifact input : allInputs.toList()) {
usedInputsByMappedPath.put(pathMapper.getMappedExecPathString(input), input);
}
// Bazel encodes file system paths as raw bytes stored in a Latin-1 encoded string, so we need
// to make sure to also decode the unused input list as Latin-1.
try (BufferedReader br =
new BufferedReader(
new InputStreamReader(
getUnusedInputListInputStream(actionExecutionContext, spawnResults),
ISO_8859_1))) {
getUnusedInputListInputStream(actionExecutionContext, spawnResults), UTF_8))) {
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
Expand All @@ -24,12 +22,16 @@
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import javax.annotation.Nullable;

/** A template that contains text content, or alternatively throws an {@link IOException}. */
@Immutable // all subclasses are immutable
public abstract class Template {

static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;

/** We only allow subclasses in this file. */
private Template() {}

Expand Down Expand Up @@ -103,8 +105,7 @@ private static final class ArtifactTemplate extends Template {
public String getContent(ArtifactPathResolver resolver) throws IOException {
Path templatePath = resolver.toPath(templateArtifact);
try {
// Bazel's internal encoding for strings is raw bytes as Latin-1
return FileSystemUtils.readContent(templatePath, ISO_8859_1);
return FileSystemUtils.readContent(templatePath, DEFAULT_CHARSET);
} catch (IOException e) {
throw new IOException(
"failed to load template file '"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -977,8 +977,13 @@ public void expandTemplate(
ImmutableMap.Builder<String, Substitution> substitutionsBuilder = ImmutableMap.builder();
for (Map.Entry<String, String> substitution :
Dict.cast(substitutionsUnchecked, String.class, String.class, "substitutions").entrySet()) {
// Blaze calls ParserInput.fromLatin1 when reading BUILD files, which might
// contain UTF-8 encoded symbols as part of template substitution.
// As a quick fix, the substitution values are corrected before being passed on.
// In the long term, avoiding ParserInput.fromLatin would be a better approach.
substitutionsBuilder.put(
substitution.getKey(), Substitution.of(substitution.getKey(), substitution.getValue()));
substitution.getKey(),
Substitution.of(substitution.getKey(), convertLatin1ToUtf8(substitution.getValue())));
}
if (!Starlark.UNBOUND.equals(computedSubstitutions)) {
for (Substitution substitution : ((TemplateDict) computedSubstitutions).getAll()) {
Expand All @@ -1002,6 +1007,16 @@ public void expandTemplate(
registerAction(action);
}

/**
* Returns the proper UTF-8 representation of a String that was erroneously read using Latin1.
*
* @param latin1 Input string
* @return The input string, UTF8 encoded
*/
private static String convertLatin1ToUtf8(String latin1) {
return new String(latin1.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}

@Override
public Args args(StarlarkThread thread) {
return Args.newArgs(thread.mutability(), getSemantics());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/authandtls/credentialhelper",
"//src/main/java/com/google/devtools/build/lib/concurrent",
"//src/main/java/com/google/devtools/build/lib/events",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/common/options",
"//third_party:auth",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.common.base.Strings;
import java.nio.charset.Charset;
import java.util.Base64;

/**
Expand All @@ -25,18 +26,16 @@ public final class BasicHttpAuthenticationEncoder {

private BasicHttpAuthenticationEncoder() {}

/**
* Encode username and password into a token.
*
* <p>username and password are expected to use Bazel's internal string encoding. The returned
* string is a regular Unicode string.
*/
public static String encode(String username, String password) {
// The raw bytes in the internal string are assumed to be UTF-8, which is the encoding used for
// basic authentication.
return "Basic "
+ Base64.getEncoder()
.encodeToString(
StringUnsafe.getInstance().getInternalStringBytes(username + ":" + password));
/** Encode username and password into a token with given {@link Charset}. */
public static String encode(String username, String password, Charset charset) {
StringBuilder sb = new StringBuilder();
if (!Strings.isNullOrEmpty(username)) {
sb.append(username);
}
sb.append(":");
if (!Strings.isNullOrEmpty(password)) {
sb.append(password);
}
return "Basic " + Base64.getEncoder().encodeToString(sb.toString().getBytes(charset));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.auth.Credentials;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -56,7 +58,7 @@ public Map<String, List<String>> getRequestMetadata(URI uri) throws IOException
Credential credential = netrc.getCredential(uri.getHost());
if (credential != null) {
String token =
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password());
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password(), UTF_8);
return ImmutableMap.of("Authorization", ImmutableList.of(token));
} else {
return ImmutableMap.of();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
package com.google.devtools.build.lib.authandtls;

import static com.google.common.base.Predicates.not;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;

import com.google.common.base.Strings;
Expand Down Expand Up @@ -77,7 +77,7 @@ private static class TokenStream implements Closeable {
private final Queue<Token> tokens = new ArrayDeque<>();

TokenStream(InputStream inputStream) throws IOException {
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, ISO_8859_1));
bufferedReader = new BufferedReader(new InputStreamReader(inputStream, UTF_8));
processLine();
}

Expand Down Expand Up @@ -183,7 +183,8 @@ private static Credential parseCredentialForMachine(TokenStream tokenStream, Str
while (!done && tokenStream.hasNext()) {
// Peek rather than taking next token since we probably won't process it
Token token = tokenStream.peek();
if (token instanceof ItemToken(String item)) {
if (token instanceof ItemToken itemToken) {
String item = itemToken.item();
switch (item) {
case LOGIN -> {
tokenStream.next();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,6 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_function",
"//src/main/java/com/google/devtools/build/lib/skyframe:package_lookup_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
"//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

package com.google.devtools.build.lib.bazel.bzlmod;

import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.FileValue;
Expand All @@ -25,7 +25,6 @@
import com.google.devtools.build.lib.packages.VendorThreadContext;
import com.google.devtools.build.lib.rules.repository.RepositoryDelegatorFunction;
import com.google.devtools.build.lib.skyframe.PrecomputedValue;
import com.google.devtools.build.lib.util.StringEncoding;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.Root;
Expand Down Expand Up @@ -55,8 +54,7 @@
public class VendorFileFunction implements SkyFunction {

private static final String VENDOR_FILE_HEADER =
StringEncoding.unicodeToInternal(
"""
"""
###############################################################################
# This file is used to configure how external repositories are handled in vendor mode.
# ONLY the two following functions can be used:
Expand All @@ -69,7 +67,7 @@ public class VendorFileFunction implements SkyFunction {
# Note that Bazel will NOT update the vendored source for this repo while running vendor command
# unless it's unpinned. The user can modify and maintain the vendored source for this repo manually.
###############################################################################
""");
""";

private final BazelStarlarkEnvironment starlarkEnv;

Expand Down Expand Up @@ -140,7 +138,8 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath)
throws VendorFileFunctionException {
try {
vendorPath.createDirectoryAndParents();
FileSystemUtils.writeContent(vendorFilePath, ISO_8859_1, VENDOR_FILE_HEADER);
byte[] vendorFileContents = VENDOR_FILE_HEADER.getBytes(UTF_8);
FileSystemUtils.writeContent(vendorFilePath, vendorFileContents);
} catch (IOException e) {
throw new VendorFileFunctionException(
new IOException("error creating VENDOR.bazel file", e), Transience.TRANSIENT);
Expand Down
Loading

0 comments on commit 77a74f3

Please sign in to comment.