Skip to content

Commit

Permalink
Avoid char array allocation in Starlark format
Browse files Browse the repository at this point in the history
Positional access via `String#charAt` is slightly faster than precreating the char array for Latin-1 strings and much faster for UTF-8 strings. It allocates less in both cases.

Also adds a `--latin1` flag to the `Benchmarks` tool that allows benchmarking against Bazel's way of parsing Starlark files.

Before:
```
INFO: Running command line: bazel-bin/src/test/java/net/starlark/java/eval/Benchmarks --filter bench_format --seconds 20
File src/test/java/net/starlark/java/eval/testdata/bench_string.star:
benchmark                        ops     cpu/op    wall/op   steps/op   alloc/op
bench_format               134217727      169ns      168ns          7       495B

INFO: Running command line: bazel-bin/src/test/java/net/starlark/java/eval/Benchmarks --filter bench_format --seconds 20 --latin1
File src/test/java/net/starlark/java/eval/testdata/bench_string.star:
benchmark                        ops     cpu/op    wall/op   steps/op   alloc/op
bench_format               268435455      122ns      121ns          7       495B
```

After:
```
INFO: Running command line: bazel-bin/src/test/java/net/starlark/java/eval/Benchmarks --filter bench_format --seconds 20
File src/test/java/net/starlark/java/eval/testdata/bench_string.star:
benchmark                        ops     cpu/op    wall/op   steps/op   alloc/op
bench_format               268435455      110ns      109ns          7       479B

INFO: Running command line: bazel-bin/src/test/java/net/starlark/java/eval/Benchmarks --filter bench_format --seconds 20 --latin1
File src/test/java/net/starlark/java/eval/testdata/bench_string.star:
benchmark                        ops     cpu/op    wall/op   steps/op   alloc/op
bench_format               268435455      113ns      112ns          7       479B
```
  • Loading branch information
fmeum committed Nov 26, 2024
1 parent fe743b9 commit cbec9b7
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 21 deletions.
37 changes: 18 additions & 19 deletions src/main/java/net/starlark/java/eval/FormatParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,17 @@ final class FormatParser {
String format(
String input, List<Object> args, Map<String, Object> kwargs, StarlarkSemantics semantics)
throws EvalException {
char[] chars = input.toCharArray();
StringBuilder output = new StringBuilder();
History history = new History();

for (int pos = 0; pos < chars.length; ++pos) {
char current = chars[pos];
for (int pos = 0; pos < input.length(); ++pos) {
char current = input.charAt(pos);
int advancePos = 0;

if (current == '{') {
advancePos = processOpeningBrace(chars, pos, args, kwargs, history, output, semantics);
advancePos = processOpeningBrace(input, pos, args, kwargs, history, output, semantics);
} else if (current == '}') {
advancePos = processClosingBrace(chars, pos, output);
advancePos = processClosingBrace(input, pos, output);
} else {
output.append(current);
}
Expand All @@ -75,7 +74,7 @@ String format(
* Processes the expression after an opening brace (possibly a replacement field) and emits the
* result to the output StringBuilder
*
* @param chars The entire string
* @param s The entire string
* @param pos The position of the opening brace
* @param args List of positional arguments
* @param kwargs Map of named arguments
Expand All @@ -85,7 +84,7 @@ String format(
* @return Number of characters that have been consumed by this method
*/
private int processOpeningBrace(
char[] chars,
String s,
int pos,
List<Object> args,
Map<String, Object> kwargs,
Expand All @@ -94,14 +93,14 @@ private int processOpeningBrace(
StarlarkSemantics semantics)
throws EvalException {
Printer printer = new Printer(output);
if (has(chars, pos + 1, '{')) {
if (has(s, pos + 1, '{')) {
// Escaped brace -> output and move to char after right brace
printer.append("{");
return 1;
}

// Inside a replacement field
String key = getFieldName(chars, pos);
String key = getFieldName(s, pos);
Object value = null;

// Only positional replacement fields will lead to a valid index
Expand Down Expand Up @@ -142,14 +141,14 @@ private Object getKwarg(Map<String, Object> kwargs, String key) throws EvalExcep
/**
* Processes a closing brace and emits the result to the output StringBuilder
*
* @param chars The entire string
* @param s The entire string
* @param pos Position of the closing brace
* @param output StringBuilder that consumes the result
* @return Number of characters that have been consumed by this method
*/
private int processClosingBrace(char[] chars, int pos, StringBuilder output)
private int processClosingBrace(String s, int pos, StringBuilder output)
throws EvalException {
if (!has(chars, pos + 1, '}')) {
if (!has(s, pos + 1, '}')) {
// Invalid brace outside replacement field
throw Starlark.errorf("Found '}' without matching '{'");
}
Expand All @@ -162,28 +161,28 @@ private int processClosingBrace(char[] chars, int pos, StringBuilder output)
/**
* Checks whether the given input string has a specific character at the given location
*
* @param data Input string as character array
* @param s Input string
* @param pos Position to be checked
* @param needle Character to be searched for
* @return True if string has the specified character at the given location
*/
private static boolean has(char[] data, int pos, char needle) {
return pos < data.length && data[pos] == needle;
private static boolean has(String s, int pos, char needle) {
return pos < s.length() && s.charAt(pos) == needle;
}

/**
* Extracts the name/index of the replacement field that starts at the specified location
*
* @param chars Input string
* @param s Input string
* @param openingBrace Position of the opening brace of the replacement field
* @return Name or index of the current replacement field
*/
private String getFieldName(char[] chars, int openingBrace) throws EvalException {
private String getFieldName(String s, int openingBrace) throws EvalException {
StringBuilder result = new StringBuilder();
boolean foundClosingBrace = false;

for (int pos = openingBrace + 1; pos < chars.length; ++pos) {
char current = chars[pos];
for (int pos = openingBrace + 1; pos < s.length(); ++pos) {
char current = s.charAt(pos);

if (current == '}') {
foundClosingBrace = true;
Expand Down
14 changes: 12 additions & 2 deletions src/test/java/net/starlark/java/eval/Benchmarks.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import com.sun.management.ThreadMXBean;
import java.io.File;
import java.lang.management.ManagementFactory;
import java.nio.file.Files;
import java.util.Arrays;
import java.util.Map;
import java.util.TreeMap;
Expand Down Expand Up @@ -63,7 +64,7 @@
public final class Benchmarks {

private static final String HELP =
"Usage: Benchmarks [--help] [--filter regex] [--seconds float] [--iterations count]\n"
"Usage: Benchmarks [--help] [--filter regex] [--seconds float] [--iterations count] [--latin1]\n"
+ "Runs Starlark benchmarks matching the filter for the specified approximate time or\n"
+ "specified number of iterations, and reports various performance measures.\n"
+ "The optional filter is a regular expression applied to the string FILE:FUNC,\n"
Expand All @@ -76,6 +77,7 @@ public static void main(String[] args) throws Exception {
Pattern filter = null; // default: all
long budgetNanos = -1;
int iterations = -1;
boolean useUtf8 = true;

// parse flags
int i;
Expand Down Expand Up @@ -124,6 +126,9 @@ public static void main(String[] args) throws Exception {
fail("--iterations out of range");
}

} else if (args[i].equals("--latin1")) {
useUtf8 = false;

} else {
fail("unknown flag: %s", args[i]);
}
Expand Down Expand Up @@ -154,7 +159,12 @@ public static void main(String[] args) throws Exception {
}

// parse & execute
ParserInput input = ParserInput.readFile(file.toString());
ParserInput input;
if (useUtf8) {
input = ParserInput.readFile(file.toString());
} else {
input = ParserInput.fromLatin1(Files.readAllBytes(file.toPath()), file.toString());
}
ImmutableMap.Builder<String, Object> predeclared = ImmutableMap.builder();
predeclared.put("json", Json.INSTANCE);

Expand Down
23 changes: 23 additions & 0 deletions src/test/java/net/starlark/java/eval/testdata/bench_string.star
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,26 @@ _to_strip = " Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do
def bench_strip(b):
for _ in range(b.n):
_to_strip.strip()

_sample_template = """
load("@rules_pkg//pkg:tar.bzl", "pkg_tar")
filegroup(
name="files",
srcs = {srcs},
visibility = ["//visibility:public"],
)
pkg_tar(
name="archives",
srcs = [":files"],
strip_prefix = "{strip_prefix}",
package_dir = "{dirname}",
visibility = ["//visibility:public"],
)
"""

def bench_format(b):
for _ in range(b.n):
_sample_template.format(srcs = "srcs", strip_prefix = "strip_prefix", dirname = "dirname")

0 comments on commit cbec9b7

Please sign in to comment.