diff --git a/benchmarks/src/main/java/org/opensearch/benchmark/search/SearchHitsProtobufBenchmark.java b/benchmarks/src/main/java/org/opensearch/benchmark/search/SearchHitsProtobufBenchmark.java new file mode 100644 index 0000000000000..7e27ce30a05cf --- /dev/null +++ b/benchmarks/src/main/java/org/opensearch/benchmark/search/SearchHitsProtobufBenchmark.java @@ -0,0 +1,140 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.benchmark.search; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.io.InputStream; +import java.nio.file.*; +import java.util.concurrent.TimeUnit; + +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.io.stream.BytesStreamInput; +import org.opensearch.core.xcontent.MediaType; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.proto.search.SearchHitsProtoDef.SearchHitsProto; +import org.opensearch.search.SearchHits; +import org.opensearch.transport.protobuf.SearchHitsProtobuf; + +@Warmup(iterations = 1) +@Measurement(iterations = 2, time = 60, timeUnit = TimeUnit.SECONDS) +@Fork(2) +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +public class SearchHitsProtobufBenchmark { + String READ_PATH = "/tmp/testFiles"; + int TEST_FILES = 400; + + // Setup proto and native representations of SearchHits + List searchHitsProtoItems = new ArrayList<>(); + List searchHitsNativePojoItems = new ArrayList<>(); + + /* + NOTE: Test items are read from disk. Randomized SearchHits are generated with SearchHitsTests.createTestItem(). + + To generate test items and run all microbenchmarks in this class: + ./gradlew server:test --tests "org.opensearch.search.SearchHitsTests.testMicroBenchmarkHackGenerateTestFiles" -Dtests.security.manager=false + ./gradlew -p benchmarks run --args 'SearchHitsProtobufBenchmark' + */ + + @Setup + public void setup() throws IOException { + Path dir = Paths.get(READ_PATH); + + for(int i = 0; i < TEST_FILES; i++) { + Path testFile = dir.resolve("testItem_" + i); + try (InputStream in = Files.newInputStream(testFile)) { + BytesStreamInput sin = new BytesStreamInput(in.readAllBytes()); + + SearchHits sh = new SearchHits(sin); + SearchHitsProto shProto = new SearchHitsProtobuf(sh).toProto(); + + searchHitsNativePojoItems.add(sh); + searchHitsProtoItems.add(shProto); + } + } + } + + /* + ./gradlew -p benchmarks run --args 'SearchHitsProtobufBenchmark.writeToNativeBench' + */ + @Benchmark + public BytesStreamOutput writeToNativeBench() throws IOException { + BytesStreamOutput bytes = new BytesStreamOutput(); + for (SearchHits sh : searchHitsNativePojoItems) { + sh.writeTo(bytes); + } + return bytes; + } + + /* + ./gradlew -p benchmarks run --args 'SearchHitsProtobufBenchmark.writeToProtoBench' + */ + @Benchmark + public BytesStreamOutput writeToProtoBench() throws IOException { + BytesStreamOutput bytes = new BytesStreamOutput(); + for (SearchHitsProto shProto : searchHitsProtoItems) { + shProto.writeTo(bytes); + } + return bytes; + } + + /* + ./gradlew -p benchmarks run --args 'SearchHitsProtobufBenchmark.toXContNativeBench' + */ + @Benchmark + public List toXContNativeBench() throws IOException { + List XContList = new ArrayList<>(); + for (SearchHits sh : searchHitsNativePojoItems) { + XContentBuilder builder = JsonXContent.contentBuilder(); + builder.startObject(); + sh.toXContent(builder, ToXContent.EMPTY_PARAMS); + builder.endObject(); + XContList.add(builder); + } + + return XContList; + } + + /* + ./gradlew -p benchmarks run --args 'SearchHitsProtobufBenchmark.toXContProtoBench' + */ + @Benchmark + public List toXContProtoBench() throws IOException { + List XContList = new ArrayList<>(); + for (SearchHitsProto sh : searchHitsProtoItems) { + XContentBuilder builder = JsonXContent.contentBuilder(); + /* + Removing JsonXContentGenerator.java 'if (mayWriteRawData(mediaType) == false) {' check + "application/octet-stream" media type not registered. + */ + builder.startObject(); + builder.rawField("protobuf", sh.toByteString().newInput(), MediaType.fromMediaType("application/octet-stream")); + builder.endObject(); + XContList.add(builder); + } + + return XContList; + } +} diff --git a/libs/x-content/src/main/java/org/opensearch/common/xcontent/json/JsonXContentGenerator.java b/libs/x-content/src/main/java/org/opensearch/common/xcontent/json/JsonXContentGenerator.java index 3f8493d7a4f14..0341ea2c925b2 100644 --- a/libs/x-content/src/main/java/org/opensearch/common/xcontent/json/JsonXContentGenerator.java +++ b/libs/x-content/src/main/java/org/opensearch/common/xcontent/json/JsonXContentGenerator.java @@ -352,25 +352,25 @@ public void writeRawField(String name, InputStream content) throws IOException { */ @Override public void writeRawField(String name, InputStream content, MediaType mediaType) throws IOException { - if (mayWriteRawData(mediaType) == false) { - // EMPTY is safe here because we never call namedObject when writing raw data - try ( - XContentParser parser = mediaType.xContent() - // It's okay to pass the throwing deprecation handler - // because we should not be writing raw fields when - // generating JSON - .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, content) - ) { - parser.nextToken(); - writeFieldName(name); - copyCurrentStructure(parser); - } - } else { +// if (mayWriteRawData(mediaType) == false) { +// // EMPTY is safe here because we never call namedObject when writing raw data +// try ( +// XContentParser parser = mediaType.xContent() +// // It's okay to pass the throwing deprecation handler +// // because we should not be writing raw fields when +// // generating JSON +// .createParser(NamedXContentRegistry.EMPTY, DeprecationHandler.THROW_UNSUPPORTED_OPERATION, content) +// ) { +// parser.nextToken(); +// writeFieldName(name); +// copyCurrentStructure(parser); +// } +// } else { writeStartRaw(name); flush(); Streams.copy(content, os); writeEndRaw(); - } +// } } /** diff --git a/server/src/main/java/org/opensearch/transport/protobuf/SearchHitsProtobuf.java b/server/src/main/java/org/opensearch/transport/protobuf/SearchHitsProtobuf.java index 413801c732922..e69ae0412842a 100644 --- a/server/src/main/java/org/opensearch/transport/protobuf/SearchHitsProtobuf.java +++ b/server/src/main/java/org/opensearch/transport/protobuf/SearchHitsProtobuf.java @@ -12,6 +12,8 @@ import org.apache.lucene.search.TotalHits; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.MediaType; +import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.proto.search.SearchHitsProtoDef.SearchHitsProto; import org.opensearch.proto.search.SearchHitsProtoDef.SortFieldProto; import org.opensearch.proto.search.SearchHitsProtoDef.SortValueProto; @@ -59,7 +61,15 @@ public void fromProtobufStream(StreamInput in) throws IOException { fromProto(proto); } - SearchHitsProto toProto() { + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(Fields.HITS); + builder.rawField("protobuf", toProto().toByteString().newInput(), MediaType.fromMediaType("application/octet-stream")); + builder.endObject(); + return builder; + } + + public SearchHitsProto toProto() { SearchHitsProto.Builder builder = SearchHitsProto.newBuilder().setMaxScore(maxScore); for (SearchHit hit : hits) { diff --git a/server/src/test/java/org/opensearch/search/SearchHitsTests.java b/server/src/test/java/org/opensearch/search/SearchHitsTests.java index cf00ba768da7c..fd2d8460fd334 100644 --- a/server/src/test/java/org/opensearch/search/SearchHitsTests.java +++ b/server/src/test/java/org/opensearch/search/SearchHitsTests.java @@ -36,6 +36,7 @@ import org.apache.lucene.search.TotalHits; import org.apache.lucene.tests.util.TestUtil; import org.opensearch.action.OriginalIndices; +import org.opensearch.common.io.stream.BytesStreamOutput; import org.opensearch.common.lucene.LuceneTests; import org.opensearch.common.xcontent.LoggingDeprecationHandler; import org.opensearch.common.xcontent.XContentType; @@ -50,12 +51,42 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.test.AbstractSerializingTestCase; +import java.io.FileOutputStream; import java.io.IOException; +import java.nio.file.Path; +import java.nio.file.Paths; import java.util.Collections; import java.util.function.Predicate; +import java.nio.file.Files; public class SearchHitsTests extends AbstractSerializingTestCase { + /* + Hack to generate a randomized set of SearchHits objects for the purpose of benchmarking serialization ops. + See SearchHitsProtobufBenchmark.java. + + Note: Security manager needs to be disabled. + + ./gradlew server:test --tests "org.opensearch.search.SearchHitsTests.testMicroBenchmarkHackGenerateTestFiles" -Dtests.security.manager=false + */ + public void testMicroBenchmarkHackGenerateTestFiles () throws IOException { + String WRITE_PATH = "/tmp/testFiles"; + int TEST_FILES = 400; + + Path tmp = Files.createDirectory(Paths.get(WRITE_PATH)); + for(int i = 0; i < TEST_FILES; i++) { + Path pth = tmp.resolve("testItem_" + i); + Files.createFile(pth); + SearchHits sh = createTestItem(XContentType.JSON, true, true); + + try (FileOutputStream out = new FileOutputStream(pth.toFile())) { + BytesStreamOutput bytes = new BytesStreamOutput(); + sh.writeTo(bytes); + bytes.copyBytes().writeTo(out); + } + } + } + public static SearchHits createTestItem(boolean withOptionalInnerHits, boolean withShardTarget) { return createTestItem(randomFrom(XContentType.values()), withOptionalInnerHits, withShardTarget); }