From 15a2600e0a010a1ca30cedcd1594873ba195449a Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Tue, 2 Apr 2024 12:27:43 -0700 Subject: [PATCH] Add limits to emitted values from DerivedFieldScript Signed-off-by: Rishabh Maurya --- .../painless/DerivedFieldScriptTests.java | 55 +++++++++++++++++++ .../opensearch/script/DerivedFieldScript.java | 36 +++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/modules/lang-painless/src/test/java/org/opensearch/painless/DerivedFieldScriptTests.java b/modules/lang-painless/src/test/java/org/opensearch/painless/DerivedFieldScriptTests.java index e394b2c7d2968..2340e5b238ebb 100644 --- a/modules/lang-painless/src/test/java/org/opensearch/painless/DerivedFieldScriptTests.java +++ b/modules/lang-painless/src/test/java/org/opensearch/painless/DerivedFieldScriptTests.java @@ -29,6 +29,7 @@ import org.opensearch.painless.spi.AllowlistLoader; import org.opensearch.script.DerivedFieldScript; import org.opensearch.script.ScriptContext; +import org.opensearch.script.ScriptException; import org.opensearch.search.lookup.LeafSearchLookup; import org.opensearch.search.lookup.SearchLookup; @@ -169,4 +170,58 @@ public void testEmittingMultipleValues() throws IOException { List result = script.getEmittedValues(); assertEquals(List.of("test", "multiple", "values"), result); } + + public void testExceedingByteSizeLimit() throws IOException { + SearchLookup lookup = mock(SearchLookup.class); + + // We don't need a real index, just need to construct a LeafReaderContext which cannot be mocked + MemoryIndex index = new MemoryIndex(); + LeafReaderContext leafReaderContext = index.createSearcher().getIndexReader().leaves().get(0); + + LeafSearchLookup leafSearchLookup = mock(LeafSearchLookup.class); + when(lookup.getLeafSearchLookup(leafReaderContext)).thenReturn(leafSearchLookup); + + // Emitting a large string to exceed the byte size limit + DerivedFieldScript stringScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit('a' + i);", lookup).newInstance( + leafReaderContext + ); + expectThrows(ScriptException.class, () -> { + stringScript.setDocument(1); + stringScript.execute(); + }); + + // Emitting an integer to check byte size limit + DerivedFieldScript intScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(42)", lookup).newInstance(leafReaderContext); + expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> { + intScript.setDocument(1); + intScript.execute(); + }); + + // Emitting a long to check byte size limit + DerivedFieldScript longScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(1234567890123456789L)", lookup).newInstance( + leafReaderContext + ); + expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> { + longScript.setDocument(1); + longScript.execute(); + }); + + // Emitting a double to check byte size limit + DerivedFieldScript doubleScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(3.14159)", lookup).newInstance( + leafReaderContext + ); + expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> { + doubleScript.setDocument(1); + doubleScript.execute(); + }); + + // Emitting a GeoPoint to check byte size limit + DerivedFieldScript geoPointScript = compile("for (int i = 0; i < 1024 * 1024; i++) emit(1.23, 4.56);", lookup).newInstance( + leafReaderContext + ); + expectThrows(ScriptException.class, "Expected IllegalStateException for exceeding byte size limit", () -> { + geoPointScript.setDocument(1); + geoPointScript.execute(); + }); + } } diff --git a/server/src/main/java/org/opensearch/script/DerivedFieldScript.java b/server/src/main/java/org/opensearch/script/DerivedFieldScript.java index f56c9ad1cd101..e9988ec5aeef2 100644 --- a/server/src/main/java/org/opensearch/script/DerivedFieldScript.java +++ b/server/src/main/java/org/opensearch/script/DerivedFieldScript.java @@ -9,12 +9,14 @@ package org.opensearch.script; import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.common.collect.Tuple; import org.opensearch.index.fielddata.ScriptDocValues; import org.opensearch.search.lookup.LeafSearchLookup; import org.opensearch.search.lookup.SearchLookup; import org.opensearch.search.lookup.SourceLookup; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -31,6 +33,7 @@ public abstract class DerivedFieldScript { public static final String[] PARAMETERS = {}; public static final ScriptContext CONTEXT = new ScriptContext<>("derived_field", Factory.class); + private static final int MAX_BYTE_SIZE = 1024 * 1024; // Maximum allowed byte size (1 MB) private static final Map> PARAMS_FUNCTIONS = Map.of( "doc", @@ -54,18 +57,22 @@ public abstract class DerivedFieldScript { */ private List emittedValues; + private int totalByteSize; + public DerivedFieldScript(Map params, SearchLookup lookup, LeafReaderContext leafContext) { Map parameters = new HashMap<>(params); this.leafLookup = lookup.getLeafSearchLookup(leafContext); parameters.putAll(leafLookup.asMap()); this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS); this.emittedValues = new ArrayList<>(); + this.totalByteSize = 0; } public DerivedFieldScript() { this.params = null; this.leafLookup = null; this.emittedValues = new ArrayList<>(); + this.totalByteSize = 0; } /** @@ -95,11 +102,38 @@ public List getEmittedValues() { */ public void setDocument(int docid) { this.emittedValues = new ArrayList<>(); + this.totalByteSize = 0; leafLookup.setDocument(docid); } public void addEmittedValue(Object o) { - emittedValues.add(o); + int byteSize = getObjectByteSize(o); + int newTotalByteSize = totalByteSize + byteSize; + if (newTotalByteSize <= MAX_BYTE_SIZE) { + emittedValues.add(o); + totalByteSize = newTotalByteSize; + } else { + throw new IllegalStateException("Exceeded maximum allowed byte size for emitted values"); + } + } + + private int getObjectByteSize(Object obj) { + if (obj instanceof String) { + return ((String) obj).getBytes(StandardCharsets.UTF_8).length; + } else if (obj instanceof Integer) { + return Integer.BYTES; + } else if (obj instanceof Long) { + return Long.BYTES; + } else if (obj instanceof Double) { + return Double.BYTES; + } else if (obj instanceof Boolean) { + return Byte.BYTES; // Assuming 1 byte for boolean + } else if (obj instanceof Tuple) { + // Assuming each element in the tuple is a double for GeoPoint case + return Double.BYTES * 2; + } else { + throw new IllegalArgumentException("Unsupported object type passed in emit()"); + } } public void execute() {}