Check UTF16 string size before converting to String to avoid OOME (#7963

) * Check UTF16 string size before converting to string to avoid OOME Signed-off-by: Rishab Nahata <[email protected]>
opensearch-project · Jun 29, 2023 · 544b1ca · 544b1ca
1 parent d5482f7
commit 544b1ca
Show file tree

Hide file tree

Showing 3 changed files with 54 additions and 1 deletion.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -139,8 +139,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - [Snapshot Interop] Add Changes in Create Snapshot Flow for remote store interoperability. ([#7118](https://github.com/opensearch-project/OpenSearch/pull/7118))
 - Allow insecure string settings to warn-log usage and advise to migration of a newer secure variant ([#5496](https://github.com/opensearch-project/OpenSearch/pull/5496))
 - Add self-organizing hash table to improve the performance of bucket aggregations ([#7652](https://github.com/opensearch-project/OpenSearch/pull/7652))
+- Check UTF16 string size before converting to String to avoid OOME ([#7963](https://github.com/opensearch-project/OpenSearch/pull/7963))
 - Move ZSTD compression codecs out of the sandbox ([#7908](https://github.com/opensearch-project/OpenSearch/pull/7908))
 
+
 ### Deprecated
 
 ### Removed

diff --git a/libs/core/src/main/java/org/opensearch/common/bytes/AbstractBytesReference.java b/libs/core/src/main/java/org/opensearch/common/bytes/AbstractBytesReference.java
@@ -33,6 +33,7 @@
 
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefIterator;
+import org.apache.lucene.util.UnicodeUtil;
 import org.opensearch.common.io.stream.StreamInput;
 import org.opensearch.core.xcontent.XContentBuilder;
 
@@ -49,6 +50,7 @@
 public abstract class AbstractBytesReference implements BytesReference {
 
     private Integer hash = null; // we cache the hash of this reference since it can be quite costly to re-calculated it
+    private static final int MAX_UTF16_LENGTH = Integer.MAX_VALUE >> 1;
 
     @Override
     public int getInt(int index) {
@@ -80,9 +82,19 @@ public void writeTo(OutputStream os) throws IOException {
         }
     }
 
+    protected int getMaxUTF16Length() {
+        return MAX_UTF16_LENGTH;
+    }
+
     @Override
     public String utf8ToString() {
-        return toBytesRef().utf8ToString();
+        BytesRef bytesRef = toBytesRef();
+        final char[] ref = new char[bytesRef.length];
+        final int len = UnicodeUtil.UTF8toUTF16(bytesRef, ref);
+        if (len > getMaxUTF16Length()) {
+            throw new IllegalArgumentException("UTF16 String size is " + len + ", should be less than " + getMaxUTF16Length());
+        }
+        return new String(ref, 0, len);
     }
 
     @Override

diff --git a/test/framework/src/main/java/org/opensearch/common/bytes/AbstractBytesReferenceTestCase.java b/test/framework/src/main/java/org/opensearch/common/bytes/AbstractBytesReferenceTestCase.java
@@ -478,6 +478,45 @@ public void testToUtf8() throws IOException {
         // TODO: good way to test?
     }
 
+    public void testUTF8toString_ExceedsMaxLength() {
+        AbstractBytesReference abr = new TestAbstractBytesReference();
+        IllegalArgumentException e = assertThrows(IllegalArgumentException.class, abr::utf8ToString);
+        assertTrue(e.getMessage().contains("UTF16 String size is"));
+        assertTrue(e.getMessage().contains("should be less than"));
+    }
+
+    static class TestAbstractBytesReference extends AbstractBytesReference {
+        @Override
+        public byte get(int index) {
+            return 0;
+        }
+
+        @Override
+        public int length() {
+            return 0;
+        }
+
+        @Override
+        public BytesReference slice(int from, int length) {
+            return null;
+        }
+
+        @Override
+        public long ramBytesUsed() {
+            return 0;
+        }
+
+        @Override
+        public BytesRef toBytesRef() {
+            return new BytesRef("UTF16 length exceed test");
+        }
+
+        @Override
+        public int getMaxUTF16Length() {
+            return 1;
+        }
+    }
+
     public void testToBytesRef() throws IOException {
         int length = randomIntBetween(0, PAGE_SIZE);
         BytesReference pbr = newBytesReference(length);