From 51c9927e8f0118b17a9c5581227b965610fdbd13 Mon Sep 17 00:00:00 2001 From: Heemin Kim Date: Tue, 18 Jun 2024 10:31:53 -0700 Subject: [PATCH] Switch to iterative version of WKT format parser (#14086) Signed-off-by: Heemin Kim --- CHANGELOG.md | 1 + .../geometry/utils/WellKnownText.java | 69 +++++++++++++++++-- .../geometry/GeometryCollectionTests.java | 30 ++++++++ 3 files changed, 94 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 529a5ce57ddf3..dc85bd4f85ffd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix handling of Short and Byte data types in ScriptProcessor ingest pipeline ([#14379](https://github.com/opensearch-project/OpenSearch/issues/14379)) +- Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086)) ### Security diff --git a/libs/geo/src/main/java/org/opensearch/geometry/utils/WellKnownText.java b/libs/geo/src/main/java/org/opensearch/geometry/utils/WellKnownText.java index ed1d63e6d4fef..8ad135b8bc1ca 100644 --- a/libs/geo/src/main/java/org/opensearch/geometry/utils/WellKnownText.java +++ b/libs/geo/src/main/java/org/opensearch/geometry/utils/WellKnownText.java @@ -49,8 +49,10 @@ import java.io.StreamTokenizer; import java.io.StringReader; import java.text.ParseException; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; +import java.util.Deque; import java.util.List; import java.util.Locale; @@ -67,6 +69,7 @@ public class WellKnownText { public static final String RPAREN = ")"; public static final String COMMA = ","; public static final String NAN = "NaN"; + public static final int MAX_DEPTH_OF_GEO_COLLECTION = 1000; private final String NUMBER = ""; private final String EOF = "END-OF-STREAM"; @@ -278,6 +281,16 @@ public Geometry fromWKT(String wkt) throws IOException, ParseException { */ private Geometry parseGeometry(StreamTokenizer stream) throws IOException, ParseException { final String type = nextWord(stream).toLowerCase(Locale.ROOT); + switch (type) { + case "geometrycollection": + return parseGeometryCollection(stream); + default: + return parseSimpleGeometry(stream, type); + } + } + + private Geometry parseSimpleGeometry(StreamTokenizer stream, String type) throws IOException, ParseException { + assert "geometrycollection".equals(type) == false; switch (type) { case "point": return parsePoint(stream); @@ -294,7 +307,7 @@ private Geometry parseGeometry(StreamTokenizer stream) throws IOException, Parse case "bbox": return parseBBox(stream); case "geometrycollection": - return parseGeometryCollection(stream); + throw new IllegalStateException("Unexpected type: geometrycollection"); case "circle": // Not part of the standard, but we need it for internal serialization return parseCircle(stream); } @@ -305,12 +318,56 @@ private GeometryCollection parseGeometryCollection(StreamTokenizer str if (nextEmptyOrOpen(stream).equals(EMPTY)) { return GeometryCollection.EMPTY; } - List shapes = new ArrayList<>(); - shapes.add(parseGeometry(stream)); - while (nextCloserOrComma(stream).equals(COMMA)) { - shapes.add(parseGeometry(stream)); + + List topLevelShapes = new ArrayList<>(); + Deque> deque = new ArrayDeque<>(); + deque.push(topLevelShapes); + boolean isFirstIteration = true; + List currentLevelShapes = null; + while (!deque.isEmpty()) { + List previousShapes = deque.pop(); + if (currentLevelShapes != null) { + previousShapes.add(new GeometryCollection<>(currentLevelShapes)); + } + currentLevelShapes = previousShapes; + + if (isFirstIteration == true) { + isFirstIteration = false; + } else { + if (nextCloserOrComma(stream).equals(COMMA) == false) { + // Done with current level, continue with parent level + continue; + } + } + while (true) { + final String type = nextWord(stream).toLowerCase(Locale.ROOT); + if (type.equals("geometrycollection")) { + if (nextEmptyOrOpen(stream).equals(EMPTY) == false) { + // GEOMETRYCOLLECTION() -> 1 depth, GEOMETRYCOLLECTION(GEOMETRYCOLLECTION()) -> 2 depth + // When parsing the top level geometry collection, the queue size is zero. + // When max depth is 1, we don't want to push any sub geometry collection in the queue. + // Therefore, we subtract 2 from max depth. + if (deque.size() >= MAX_DEPTH_OF_GEO_COLLECTION - 2) { + throw new IllegalArgumentException( + "a geometry collection with a depth greater than " + MAX_DEPTH_OF_GEO_COLLECTION + " is not supported" + ); + } + deque.push(currentLevelShapes); + currentLevelShapes = new ArrayList<>(); + continue; + } + currentLevelShapes.add(GeometryCollection.EMPTY); + } else { + currentLevelShapes.add(parseSimpleGeometry(stream, type)); + } + + if (nextCloserOrComma(stream).equals(COMMA) == false) { + break; + } + } } - return new GeometryCollection<>(shapes); + + return new GeometryCollection<>(topLevelShapes); } private Point parsePoint(StreamTokenizer stream) throws IOException, ParseException { diff --git a/libs/geo/src/test/java/org/opensearch/geometry/GeometryCollectionTests.java b/libs/geo/src/test/java/org/opensearch/geometry/GeometryCollectionTests.java index 631b6456a77da..cd8bb8f585966 100644 --- a/libs/geo/src/test/java/org/opensearch/geometry/GeometryCollectionTests.java +++ b/libs/geo/src/test/java/org/opensearch/geometry/GeometryCollectionTests.java @@ -62,6 +62,11 @@ public void testBasicSerialization() throws IOException, ParseException { assertEquals("GEOMETRYCOLLECTION EMPTY", wkt.toWKT(GeometryCollection.EMPTY)); assertEquals(GeometryCollection.EMPTY, wkt.fromWKT("GEOMETRYCOLLECTION EMPTY)")); + + assertEquals( + new GeometryCollection(Arrays.asList(GeometryCollection.EMPTY)), + wkt.fromWKT("GEOMETRYCOLLECTION (GEOMETRYCOLLECTION EMPTY)") + ); } @SuppressWarnings("ConstantConditions") @@ -86,4 +91,29 @@ public void testInitValidation() { new StandardValidator(true).validate(new GeometryCollection(Collections.singletonList(new Point(20, 10, 30)))); } + + public void testDeeplyNestedGeometryCollection() throws IOException, ParseException { + WellKnownText wkt = new WellKnownText(true, new GeographyValidator(true)); + StringBuilder validGeometryCollectionHead = new StringBuilder("GEOMETRYCOLLECTION"); + StringBuilder validGeometryCollectionTail = new StringBuilder(" EMPTY"); + for (int i = 0; i < WellKnownText.MAX_DEPTH_OF_GEO_COLLECTION - 1; i++) { + validGeometryCollectionHead.append(" (GEOMETRYCOLLECTION"); + validGeometryCollectionTail.append(")"); + } + // Expect no exception + wkt.fromWKT(validGeometryCollectionHead.append(validGeometryCollectionTail).toString()); + + StringBuilder invalidGeometryCollectionHead = new StringBuilder("GEOMETRYCOLLECTION"); + StringBuilder invalidGeometryCollectionTail = new StringBuilder(" EMPTY"); + for (int i = 0; i < WellKnownText.MAX_DEPTH_OF_GEO_COLLECTION; i++) { + invalidGeometryCollectionHead.append(" (GEOMETRYCOLLECTION"); + invalidGeometryCollectionTail.append(")"); + } + + IllegalArgumentException ex = expectThrows( + IllegalArgumentException.class, + () -> wkt.fromWKT(invalidGeometryCollectionHead.append(invalidGeometryCollectionTail).toString()) + ); + assertEquals("a geometry collection with a depth greater than 1000 is not supported", ex.getMessage()); + } }