Overflow prevention (#16812)

Signed-off-by: Prudhvi Godithi <[email protected]>
opensearch-project · Dec 9, 2024 · 5ba909a · 5ba909a
1 parent 2d18c34
commit 5ba909a
Showing 6 changed files with 243 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Support prefix list for remote repository attributes([#16271](https://github.com/opensearch-project/OpenSearch/pull/16271))
 - Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).
 - Add stats for remote publication failure and move download failure stats to remote methods([#16682](https://github.com/opensearch-project/OpenSearch/pull/16682/))
+- Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)).
 
 ### Dependencies
 - Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))

diff --git a/server/src/main/java/org/opensearch/common/time/DateUtils.java b/server/src/main/java/org/opensearch/common/time/DateUtils.java
@@ -272,6 +272,30 @@ public static Instant clampToNanosRange(Instant instant) {
         return instant;
     }
 
+    static final Instant INSTANT_LONG_MIN_VALUE = Instant.ofEpochMilli(Long.MIN_VALUE);
+    static final Instant INSTANT_LONG_MAX_VALUE = Instant.ofEpochMilli(Long.MAX_VALUE);
+
+    /**
+     * Clamps the given {@link Instant} to the valid epoch millisecond range.
+     *
+     * - If the input is before {@code Long.MIN_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MIN_VALUE)}.
+     * - If the input is after {@code Long.MAX_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MAX_VALUE)}.
+     * - Otherwise, it returns the input as-is.
+     *
+     * @param instant the {@link Instant} to clamp
+     * @return the clamped {@link Instant}
+     * @throws NullPointerException if the input is {@code null}
+     */
+    public static Instant clampToMillisRange(Instant instant) {
+        if (instant.isBefore(INSTANT_LONG_MIN_VALUE)) {
+            return INSTANT_LONG_MIN_VALUE;
+        }
+        if (instant.isAfter(INSTANT_LONG_MAX_VALUE)) {
+            return INSTANT_LONG_MAX_VALUE;
+        }
+        return instant;
+    }
+
     /**
      * convert a long value to a java time instant
      * the long value resembles the nanoseconds since the epoch

diff --git a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java
@@ -122,7 +122,7 @@ public enum Resolution {
         MILLISECONDS(CONTENT_TYPE, NumericType.DATE) {
             @Override
             public long convert(Instant instant) {
-                return instant.toEpochMilli();
+                return clampToValidRange(instant).toEpochMilli();
             }
 
             @Override
@@ -132,7 +132,7 @@ public Instant toInstant(long value) {
 
             @Override
             public Instant clampToValidRange(Instant instant) {
-                return instant;
+                return DateUtils.clampToMillisRange(instant);
             }
 
             @Override

diff --git a/server/src/test/java/org/opensearch/common/time/DateUtilsTests.java b/server/src/test/java/org/opensearch/common/time/DateUtilsTests.java
@@ -260,4 +260,21 @@ public void testRoundYear() {
         long startOf1996 = Year.of(1996).atDay(1).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
         assertThat(DateUtils.roundYear(endOf1996), is(startOf1996));
     }
+
+    public void testClampToMillisRange() {
+        Instant normalInstant = Instant.now();
+        assertEquals(normalInstant, DateUtils.clampToMillisRange(normalInstant));
+
+        Instant beforeMinInstant = DateUtils.INSTANT_LONG_MIN_VALUE.minusMillis(1);
+        assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(beforeMinInstant));
+
+        Instant afterMaxInstant = DateUtils.INSTANT_LONG_MAX_VALUE.plusMillis(1);
+        assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(afterMaxInstant));
+
+        assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MIN_VALUE));
+
+        assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MAX_VALUE));
+
+        assertThrows(NullPointerException.class, () -> DateUtils.clampToMillisRange(null));
+    }
 }
diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldMapperTests.java
@@ -156,7 +156,6 @@ public void testIgnoreMalformedLegacy() throws IOException {
             "failed to parse date field [2016-03-99] with format [strict_date_optional_time||epoch_millis]"
         );
         testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
-        testIgnoreMalformedForValue("-522000000", "long overflow");
     }
 
     public void testIgnoreMalformed() throws IOException {
@@ -170,7 +169,6 @@ public void testIgnoreMalformed() throws IOException {
             "failed to parse date field [2016-03-99] with format [strict_date_time_no_millis||strict_date_optional_time||epoch_millis]"
         );
         testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
-        testIgnoreMalformedForValue("-522000000", "long overflow");
     }
 
     private void testIgnoreMalformedForValue(String value, String expectedCause) throws IOException {

diff --git a/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/DateFieldTypeTests.java
@@ -31,20 +31,32 @@
 
 package org.opensearch.index.mapper;
 
+import org.apache.lucene.document.Field;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
 import org.opensearch.Version;
 import org.opensearch.cluster.metadata.IndexMetadata;
@@ -71,8 +83,12 @@
 import org.joda.time.DateTimeZone;
 
 import java.io.IOException;
+import java.time.Instant;
 import java.time.ZoneOffset;
+import java.util.Arrays;
 import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
 
 import static org.hamcrest.CoreMatchers.is;
 import static org.apache.lucene.document.LongPoint.pack;
@@ -490,4 +506,187 @@ public void testParseSourceValueNanos() throws IOException {
         MappedFieldType nullValueMapper = fieldType(Resolution.NANOSECONDS, "strict_date_time||epoch_millis", nullValueDate);
         assertEquals(Collections.singletonList(nullValueDate), fetchSourceValue(nullValueMapper, null));
     }
+
+    public void testDateResolutionForOverflow() throws IOException {
+        Directory dir = newDirectory();
+        IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
+
+        DateFieldType ft = new DateFieldType(
+            "test_date",
+            true,
+            true,
+            true,
+            DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
+            Resolution.MILLISECONDS,
+            null,
+            Collections.emptyMap()
+        );
+
+        List<String> dates = Arrays.asList(
+            null,
+            "2020-01-01T00:00:00Z",
+            null,
+            "2021-01-01T00:00:00Z",
+            "+292278994-08-17T07:12:55.807Z",
+            null,
+            "-292275055-05-16T16:47:04.192Z"
+        );
+
+        int numNullDates = 0;
+        long minDateValue = Long.MAX_VALUE;
+        long maxDateValue = Long.MIN_VALUE;
+
+        for (int i = 0; i < dates.size(); i++) {
+            ParseContext.Document doc = new ParseContext.Document();
+            String dateStr = dates.get(i);
+
+            if (dateStr != null) {
+                long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
+                doc.add(new LongPoint(ft.name(), timestamp));
+                doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
+                doc.add(new StoredField(ft.name(), timestamp));
+                doc.add(new StoredField("id", i));
+                minDateValue = Math.min(minDateValue, timestamp);
+                maxDateValue = Math.max(maxDateValue, timestamp);
+            } else {
+                numNullDates++;
+                doc.add(new StoredField("id", i));
+            }
+            w.addDocument(doc);
+        }
+
+        DirectoryReader reader = DirectoryReader.open(w);
+        IndexSearcher searcher = new IndexSearcher(reader);
+
+        Settings indexSettings = Settings.builder()
+            .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
+            .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
+            .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
+            .build();
+        QueryShardContext context = new QueryShardContext(
+            0,
+            new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings),
+            BigArrays.NON_RECYCLING_INSTANCE,
+            null,
+            null,
+            null,
+            null,
+            null,
+            xContentRegistry(),
+            writableRegistry(),
+            null,
+            null,
+            () -> nowInMillis,
+            null,
+            null,
+            () -> true,
+            null
+        );
+
+        Query rangeQuery = ft.rangeQuery(
+            "-292275055-05-16T16:47:04.192Z",
+            "+292278994-08-17T07:12:55.807Z",
+            true,
+            true,
+            null,
+            null,
+            null,
+            context
+        );
+
+        TopDocs topDocs = searcher.search(rangeQuery, dates.size());
+        assertEquals("Number of non-null date documents", dates.size() - numNullDates, topDocs.totalHits.value);
+
+        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
+            org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
+            IndexableField dateField = doc.getField(ft.name());
+            if (dateField != null) {
+                long dateValue = dateField.numericValue().longValue();
+                assertTrue(
+                    "Date value " + dateValue + " should be within valid range",
+                    dateValue >= minDateValue && dateValue <= maxDateValue
+                );
+            }
+        }
+
+        DateFieldType ftWithNullValue = new DateFieldType(
+            "test_date",
+            true,
+            true,
+            true,
+            DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
+            Resolution.MILLISECONDS,
+            "2020-01-01T00:00:00Z",
+            Collections.emptyMap()
+        );
+
+        Query nullValueQuery = ftWithNullValue.termQuery("2020-01-01T00:00:00Z", context);
+        topDocs = searcher.search(nullValueQuery, dates.size());
+        assertEquals("Documents matching the 2020-01-01 date", 1, topDocs.totalHits.value);
+
+        IOUtils.close(reader, w, dir);
+    }
+
+    public void testDateFieldTypeWithNulls() throws IOException {
+        DateFieldType ft = new DateFieldType(
+            "domainAttributes.dueDate",
+            true,
+            true,
+            true,
+            DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||date_optional_time"),
+            Resolution.MILLISECONDS,
+            null,
+            Collections.emptyMap()
+        );
+
+        Directory dir = newDirectory();
+        IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));
+
+        int nullDocs = 3500;
+        int datedDocs = 50;
+
+        for (int i = 0; i < nullDocs; i++) {
+            ParseContext.Document doc = new ParseContext.Document();
+            doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
+            w.addDocument(doc);
+        }
+
+        for (int i = 1; i <= datedDocs; i++) {
+            ParseContext.Document doc = new ParseContext.Document();
+            String dateStr = String.format(Locale.ROOT, "2022-03-%02dT15:40:58.324", (i % 30) + 1);
+            long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
+            doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
+            doc.add(new LongPoint(ft.name(), timestamp));
+            doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
+            doc.add(new StoredField(ft.name(), timestamp));
+            w.addDocument(doc);
+        }
+
+        DirectoryReader reader = DirectoryReader.open(w);
+        IndexSearcher searcher = new IndexSearcher(reader);
+
+        BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
+        queryBuilder.add(new TermQuery(new Term("domainAttributes.firmId", "12345678910111213")), BooleanClause.Occur.MUST);
+
+        Sort sort = new Sort(new SortField(ft.name(), SortField.Type.DOC, false));
+
+        for (int i = 0; i < 100; i++) {
+            TopDocs topDocs = searcher.search(queryBuilder.build(), nullDocs + datedDocs, sort);
+            assertEquals("Total hits should match total documents", nullDocs + datedDocs, topDocs.totalHits.value);
+            for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
+                org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
+                IndexableField dateField = doc.getField(ft.name());
+                if (dateField != null) {
+                    long dateValue = dateField.numericValue().longValue();
+                    Instant dateInstant = Instant.ofEpochMilli(dateValue);
+                    assertTrue(
+                        "Date should be in March 2022",
+                        dateInstant.isAfter(Instant.parse("2022-03-01T00:00:00Z"))
+                            && dateInstant.isBefore(Instant.parse("2022-04-01T00:00:00Z"))
+                    );
+                }
+            }
+        }
+        IOUtils.close(reader, w, dir);
+    }
 }