Skip to content

Commit

Permalink
Overflow prevention (#16812)
Browse files Browse the repository at this point in the history
Signed-off-by: Prudhvi Godithi <[email protected]>
prudhvigodithi authored Dec 9, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 2d18c34 commit 5ba909a
Showing 6 changed files with 243 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -21,6 +21,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Support prefix list for remote repository attributes([#16271](https://github.com/opensearch-project/OpenSearch/pull/16271))
- Add new configuration setting `synonym_analyzer`, to the `synonym` and `synonym_graph` filters, enabling the specification of a custom analyzer for reading the synonym file ([#16488](https://github.com/opensearch-project/OpenSearch/pull/16488)).
- Add stats for remote publication failure and move download failure stats to remote methods([#16682](https://github.com/opensearch-project/OpenSearch/pull/16682/))
- Added a precaution to handle extreme date values during sorting to prevent `arithmetic_exception: long overflow` ([#16812](https://github.com/opensearch-project/OpenSearch/pull/16812)).

### Dependencies
- Bump `com.google.cloud:google-cloud-core-http` from 2.23.0 to 2.47.0 ([#16504](https://github.com/opensearch-project/OpenSearch/pull/16504))
24 changes: 24 additions & 0 deletions server/src/main/java/org/opensearch/common/time/DateUtils.java
Original file line number Diff line number Diff line change
@@ -272,6 +272,30 @@ public static Instant clampToNanosRange(Instant instant) {
return instant;
}

static final Instant INSTANT_LONG_MIN_VALUE = Instant.ofEpochMilli(Long.MIN_VALUE);
static final Instant INSTANT_LONG_MAX_VALUE = Instant.ofEpochMilli(Long.MAX_VALUE);

/**
* Clamps the given {@link Instant} to the valid epoch millisecond range.
*
* - If the input is before {@code Long.MIN_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MIN_VALUE)}.
* - If the input is after {@code Long.MAX_VALUE}, it returns {@code Instant.ofEpochMilli(Long.MAX_VALUE)}.
* - Otherwise, it returns the input as-is.
*
* @param instant the {@link Instant} to clamp
* @return the clamped {@link Instant}
* @throws NullPointerException if the input is {@code null}
*/
public static Instant clampToMillisRange(Instant instant) {
if (instant.isBefore(INSTANT_LONG_MIN_VALUE)) {
return INSTANT_LONG_MIN_VALUE;
}
if (instant.isAfter(INSTANT_LONG_MAX_VALUE)) {
return INSTANT_LONG_MAX_VALUE;
}
return instant;
}

/**
* convert a long value to a java time instant
* the long value resembles the nanoseconds since the epoch
Original file line number Diff line number Diff line change
@@ -122,7 +122,7 @@ public enum Resolution {
MILLISECONDS(CONTENT_TYPE, NumericType.DATE) {
@Override
public long convert(Instant instant) {
return instant.toEpochMilli();
return clampToValidRange(instant).toEpochMilli();
}

@Override
@@ -132,7 +132,7 @@ public Instant toInstant(long value) {

@Override
public Instant clampToValidRange(Instant instant) {
return instant;
return DateUtils.clampToMillisRange(instant);
}

@Override
Original file line number Diff line number Diff line change
@@ -260,4 +260,21 @@ public void testRoundYear() {
long startOf1996 = Year.of(1996).atDay(1).atStartOfDay().toInstant(ZoneOffset.UTC).toEpochMilli();
assertThat(DateUtils.roundYear(endOf1996), is(startOf1996));
}

public void testClampToMillisRange() {
Instant normalInstant = Instant.now();
assertEquals(normalInstant, DateUtils.clampToMillisRange(normalInstant));

Instant beforeMinInstant = DateUtils.INSTANT_LONG_MIN_VALUE.minusMillis(1);
assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(beforeMinInstant));

Instant afterMaxInstant = DateUtils.INSTANT_LONG_MAX_VALUE.plusMillis(1);
assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(afterMaxInstant));

assertEquals(DateUtils.INSTANT_LONG_MIN_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MIN_VALUE));

assertEquals(DateUtils.INSTANT_LONG_MAX_VALUE, DateUtils.clampToMillisRange(DateUtils.INSTANT_LONG_MAX_VALUE));

assertThrows(NullPointerException.class, () -> DateUtils.clampToMillisRange(null));
}
}
Original file line number Diff line number Diff line change
@@ -156,7 +156,6 @@ public void testIgnoreMalformedLegacy() throws IOException {
"failed to parse date field [2016-03-99] with format [strict_date_optional_time||epoch_millis]"
);
testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
testIgnoreMalformedForValue("-522000000", "long overflow");
}

public void testIgnoreMalformed() throws IOException {
@@ -170,7 +169,6 @@ public void testIgnoreMalformed() throws IOException {
"failed to parse date field [2016-03-99] with format [strict_date_time_no_millis||strict_date_optional_time||epoch_millis]"
);
testIgnoreMalformedForValue("-2147483648", "Invalid value for Year (valid values -999999999 - 999999999): -2147483648");
testIgnoreMalformedForValue("-522000000", "long overflow");
}

private void testIgnoreMalformedForValue(String value, String expectedCause) throws IOException {
Original file line number Diff line number Diff line change
@@ -31,20 +31,32 @@

package org.opensearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.IndexSortSortedNumericDocValuesRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.opensearch.Version;
import org.opensearch.cluster.metadata.IndexMetadata;
@@ -71,8 +83,12 @@
import org.joda.time.DateTimeZone;

import java.io.IOException;
import java.time.Instant;
import java.time.ZoneOffset;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Locale;

import static org.hamcrest.CoreMatchers.is;
import static org.apache.lucene.document.LongPoint.pack;
@@ -490,4 +506,187 @@ public void testParseSourceValueNanos() throws IOException {
MappedFieldType nullValueMapper = fieldType(Resolution.NANOSECONDS, "strict_date_time||epoch_millis", nullValueDate);
assertEquals(Collections.singletonList(nullValueDate), fetchSourceValue(nullValueMapper, null));
}

public void testDateResolutionForOverflow() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));

DateFieldType ft = new DateFieldType(
"test_date",
true,
true,
true,
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
Resolution.MILLISECONDS,
null,
Collections.emptyMap()
);

List<String> dates = Arrays.asList(
null,
"2020-01-01T00:00:00Z",
null,
"2021-01-01T00:00:00Z",
"+292278994-08-17T07:12:55.807Z",
null,
"-292275055-05-16T16:47:04.192Z"
);

int numNullDates = 0;
long minDateValue = Long.MAX_VALUE;
long maxDateValue = Long.MIN_VALUE;

for (int i = 0; i < dates.size(); i++) {
ParseContext.Document doc = new ParseContext.Document();
String dateStr = dates.get(i);

if (dateStr != null) {
long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
doc.add(new LongPoint(ft.name(), timestamp));
doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
doc.add(new StoredField(ft.name(), timestamp));
doc.add(new StoredField("id", i));
minDateValue = Math.min(minDateValue, timestamp);
maxDateValue = Math.max(maxDateValue, timestamp);
} else {
numNullDates++;
doc.add(new StoredField("id", i));
}
w.addDocument(doc);
}

DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = new IndexSearcher(reader);

Settings indexSettings = Settings.builder()
.put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 1)
.build();
QueryShardContext context = new QueryShardContext(
0,
new IndexSettings(IndexMetadata.builder("foo").settings(indexSettings).build(), indexSettings),
BigArrays.NON_RECYCLING_INSTANCE,
null,
null,
null,
null,
null,
xContentRegistry(),
writableRegistry(),
null,
null,
() -> nowInMillis,
null,
null,
() -> true,
null
);

Query rangeQuery = ft.rangeQuery(
"-292275055-05-16T16:47:04.192Z",
"+292278994-08-17T07:12:55.807Z",
true,
true,
null,
null,
null,
context
);

TopDocs topDocs = searcher.search(rangeQuery, dates.size());
assertEquals("Number of non-null date documents", dates.size() - numNullDates, topDocs.totalHits.value);

for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
IndexableField dateField = doc.getField(ft.name());
if (dateField != null) {
long dateValue = dateField.numericValue().longValue();
assertTrue(
"Date value " + dateValue + " should be within valid range",
dateValue >= minDateValue && dateValue <= maxDateValue
);
}
}

DateFieldType ftWithNullValue = new DateFieldType(
"test_date",
true,
true,
true,
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||strict_date_optional_time"),
Resolution.MILLISECONDS,
"2020-01-01T00:00:00Z",
Collections.emptyMap()
);

Query nullValueQuery = ftWithNullValue.termQuery("2020-01-01T00:00:00Z", context);
topDocs = searcher.search(nullValueQuery, dates.size());
assertEquals("Documents matching the 2020-01-01 date", 1, topDocs.totalHits.value);

IOUtils.close(reader, w, dir);
}

public void testDateFieldTypeWithNulls() throws IOException {
DateFieldType ft = new DateFieldType(
"domainAttributes.dueDate",
true,
true,
true,
DateFormatter.forPattern("yyyy-MM-dd HH:mm:ss||yyyy-MM-dd||epoch_millis||date_optional_time"),
Resolution.MILLISECONDS,
null,
Collections.emptyMap()
);

Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(null));

int nullDocs = 3500;
int datedDocs = 50;

for (int i = 0; i < nullDocs; i++) {
ParseContext.Document doc = new ParseContext.Document();
doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
w.addDocument(doc);
}

for (int i = 1; i <= datedDocs; i++) {
ParseContext.Document doc = new ParseContext.Document();
String dateStr = String.format(Locale.ROOT, "2022-03-%02dT15:40:58.324", (i % 30) + 1);
long timestamp = Resolution.MILLISECONDS.convert(DateFormatters.from(ft.dateTimeFormatter().parse(dateStr)).toInstant());
doc.add(new StringField("domainAttributes.firmId", "12345678910111213", Field.Store.YES));
doc.add(new LongPoint(ft.name(), timestamp));
doc.add(new SortedNumericDocValuesField(ft.name(), timestamp));
doc.add(new StoredField(ft.name(), timestamp));
w.addDocument(doc);
}

DirectoryReader reader = DirectoryReader.open(w);
IndexSearcher searcher = new IndexSearcher(reader);

BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder();
queryBuilder.add(new TermQuery(new Term("domainAttributes.firmId", "12345678910111213")), BooleanClause.Occur.MUST);

Sort sort = new Sort(new SortField(ft.name(), SortField.Type.DOC, false));

for (int i = 0; i < 100; i++) {
TopDocs topDocs = searcher.search(queryBuilder.build(), nullDocs + datedDocs, sort);
assertEquals("Total hits should match total documents", nullDocs + datedDocs, topDocs.totalHits.value);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
org.apache.lucene.document.Document doc = reader.document(scoreDoc.doc);
IndexableField dateField = doc.getField(ft.name());
if (dateField != null) {
long dateValue = dateField.numericValue().longValue();
Instant dateInstant = Instant.ofEpochMilli(dateValue);
assertTrue(
"Date should be in March 2022",
dateInstant.isAfter(Instant.parse("2022-03-01T00:00:00Z"))
&& dateInstant.isBefore(Instant.parse("2022-04-01T00:00:00Z"))
);
}
}
}
IOUtils.close(reader, w, dir);
}
}

0 comments on commit 5ba909a

Please sign in to comment.