Skip to content

Commit

Permalink
Decouple IndexSettings from IncludeExclude (#2860) (#2861)
Browse files Browse the repository at this point in the history
This change refactors an earlier change to impose a reg-ex size limit on the include/exclude string. Instead of accepting an IndexSettings instance, the class now accepts a integer limit value. This is necessary because the IncludeExclude class is used outside the core codebase, whose use-cases may be unaware of indices and their settings. To ensure that a limit is always imposed, a default limit is defined in the class.

(cherry picked from commit ba19668)
Signed-off-by: Kartik Ganesh <[email protected]>

Co-authored-by: Kartik Ganesh <[email protected]>
  • Loading branch information
opensearch-trigger-bot[bot] and kartg authored Apr 13, 2022
1 parent 135177e commit 3af4300
Show file tree
Hide file tree
Showing 6 changed files with 50 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,14 @@ public class IncludeExclude implements Writeable, ToXContentFragment {
// can disagree on which terms hash to the required partition.
private static final int HASH_PARTITIONING_SEED = 31;

/**
* The default length limit for a reg-ex string. The value is derived from {@link IndexSettings#MAX_REGEX_LENGTH_SETTING}.
* For context, see:
* https://github.com/opensearch-project/OpenSearch/issues/1992
* https://github.com/opensearch-project/OpenSearch/issues/2858
*/
private static final int DEFAULT_MAX_REGEX_LENGTH = 1000;

// for parsing purposes only
// TODO: move all aggs to the same package so that this stuff could be pkg-private
public static IncludeExclude merge(IncludeExclude include, IncludeExclude exclude) {
Expand Down Expand Up @@ -576,18 +584,18 @@ public boolean isPartitionBased() {
return incNumPartitions > 0;
}

private Automaton toAutomaton(IndexSettings indexSettings) {
private Automaton toAutomaton(int maxRegExLength) {
Automaton a;
if (include != null) {
validateRegExpStringLength(include, indexSettings);
validateRegExpStringLength(include, maxRegExLength);
a = new RegExp(include).toAutomaton();
} else if (includeValues != null) {
a = Automata.makeStringUnion(includeValues);
} else {
a = Automata.makeAnyString();
}
if (exclude != null) {
validateRegExpStringLength(exclude, indexSettings);
validateRegExpStringLength(exclude, maxRegExLength);
Automaton excludeAutomaton = new RegExp(exclude).toAutomaton();
a = Operations.minus(a, excludeAutomaton, Operations.DEFAULT_DETERMINIZE_WORK_LIMIT);
} else if (excludeValues != null) {
Expand All @@ -596,8 +604,7 @@ private Automaton toAutomaton(IndexSettings indexSettings) {
return a;
}

private static void validateRegExpStringLength(String source, IndexSettings indexSettings) {
int maxRegexLength = indexSettings.getMaxRegexLength();
private static void validateRegExpStringLength(String source, int maxRegexLength) {
if (maxRegexLength > 0 && source.length() > maxRegexLength) {
throw new IllegalArgumentException(
"The length of regex ["
Expand All @@ -613,9 +620,17 @@ private static void validateRegExpStringLength(String source, IndexSettings inde
}
}

public StringFilter convertToStringFilter(DocValueFormat format, IndexSettings indexSettings) {
/**
* Wrapper method that imposes a default regex limit.
* See https://github.com/opensearch-project/OpenSearch/issues/2858
*/
public StringFilter convertToStringFilter(DocValueFormat format) {
return convertToStringFilter(format, DEFAULT_MAX_REGEX_LENGTH);
}

public StringFilter convertToStringFilter(DocValueFormat format, int maxRegexLength) {
if (isRegexBased()) {
return new AutomatonBackedStringFilter(toAutomaton(indexSettings));
return new AutomatonBackedStringFilter(toAutomaton(maxRegexLength));
}
if (isPartitionBased()) {
return new PartitionedStringFilter();
Expand All @@ -636,10 +651,18 @@ private static SortedSet<BytesRef> parseForDocValues(SortedSet<BytesRef> endUser
return result;
}

public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, IndexSettings indexSettings) {
/**
* Wrapper method that imposes a default regex limit.
* See https://github.com/opensearch-project/OpenSearch/issues/2858
*/
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
return convertToOrdinalsFilter(format, DEFAULT_MAX_REGEX_LENGTH);
}

public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format, int maxRegexLength) {

if (isRegexBased()) {
return new AutomatonBackedOrdinalsFilter(toAutomaton(indexSettings));
return new AutomatonBackedOrdinalsFilter(toAutomaton(maxRegexLength));
}
if (isPartitionBased()) {
return new PartitionedOrdinalsFilter();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

import org.opensearch.common.ParseField;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.Aggregator;
Expand Down Expand Up @@ -251,10 +250,10 @@ Aggregator create(
double precision,
CardinalityUpperBound cardinality
) throws IOException {
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.StringFilter filter = includeExclude == null
? null
: includeExclude.convertToStringFilter(format, indexSettings);
: includeExclude.convertToStringFilter(format, maxRegexLength);
return new StringRareTermsAggregator(
name,
factories,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

import org.opensearch.common.ParseField;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
Expand Down Expand Up @@ -326,10 +325,10 @@ Aggregator create(
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException {
IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.StringFilter filter = includeExclude == null
? null
: includeExclude.convertToStringFilter(format, indexSettings);
: includeExclude.convertToStringFilter(format, maxRegexLength);
return new MapStringTermsAggregator(
name,
factories,
Expand Down Expand Up @@ -367,10 +366,10 @@ Aggregator create(
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException {
IndexSettings indexSettings = aggregationContext.getQueryShardContext().getIndexSettings();
int maxRegexLength = aggregationContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null
? null
: includeExclude.convertToOrdinalsFilter(format, indexSettings);
: includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
boolean remapGlobalOrd = true;
if (cardinality == CardinalityUpperBound.ONE && factories == AggregatorFactories.EMPTY && includeExclude == null) {
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
import org.opensearch.common.util.BigArrays;
import org.opensearch.common.util.BytesRefHash;
import org.opensearch.common.util.ObjectArray;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryShardContext;
Expand Down Expand Up @@ -138,10 +137,10 @@ protected Aggregator createInternal(

// TODO - need to check with mapping that this is indeed a text field....

IndexSettings indexSettings = searchContext.getQueryShardContext().getIndexSettings();
int maxRegexLength = searchContext.getQueryShardContext().getIndexSettings().getMaxRegexLength();
IncludeExclude.StringFilter incExcFilter = includeExclude == null
? null
: includeExclude.convertToStringFilter(DocValueFormat.RAW, indexSettings);
: includeExclude.convertToStringFilter(DocValueFormat.RAW, maxRegexLength);

MapStringTermsAggregator.CollectorSource collectorSource = new SignificantTextCollectorSource(
queryShardContext.lookup().source(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

import org.apache.lucene.search.IndexSearcher;
import org.opensearch.common.ParseField;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.AggregationExecutionException;
Expand Down Expand Up @@ -381,10 +380,10 @@ Aggregator create(
CardinalityUpperBound cardinality,
Map<String, Object> metadata
) throws IOException {
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.StringFilter filter = includeExclude == null
? null
: includeExclude.convertToStringFilter(format, indexSettings);
: includeExclude.convertToStringFilter(format, maxRegexLength);
return new MapStringTermsAggregator(
name,
factories,
Expand Down Expand Up @@ -462,10 +461,10 @@ Aggregator create(
);

}
IndexSettings indexSettings = context.getQueryShardContext().getIndexSettings();
int maxRegexLength = context.getQueryShardContext().getIndexSettings().getMaxRegexLength();
final IncludeExclude.OrdinalsFilter filter = includeExclude == null
? null
: includeExclude.convertToOrdinalsFilter(format, indexSettings);
: includeExclude.convertToOrdinalsFilter(format, maxRegexLength);
boolean remapGlobalOrds;
if (cardinality == CardinalityUpperBound.ONE && REMAP_GLOBAL_ORDS != null) {
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,12 @@
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.opensearch.Version;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.ParseField;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.ToXContent;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.fielddata.AbstractSortedSetDocValues;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.bucket.terms.IncludeExclude;
Expand All @@ -58,23 +54,14 @@

public class IncludeExcludeTests extends OpenSearchTestCase {

private final IndexSettings dummyIndexSettings = new IndexSettings(
IndexMetadata.builder("index")
.settings(Settings.builder().put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT))
.numberOfShards(1)
.numberOfReplicas(0)
.build(),
Settings.EMPTY
);

public void testEmptyTermsWithOrds() throws IOException {
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
assertEquals(0, acceptedOrds.length());

inexcl = new IncludeExclude(null, new TreeSet<>(Collections.singleton(new BytesRef("foo"))));
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(DocValues.emptySortedSet());
assertEquals(0, acceptedOrds.length());
}
Expand Down Expand Up @@ -113,13 +100,13 @@ public long getValueCount() {

};
IncludeExclude inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("foo"))), null);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
OrdinalsFilter filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
LongBitSet acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertTrue(acceptedOrds.get(0));

inexcl = new IncludeExclude(new TreeSet<>(Collections.singleton(new BytesRef("bar"))), null);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
Expand All @@ -128,7 +115,7 @@ public long getValueCount() {
new TreeSet<>(Collections.singleton(new BytesRef("foo"))),
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
Expand All @@ -137,7 +124,7 @@ public long getValueCount() {
null, // means everything included
new TreeSet<>(Collections.singleton(new BytesRef("foo")))
);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW, dummyIndexSettings);
filter = inexcl.convertToOrdinalsFilter(DocValueFormat.RAW);
acceptedOrds = filter.acceptedGlobalOrdinals(ords);
assertEquals(1, acceptedOrds.length());
assertFalse(acceptedOrds.get(0));
Expand Down

0 comments on commit 3af4300

Please sign in to comment.