Skip to content

Commit

Permalink
[Backport 2.x] Add new cluster setting for keyword indexordocvalues q…
Browse files Browse the repository at this point in the history
…uery (#15637) (#15703)


---------

Signed-off-by: Harsha Vamsi Kalluri <[email protected]>
  • Loading branch information
harshavamsi authored Sep 5, 2024
1 parent 00b2fcc commit bb0a497
Show file tree
Hide file tree
Showing 12 changed files with 153 additions and 29 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Remote Publication] Added checksum validation for cluster state behind a cluster setting ([#15218](https://github.com/opensearch-project/OpenSearch/pull/15218))
- Relax the join validation for Remote State publication ([#15471](https://github.com/opensearch-project/OpenSearch/pull/15471))
- Optimize NodeIndicesStats output behind flag ([#14454](https://github.com/opensearch-project/OpenSearch/pull/14454))
- MultiTermQueries in keyword fields now default to `indexed` approach and gated behind cluster setting ([#15637](https://github.com/opensearch-project/OpenSearch/pull/15637))

### Dependencies
- Bump `netty` from 4.1.111.Final to 4.1.112.Final ([#15081](https://github.com/opensearch-project/OpenSearch/pull/15081))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ public void apply(Settings value, Settings current, Settings previous) {
SearchService.MAX_AGGREGATION_REWRITE_FILTERS,
SearchService.INDICES_MAX_CLAUSE_COUNT_SETTING,
SearchService.CARDINALITY_AGGREGATION_PRUNING_THRESHOLD,
SearchService.KEYWORD_INDEX_OR_DOC_VALUES_ENABLED,
CreatePitController.PIT_INIT_KEEP_ALIVE,
Node.WRITE_PORTS_FILE_SETTING,
Node.NODE_NAME_SETTING,
Expand Down
22 changes: 20 additions & 2 deletions server/src/main/java/org/opensearch/index/IndexService.java
Original file line number Diff line number Diff line change
Expand Up @@ -959,7 +959,8 @@ public QueryShardContext newQueryShardContext(
IndexSearcher searcher,
LongSupplier nowInMillis,
String clusterAlias,
boolean validate
boolean validate,
boolean keywordIndexOrDocValuesEnabled
) {
final SearchIndexNameMatcher indexNameMatcher = new SearchIndexNameMatcher(
index().getName(),
Expand All @@ -985,10 +986,27 @@ public QueryShardContext newQueryShardContext(
indexNameMatcher,
allowExpensiveQueries,
valuesSourceRegistry,
validate
validate,
keywordIndexOrDocValuesEnabled
);
}

/**
* Creates a new QueryShardContext.
* <p>
* Passing a {@code null} {@link IndexSearcher} will return a valid context, however it won't be able to make
* {@link IndexReader}-specific optimizations, such as rewriting containing range queries.
*/
public QueryShardContext newQueryShardContext(
int shardId,
IndexSearcher searcher,
LongSupplier nowInMillis,
String clusterAlias,
boolean validate
) {
return newQueryShardContext(shardId, searcher, nowInMillis, clusterAlias, validate, false);
}

/**
* The {@link ThreadPool} to use for this index.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,9 @@ public Query termsQuery(List<?> values, QueryShardContext context) {
failIfNotIndexedAndNoDocValues();
// has index and doc_values enabled
if (isSearchable() && hasDocValues()) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
return super.termsQuery(values, context);
}
BytesRef[] bytesRefs = new BytesRef[values.size()];
for (int i = 0; i < bytesRefs.length; i++) {
bytesRefs[i] = indexedValueForSearch(values.get(i));
Expand Down Expand Up @@ -429,6 +432,9 @@ public Query prefixQuery(
}
failIfNotIndexedAndNoDocValues();
if (isSearchable() && hasDocValues()) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
return super.prefixQuery(value, method, caseInsensitive, context);
}
Query indexQuery = super.prefixQuery(value, method, caseInsensitive, context);
Query dvQuery = super.prefixQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, context);
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
Expand Down Expand Up @@ -461,6 +467,9 @@ public Query regexpQuery(
}
failIfNotIndexedAndNoDocValues();
if (isSearchable() && hasDocValues()) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
return super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context);
}
Query indexQuery = super.regexpQuery(value, syntaxFlags, matchFlags, maxDeterminizedStates, method, context);
Query dvQuery = super.regexpQuery(
value,
Expand Down Expand Up @@ -549,6 +558,9 @@ public Query fuzzyQuery(
);
}
if (isSearchable() && hasDocValues()) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
return super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context);
}
Query indexQuery = super.fuzzyQuery(value, fuzziness, prefixLength, maxExpansions, transpositions, method, context);
Query dvQuery = super.fuzzyQuery(
value,
Expand Down Expand Up @@ -591,6 +603,9 @@ public Query wildcardQuery(
// wildcard
// query text
if (isSearchable() && hasDocValues()) {
if (!context.keywordFieldIndexOrDocValuesEnabled()) {
return super.wildcardQuery(value, method, caseInsensitive, true, context);
}
Query indexQuery = super.wildcardQuery(value, method, caseInsensitive, true, context);
Query dvQuery = super.wildcardQuery(value, MultiTermQuery.DOC_VALUES_REWRITE, caseInsensitive, true, context);
return new IndexOrDocValuesQuery(indexQuery, dvQuery);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ public class QueryShardContext extends QueryRewriteContext {
private final ValuesSourceRegistry valuesSourceRegistry;
private BitSetProducer parentFilter;
private DerivedFieldResolver derivedFieldResolver;
private boolean keywordIndexOrDocValuesEnabled;

public QueryShardContext(
int shardId,
Expand Down Expand Up @@ -209,7 +210,55 @@ public QueryShardContext(
),
allowExpensiveQueries,
valuesSourceRegistry,
validate
validate,
false
);
}

public QueryShardContext(
int shardId,
IndexSettings indexSettings,
BigArrays bigArrays,
BitsetFilterCache bitsetFilterCache,
TriFunction<MappedFieldType, String, Supplier<SearchLookup>, IndexFieldData<?>> indexFieldDataLookup,
MapperService mapperService,
SimilarityService similarityService,
ScriptService scriptService,
NamedXContentRegistry xContentRegistry,
NamedWriteableRegistry namedWriteableRegistry,
Client client,
IndexSearcher searcher,
LongSupplier nowInMillis,
String clusterAlias,
Predicate<String> indexNameMatcher,
BooleanSupplier allowExpensiveQueries,
ValuesSourceRegistry valuesSourceRegistry,
boolean validate,
boolean keywordIndexOrDocValuesEnabled
) {
this(
shardId,
indexSettings,
bigArrays,
bitsetFilterCache,
indexFieldDataLookup,
mapperService,
similarityService,
scriptService,
xContentRegistry,
namedWriteableRegistry,
client,
searcher,
nowInMillis,
indexNameMatcher,
new Index(
RemoteClusterAware.buildRemoteIndexName(clusterAlias, indexSettings.getIndex().getName()),
indexSettings.getIndex().getUUID()
),
allowExpensiveQueries,
valuesSourceRegistry,
validate,
keywordIndexOrDocValuesEnabled
);
}

Expand All @@ -232,7 +281,8 @@ public QueryShardContext(QueryShardContext source) {
source.fullyQualifiedIndex,
source.allowExpensiveQueries,
source.valuesSourceRegistry,
source.validate()
source.validate(),
source.keywordIndexOrDocValuesEnabled
);
}

Expand All @@ -254,7 +304,8 @@ private QueryShardContext(
Index fullyQualifiedIndex,
BooleanSupplier allowExpensiveQueries,
ValuesSourceRegistry valuesSourceRegistry,
boolean validate
boolean validate,
boolean keywordIndexOrDocValuesEnabled
) {
super(xContentRegistry, namedWriteableRegistry, client, nowInMillis, validate);
this.shardId = shardId;
Expand All @@ -278,6 +329,7 @@ private QueryShardContext(
emptyList(),
indexSettings.isDerivedFieldAllowed()
);
this.keywordIndexOrDocValuesEnabled = keywordIndexOrDocValuesEnabled;
}

private void reset() {
Expand Down Expand Up @@ -425,6 +477,14 @@ public MappedFieldType getDerivedFieldType(String fieldName) {
throw new UnsupportedOperationException("Use resolveDerivedFieldType() instead.");
}

public boolean keywordFieldIndexOrDocValuesEnabled() {
return keywordIndexOrDocValuesEnabled;
}

public void setKeywordFieldIndexOrDocValuesEnabled(boolean keywordIndexOrDocValuesEnabled) {
this.keywordIndexOrDocValuesEnabled = keywordIndexOrDocValuesEnabled;
}

public void setAllowUnmappedFields(boolean allowUnmappedFields) {
this.allowUnmappedFields = allowUnmappedFields;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_ALL;
import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_AUTO;
import static org.opensearch.search.SearchService.CONCURRENT_SEGMENT_SEARCH_MODE_NONE;
import static org.opensearch.search.SearchService.KEYWORD_INDEX_OR_DOC_VALUES_ENABLED;
import static org.opensearch.search.SearchService.MAX_AGGREGATION_REWRITE_FILTERS;

/**
Expand Down Expand Up @@ -207,6 +208,7 @@ final class DefaultSearchContext extends SearchContext {
private final SetOnce<Boolean> requestShouldUseConcurrentSearch = new SetOnce<>();
private final int maxAggRewriteFilters;
private final int cardinalityAggregationPruningThreshold;
private final boolean keywordIndexOrDocValuesEnabled;

DefaultSearchContext(
ReaderContext readerContext,
Expand Down Expand Up @@ -257,14 +259,16 @@ final class DefaultSearchContext extends SearchContext {
this.searcher,
request::nowInMillis,
shardTarget.getClusterAlias(),
validate
validate,
evaluateKeywordIndexOrDocValuesEnabled()
);
queryBoost = request.indexBoost();
this.lowLevelCancellation = lowLevelCancellation;
this.requestToAggReduceContextBuilder = requestToAggReduceContextBuilder;

this.maxAggRewriteFilters = evaluateFilterRewriteSetting();
this.cardinalityAggregationPruningThreshold = evaluateCardinalityAggregationPruningThreshold();
this.keywordIndexOrDocValuesEnabled = evaluateKeywordIndexOrDocValuesEnabled();
this.concurrentSearchDeciderFactories = concurrentSearchDeciderFactories;
}

Expand Down Expand Up @@ -1125,10 +1129,22 @@ public int cardinalityAggregationPruningThreshold() {
return cardinalityAggregationPruningThreshold;
}

@Override
public boolean keywordIndexOrDocValuesEnabled() {
return keywordIndexOrDocValuesEnabled;
}

private int evaluateCardinalityAggregationPruningThreshold() {
if (clusterService != null) {
return clusterService.getClusterSettings().get(CARDINALITY_AGGREGATION_PRUNING_THRESHOLD);
}
return 0;
}

public boolean evaluateKeywordIndexOrDocValuesEnabled() {
if (clusterService != null) {
return clusterService.getClusterSettings().get(KEYWORD_INDEX_OR_DOC_VALUES_ENABLED);
}
return false;
}
}
8 changes: 8 additions & 0 deletions server/src/main/java/org/opensearch/search/SearchService.java
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,13 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
Property.NodeScope
);

public static final Setting<Boolean> KEYWORD_INDEX_OR_DOC_VALUES_ENABLED = Setting.boolSetting(
"search.keyword_index_or_doc_values_enabled",
false,
Property.Dynamic,
Property.NodeScope
);

public static final int DEFAULT_SIZE = 10;
public static final int DEFAULT_FROM = 0;

Expand Down Expand Up @@ -1174,6 +1181,7 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear
context.getIndexSettings().isDerivedFieldAllowed() && allowDerivedField
);
context.setDerivedFieldResolver(derivedFieldResolver);
context.setKeywordFieldIndexOrDocValuesEnabled(searchContext.keywordIndexOrDocValuesEnabled());
searchContext.getQueryShardContext().setDerivedFieldResolver(derivedFieldResolver);
Rewriteable.rewrite(request.getRewriteable(), context, true);
assert searchContext.getQueryShardContext().isCacheable();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,4 +530,9 @@ public int maxAggRewriteFilters() {
public int cardinalityAggregationPruningThreshold() {
return 0;
}

public boolean keywordIndexOrDocValuesEnabled() {
return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ public void testTermsQuery() {
new TermInSetQuery("field", terms),
new TermInSetQuery(MultiTermQuery.DOC_VALUES_REWRITE, "field", terms)
);
assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), null));
assertEquals(expected, ft.termsQuery(Arrays.asList("foo", "bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES));

MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap());
Query expectedIndex = new TermInSetQuery("field", terms);
Expand Down Expand Up @@ -225,7 +225,7 @@ public void testRegexpQuery() {
new RegexpQuery(new Term("field", "foo.*")),
new RegexpQuery(new Term("field", "foo.*"), 0, 0, RegexpQuery.DEFAULT_PROVIDER, 10, MultiTermQuery.DOC_VALUES_REWRITE)
),
ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC)
ft.regexpQuery("foo.*", 0, 0, 10, MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)
);

Query indexExpected = new RegexpQuery(new Term("field", "foo.*"));
Expand Down Expand Up @@ -267,7 +267,7 @@ public void testFuzzyQuery() {
new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true),
new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true, MultiTermQuery.DOC_VALUES_REWRITE)
),
ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, null, MOCK_QSC)
ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, null, MOCK_QSC_ENABLE_INDEX_DOC_VALUES)
);

Query indexExpected = new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true);
Expand Down Expand Up @@ -308,7 +308,7 @@ public void testWildCardQuery() {
MultiTermQuery.DOC_VALUES_REWRITE
)
);
assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC));
assertEquals(expected, ft.wildcardQuery("foo*", MultiTermQuery.CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC_ENABLE_INDEX_DOC_VALUES));

Query indexExpected = new WildcardQuery(new Term("field", new BytesRef("foo*")));
MappedFieldType onlyIndexed = new KeywordFieldType("field", true, false, Collections.emptyMap());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@
import java.io.IOException;
import java.util.Collections;

import static org.opensearch.index.mapper.FieldTypeTestCase.MOCK_QSC_ENABLE_INDEX_DOC_VALUES;

public class NestedHelperTests extends OpenSearchSingleNodeTestCase {

IndexService indexService;
Expand Down Expand Up @@ -132,28 +134,28 @@ public void testMatchNo() {
}

public void testTermsQuery() {
Query termsQuery = mapperService.fieldType("foo").termsQuery(Collections.singletonList("bar"), null);
Query termsQuery = mapperService.fieldType("foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES);
assertFalse(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing"));

termsQuery = mapperService.fieldType("nested1.foo").termsQuery(Collections.singletonList("bar"), null);
termsQuery = mapperService.fieldType("nested1.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES);
assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery));
assertFalse(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing"));

termsQuery = mapperService.fieldType("nested2.foo").termsQuery(Collections.singletonList("bar"), null);
termsQuery = mapperService.fieldType("nested2.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES);
assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested3"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested_missing"));

termsQuery = mapperService.fieldType("nested3.foo").termsQuery(Collections.singletonList("bar"), null);
termsQuery = mapperService.fieldType("nested3.foo").termsQuery(Collections.singletonList("bar"), MOCK_QSC_ENABLE_INDEX_DOC_VALUES);
assertTrue(new NestedHelper(mapperService).mightMatchNestedDocs(termsQuery));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested1"));
assertTrue(new NestedHelper(mapperService).mightMatchNonNestedDocs(termsQuery, "nested2"));
Expand Down
Loading

0 comments on commit bb0a497

Please sign in to comment.