Skip to content

Commit

Permalink
[DerivedFields] DerivedFieldScript and query execution logic (#12746) (
Browse files Browse the repository at this point in the history
…#12968)

First in a series of commits to support derived fields, a form of schema-on-read.

This commit adds:
1. DerivedFieldScript factory: This script factory will be used to execute scripts defined against
   derived fields of any type.
2. DerivedFieldValueFetcher: The value fetcher contains logic to execute script and fetch the value
   in form of List<Object>. It expects DerivedFieldScript.LeafFactory as an input and sets the contract
   with consumer to call setNextReader() whenever a segment is switched.
3. DerivedFieldQuery: This query will be used by any of the derived fields. It expects an input query and
   DerivedFieldValueFetcher. It uses 2-phase iterator approach with approximation iterator set to match
   all docs. On a match, it creates a lucene MemoryIndex for a given doc, fetches the value of the derived
   field from _source using DerivedFieldValueFetcher and executes the input query against.

---------


(cherry picked from commit 70711cf)

Signed-off-by: Rishabh Maurya <[email protected]>
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
1 parent 6919ee1 commit 993927a
Show file tree
Hide file tree
Showing 7 changed files with 417 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
- do:
scripts_painless_context: {}
- match: { contexts.0: aggregation_selector}
- match: { contexts.23: update}
- match: { contexts.24: update}
---

"Action to get all API values for score context":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.script.DerivedFieldScript;
import org.opensearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.List;

/**
* The value fetcher contains logic to execute script and fetch the value in form of list of object.
* It expects DerivedFieldScript.LeafFactory as an input and sets the contract with consumer to call
* {@link #setNextReader(LeafReaderContext)} whenever a segment is switched.
*/
public final class DerivedFieldValueFetcher implements ValueFetcher {
private DerivedFieldScript derivedFieldScript;
private final DerivedFieldScript.LeafFactory derivedFieldScriptFactory;

public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScriptFactory) {
this.derivedFieldScriptFactory = derivedFieldScriptFactory;
}

@Override
public List<Object> fetchValues(SourceLookup lookup) {
derivedFieldScript.setDocument(lookup.docId());
// TODO: remove List.of() when derivedFieldScript.execute() returns list of objects.
return List.of(derivedFieldScript.execute());
}

public void setNextReader(LeafReaderContext context) {
try {
derivedFieldScript = derivedFieldScriptFactory.newInstance(context);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
146 changes: 146 additions & 0 deletions server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.query;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.opensearch.index.mapper.DerivedFieldValueFetcher;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;

/**
* DerivedFieldQuery used for querying derived fields. It contains the logic to execute an input lucene query against
* DerivedField. It also accepts DerivedFieldValueFetcher and SearchLookup as an input.
*/
public final class DerivedFieldQuery extends Query {
private final Query query;
private final DerivedFieldValueFetcher valueFetcher;
private final SearchLookup searchLookup;
private final Function<Object, IndexableField> indexableFieldGenerator;
private final Analyzer indexAnalyzer;

/**
* @param query lucene query to be executed against the derived field
* @param valueFetcher DerivedFieldValueFetcher ValueFetcher to fetch the value of a derived field from _source
* using LeafSearchLookup
* @param searchLookup SearchLookup to get the LeafSearchLookup look used by valueFetcher to fetch the _source
* @param indexableFieldGenerator used to generate lucene IndexableField from a given object fetched by valueFetcher
* to be used in lucene memory index.
*/
public DerivedFieldQuery(
Query query,
DerivedFieldValueFetcher valueFetcher,
SearchLookup searchLookup,
Function<Object, IndexableField> indexableFieldGenerator,
Analyzer indexAnalyzer
) {
this.query = query;
this.valueFetcher = valueFetcher;
this.searchLookup = searchLookup;
this.indexableFieldGenerator = indexableFieldGenerator;
this.indexAnalyzer = indexAnalyzer;
}

@Override
public void visit(QueryVisitor visitor) {
query.visit(visitor);
}

@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
Query rewritten = indexSearcher.rewrite(query);
if (rewritten == query) {
return this;
}
return new DerivedFieldQuery(rewritten, valueFetcher, searchLookup, indexableFieldGenerator, indexAnalyzer);
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) {
DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
valueFetcher.setNextReader(context);
LeafSearchLookup leafSearchLookup = searchLookup.getLeafSearchLookup(context);
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
@Override
public boolean matches() {
leafSearchLookup.source().setSegmentAndDocument(context, approximation.docID());
List<Object> values = valueFetcher.fetchValues(leafSearchLookup.source());
// TODO: in case of errors from script, should it be ignored and treated as missing field
// by using a configurable setting?
MemoryIndex memoryIndex = new MemoryIndex();
for (Object value : values) {
memoryIndex.addField(indexableFieldGenerator.apply(value), indexAnalyzer);
}
float score = memoryIndex.search(query);
return score > 0.0f;
}

@Override
public float matchCost() {
// TODO: how can we compute this?
return 1000f;
}
};
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
};
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (sameClassAs(o) == false) {
return false;
}
DerivedFieldQuery other = (DerivedFieldQuery) o;
return Objects.equals(this.query, other.query)
&& Objects.equals(this.valueFetcher, other.valueFetcher)
&& Objects.equals(this.searchLookup, other.searchLookup)
&& Objects.equals(this.indexableFieldGenerator, other.indexableFieldGenerator)
&& Objects.equals(this.indexAnalyzer, other.indexAnalyzer);
}

@Override
public int hashCode() {
return Objects.hash(classHash(), query, valueFetcher, searchLookup, indexableFieldGenerator, indexableFieldGenerator);
}

@Override
public String toString(String f) {
return "DerivedFieldQuery (Query: [ " + query.toString(f) + "])";
}
}
104 changes: 104 additions & 0 deletions server/src/main/java/org/opensearch/script/DerivedFieldScript.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.script;

import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.index.fielddata.ScriptDocValues;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;
import org.opensearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;

/**
* Definition of Script for DerivedField.
* It will be used to execute scripts defined against derived fields of any type
*
* @opensearch.internal
*/
public abstract class DerivedFieldScript {

public static final String[] PARAMETERS = {};
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("derived_field", Factory.class);
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(DynamicMap.class);

private static final Map<String, Function<Object, Object>> PARAMS_FUNCTIONS = Map.of(
"doc",
value -> value,
"_source",
value -> ((SourceLookup) value).loadSourceIfNeeded()
);

/**
* The generic runtime parameters for the script.
*/
private final Map<String, Object> params;

/**
* A leaf lookup for the bound segment this script will operate on.
*/
private final LeafSearchLookup leafLookup;

public DerivedFieldScript(Map<String, Object> params, SearchLookup lookup, LeafReaderContext leafContext) {
Map<String, Object> parameters = new HashMap<>(params);
this.leafLookup = lookup.getLeafSearchLookup(leafContext);
parameters.putAll(leafLookup.asMap());
this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS);
}

protected DerivedFieldScript() {
params = null;
leafLookup = null;
}

/**
* Return the parameters for this script.
*/
public Map<String, Object> getParams() {
return params;
}

/**
* The doc lookup for the Lucene segment this script was created for.
*/
public Map<String, ScriptDocValues<?>> getDoc() {
return leafLookup.doc();
}

/**
* Set the current document to run the script on next.
*/
public void setDocument(int docid) {
leafLookup.setDocument(docid);
}

public abstract Object execute();

/**
* A factory to construct {@link DerivedFieldScript} instances.
*
* @opensearch.internal
*/
public interface LeafFactory {
DerivedFieldScript newInstance(LeafReaderContext ctx) throws IOException;
}

/**
* A factory to construct stateful {@link DerivedFieldScript} factories for a specific index.
*
* @opensearch.internal
*/
public interface Factory extends ScriptFactory {
LeafFactory newFactory(Map<String, Object> params, SearchLookup lookup);
}
}
3 changes: 2 additions & 1 deletion server/src/main/java/org/opensearch/script/ScriptModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ public class ScriptModule {
ScriptedMetricAggContexts.MapScript.CONTEXT,
ScriptedMetricAggContexts.CombineScript.CONTEXT,
ScriptedMetricAggContexts.ReduceScript.CONTEXT,
IntervalFilterScript.CONTEXT
IntervalFilterScript.CONTEXT,
DerivedFieldScript.CONTEXT
).collect(Collectors.toMap(c -> c.name, Function.identity()));
}

Expand Down
Loading

0 comments on commit 993927a

Please sign in to comment.