Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport 2.x] [DerivedFields] PR1 - DerivedFieldScript and query execution logic #12968

Merged
merged 1 commit into from
Apr 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
- do:
scripts_painless_context: {}
- match: { contexts.0: aggregation_selector}
- match: { contexts.23: update}
- match: { contexts.24: update}
---

"Action to get all API values for score context":
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.script.DerivedFieldScript;
import org.opensearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.List;

/**
* The value fetcher contains logic to execute script and fetch the value in form of list of object.
* It expects DerivedFieldScript.LeafFactory as an input and sets the contract with consumer to call
* {@link #setNextReader(LeafReaderContext)} whenever a segment is switched.
*/
public final class DerivedFieldValueFetcher implements ValueFetcher {
private DerivedFieldScript derivedFieldScript;
private final DerivedFieldScript.LeafFactory derivedFieldScriptFactory;

public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScriptFactory) {
this.derivedFieldScriptFactory = derivedFieldScriptFactory;
}

@Override
public List<Object> fetchValues(SourceLookup lookup) {
derivedFieldScript.setDocument(lookup.docId());
// TODO: remove List.of() when derivedFieldScript.execute() returns list of objects.
return List.of(derivedFieldScript.execute());
}

public void setNextReader(LeafReaderContext context) {
try {
derivedFieldScript = derivedFieldScriptFactory.newInstance(context);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.query;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.opensearch.index.mapper.DerivedFieldValueFetcher;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.function.Function;

/**
* DerivedFieldQuery used for querying derived fields. It contains the logic to execute an input lucene query against
* DerivedField. It also accepts DerivedFieldValueFetcher and SearchLookup as an input.
*/
public final class DerivedFieldQuery extends Query {
private final Query query;
private final DerivedFieldValueFetcher valueFetcher;
private final SearchLookup searchLookup;
private final Function<Object, IndexableField> indexableFieldGenerator;
private final Analyzer indexAnalyzer;

/**
* @param query lucene query to be executed against the derived field
* @param valueFetcher DerivedFieldValueFetcher ValueFetcher to fetch the value of a derived field from _source
* using LeafSearchLookup
* @param searchLookup SearchLookup to get the LeafSearchLookup look used by valueFetcher to fetch the _source
* @param indexableFieldGenerator used to generate lucene IndexableField from a given object fetched by valueFetcher
* to be used in lucene memory index.
*/
public DerivedFieldQuery(
Query query,
DerivedFieldValueFetcher valueFetcher,
SearchLookup searchLookup,
Function<Object, IndexableField> indexableFieldGenerator,
Analyzer indexAnalyzer
) {
this.query = query;
this.valueFetcher = valueFetcher;
this.searchLookup = searchLookup;
this.indexableFieldGenerator = indexableFieldGenerator;
this.indexAnalyzer = indexAnalyzer;
}

@Override
public void visit(QueryVisitor visitor) {
query.visit(visitor);
}

@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
Query rewritten = indexSearcher.rewrite(query);
if (rewritten == query) {
return this;
}
return new DerivedFieldQuery(rewritten, valueFetcher, searchLookup, indexableFieldGenerator, indexAnalyzer);
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) {
DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
valueFetcher.setNextReader(context);
LeafSearchLookup leafSearchLookup = searchLookup.getLeafSearchLookup(context);
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {
@Override
public boolean matches() {
leafSearchLookup.source().setSegmentAndDocument(context, approximation.docID());
List<Object> values = valueFetcher.fetchValues(leafSearchLookup.source());
// TODO: in case of errors from script, should it be ignored and treated as missing field
// by using a configurable setting?
MemoryIndex memoryIndex = new MemoryIndex();
for (Object value : values) {
memoryIndex.addField(indexableFieldGenerator.apply(value), indexAnalyzer);
}
float score = memoryIndex.search(query);
return score > 0.0f;
}

@Override
public float matchCost() {
// TODO: how can we compute this?
return 1000f;
}
};
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
};
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (sameClassAs(o) == false) {
return false;
}
DerivedFieldQuery other = (DerivedFieldQuery) o;
return Objects.equals(this.query, other.query)
&& Objects.equals(this.valueFetcher, other.valueFetcher)
&& Objects.equals(this.searchLookup, other.searchLookup)
&& Objects.equals(this.indexableFieldGenerator, other.indexableFieldGenerator)
&& Objects.equals(this.indexAnalyzer, other.indexAnalyzer);
}

@Override
public int hashCode() {
return Objects.hash(classHash(), query, valueFetcher, searchLookup, indexableFieldGenerator, indexableFieldGenerator);
}

@Override
public String toString(String f) {
return "DerivedFieldQuery (Query: [ " + query.toString(f) + "])";
}
}
104 changes: 104 additions & 0 deletions server/src/main/java/org/opensearch/script/DerivedFieldScript.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.script;

import org.apache.lucene.index.LeafReaderContext;
import org.opensearch.common.logging.DeprecationLogger;
import org.opensearch.index.fielddata.ScriptDocValues;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;
import org.opensearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;

/**
* Definition of Script for DerivedField.
* It will be used to execute scripts defined against derived fields of any type
*
* @opensearch.internal
*/
public abstract class DerivedFieldScript {

public static final String[] PARAMETERS = {};
public static final ScriptContext<Factory> CONTEXT = new ScriptContext<>("derived_field", Factory.class);
private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(DynamicMap.class);

private static final Map<String, Function<Object, Object>> PARAMS_FUNCTIONS = Map.of(
"doc",
value -> value,
"_source",
value -> ((SourceLookup) value).loadSourceIfNeeded()
);

/**
* The generic runtime parameters for the script.
*/
private final Map<String, Object> params;

/**
* A leaf lookup for the bound segment this script will operate on.
*/
private final LeafSearchLookup leafLookup;

public DerivedFieldScript(Map<String, Object> params, SearchLookup lookup, LeafReaderContext leafContext) {
Map<String, Object> parameters = new HashMap<>(params);
this.leafLookup = lookup.getLeafSearchLookup(leafContext);
parameters.putAll(leafLookup.asMap());
this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS);
}

protected DerivedFieldScript() {
params = null;
leafLookup = null;
}

/**
* Return the parameters for this script.
*/
public Map<String, Object> getParams() {
return params;
}

/**
* The doc lookup for the Lucene segment this script was created for.
*/
public Map<String, ScriptDocValues<?>> getDoc() {
return leafLookup.doc();
}

/**
* Set the current document to run the script on next.
*/
public void setDocument(int docid) {
leafLookup.setDocument(docid);
}

public abstract Object execute();

/**
* A factory to construct {@link DerivedFieldScript} instances.
*
* @opensearch.internal
*/
public interface LeafFactory {
DerivedFieldScript newInstance(LeafReaderContext ctx) throws IOException;
}

/**
* A factory to construct stateful {@link DerivedFieldScript} factories for a specific index.
*
* @opensearch.internal
*/
public interface Factory extends ScriptFactory {
LeafFactory newFactory(Map<String, Object> params, SearchLookup lookup);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ public class ScriptModule {
ScriptedMetricAggContexts.MapScript.CONTEXT,
ScriptedMetricAggContexts.CombineScript.CONTEXT,
ScriptedMetricAggContexts.ReduceScript.CONTEXT,
IntervalFilterScript.CONTEXT
IntervalFilterScript.CONTEXT,
DerivedFieldScript.CONTEXT
).collect(Collectors.toMap(c -> c.name, Function.identity()));
}

Expand Down
Loading
Loading