diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/71_context_api.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/71_context_api.yml index 478ca9ae8abf4..20e6fd351a4b9 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/71_context_api.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/71_context_api.yml @@ -2,7 +2,7 @@ - do: scripts_painless_context: {} - match: { contexts.0: aggregation_selector} - - match: { contexts.23: update} + - match: { contexts.24: update} --- "Action to get all API values for score context": diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java new file mode 100644 index 0000000000000..f3bf0c613415a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldValueFetcher.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.script.DerivedFieldScript; +import org.opensearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.util.List; + +/** + * The value fetcher contains logic to execute script and fetch the value in form of list of object. + * It expects DerivedFieldScript.LeafFactory as an input and sets the contract with consumer to call + * {@link #setNextReader(LeafReaderContext)} whenever a segment is switched. + */ +public final class DerivedFieldValueFetcher implements ValueFetcher { + private DerivedFieldScript derivedFieldScript; + private final DerivedFieldScript.LeafFactory derivedFieldScriptFactory; + + public DerivedFieldValueFetcher(DerivedFieldScript.LeafFactory derivedFieldScriptFactory) { + this.derivedFieldScriptFactory = derivedFieldScriptFactory; + } + + @Override + public List fetchValues(SourceLookup lookup) { + derivedFieldScript.setDocument(lookup.docId()); + // TODO: remove List.of() when derivedFieldScript.execute() returns list of objects. + return List.of(derivedFieldScript.execute()); + } + + public void setNextReader(LeafReaderContext context) { + try { + derivedFieldScript = derivedFieldScriptFactory.newInstance(context); + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java b/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java new file mode 100644 index 0000000000000..8beef0bf46be0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/DerivedFieldQuery.java @@ -0,0 +1,146 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.opensearch.index.mapper.DerivedFieldValueFetcher; +import org.opensearch.search.lookup.LeafSearchLookup; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +/** + * DerivedFieldQuery used for querying derived fields. It contains the logic to execute an input lucene query against + * DerivedField. It also accepts DerivedFieldValueFetcher and SearchLookup as an input. + */ +public final class DerivedFieldQuery extends Query { + private final Query query; + private final DerivedFieldValueFetcher valueFetcher; + private final SearchLookup searchLookup; + private final Function indexableFieldGenerator; + private final Analyzer indexAnalyzer; + + /** + * @param query lucene query to be executed against the derived field + * @param valueFetcher DerivedFieldValueFetcher ValueFetcher to fetch the value of a derived field from _source + * using LeafSearchLookup + * @param searchLookup SearchLookup to get the LeafSearchLookup look used by valueFetcher to fetch the _source + * @param indexableFieldGenerator used to generate lucene IndexableField from a given object fetched by valueFetcher + * to be used in lucene memory index. + */ + public DerivedFieldQuery( + Query query, + DerivedFieldValueFetcher valueFetcher, + SearchLookup searchLookup, + Function indexableFieldGenerator, + Analyzer indexAnalyzer + ) { + this.query = query; + this.valueFetcher = valueFetcher; + this.searchLookup = searchLookup; + this.indexableFieldGenerator = indexableFieldGenerator; + this.indexAnalyzer = indexAnalyzer; + } + + @Override + public void visit(QueryVisitor visitor) { + query.visit(visitor); + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + Query rewritten = indexSearcher.rewrite(query); + if (rewritten == query) { + return this; + } + return new DerivedFieldQuery(rewritten, valueFetcher, searchLookup, indexableFieldGenerator, indexAnalyzer); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + + return new ConstantScoreWeight(this, boost) { + @Override + public Scorer scorer(LeafReaderContext context) { + DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc()); + valueFetcher.setNextReader(context); + LeafSearchLookup leafSearchLookup = searchLookup.getLeafSearchLookup(context); + TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { + @Override + public boolean matches() { + leafSearchLookup.source().setSegmentAndDocument(context, approximation.docID()); + List values = valueFetcher.fetchValues(leafSearchLookup.source()); + // TODO: in case of errors from script, should it be ignored and treated as missing field + // by using a configurable setting? + MemoryIndex memoryIndex = new MemoryIndex(); + for (Object value : values) { + memoryIndex.addField(indexableFieldGenerator.apply(value), indexAnalyzer); + } + float score = memoryIndex.search(query); + return score > 0.0f; + } + + @Override + public float matchCost() { + // TODO: how can we compute this? + return 1000f; + } + }; + return new ConstantScoreScorer(this, score(), scoreMode, twoPhase); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return false; + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (sameClassAs(o) == false) { + return false; + } + DerivedFieldQuery other = (DerivedFieldQuery) o; + return Objects.equals(this.query, other.query) + && Objects.equals(this.valueFetcher, other.valueFetcher) + && Objects.equals(this.searchLookup, other.searchLookup) + && Objects.equals(this.indexableFieldGenerator, other.indexableFieldGenerator) + && Objects.equals(this.indexAnalyzer, other.indexAnalyzer); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), query, valueFetcher, searchLookup, indexableFieldGenerator, indexableFieldGenerator); + } + + @Override + public String toString(String f) { + return "DerivedFieldQuery (Query: [ " + query.toString(f) + "])"; + } +} diff --git a/server/src/main/java/org/opensearch/script/DerivedFieldScript.java b/server/src/main/java/org/opensearch/script/DerivedFieldScript.java new file mode 100644 index 0000000000000..7f5b991950ec6 --- /dev/null +++ b/server/src/main/java/org/opensearch/script/DerivedFieldScript.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.script; + +import org.apache.lucene.index.LeafReaderContext; +import org.opensearch.common.logging.DeprecationLogger; +import org.opensearch.index.fielddata.ScriptDocValues; +import org.opensearch.search.lookup.LeafSearchLookup; +import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.lookup.SourceLookup; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.function.Function; + +/** + * Definition of Script for DerivedField. + * It will be used to execute scripts defined against derived fields of any type + * + * @opensearch.internal + */ +public abstract class DerivedFieldScript { + + public static final String[] PARAMETERS = {}; + public static final ScriptContext CONTEXT = new ScriptContext<>("derived_field", Factory.class); + private static final DeprecationLogger deprecationLogger = DeprecationLogger.getLogger(DynamicMap.class); + + private static final Map> PARAMS_FUNCTIONS = Map.of( + "doc", + value -> value, + "_source", + value -> ((SourceLookup) value).loadSourceIfNeeded() + ); + + /** + * The generic runtime parameters for the script. + */ + private final Map params; + + /** + * A leaf lookup for the bound segment this script will operate on. + */ + private final LeafSearchLookup leafLookup; + + public DerivedFieldScript(Map params, SearchLookup lookup, LeafReaderContext leafContext) { + Map parameters = new HashMap<>(params); + this.leafLookup = lookup.getLeafSearchLookup(leafContext); + parameters.putAll(leafLookup.asMap()); + this.params = new DynamicMap(parameters, PARAMS_FUNCTIONS); + } + + protected DerivedFieldScript() { + params = null; + leafLookup = null; + } + + /** + * Return the parameters for this script. + */ + public Map getParams() { + return params; + } + + /** + * The doc lookup for the Lucene segment this script was created for. + */ + public Map> getDoc() { + return leafLookup.doc(); + } + + /** + * Set the current document to run the script on next. + */ + public void setDocument(int docid) { + leafLookup.setDocument(docid); + } + + public abstract Object execute(); + + /** + * A factory to construct {@link DerivedFieldScript} instances. + * + * @opensearch.internal + */ + public interface LeafFactory { + DerivedFieldScript newInstance(LeafReaderContext ctx) throws IOException; + } + + /** + * A factory to construct stateful {@link DerivedFieldScript} factories for a specific index. + * + * @opensearch.internal + */ + public interface Factory extends ScriptFactory { + LeafFactory newFactory(Map params, SearchLookup lookup); + } +} diff --git a/server/src/main/java/org/opensearch/script/ScriptModule.java b/server/src/main/java/org/opensearch/script/ScriptModule.java index a192e9553016b..c83a6e64d53eb 100644 --- a/server/src/main/java/org/opensearch/script/ScriptModule.java +++ b/server/src/main/java/org/opensearch/script/ScriptModule.java @@ -78,7 +78,8 @@ public class ScriptModule { ScriptedMetricAggContexts.MapScript.CONTEXT, ScriptedMetricAggContexts.CombineScript.CONTEXT, ScriptedMetricAggContexts.ReduceScript.CONTEXT, - IntervalFilterScript.CONTEXT + IntervalFilterScript.CONTEXT, + DerivedFieldScript.CONTEXT ).collect(Collectors.toMap(c -> c.name, Function.identity())); } diff --git a/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java b/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java new file mode 100644 index 0000000000000..18d117fa8c0f5 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/query/DerivedFieldQueryTests.java @@ -0,0 +1,104 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.KeywordField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.opensearch.common.lucene.Lucene; +import org.opensearch.index.mapper.DerivedFieldValueFetcher; +import org.opensearch.script.DerivedFieldScript; +import org.opensearch.script.Script; +import org.opensearch.search.lookup.LeafSearchLookup; +import org.opensearch.search.lookup.SearchLookup; +import org.opensearch.search.lookup.SourceLookup; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +public class DerivedFieldQueryTests extends OpenSearchTestCase { + + private static final String[][] raw_requests = new String[][] { + { "40.135.0.0 GET /images/hm_bg.jpg HTTP/1.0", "200", "40.135.0.0" }, + { "232.0.0.0 GET /images/hm_bg.jpg HTTP/1.0", "400", "232.0.0.0" }, + { "26.1.0.0 GET /images/hm_bg.jpg HTTP/1.0", "200", "26.1.0.0" }, + { "247.37.0.0 GET /french/splash_inet.html HTTP/1.0", "400", "247.37.0.0" }, + { "247.37.0.0 GET /french/splash_inet.html HTTP/1.0", "400", "247.37.0.0" } }; + + public void testDerivedField() throws IOException { + // Create lucene documents + List docs = new ArrayList<>(); + for (String[] request : raw_requests) { + Document document = new Document(); + document.add(new TextField("raw_request", request[0], Field.Store.YES)); + document.add(new KeywordField("status", request[1], Field.Store.YES)); + docs.add(document); + } + + // Mock SearchLookup + SearchLookup searchLookup = mock(SearchLookup.class); + SourceLookup sourceLookup = new SourceLookup(); + LeafSearchLookup leafLookup = mock(LeafSearchLookup.class); + when(leafLookup.source()).thenReturn(sourceLookup); + + // Mock DerivedFieldScript.Factory + DerivedFieldScript.Factory factory = (params, lookup) -> (DerivedFieldScript.LeafFactory) ctx -> { + when(searchLookup.getLeafSearchLookup(ctx)).thenReturn(leafLookup); + return new DerivedFieldScript(params, lookup, ctx) { + @Override + public Object execute() { + return raw_requests[sourceLookup.docId()][2]; + } + }; + }; + + // Create ValueFetcher from mocked DerivedFieldScript.Factory + DerivedFieldScript.LeafFactory leafFactory = factory.newFactory((new Script("")).getParams(), searchLookup); + DerivedFieldValueFetcher valueFetcher = new DerivedFieldValueFetcher(leafFactory); + + // Create DerivedFieldQuery + DerivedFieldQuery derivedFieldQuery = new DerivedFieldQuery( + new TermQuery(new Term("ip_from_raw_request", "247.37.0.0")), + valueFetcher, + searchLookup, + (o -> new KeywordField("ip_from_raw_request", (String) o, Field.Store.NO)), + Lucene.STANDARD_ANALYZER + ); + + // Index and Search + + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER)); + for (Document d : docs) { + iw.addDocument(d); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(derivedFieldQuery, 10); + assertEquals(2, topDocs.totalHits.value); + } + } + } +} diff --git a/test/framework/src/main/java/org/opensearch/script/MockScriptEngine.java b/test/framework/src/main/java/org/opensearch/script/MockScriptEngine.java index 83b245a1bcecb..8c7e9718eb0cd 100644 --- a/test/framework/src/main/java/org/opensearch/script/MockScriptEngine.java +++ b/test/framework/src/main/java/org/opensearch/script/MockScriptEngine.java @@ -281,7 +281,22 @@ public double execute(Map params1, double[] values) { } else if (context.instanceClazz.equals(IntervalFilterScript.class)) { IntervalFilterScript.Factory factory = mockCompiled::createIntervalFilterScript; return context.factoryClazz.cast(factory); + } else if (context.instanceClazz.equals(DerivedFieldScript.class)) { + DerivedFieldScript.Factory factory = (derivedFieldsParams, lookup) -> ctx -> new DerivedFieldScript( + derivedFieldsParams, + lookup, + ctx + ) { + @Override + public Object execute() { + Map vars = new HashMap<>(derivedFieldsParams); + vars.put("params", derivedFieldsParams); + return script.apply(vars); + } + }; + return context.factoryClazz.cast(factory); } + ContextCompiler compiler = contexts.get(context); if (compiler != null) { return context.factoryClazz.cast(compiler.compile(script::apply, params));