From 6be5ad94d9d74e29289bf6b2474154a094d4065f Mon Sep 17 00:00:00 2001 From: Vikasht34 Date: Mon, 21 Oct 2024 20:58:28 -0700 Subject: [PATCH 1/4] Add Release Notes for 2.18.0.0 (#953) --- .../opensearch-neural-search.release-notes-2.18.0.0.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md b/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md index fc7f1528f..298bd704b 100644 --- a/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md +++ b/release-notes/opensearch-neural-search.release-notes-2.18.0.0.md @@ -3,10 +3,6 @@ Compatible with OpenSearch 2.18.0 -### Features -- Introduces ByFieldRerankProcessor for second level reranking on documents ([#932](https://github.com/opensearch-project/neural-search/pull/932)) -### Bug Fixes -- Fixed incorrect document order for nested aggregations in hybrid query ([#956](https://github.com/opensearch-project/neural-search/pull/956)) ### Enhancements - Implement `ignore_missing` field in text chunking processors ([#907](https://github.com/opensearch-project/neural-search/pull/907)) - Added rescorer in hybrid query ([#917](https://github.com/opensearch-project/neural-search/pull/917)) From 923bcb61d6b617ec3c7e908f3470fe75f889e847 Mon Sep 17 00:00:00 2001 From: Isaac Johnson <114550967+Johnsonisaacn@users.noreply.github.com> Date: Fri, 18 Oct 2024 09:44:07 -0700 Subject: [PATCH 2/4] Reciprocal Rank Fusion (RRF) normalization technique in hybrid query (#874) * initial commit of RRF Signed-off-by: Isaac Johnson Co-authored-by: Varun Jain Signed-off-by: Martin Gaievski --- .github/workflows/CI.yml | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 5bcc517bc..fe847d63d 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -91,6 +91,11 @@ jobs: run: | ./gradlew check + - name: Upload Coverage Report + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} + Precommit-neural-search-linux: needs: Get-CI-Image-Tag strategy: @@ -131,8 +136,7 @@ jobs: su `id -un 1000` -c "./gradlew precommit --parallel" - name: Upload Coverage Report - if: ${{ !cancelled() }} - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v3 with: token: ${{ secrets.CODECOV_TOKEN }} @@ -164,3 +168,7 @@ jobs: run: | ./gradlew precommit --parallel + - name: Upload Coverage Report + uses: codecov/codecov-action@v3 + with: + token: ${{ secrets.CODECOV_TOKEN }} From 2cebb0e9b9ee7dafe325780dcbeed08d0ebead3a Mon Sep 17 00:00:00 2001 From: Ryan Bogan Date: Thu, 14 Nov 2024 14:01:49 -0800 Subject: [PATCH 3/4] Add RRF integ test Signed-off-by: Ryan Bogan --- .../neuralsearch/processor/RRFSearchIT.java | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 src/test/java/org/opensearch/neuralsearch/processor/RRFSearchIT.java diff --git a/src/test/java/org/opensearch/neuralsearch/processor/RRFSearchIT.java b/src/test/java/org/opensearch/neuralsearch/processor/RRFSearchIT.java new file mode 100644 index 000000000..5cf0f0031 --- /dev/null +++ b/src/test/java/org/opensearch/neuralsearch/processor/RRFSearchIT.java @@ -0,0 +1,171 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.neuralsearch.processor; + +import com.google.common.collect.ImmutableList; +import lombok.SneakyThrows; +import org.apache.hc.core5.http.HttpHeaders; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.apache.hc.core5.http.message.BasicHeader; +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.common.xcontent.XContentType; +import org.opensearch.core.rest.RestStatus; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.neuralsearch.BaseNeuralSearchIT; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; + +import static org.opensearch.neuralsearch.util.TestUtils.DEFAULT_USER_AGENT; +import static org.opensearch.neuralsearch.util.TestUtils.DELTA_FOR_SCORE_ASSERTION; + +public class RRFSearchIT extends BaseNeuralSearchIT { + + private int currentDoc = 1; + private static final String RRF_INDEX_NAME = "rrf-index"; + private static final String RRF_SEARCH_PIPELINE = "rrf-search-pipeline"; + + @SneakyThrows + public void testRRF() { + String modelId = prepareModel(); + String ingestPipelineName = "rrf-ingest-pipeline"; + createPipelineProcessor(modelId, ingestPipelineName, ProcessorType.TEXT_EMBEDDING); + Settings indexSettings = Settings.builder().put("index.knn", true).put("default_pipeline", ingestPipelineName).build(); + String indexMappings = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("id") + .field("type", "text") + .endObject() + .startObject("passage_embedding") + .field("type", "knn_vector") + .field("dimension", "768") + .startObject("method") + .field("engine", "lucene") + .field("space_type", "l2") + .field("name", "hnsw") + .endObject() + .endObject() + .startObject("text") + .field("type", "text") + .endObject() + .endObject() + .endObject() + .toString(); + // Removes the {} around the string, since they are already included with createIndex + indexMappings = indexMappings.substring(1, indexMappings.length() - 1); + String indexName = "rrf-index"; + createIndex(indexName, indexSettings, indexMappings, null); + addRRFDocuments(); + createDefaultRRFSearchPipeline(); + + Map results = searchRRF(modelId); + Map hits = (Map) results.get("hits"); + ArrayList> hitsList = (ArrayList>) hits.get("hits"); + assertEquals(3, hitsList.size()); + assertEquals(0.016393442, (Double) hitsList.getFirst().get("_score"), DELTA_FOR_SCORE_ASSERTION); + assertEquals(0.016129032, (Double) hitsList.get(1).get("_score"), DELTA_FOR_SCORE_ASSERTION); + assertEquals(0.015873017, (Double) hitsList.getLast().get("_score"), DELTA_FOR_SCORE_ASSERTION); + } + + @SneakyThrows + private void addRRFDocuments() { + addRRFDocument( + "A West Virginia university women 's basketball team , officials , and a small gathering of fans are in a West Virginia arena .", + "4319130149.jpg" + ); + addRRFDocument("A wild animal races across an uncut field with a minimal amount of trees .", "1775029934.jpg"); + addRRFDocument( + "People line the stands which advertise Freemont 's orthopedics , a cowboy rides a light brown bucking bronco .", + "2664027527.jpg" + ); + addRRFDocument("A man who is riding a wild horse in the rodeo is very near to falling off .", "4427058951.jpg"); + addRRFDocument("A rodeo cowboy , wearing a cowboy hat , is being thrown off of a wild white horse .", "2691147709.jpg"); + } + + @SneakyThrows + private void addRRFDocument(String description, String imageText) { + addDocument(RRF_INDEX_NAME, String.valueOf(currentDoc++), "text", description, "image_text", imageText); + } + + @SneakyThrows + private void createDefaultRRFSearchPipeline() { + String requestBody = XContentFactory.jsonBuilder() + .startObject() + .field("description", "Post processor for hybrid search") + .startArray("phase_results_processors") + .startObject() + .startObject("score-ranker-processor") + .startObject("combination") + .field("technique", "rrf") + .startObject("parameters") + .field("rank_constant", 60) + .endObject() + .endObject() + .endObject() + .endObject() + .endArray() + .endObject() + .toString(); + + makeRequest( + client(), + "PUT", + String.format(LOCALE, "/_search/pipeline/%s", RRF_SEARCH_PIPELINE), + null, + toHttpEntity(String.format(LOCALE, requestBody)), + ImmutableList.of(new BasicHeader(HttpHeaders.USER_AGENT, DEFAULT_USER_AGENT)) + ); + } + + @SneakyThrows + private Map searchRRF(String modelId) { + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("_source") + .startArray("exclude") + .value("passage_embedding") + .endArray() + .endObject() + .startObject("query") + .startObject("hybrid") + .startArray("queries") + .startObject() + .startObject("match") + .startObject("text") + .field("query", "cowboy rodeo bronco") + .endObject() + .endObject() + .endObject() + .startObject() + .startObject("neural") + .startObject("passage_embedding") + .field("query_text", "wild west") + .field("model_id", modelId) + .field("k", 5) + .endObject() + .endObject() + .endObject() + .endArray() + .endObject() + .endObject() + .endObject(); + + Request request = new Request("GET", "/" + RRF_INDEX_NAME + "/_search?timeout=1000s&search_pipeline=" + RRF_SEARCH_PIPELINE); + logger.info("Sorting request " + builder); + request.setJsonEntity(builder.toString()); + Response response = client().performRequest(request); + assertEquals(request.getEndpoint() + ": failed", RestStatus.OK, RestStatus.fromCode(response.getStatusLine().getStatusCode())); + + String responseBody = EntityUtils.toString(response.getEntity()); + logger.info("Response " + responseBody); + return XContentHelper.convertToMap(XContentType.JSON.xContent(), responseBody, false); + } +} From 569040584511aea5e145c24f2015223a9ce249d1 Mon Sep 17 00:00:00 2001 From: Ryan Bogan Date: Tue, 19 Nov 2024 12:33:55 -0800 Subject: [PATCH 4/4] Initial unit test implementation Signed-off-by: Ryan Bogan --- .../neuralsearch/processor/RRFProcessor.java | 15 +- .../processor/RRFProcessorTests.java | 230 ++++++++++++++++++ 2 files changed, 239 insertions(+), 6 deletions(-) create mode 100644 src/test/java/org/opensearch/neuralsearch/processor/RRFProcessorTests.java diff --git a/src/main/java/org/opensearch/neuralsearch/processor/RRFProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/RRFProcessor.java index 207af156c..a083fa0b7 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/RRFProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/RRFProcessor.java @@ -13,6 +13,7 @@ import java.util.Optional; import lombok.Getter; +import org.opensearch.ml.repackage.com.google.common.annotations.VisibleForTesting; import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique; import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique; import org.opensearch.search.fetch.FetchSearchResult; @@ -98,7 +99,8 @@ public boolean isIgnoreFailure() { return false; } - private boolean shouldSkipProcessor(SearchPhaseResults searchPhaseResult) { + @VisibleForTesting + boolean shouldSkipProcessor(SearchPhaseResults searchPhaseResult) { if (Objects.isNull(searchPhaseResult) || !(searchPhaseResult instanceof QueryPhaseResultConsumer queryPhaseResultConsumer)) { return true; } @@ -111,7 +113,8 @@ private boolean shouldSkipProcessor(SearchPha * @param searchPhaseResult * @return true if results are from hybrid query */ - private boolean isHybridQuery(final SearchPhaseResult searchPhaseResult) { + @VisibleForTesting + boolean isHybridQuery(final SearchPhaseResult searchPhaseResult) { // check for delimiter at the end of the score docs. return Objects.nonNull(searchPhaseResult.queryResult()) && Objects.nonNull(searchPhaseResult.queryResult().topDocs()) @@ -120,9 +123,8 @@ private boolean isHybridQuery(final SearchPhaseResult searchPhaseResult) { && isHybridQueryStartStopElement(searchPhaseResult.queryResult().topDocs().topDocs.scoreDocs[0]); } - private List getQueryPhaseSearchResults( - final SearchPhaseResults results - ) { + @VisibleForTesting + List getQueryPhaseSearchResults(final SearchPhaseResults results) { return results.getAtomicArray() .asList() .stream() @@ -130,7 +132,8 @@ private List getQueryPhase .collect(Collectors.toList()); } - private Optional getFetchSearchResults( + @VisibleForTesting + Optional getFetchSearchResults( final SearchPhaseResults searchPhaseResults ) { Optional optionalFirstSearchPhaseResult = searchPhaseResults.getAtomicArray().asList().stream().findFirst(); diff --git a/src/test/java/org/opensearch/neuralsearch/processor/RRFProcessorTests.java b/src/test/java/org/opensearch/neuralsearch/processor/RRFProcessorTests.java new file mode 100644 index 000000000..70dcb7aee --- /dev/null +++ b/src/test/java/org/opensearch/neuralsearch/processor/RRFProcessorTests.java @@ -0,0 +1,230 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.opensearch.neuralsearch.processor; + +import lombok.SneakyThrows; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TotalHits; +import org.junit.Before; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.opensearch.action.OriginalIndices; +import org.opensearch.action.search.QueryPhaseResultConsumer; +import org.opensearch.action.search.SearchPhaseContext; +import org.opensearch.action.search.SearchPhaseName; +import org.opensearch.action.search.SearchPhaseResults; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.support.IndicesOptions; +import org.opensearch.common.lucene.search.TopDocsAndMaxScore; +import org.opensearch.common.util.concurrent.AtomicArray; +import org.opensearch.core.common.Strings; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.neuralsearch.processor.combination.ScoreCombinationTechnique; +import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizationTechnique; +import org.opensearch.neuralsearch.search.util.HybridSearchResultFormatUtil; +import org.opensearch.search.DocValueFormat; +import org.opensearch.search.SearchPhaseResult; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.builder.SearchSourceBuilder; +import org.opensearch.search.fetch.FetchSearchResult; +import org.opensearch.search.internal.AliasFilter; +import org.opensearch.search.internal.ShardSearchContextId; +import org.opensearch.search.internal.ShardSearchRequest; +import org.opensearch.search.query.QuerySearchResult; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.List; +import java.util.Optional; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.never; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +public class RRFProcessorTests extends OpenSearchTestCase { + + @Mock + private ScoreNormalizationTechnique mockNormalizationTechnique; + @Mock + private ScoreCombinationTechnique mockCombinationTechnique; + @Mock + private NormalizationProcessorWorkflow mockNormalizationWorkflow; + @Mock + private SearchPhaseResults mockSearchPhaseResults; + @Mock + private SearchPhaseContext mockSearchPhaseContext; + @Mock + private QueryPhaseResultConsumer mockQueryPhaseResultConsumer; + + private RRFProcessor rrfProcessor; + + @Before + @SneakyThrows + public void setUp() { + super.setUp(); + MockitoAnnotations.openMocks(this); + rrfProcessor = new RRFProcessor( + "tag", + "description", + mockNormalizationTechnique, + mockCombinationTechnique, + mockNormalizationWorkflow + ); + } + + @SneakyThrows + public void testGetType() { + assertEquals("score-ranker-processor", rrfProcessor.getType()); + } + + @SneakyThrows + public void testGetBeforePhase() { + assertEquals(SearchPhaseName.QUERY, rrfProcessor.getBeforePhase()); + } + + @SneakyThrows + public void testGetAfterPhase() { + assertEquals(SearchPhaseName.FETCH, rrfProcessor.getAfterPhase()); + } + + @SneakyThrows + public void testIsIgnoreFailure() { + assertFalse(rrfProcessor.isIgnoreFailure()); + } + + @SneakyThrows + public void testProcessWithNullSearchPhaseResult() { + rrfProcessor.process(null, mockSearchPhaseContext); + verify(mockNormalizationWorkflow, never()).execute(any()); + } + + @SneakyThrows + public void testProcessWithNonQueryPhaseResultConsumer() { + rrfProcessor.process(mockSearchPhaseResults, mockSearchPhaseContext); + verify(mockNormalizationWorkflow, never()).execute(any()); + } + + @SneakyThrows + public void testProcessWithValidHybridInput() { + QuerySearchResult result = createQuerySearchResult(true); + AtomicArray atomicArray = new AtomicArray<>(1); + atomicArray.set(0, result); + + when(mockQueryPhaseResultConsumer.getAtomicArray()).thenReturn(atomicArray); + + rrfProcessor.process(mockQueryPhaseResultConsumer, mockSearchPhaseContext); + + verify(mockNormalizationWorkflow).execute(any(NormalizationExecuteDTO.class)); + } + + @SneakyThrows + public void testProcessWithValidNonHybridInput() { + QuerySearchResult result = createQuerySearchResult(false); + AtomicArray atomicArray = new AtomicArray<>(1); + atomicArray.set(0, result); + + when(mockQueryPhaseResultConsumer.getAtomicArray()).thenReturn(atomicArray); + + rrfProcessor.process(mockQueryPhaseResultConsumer, mockSearchPhaseContext); + + verify(mockNormalizationWorkflow, never()).execute(any(NormalizationExecuteDTO.class)); + } + + @SneakyThrows + public void testGetTag() { + assertEquals("tag", rrfProcessor.getTag()); + } + + @SneakyThrows + public void testGetDescription() { + assertEquals("description", rrfProcessor.getDescription()); + } + + @SneakyThrows + public void testShouldSkipProcessor() { + assertTrue(rrfProcessor.shouldSkipProcessor(null)); + assertTrue(rrfProcessor.shouldSkipProcessor(mockSearchPhaseResults)); + + AtomicArray atomicArray = new AtomicArray<>(1); + atomicArray.set(0, createQuerySearchResult(false)); + when(mockQueryPhaseResultConsumer.getAtomicArray()).thenReturn(atomicArray); + + assertTrue(rrfProcessor.shouldSkipProcessor(mockQueryPhaseResultConsumer)); + + atomicArray.set(0, createQuerySearchResult(true)); + assertFalse(rrfProcessor.shouldSkipProcessor(mockQueryPhaseResultConsumer)); + } + + @SneakyThrows + public void testGetQueryPhaseSearchResults() { + AtomicArray atomicArray = new AtomicArray<>(2); + atomicArray.set(0, createQuerySearchResult(true)); + atomicArray.set(1, createQuerySearchResult(false)); + when(mockQueryPhaseResultConsumer.getAtomicArray()).thenReturn(atomicArray); + + List results = rrfProcessor.getQueryPhaseSearchResults(mockQueryPhaseResultConsumer); + assertEquals(2, results.size()); + assertNotNull(results.get(0)); + assertNotNull(results.get(1)); + } + + @SneakyThrows + public void testGetFetchSearchResults() { + AtomicArray atomicArray = new AtomicArray<>(1); + atomicArray.set(0, createQuerySearchResult(true)); + when(mockQueryPhaseResultConsumer.getAtomicArray()).thenReturn(atomicArray); + + Optional result = rrfProcessor.getFetchSearchResults(mockQueryPhaseResultConsumer); + assertFalse(result.isPresent()); + } + + private QuerySearchResult createQuerySearchResult(boolean isHybrid) { + ShardId shardId = new ShardId("index", "uuid", 0); + OriginalIndices originalIndices = new OriginalIndices(new String[] { "index" }, IndicesOptions.strictExpandOpenAndForbidClosed()); + SearchRequest searchRequest = new SearchRequest("index"); + searchRequest.source(new SearchSourceBuilder()); + searchRequest.allowPartialSearchResults(true); + + int numberOfShards = 1; + AliasFilter aliasFilter = new AliasFilter(null, Strings.EMPTY_ARRAY); + float indexBoost = 1.0f; + long nowInMillis = System.currentTimeMillis(); + String clusterAlias = null; + String[] indexRoutings = Strings.EMPTY_ARRAY; + + ShardSearchRequest shardSearchRequest = new ShardSearchRequest( + originalIndices, + searchRequest, + shardId, + numberOfShards, + aliasFilter, + indexBoost, + nowInMillis, + clusterAlias, + indexRoutings + ); + + QuerySearchResult result = new QuerySearchResult( + new ShardSearchContextId("test", 1), + new SearchShardTarget("node1", shardId, clusterAlias, originalIndices), + shardSearchRequest + ); + result.from(0).size(10); + + ScoreDoc[] scoreDocs; + if (isHybrid) { + scoreDocs = new ScoreDoc[] { HybridSearchResultFormatUtil.createStartStopElementForHybridSearchResults(0) }; + } else { + scoreDocs = new ScoreDoc[] { new ScoreDoc(0, 1.0f) }; + } + + TopDocs topDocs = new TopDocs(new TotalHits(1, TotalHits.Relation.EQUAL_TO), scoreDocs); + TopDocsAndMaxScore topDocsAndMaxScore = new TopDocsAndMaxScore(topDocs, 1.0f); + result.topDocs(topDocsAndMaxScore, new DocValueFormat[0]); + + return result; + } +}