Skip to content

Commit

Permalink
Optimizing integ tests for less model upload calls (opensearch-projec…
Browse files Browse the repository at this point in the history
…t#683)

* Optimizing tests for less model upload calls

Signed-off-by: Martin Gaievski <[email protected]>
martin-gaievski authored Apr 12, 2024
1 parent ff95604 commit 7106d20
Showing 8 changed files with 139 additions and 289 deletions.
11 changes: 7 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -31,10 +31,13 @@ To send us a pull request, please:

1. Fork the repository.
2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change.
3. Ensure local tests pass.
4. Commit to your fork using clear commit messages.
5. Send us a pull request, answering any default questions in the pull request interface.
6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.
3. Include tests that check your new feature or bug fix. Ideally, we're looking for unit, integration, and BWC tests, but that depends on how big and critical your change is.
If you're adding an integration test and it is using local ML models, please make sure that the number of model deployments is limited, and you're using the smallest possible model.
Each model deployment consumes resources, and having too many models may cause unexpected test failures.
4. Ensure local tests pass.
5. Commit to your fork using clear commit messages.
6. Send us a pull request, answering any default questions in the pull request interface.
7. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation.

GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and
[creating a pull request](https://help.github.com/articles/creating-a-pull-request/).
Original file line number Diff line number Diff line change
@@ -164,7 +164,7 @@ public void testResultProcessor_whenDefaultProcessorConfigAndQueryMatches_thenSu
}

@SneakyThrows
public void testResultProcessor_whenMultipleShardsAndQueryMatches_thenSuccessful() {
public void testQueryMatches_whenMultipleShards_thenSuccessful() {
String modelId = null;
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME);
@@ -223,55 +223,37 @@ public void testResultProcessor_whenMultipleShardsAndQueryMatches_thenSuccessful

// verify that all ids are unique
assertEquals(Set.copyOf(ids).size(), ids.size());
} finally {
wipeOfTestResources(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME, null, modelId, SEARCH_PIPELINE);
}
}

@SneakyThrows
public void testResultProcessor_whenMultipleShardsAndNoMatches_thenSuccessful() {
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME);
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);

HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT6));
hybridQueryBuilder.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT7));
// verify case when there are partial match
HybridQueryBuilder hybridQueryBuilderPartialMatch = new HybridQueryBuilder();
hybridQueryBuilderPartialMatch.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
hybridQueryBuilderPartialMatch.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT4));
hybridQueryBuilderPartialMatch.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT7));

Map<String, Object> searchResponseAsMap = search(
Map<String, Object> searchResponseAsMapPartialMatch = search(
TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME,
hybridQueryBuilder,
hybridQueryBuilderPartialMatch,
null,
5,
Map.of("search_pipeline", SEARCH_PIPELINE)
);
assertQueryResults(searchResponseAsMap, 0, true);
} finally {
wipeOfTestResources(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME, null, null, SEARCH_PIPELINE);
}
}

@SneakyThrows
public void testResultProcessor_whenMultipleShardsAndPartialMatches_thenSuccessful() {
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME);
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
assertQueryResults(searchResponseAsMapPartialMatch, 4, true, Range.between(0.33f, 1.0f));

HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
hybridQueryBuilder.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT4));
hybridQueryBuilder.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT7));
// verify case when query doesn't have a match
HybridQueryBuilder hybridQueryBuilderNoMatches = new HybridQueryBuilder();
hybridQueryBuilderNoMatches.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT6));
hybridQueryBuilderNoMatches.add(QueryBuilders.termQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT7));

Map<String, Object> searchResponseAsMap = search(
Map<String, Object> searchResponseAsMapNoMatches = search(
TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME,
hybridQueryBuilder,
hybridQueryBuilderNoMatches,
null,
5,
Map.of("search_pipeline", SEARCH_PIPELINE)
);
assertQueryResults(searchResponseAsMap, 4, true, Range.between(0.33f, 1.0f));
assertQueryResults(searchResponseAsMapNoMatches, 0, true);
} finally {
wipeOfTestResources(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME, null, null, SEARCH_PIPELINE);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_THREE_SHARDS_NAME, null, modelId, SEARCH_PIPELINE);
}
}

Original file line number Diff line number Diff line change
@@ -34,29 +34,19 @@ public void setUp() throws Exception {
updateClusterSettings();
}

public void testEmbeddingProcessor_whenIngestingDocumentWithSourceMatchingTextMapping_thenSuccessful() throws Exception {
public void testEmbeddingProcessor_whenIngestingDocumentWithOrWithoutSourceMatchingMapping_thenSuccessful() throws Exception {
String modelId = null;
try {
modelId = uploadModel();
loadModel(modelId);
createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.TEXT_IMAGE_EMBEDDING);
createTextImageEmbeddingIndex();
// verify doc with mapping
ingestDocumentWithTextMappedToEmbeddingField();
assertEquals(1, getDocCount(INDEX_NAME));
} finally {
wipeOfTestResources(INDEX_NAME, PIPELINE_NAME, modelId, null);
}
}

public void testEmbeddingProcessor_whenIngestingDocumentWithSourceWithoutMatchingInMapping_thenSuccessful() throws Exception {
String modelId = null;
try {
modelId = uploadModel();
loadModel(modelId);
createPipelineProcessor(modelId, PIPELINE_NAME, ProcessorType.TEXT_IMAGE_EMBEDDING);
createTextImageEmbeddingIndex();
// verify doc without mapping
ingestDocumentWithoutMappedFields();
assertEquals(1, getDocCount(INDEX_NAME));
assertEquals(2, getDocCount(INDEX_NAME));
} finally {
wipeOfTestResources(INDEX_NAME, PIPELINE_NAME, modelId, null);
}
Original file line number Diff line number Diff line change
@@ -12,8 +12,6 @@
import org.apache.hc.core5.http.HttpHeaders;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.apache.hc.core5.http.message.BasicHeader;
import org.junit.After;
import org.junit.Before;
import org.opensearch.client.Request;
import org.opensearch.client.Response;
import org.opensearch.common.xcontent.XContentHelper;
@@ -36,33 +34,19 @@ public class MLOpenSearchRerankProcessorIT extends BaseNeuralSearchIT {
private final static String TEXT_REP_1 = "Jacques loves fish. Fish make Jacques happy";
private final static String TEXT_REP_2 = "Fish like to eat plankton";
private final static String INDEX_CONFIG = "{\"mappings\": {\"properties\": {\"text_representation\": {\"type\": \"text\"}}}}";
private String modelId;

@After
@SneakyThrows
public void tearDown() {
super.tearDown();
/* this is required to minimize chance of model not being deployed due to open memory CB,
* this happens in case we leave model from previous test case. We use new model for every test, and old model
* can be undeployed and deleted to free resources after each test case execution.
*/
deleteModel(modelId);
deleteSearchPipeline(PIPELINE_NAME);
deleteIndex(INDEX_NAME);
}

@Before
@SneakyThrows
public void setup() {
modelId = uploadTextSimilarityModel();
loadModel(modelId);
}

@SneakyThrows
public void testCrossEncoderRerankProcessor() {
createSearchPipelineViaConfig(modelId, PIPELINE_NAME, "processor/RerankMLOpenSearchPipelineConfiguration.json");
setupIndex();
runQueries();
String modelId = null;
try {
modelId = uploadTextSimilarityModel();
loadModel(modelId);
createSearchPipelineViaConfig(modelId, PIPELINE_NAME, "processor/RerankMLOpenSearchPipelineConfiguration.json");
setupIndex();
runQueries();
} finally {
wipeOfTestResources(INDEX_NAME, null, modelId, PIPELINE_NAME);
}
}

private String uploadTextSimilarityModel() throws Exception {
40 changes: 13 additions & 27 deletions src/test/java/org/opensearch/neuralsearch/query/HybridQueryIT.java
Original file line number Diff line number Diff line change
@@ -566,28 +566,17 @@ public void testRequestCache_whenMultipleShardsQueryReturnResults_thenSuccessful

@SneakyThrows
public void testWrappedQueryWithFilter_whenIndexAliasHasFilterAndIndexWithNestedFields_thenSuccess() {
String modelId = null;
String alias = "alias_with_filter";
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
// create alias for index
QueryBuilder aliasFilter = QueryBuilders.boolQuery()
.mustNot(QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
createIndexAlias(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, alias, aliasFilter);

NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
TEST_KNN_VECTOR_FIELD_NAME_1,
TEST_QUERY_TEXT,
"",
modelId,
5,
null,
null
);
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(neuralQueryBuilder);
hybridQueryBuilder.add(QueryBuilders.existsQuery(TEST_TEXT_FIELD_NAME_1));

Map<String, Object> searchResponseAsMap = search(
alias,
@@ -608,34 +597,23 @@ public void testWrappedQueryWithFilter_whenIndexAliasHasFilterAndIndexWithNested
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
deleteIndexAlias(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, alias);
wipeOfTestResources(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_MULTI_DOC_WITH_NESTED_FIELDS_INDEX_NAME, null, null, SEARCH_PIPELINE);
}
}

@SneakyThrows
public void testWrappedQueryWithFilter_whenIndexAliasHasFilters_thenSuccess() {
String modelId = null;
String alias = "alias_with_filter";
try {
initializeIndexIfNotExist(TEST_MULTI_DOC_INDEX_NAME);
modelId = prepareModel();
createSearchPipelineWithResultsPostProcessor(SEARCH_PIPELINE);
// create alias for index
QueryBuilder aliasFilter = QueryBuilders.boolQuery()
.mustNot(QueryBuilders.matchQuery(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT3));
createIndexAlias(TEST_MULTI_DOC_INDEX_NAME, alias, aliasFilter);

NeuralQueryBuilder neuralQueryBuilder = new NeuralQueryBuilder(
TEST_KNN_VECTOR_FIELD_NAME_1,
TEST_QUERY_TEXT,
"",
modelId,
5,
null,
null
);
HybridQueryBuilder hybridQueryBuilder = new HybridQueryBuilder();
hybridQueryBuilder.add(neuralQueryBuilder);
hybridQueryBuilder.add(QueryBuilders.existsQuery(TEST_TEXT_FIELD_NAME_1));

Map<String, Object> searchResponseAsMap = search(
alias,
@@ -656,7 +634,7 @@ public void testWrappedQueryWithFilter_whenIndexAliasHasFilters_thenSuccess() {
assertEquals(RELATION_EQUAL_TO, total.get("relation"));
} finally {
deleteIndexAlias(TEST_MULTI_DOC_INDEX_NAME, alias);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, modelId, SEARCH_PIPELINE);
wipeOfTestResources(TEST_MULTI_DOC_INDEX_NAME, null, null, SEARCH_PIPELINE);
}
}

@@ -892,7 +870,15 @@ private void addDocsToIndex(final String testMultiDocIndexName) {
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT2)
);
assertEquals(3, getDocCount(testMultiDocIndexName));
addKnnDoc(
testMultiDocIndexName,
"4",
Collections.emptyList(),
Collections.emptyList(),
Collections.singletonList(TEST_TEXT_FIELD_NAME_1),
Collections.singletonList(TEST_DOC_TEXT3)
);
assertEquals(4, getDocCount(testMultiDocIndexName));
}

private List<Map<String, Object>> getNestedHits(Map<String, Object> searchResponseAsMap) {
Loading

0 comments on commit 7106d20

Please sign in to comment.