From 7b1c2c78c16d566da93001d0f6a8224b2de9e8d0 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Tue, 2 Jan 2024 16:04:40 -0800 Subject: [PATCH 01/13] Implementation for match_only_text field (#11039) * Implementation for match_only_text field Signed-off-by: Rishabh Maurya * Fix build failures Signed-off-by: Rishabh Maurya * Fix bugs Signed-off-by: Rishabh Maurya * Added mapper tests, stil failing on prefix and phrase tests Signed-off-by: Rishabh Maurya * Disable index prefix and phrase mapper Signed-off-by: Rishabh Maurya * Added unit tests for phrase and multiphrase query validation Signed-off-by: Rishabh Maurya * Add unit tests for prefix and prefix phrase queries Signed-off-by: Rishabh Maurya * Add a test to cover 3 word with synonym match phrase prefix query Signed-off-by: Rishabh Maurya * Add unit test for SourceFieldMatchQuery Signed-off-by: Rishabh Maurya * Added test for _source disabled case Signed-off-by: Rishabh Maurya * Add unit test for missing field Signed-off-by: Rishabh Maurya * more validation tests and changelog update Signed-off-by: Rishabh Maurya * Added integration tests for match_only_text replicating text field integ tests Signed-off-by: Rishabh Maurya * Added skip section in integ test to fix mixed cluster failures Signed-off-by: Rishabh Maurya * remove unused import Signed-off-by: Rishabh Maurya * Address PR comments Signed-off-by: Rishabh Maurya * fix integ tests Signed-off-by: Rishabh Maurya * Fix flaky test due to random indexwriter Signed-off-by: Rishabh Maurya * pr comment: header modification Signed-off-by: Rishabh Maurya * Address PR comments Signed-off-by: Rishabh Maurya * addded change to the right section of CHANGELOG Signed-off-by: Rishabh Maurya * overriding the textFieldType before every test Signed-off-by: Rishabh Maurya * rename @Before method Signed-off-by: Rishabh Maurya * update changelog description Signed-off-by: Rishabh Maurya --------- Signed-off-by: Rishabh Maurya --- CHANGELOG.md | 1 + .../11_match_field_match_only_text.yml | 70 +++ .../20_ngram_search_field_match_only_text.yml | 144 ++++++ ...ram_highligthing_field_match_only_text.yml | 137 ++++++ .../40_query_string_field_match_only_text.yml | 59 +++ ...default_analyzer_field_match_only_text.yml | 42 ++ ...es_with_synonyms_field_match_only_text.yml | 348 ++++++++++++++ ...60_synonym_graph_field_match_only_text.yml | 209 ++++++++ .../70_intervals_field_match_only_text.yml | 67 +++ .../20_phrase_field_match_only_text.yml | 238 +++++++++ .../20_highlighting_field_match_only_text.yml | 201 ++++++++ .../20_query_string_field_match_only_text.yml | 53 +++ .../30_sig_terms_field_match_only_text.yml | 76 +++ .../90_sig_text_field_match_only_text.yml | 155 ++++++ .../20_highlighting_field_match_only_text.yml | 137 ++++++ .../160_exists_query_match_only_text.yml | 119 +++++ ...00_phrase_search_field_match_only_text.yml | 67 +++ ...atch_bool_prefix_field_match_only_text.yml | 282 +++++++++++ ...disallow_queries_field_match_only_text.yml | 141 ++++++ .../10_basic_field_match_only_field.yml | 92 ++++ .../index/mapper/MappedFieldType.java | 13 + .../mapper/MatchOnlyTextFieldMapper.java | 312 ++++++++++++ .../index/mapper/TextFieldMapper.java | 35 +- .../index/query/SourceFieldMatchQuery.java | 160 +++++++ .../opensearch/index/search/MatchQuery.java | 10 +- .../index/search/MultiMatchQuery.java | 4 +- .../org/opensearch/indices/IndicesModule.java | 2 + .../MatchOnlyTextFieldAnalyzerModeTests.java | 16 + .../mapper/MatchOnlyTextFieldMapperTests.java | 450 ++++++++++++++++++ .../mapper/MatchOnlyTextFieldTypeTests.java | 31 ++ .../mapper/TextFieldAnalyzerModeTests.java | 22 +- .../index/mapper/TextFieldMapperTests.java | 222 ++++----- .../index/mapper/TextFieldTypeTests.java | 32 +- .../query/SourceFieldMatchQueryTests.java | 173 +++++++ .../index/mapper/MapperServiceTestCase.java | 4 +- .../aggregations/AggregatorTestCase.java | 4 +- 36 files changed, 3959 insertions(+), 169 deletions(-) create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml create mode 100644 modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml create mode 100644 server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java create mode 100644 server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java create mode 100644 server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java create mode 100644 server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index a167127a7d795..026606ff57d65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -119,6 +119,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Create separate transport action for render search template action ([#11170](https://github.com/opensearch-project/OpenSearch/pull/11170)) - Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591)) - Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583)) +- Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039)) ### Dependencies - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822)) diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml new file mode 100644 index 0000000000000..40ff2c2f4cdbe --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml @@ -0,0 +1,70 @@ +# integration tests for queries with specific analysis chains + +"match query with stacked stems": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + # Tests the match query stemmed tokens are "stacked" on top of the unstemmed + # versions in the same position. + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + analysis: + analyzer: + index: + tokenizer: standard + filter: [lowercase] + search: + rest_total_hits_as_int: true + tokenizer: standard + filter: [lowercase, keyword_repeat, porter_stem, unique_stem] + filter: + unique_stem: + type: unique + only_on_same_position: true + mappings: + properties: + text: + type: match_only_text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + id: 1 + body: { "text": "the fox runs across the street" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fox runs + operator: AND + - match: {hits.total: 1} + + - do: + index: + index: test + id: 2 + body: { "text": "run fox run" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fox runs + operator: AND + - match: {hits.total: 2} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml new file mode 100644 index 0000000000000..95b648dee47c8 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml @@ -0,0 +1,144 @@ +"ngram search": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + analysis: + analyzer: + my_analyzer: + tokenizer: standard + filter: [my_ngram] + filter: + my_ngram: + type: ngram + min: 2, + max: 2 + mappings: + properties: + text: + type: match_only_text + analyzer: my_analyzer + + - do: + index: + index: test + id: 1 + body: { "text": "foo bar baz" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: foa + - match: {hits.total: 1} + +--- +"testNGramCopyField": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + max_ngram_diff: 9 + analysis: + analyzer: + my_ngram_analyzer: + tokenizer: my_ngram_tokenizer + tokenizer: + my_ngram_tokenizer: + type: ngram + min: 1, + max: 10 + token_chars: [] + mappings: + properties: + origin: + type: match_only_text + copy_to: meta + meta: + type: match_only_text + analyzer: my_ngram_analyzer + + - do: + index: + index: test + id: 1 + body: { "origin": "C.A1234.5678" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: 1234 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: 1234.56 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: A1234 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + term: + meta: + value: a1234 + - match: {hits.total: 0} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: A1234 + analyzer: my_ngram_analyzer + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: a1234 + analyzer: my_ngram_analyzer + - match: {hits.total: 1} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml new file mode 100644 index 0000000000000..597f55679a2c6 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml @@ -0,0 +1,137 @@ +"ngram highlighting": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + index.max_ngram_diff: 19 + analysis: + tokenizer: + my_ngramt: + type: ngram + min_gram: 1 + max_gram: 20 + token_chars: letter,digit + filter: + my_ngram: + type: ngram + min_gram: 1 + max_gram: 20 + analyzer: + name2_index_analyzer: + tokenizer: whitespace + filter: [my_ngram] + name_index_analyzer: + tokenizer: my_ngramt + name_search_analyzer: + tokenizer: whitespace + mappings: + properties: + name: + type: match_only_text + term_vector: with_positions_offsets + analyzer: name_index_analyzer + search_analyzer: name_search_analyzer + name2: + type: match_only_text + term_vector: with_positions_offsets + analyzer: name2_index_analyzer + search_analyzer: name_search_analyzer + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: logicacmg ehemals avinci - the know how company + name2: logicacmg ehemals avinci - the know how company + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica m + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica ma + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica m + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica ma + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml new file mode 100644 index 0000000000000..ddebb1d76acbc --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml @@ -0,0 +1,59 @@ +--- +"Test query string with snowball": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + mappings: + properties: + field: + type: match_only_text + number: + type: integer + + - do: + index: + index: test + id: 1 + body: { field: foo bar} + + - do: + indices.refresh: + index: [test] + + - do: + indices.validate_query: + index: test + q: field:bars + analyzer: snowball + + - is_true: valid + + - do: + search: + rest_total_hits_as_int: true + index: test + q: field:bars + analyzer: snowball + + - match: {hits.total: 1} + + - do: + explain: + index: test + id: 1 + q: field:bars + analyzer: snowball + + - is_true: matched + + - do: + count: + index: test + q: field:bars + analyzer: snowball + + - match: {count : 1} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml new file mode 100644 index 0000000000000..97f3fb65e94a2 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml @@ -0,0 +1,42 @@ +--- +"Test default search analyzer is applied": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + index.analysis.analyzer.default.type: simple + index.analysis.analyzer.default_search.type: german + mappings: + properties: + body: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + body: Ich lese die Bücher + + - do: + indices.refresh: + index: [ test ] + + - do: + search: + index: test + q: "body:Bücher" + + - match: { hits.total.value: 0 } + + - do: + search: + index: test + q: "body:Bücher" + analyzer: simple + + - match: { hits.total.value: 1 } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml new file mode 100644 index 0000000000000..0c263a47a38e6 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml @@ -0,0 +1,348 @@ +--- +"Test common terms query with stacked tokens": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + features: "allowed_warnings" + + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + syns: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + syns: + tokenizer: standard + filter: [ "syns" ] + mappings: + properties: + field1: + type: match_only_text + analyzer: syns + field2: + type: match_only_text + analyzer: syns + + - do: + index: + index: test + id: 3 + body: + field1: quick lazy huge brown pidgin + field2: the quick lazy huge brown fox jumps over the tree + + - do: + index: + index: test + id: 1 + body: + field1: the quick brown fox + + - do: + index: + index: test + id: 2 + body: + field1: the quick lazy huge brown fox jumps over the tree + refresh: true + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast huge fox + minimum_should_match: + low_freq: 3 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 5 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "1" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 6 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the quick brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + minimum_should_match: 3 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [multi_match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + multi_match: + query: the fast brown + fields: [ "field1", "field2" ] + cutoff_frequency: 3 + operator: and + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.2._id: "2" } + +--- +"Test match query with synonyms - see #3881 for extensive description of the issue": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + synonym: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + index: + type: custom + tokenizer: standard + filter: lowercase + search: + rest_total_hits_as_int: true + type: custom + tokenizer: standard + filter: [ lowercase, synonym ] + mappings: + properties: + text: + type: match_only_text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + id: 1 + body: + text: quick brown fox + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fast + operator: and + - match: { hits.total: 1 } + + - do: + index: + index: test + id: 2 + body: + text: fast brown fox + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 2 } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml new file mode 100644 index 0000000000000..91a8b1509517e --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml @@ -0,0 +1,209 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + index: + number_of_shards: 1 # keep scoring stable + analysis: + filter: + syns: + type: synonym + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + graph_syns: + type: synonym_graph + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + analyzer: + lower_syns: + type: custom + tokenizer: standard + filter: [ lowercase, syns ] + lower_graph_syns: + type: custom + tokenizer: standard + filter: [ lowercase, graph_syns ] + mappings: + properties: + field: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + text: say wtf happened foo + - do: + index: + index: test + id: 2 + body: + text: bar baz what the fudge man + + - do: + index: + index: test + id: 3 + body: + text: wtf + + - do: + index: + index: test + id: 4 + body: + text: what is the name for fudge + + - do: + index: + index: test + id: 5 + body: + text: bar two three + + - do: + index: + index: test + id: 6 + body: + text: bar baz two three + refresh: true + +--- +"simple multiterm phrase": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "5" } # incorrect match because we're not using graph synonyms + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_graph_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "6" } # correct match because we're using graph synonyms + +--- +"simple multiterm and": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } # non-graph synonyms coincidentally give us the correct answer here + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_graph_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + +--- +"minimum should match": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + auto_generate_synonyms_phrase_query: false + - match: { hits.total: 6 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + minimum_should_match: 80% + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "6" } + - match: { hits.hits.2._id: "1" } + +--- +"multiterm synonyms phrase": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: wtf + operator: and + analyzer: lower_graph_syns + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "1" } + +--- +"phrase prefix": + - do: + index: + index: test + id: 7 + body: + text: "WTFD!" + + - do: + index: + index: test + id: 8 + body: + text: "Weird Al's WHAT THE FUDGESICLE" + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase_prefix: + text: + query: wtf + analyzer: lower_graph_syns + - match: { hits.total: 5 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "7" } + - match: { hits.hits.2._id: "1" } + - match: { hits.hits.3._id: "8" } + - match: { hits.hits.4._id: "2" } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml new file mode 100644 index 0000000000000..9792c9d2695ea --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml @@ -0,0 +1,67 @@ +# integration tests for intervals queries using analyzers +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + analyzer: standard + text_en: + type: match_only_text + analyzer: english + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet and raining cats and dogs", + "text_en" : "Outside it is cold and wet and raining cats and dogs"}' + +--- +"Test use_field": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + max_gaps: 1 + - match: { status: 400 } + - match: { error.type: "search_phase_execution_exception"} + - match: { error.reason: "all shards failed"} + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + use_field: text_en + max_gaps: 1 + - match: { status: 400 } + - match: { error.type: "search_phase_execution_exception"} + - match: { error.reason: "all shards failed"} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml new file mode 100644 index 0000000000000..aff2b3f11101c --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml @@ -0,0 +1,238 @@ +# Integration tests for the phrase suggester with a few analyzers + +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + analysis: + analyzer: + body: + tokenizer: standard + filter: [lowercase] + bigram: + tokenizer: standard + filter: [lowercase, bigram] + ngram: + tokenizer: standard + filter: [lowercase, ngram] + reverse: + tokenizer: standard + filter: [lowercase, reverse] + filter: + bigram: + type: shingle + output_unigrams: false + min_shingle_size: 2 + max_shingle_size: 2 + ngram: + type: shingle + output_unigrams: true + min_shingle_size: 2 + max_shingle_size: 2 + mappings: + properties: + body: + type: match_only_text + analyzer: body + fields: + bigram: + type: match_only_text + analyzer: bigram + ngram: + type: match_only_text + analyzer: ngram + reverse: + type: match_only_text + analyzer: reverse + + - do: + bulk: + index: test + refresh: true + body: | + { "index": {} } + { "body": "Xorr the God-Jewel" } + { "index": {} } + { "body": "Xorn" } + { "index": {} } + { "body": "Arthur, King of the Britons" } + { "index": {} } + { "body": "Sir Lancelot the Brave" } + { "index": {} } + { "body": "Patsy, Arthur's Servant" } + { "index": {} } + { "body": "Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot" } + { "index": {} } + { "body": "Sir Bedevere the Wise" } + { "index": {} } + { "body": "Sir Galahad the Pure" } + { "index": {} } + { "body": "Miss Islington, the Witch" } + { "index": {} } + { "body": "Zoot" } + { "index": {} } + { "body": "Leader of Robin's Minstrels" } + { "index": {} } + { "body": "Old Crone" } + { "index": {} } + { "body": "Frank, the Historian" } + { "index": {} } + { "body": "Frank's Wife" } + { "index": {} } + { "body": "Dr. Piglet" } + { "index": {} } + { "body": "Dr. Winston" } + { "index": {} } + { "body": "Sir Robin (Stand-in)" } + { "index": {} } + { "body": "Knight Who Says Ni" } + { "index": {} } + { "body": "Police sergeant who stops the film" } + +--- +"sorts by score": + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.ngram + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body.ngram + min_word_length: 1 + suggest_mode: always + + - match: {suggest.test.0.options.0.text: xorr the god jewel} + - match: {suggest.test.0.options.1.text: xorn the god jewel} + +--- +"breaks ties by sorting terms": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + # This runs the suggester without bigrams so we can be sure of the sort order + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body + analyzer: body + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body + min_word_length: 1 + suggest_mode: always + + # The scores are identical but xorn comes first because it sorts first + - match: {suggest.test.0.options.0.text: xorn the god jewel} + - match: {suggest.test.0.options.1.text: xorr the god jewel} + - match: {suggest.test.0.options.0.score: $body.suggest.test.0.options.0.score} + +--- +"fails when asked to run on a field without unigrams": + - do: + catch: /since it doesn't emit unigrams/ + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + + - do: + catch: /since it doesn't emit unigrams/ + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + analyzer: bigram + +--- +"doesn't fail when asked to run on a field without unigrams when force_unigrams=false": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + force_unigrams: false + + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + analyzer: bigram + force_unigrams: false + +--- +"reverse suggestions": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: Artur, Ging of the Britons + test: + phrase: + field: body.ngram + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body.reverse + min_word_length: 1 + suggest_mode: always + pre_filter: reverse + post_filter: reverse + + - match: {suggest.test.0.options.0.text: arthur king of the britons} diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml new file mode 100644 index 0000000000000..3cb8e09c70aed --- /dev/null +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml @@ -0,0 +1,201 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + a_field: + type: search_as_you_type + analyzer: simple + max_shingle_size: 4 + text_field: + type: match_only_text + analyzer: simple + + - do: + index: + index: test + id: 1 + body: + a_field: "quick brown fox jump lazy dog" + text_field: "quick brown fox jump lazy dog" + + - do: + indices.refresh: {} + +--- +"phrase query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"bool prefix query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fo" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query 1 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fo" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: null } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 2 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 3 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 4 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy d" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: ["quick brown fox jump lazy dog"] } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml new file mode 100644 index 0000000000000..085c5633ac72b --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml @@ -0,0 +1,53 @@ +--- +"validate_query with query_string parameters": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + mappings: + properties: + field: + type: match_only_text + number: + type: integer + + - do: + indices.validate_query: + index: test + q: bar + df: field + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:foo field:xyz + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:foo field:xyz + default_operator: AND + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:BA* + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: number:foo + lenient: true + + - is_true: valid diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml new file mode 100644 index 0000000000000..7a96536a2e261 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml @@ -0,0 +1,76 @@ +--- +"Default index": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: true + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_terms": {"significant_terms": {"field": "text"}}}}}} + + - match: {aggregations.class.buckets.0.sig_terms.buckets.0.key: "bad"} + - match: {aggregations.class.buckets.1.sig_terms.buckets.0.key: "good"} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml new file mode 100644 index 0000000000000..bc41f157dfdc4 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml @@ -0,0 +1,155 @@ +--- +"Default index": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: false + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text"}}}}}} + + - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"} + - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"} + +--- +"Dedup noise": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: false + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good noisewords1 g1 g2 g3 g4 g5 g6", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good noisewords2 g1 g2 g3 g4 g5 g6", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad noisewords3 b1 b2 b3 b4 b5 b6", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad noisewords4 b1 b2 b3 b4 b5 b6", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad noisewords5 gb1 gb2 gb3 gb4 gb5 gb6", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad noisewords6 gb1 gb2 gb3 gb4 gb5 gb6", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad noisewords7 b1 b2 b3 b4 b5 b6", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text", "filter_duplicate_text": true}}}}}} + + - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"} + - length: { aggregations.class.buckets.0.sig_text.buckets: 1 } + - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"} + - length: { aggregations.class.buckets.1.sig_text.buckets: 1 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml new file mode 100644 index 0000000000000..7100d620bf19e --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml @@ -0,0 +1,137 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + - do: + indices.create: + index: test + body: + mappings: + _source: + excludes: ["nested.stored_only"] + properties: + nested: + type: nested + properties: + field: + type: text + fields: + vectors: + type: text + term_vector: "with_positions_offsets" + postings: + type: text + index_options: "offsets" + stored: + type: match_only_text + store: true + stored_only: + type: match_only_text + store: true + - do: + index: + index: test + id: 1 + refresh: true + body: + nested: + field : "The quick brown fox is brown." + stored : "The quick brown fox is brown." + stored_only : "The quick brown fox is brown." + +--- +"Unified highlighter": + - do: + search: + index: test + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: [ "nested.field", "nested.field.vectors", "nested.field.postings" ] + inner_hits: + highlight: + type: "unified" + fields: + nested.field: {} + nested.field.vectors: {} + nested.field.postings: {} + + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.vectors.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.postings.0: "The quick brown fox is brown." } + +--- +"Unified highlighter with stored fields": + - do: + search: + index: test + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: [ "nested.stored", "nested.stored_only" ] + inner_hits: + highlight: + type: "unified" + fields: + nested.stored: {} + nested.stored_only: {} + + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown." } + +--- +"Unified highlighter with stored fields and disabled source": + - do: + indices.create: + index: disabled_source + body: + mappings: + _source: + enabled: false + properties: + nested: + type: nested + properties: + field: + type: match_only_text + stored_only: + type: match_only_text + store: true + - do: + index: + index: disabled_source + id: 1 + refresh: true + body: + nested: + field: "The quick brown fox is brown." + stored_only: "The quick brown fox is brown." + + - do: + search: + index: disabled_source + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: ["nested.field", "nested.stored_only"] + inner_hits: + highlight: + type: "unified" + fields: + nested.field: {} + nested.stored_only: {} + + - is_false: hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown."} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml new file mode 100644 index 0000000000000..03626236604a1 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml @@ -0,0 +1,119 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + features: ["headers"] + + - do: + indices.create: + index: test + body: + mappings: + dynamic: false + properties: + match_only_text: + type: match_only_text + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 1 + body: + match_only_text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 2 + body: + match_only_text: "foo bar" + + - do: + headers: + Content-Type: application/json + index: + index: "test" + id: 3 + routing: "route_me" + body: + match_only_text: "foo bar" + + - do: + index: + index: "test" + id: 4 + body: {} + + - do: + indices.create: + index: test-unmapped + body: + mappings: + dynamic: false + properties: + unrelated: + type: keyword + + - do: + index: + index: "test-unmapped" + id: 1 + body: + unrelated: "foo" + + - do: + indices.create: + index: test-empty + body: + mappings: + dynamic: false + properties: + match_only_text: + type: match_only_text + + - do: + indices.refresh: + index: [test, test-unmapped, test-empty] + +--- +"Test exists query on mapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 3} + +--- +"Test exists query on unmapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test-unmapped + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} + +--- +"Test exists query on match_only_text field in empty index": + - do: + search: + rest_total_hits_as_int: true + index: test-empty + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml new file mode 100644 index 0000000000000..a41b8d353e3e9 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml @@ -0,0 +1,67 @@ +--- +"search with indexed phrases": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: { text: "peter piper picked a peck of pickled peppers" } + + - do: + indices.refresh: + index: [test] + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: + query: "peter piper" + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + q: '"peter piper"~1' + df: text + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: "peter piper picked" + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: "piper" + + - match: {hits.total: 1} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml new file mode 100644 index 0000000000000..fc4e9f9de0f38 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml @@ -0,0 +1,282 @@ +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + my_field1: + type: match_only_text + my_field2: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + my_field1: "brown fox jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 2 + body: + my_field1: "brown emu jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 3 + body: + my_field1: "jumparound" + my_field2: "emu" + + - do: + index: + index: test + id: 4 + body: + my_field1: "dog" + my_field2: "brown fox jump lazy" + + - do: + indices.refresh: {} + +--- +"minimum should match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + minimum_should_match: 3 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "BROWN dog" + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + +--- +"operator": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + operator: AND + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field2: + query: "xylophoen foo" + fuzziness: 1 + prefix_length: 1 + max_expansions: 10 + fuzzy_transpositions: true + fuzzy_rewrite: constant_score + + - match: { hits.total: 2 } + - match: { hits.hits.0._source.my_field2: "xylophone" } + - match: { hits.hits.1._source.my_field2: "xylophone" } + +--- +"multi_match single field complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + +--- +"multi_match single field partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + +--- +"multi_match multiple fields complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + +--- +"multi_match multiple fields partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump laz" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + +--- +"multi_match multiple fields with analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "BROWN FOX JUMP dog" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with minimum_should_match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + minimum_should_match: 4 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "dob nomatch" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + fuzziness: 1 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with slop throws exception": + + - do: + catch: /\[slop\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + slop: 1 + +--- +"multi_match multiple fields with cutoff_frequency throws exception": + + - do: + catch: /\[cutoff_frequency\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + cutoff_frequency: 0.001 diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml new file mode 100644 index 0000000000000..f4faf87eb83cc --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml @@ -0,0 +1,141 @@ +--- +setup: + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + analyzer: standard + fields: + raw: + type: keyword + nested1: + type: nested + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "1"}}' + - '{"text" : "Some like it hot, some like it cold", "nested1": [{"foo": "bar1"}]}' + - '{"index": {"_index": "test", "_id": "2"}}' + - '{"text" : "Its cold outside, theres no kind of atmosphere", "nested1": [{"foo": "bar2"}]}' + - '{"index": {"_index": "test", "_id": "3"}}' + - '{"text" : "Baby its cold there outside", "nested1": [{"foo": "bar3"}]}' + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet", "nested1": [{"foo": "bar4"}]}' + +--- +teardown: + + - do: + cluster.put_settings: + body: + transient: + search.allow_expensive_queries: null + +--- +"Test disallow expensive queries": + + ### Check for initial setting = null -> false + - do: + cluster.get_settings: + flat_settings: true + + - is_false: search.allow_expensive_queries + + ### Update setting to false + - do: + cluster.put_settings: + body: + transient: + search.allow_expensive_queries: "false" + flat_settings: true + + - match: {transient: {search.allow_expensive_queries: "false"}} + + ### Prefix + - do: + catch: /\[prefix\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false. For optimised prefix queries on text fields please enable \[index_prefixes\]./ + search: + index: test + body: + query: + prefix: + text: + value: out + + ### Fuzzy + - do: + catch: /\[fuzzy\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + fuzzy: + text: + value: outwide + + ### Regexp + - do: + catch: /\[regexp\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + regexp: + text: + value: .*ou.*id.* + + ### Wildcard + - do: + catch: /\[wildcard\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + wildcard: + text: + value: out?ide + + ### Range on text + - do: + catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + range: + text: + gte: "theres" + + ### Range on keyword + - do: + catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + range: + text.raw: + gte : "Outside it is cold and wet" + + ### Nested + - do: + catch: /\[joining\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + nested: + path: "nested1" + query: + bool: + must: [{"match" : {"nested1.foo" : "bar2"}}] diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml new file mode 100644 index 0000000000000..cc15796e4697f --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml @@ -0,0 +1,92 @@ +--- +"Search shards aliases with and without filters": + - skip: + version: " - 2.99.99" + reason: "match_only_text was added in 3.0" + + - do: + indices.create: + index: test_index + body: + settings: + index: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + field: + type: match_only_text + aliases: + test_alias_no_filter: {} + test_alias_filter_1: + filter: + term: + field : value1 + test_alias_filter_2: + filter: + term: + field : value2 + + - do: + search_shards: + index: test_alias_no_filter + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - is_true: indices.test_index + - is_false: indices.test_index.filter + - match: { indices.test_index.aliases: [test_alias_no_filter]} + + - do: + search_shards: + index: test_alias_filter_1 + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1] } + - match: { indices.test_index.filter.term.field.value: value1 } + - lte: { indices.test_index.filter.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.term.field.boost: 1.0 } + + - do: + search_shards: + index: ["test_alias_filter_1","test_alias_filter_2"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2]} + - length: { indices.test_index.filter.bool.should: 2 } + - lte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 } + - lte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 } + - match: { indices.test_index.filter.bool.adjust_pure_negative: true} + - lte: { indices.test_index.filter.bool.boost: 1.0 } + - gte: { indices.test_index.filter.bool.boost: 1.0 } + + - do: + search_shards: + index: "test*" + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2, test_alias_no_filter]} + - is_false: indices.test_index.filter + + - do: + search_shards: + index: ["test_alias_filter_1","test_alias_no_filter"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_no_filter]} + - is_false: indices.test_index.filter + + - do: + search_shards: + index: ["test_alias_no_filter"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_no_filter]} + - is_false: indices.test_index.filter diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index da62ddfd7017d..66d4654e543a2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -359,18 +359,31 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionInc ); } + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { + return phraseQuery(stream, slop, enablePositionIncrements); + } + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { throw new IllegalArgumentException( "Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { + return multiPhraseQuery(stream, slop, enablePositionIncrements); + } + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { throw new IllegalArgumentException( "Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" ); } + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + return phrasePrefixQuery(stream, slop, maxExpansions); + } + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { throw new IllegalArgumentException( "Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]" diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java new file mode 100644 index 0000000000000..fb97f8c309a70 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java @@ -0,0 +1,312 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.opensearch.Version; +import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.analysis.NamedAnalyzer; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.SourceFieldMatchQuery; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.function.Function; + +/** + * A specialized type of TextFieldMapper which disables the positions and norms to save on storage and executes phrase queries, which requires + * positional data, in a slightly less efficient manner using the {@link org.opensearch.index.query.SourceFieldMatchQuery}. + */ +public class MatchOnlyTextFieldMapper extends TextFieldMapper { + + public static final FieldType FIELD_TYPE = new FieldType(); + public static final String CONTENT_TYPE = "match_only_text"; + private final String indexOptions = FieldMapper.indexOptionToString(FIELD_TYPE.indexOptions()); + private final boolean norms = FIELD_TYPE.omitNorms() == false; + + @Override + protected String contentType() { + return CONTENT_TYPE; + } + + static { + FIELD_TYPE.setTokenized(true); + FIELD_TYPE.setStored(false); + FIELD_TYPE.setStoreTermVectors(false); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.freeze(); + } + + public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); + + protected MatchOnlyTextFieldMapper( + String simpleName, + FieldType fieldType, + MatchOnlyTextFieldType mappedFieldType, + TextFieldMapper.PrefixFieldMapper prefixFieldMapper, + TextFieldMapper.PhraseFieldMapper phraseFieldMapper, + MultiFields multiFields, + CopyTo copyTo, + Builder builder + ) { + + super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder); + } + + @Override + public ParametrizedFieldMapper.Builder getMergeBuilder() { + return new Builder(simpleName(), this.indexCreatedVersion, this.indexAnalyzers).init(this); + } + + /** + * Builder class for constructing the MatchOnlyTextFieldMapper. + */ + public static class Builder extends TextFieldMapper.Builder { + final Parameter indexOptions = indexOptions(m -> ((MatchOnlyTextFieldMapper) m).indexOptions); + + private static Parameter indexOptions(Function initializer) { + return Parameter.restrictedStringParam("index_options", false, initializer, "docs"); + } + + final Parameter norms = norms(m -> ((MatchOnlyTextFieldMapper) m).norms); + final Parameter indexPhrases = Parameter.boolParam( + "index_phrases", + false, + m -> ((MatchOnlyTextFieldType) m.mappedFieldType).indexPhrases, + false + ).setValidator(v -> { + if (v == true) { + throw new MapperParsingException("Index phrases cannot be enabled on for match_only_text field. Use text field instead"); + } + }); + + final Parameter indexPrefixes = new Parameter<>( + "index_prefixes", + false, + () -> null, + TextFieldMapper::parsePrefixConfig, + m -> Optional.ofNullable(((MatchOnlyTextFieldType) m.mappedFieldType).prefixFieldType) + .map(p -> new PrefixConfig(p.minChars, p.maxChars)) + .orElse(null) + ).acceptsNull().setValidator(v -> { + if (v != null) { + throw new MapperParsingException("Index prefixes cannot be enabled on for match_only_text field. Use text field instead"); + } + }); + + private static Parameter norms(Function initializer) { + return Parameter.boolParam("norms", false, initializer, false) + .setMergeValidator((o, n) -> o == n || (o && n == false)) + .setValidator(v -> { + if (v == true) { + throw new MapperParsingException("Norms cannot be enabled on for match_only_text field"); + } + }); + } + + public Builder(String name, IndexAnalyzers indexAnalyzers) { + super(name, indexAnalyzers); + } + + public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) { + super(name, indexCreatedVersion, indexAnalyzers); + } + + @Override + public MatchOnlyTextFieldMapper build(BuilderContext context) { + FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors); + MatchOnlyTextFieldType tft = buildFieldType(fieldType, context); + return new MatchOnlyTextFieldMapper( + name, + fieldType, + tft, + buildPrefixMapper(context, fieldType, tft), + buildPhraseMapper(fieldType, tft), + multiFieldsBuilder.build(this, context), + copyTo.build(), + this + ); + } + + @Override + protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { + NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); + NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); + NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); + + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) > 0) { + throw new IllegalArgumentException("Cannot set position_increment_gap on field [" + name + "] without positions enabled"); + } + if (positionIncrementGap.get() != POSITION_INCREMENT_GAP_USE_ANALYZER) { + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) < 0) { + throw new IllegalArgumentException( + "Cannot set position_increment_gap on field [" + name + "] without indexing enabled" + ); + } + indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionIncrementGap.get()); + searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionIncrementGap.get()); + searchQuoteAnalyzer = new NamedAnalyzer(searchQuoteAnalyzer, positionIncrementGap.get()); + } + TextSearchInfo tsi = new TextSearchInfo(fieldType, similarity.getValue(), searchAnalyzer, searchQuoteAnalyzer); + MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType( + buildFullName(context), + index.getValue(), + fieldType.stored(), + tsi, + meta.getValue() + ); + ft.setIndexAnalyzer(indexAnalyzer); + ft.setEagerGlobalOrdinals(eagerGlobalOrdinals.getValue()); + ft.setBoost(boost.getValue()); + if (fieldData.getValue()) { + ft.setFielddata(true, freqFilter.getValue()); + } + return ft; + } + + @Override + protected List> getParameters() { + return Arrays.asList( + index, + store, + indexOptions, + norms, + termVectors, + analyzers.indexAnalyzer, + analyzers.searchAnalyzer, + analyzers.searchQuoteAnalyzer, + similarity, + positionIncrementGap, + fieldData, + freqFilter, + eagerGlobalOrdinals, + indexPhrases, + indexPrefixes, + boost, + meta + ); + } + } + + /** + * The specific field type for MatchOnlyTextFieldMapper + * + * @opensearch.internal + */ + public static final class MatchOnlyTextFieldType extends TextFieldType { + private final boolean indexPhrases = false; + + private PrefixFieldType prefixFieldType; + + @Override + public String typeName() { + return CONTENT_TYPE; + } + + public MatchOnlyTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map meta) { + super(name, indexed, stored, tsi, meta); + } + + @Override + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext context) throws IOException { + PhraseQuery phraseQuery = (PhraseQuery) super.phraseQuery(stream, slop, enablePosIncrements); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term term : phraseQuery.getTerms()) { + builder.add(new TermQuery(term), BooleanClause.Occur.FILTER); + } + return new SourceFieldMatchQuery(builder.build(), phraseQuery, this, context); + } + + @Override + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) + throws IOException { + MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (Term[] terms : multiPhraseQuery.getTermArrays()) { + if (terms.length > 1) { + // Multiple terms in the same position, creating a disjunction query for it and + // adding it to conjunction query + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term : terms) { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); + } else { + builder.add(new TermQuery(terms[0]), BooleanClause.Occur.FILTER); + } + } + return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this, context); + } + + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { + Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions); + List> termArray = getTermsFromTokenStream(stream); + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + for (int i = 0; i < termArray.size(); i++) { + if (i == termArray.size() - 1) { + // last element of the term Array is a prefix, thus creating a prefix query for it and adding it to + // conjunction query + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); + mqb.add(termArray.get(i).toArray(new Term[0])); + builder.add(mqb, BooleanClause.Occur.FILTER); + } else { + if (termArray.get(i).size() > 1) { + // multiple terms in the same position, creating a disjunction query for it and + // adding it to conjunction query + BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); + for (Term term : termArray.get(i)) { + disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); + } + builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); + } else { + builder.add(new TermQuery(termArray.get(i).get(0)), BooleanClause.Occur.FILTER); + } + } + } + return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context); + } + + private List> getTermsFromTokenStream(TokenStream stream) throws IOException { + final List> termArray = new ArrayList<>(); + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + List currentTerms = new ArrayList<>(); + stream.reset(); + while (stream.incrementToken()) { + if (posIncrAtt.getPositionIncrement() != 0) { + if (currentTerms.isEmpty() == false) { + termArray.add(List.copyOf(currentTerms)); + } + currentTerms.clear(); + } + currentTerms.add(new Term(name(), termAtt.getBytesRef())); + } + termArray.add(List.copyOf(currentTerms)); + return termArray; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index 1d0d1ae2bd899..d0e041e68a81d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -110,7 +110,7 @@ public class TextFieldMapper extends ParametrizedFieldMapper { public static final String CONTENT_TYPE = "text"; - private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1; + protected static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1; private static final String FAST_PHRASE_SUFFIX = "._index_phrase"; /** @@ -152,11 +152,11 @@ private static TextFieldMapper toType(FieldMapper in) { * * @opensearch.internal */ - private static final class PrefixConfig implements ToXContent { + protected static final class PrefixConfig implements ToXContent { final int minChars; final int maxChars; - private PrefixConfig(int minChars, int maxChars) { + PrefixConfig(int minChars, int maxChars) { this.minChars = minChars; this.maxChars = maxChars; if (minChars > maxChars) { @@ -198,7 +198,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws } } - private static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) { + static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) { if (propNode == null) { return null; } @@ -214,7 +214,7 @@ private static PrefixConfig parsePrefixConfig(String propName, ParserContext par * * @opensearch.internal */ - private static final class FielddataFrequencyFilter implements ToXContent { + protected static final class FielddataFrequencyFilter implements ToXContent { final double minFreq; final double maxFreq; final int minSegmentSize; @@ -280,15 +280,14 @@ public static class Builder extends ParametrizedFieldMapper.Builder { private final Version indexCreatedVersion; - private final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true); - private final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false); + protected final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true); + protected final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false); final Parameter similarity = TextParams.similarity(m -> toType(m).similarity); final Parameter indexOptions = TextParams.indexOptions(m -> toType(m).indexOptions); final Parameter norms = TextParams.norms(true, m -> toType(m).fieldType.omitNorms() == false); final Parameter termVectors = TextParams.termVectors(m -> toType(m).termVectors); - final Parameter positionIncrementGap = Parameter.intParam( "position_increment_gap", false, @@ -332,8 +331,8 @@ public static class Builder extends ParametrizedFieldMapper.Builder { .orElse(null) ).acceptsNull(); - private final Parameter boost = Parameter.boostParam(); - private final Parameter> meta = Parameter.metaParam(); + protected final Parameter boost = Parameter.boostParam(); + protected final Parameter> meta = Parameter.metaParam(); final TextParams.Analyzers analyzers; @@ -395,7 +394,7 @@ protected List> getParameters() { ); } - private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { + protected TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) { NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer(); NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer(); NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer(); @@ -420,7 +419,7 @@ private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context return ft; } - private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) { + protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) { if (indexPrefixes.get() == null) { return null; } @@ -454,7 +453,7 @@ private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fi return new PrefixFieldMapper(pft, prefixFieldType); } - private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) { + protected PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) { if (indexPhrases.get() == false) { return null; } @@ -683,7 +682,7 @@ public Query existsQuery(QueryShardContext context) { * * @opensearch.internal */ - private static final class PhraseFieldMapper extends FieldMapper { + protected static final class PhraseFieldMapper extends FieldMapper { PhraseFieldMapper(FieldType fieldType, PhraseFieldType mappedFieldType) { super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); @@ -710,7 +709,7 @@ protected String contentType() { * * @opensearch.internal */ - private static final class PrefixFieldMapper extends FieldMapper { + protected static final class PrefixFieldMapper extends FieldMapper { protected PrefixFieldMapper(FieldType fieldType, PrefixFieldType mappedFieldType) { super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty()); @@ -968,15 +967,15 @@ public IndexFieldData.Builder fielddataBuilder(String fullyQualifiedIndexName, S } - private final FieldType fieldType; + protected final FieldType fieldType; private final PrefixFieldMapper prefixFieldMapper; private final PhraseFieldMapper phraseFieldMapper; private final SimilarityProvider similarity; private final String indexOptions; private final String termVectors; private final int positionIncrementGap; - private final Version indexCreatedVersion; - private final IndexAnalyzers indexAnalyzers; + protected final Version indexCreatedVersion; + protected final IndexAnalyzers indexAnalyzers; private final FielddataFrequencyFilter freqFilter; protected TextFieldMapper( diff --git a/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java new file mode 100644 index 0000000000000..b0be20e417efe --- /dev/null +++ b/server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java @@ -0,0 +1,160 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.SourceValueFetcher; +import org.opensearch.search.lookup.LeafSearchLookup; +import org.opensearch.search.lookup.SearchLookup; + +import java.io.IOException; +import java.util.List; +import java.util.Objects; + +/** + * A query that matches against each document from the parent query by filtering using the source field values. + * Useful to query against field type which doesn't store positional data and field is not stored/computed dynamically. + */ +public class SourceFieldMatchQuery extends Query { + private final Query delegateQuery; + private final Query filter; + private final SearchLookup lookup; + private final MappedFieldType fieldType; + private final SourceValueFetcher valueFetcher; + private final QueryShardContext context; + + /** + * Constructs a SourceFieldMatchQuery. + * + * @param delegateQuery The parent query to use to find matches. + * @param filter The query used to filter further by running against field value fetched using _source field. + * @param fieldType The mapped field type. + * @param context The QueryShardContext to get lookup and valueFetcher + */ + public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType, QueryShardContext context) { + this.delegateQuery = delegateQuery; + this.filter = filter; + this.fieldType = fieldType; + this.context = context; + this.lookup = context.lookup(); + if (!context.documentMapper("").sourceMapper().enabled()) { + throw new IllegalArgumentException( + "SourceFieldMatchQuery error: unable to fetch fields from _source field: _source is disabled in the mappings " + + "for index [" + + context.index().getName() + + "]" + ); + } + this.valueFetcher = (SourceValueFetcher) fieldType.valueFetcher(context, lookup, null); + } + + @Override + public void visit(QueryVisitor visitor) { + delegateQuery.visit(visitor); + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + Query rewritten = indexSearcher.rewrite(delegateQuery); + if (rewritten == delegateQuery) { + return this; + } + return new SourceFieldMatchQuery(rewritten, filter, fieldType, context); + } + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + + Weight weight = delegateQuery.createWeight(searcher, ScoreMode.TOP_DOCS, boost); + + return new ConstantScoreWeight(this, boost) { + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + + Scorer scorer = weight.scorer(context); + if (scorer == null) { + // none of the docs are matching + return null; + } + DocIdSetIterator approximation = scorer.iterator(); + LeafSearchLookup leafSearchLookup = lookup.getLeafSearchLookup(context); + TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { + + @Override + public boolean matches() { + leafSearchLookup.setDocument(approximation.docID()); + List values = valueFetcher.fetchValues(leafSearchLookup.source()); + // Missing fields won't count as match. Can we use a default value for missing field? + if (values.isEmpty()) { + return false; + } + MemoryIndex memoryIndex = new MemoryIndex(); + for (Object value : values) { + memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer()); + } + float score = memoryIndex.search(filter); + return score > 0.0f; + } + + @Override + public float matchCost() { + // arbitrary cost + return 1000f; + } + }; + return new ConstantScoreScorer(this, score(), ScoreMode.TOP_DOCS, twoPhase); + } + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + // It is fine to cache if delegate query weight is cacheable since additional logic here + // is just a filter on top of delegate query matches + return weight.isCacheable(ctx); + } + }; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (sameClassAs(o) == false) { + return false; + } + SourceFieldMatchQuery other = (SourceFieldMatchQuery) o; + return Objects.equals(this.delegateQuery, other.delegateQuery) + && Objects.equals(this.filter, other.filter) + && Objects.equals(this.fieldType, other.fieldType) + && Objects.equals(this.context, other.context); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), delegateQuery, filter, fieldType, context); + } + + @Override + public String toString(String f) { + return "SourceFieldMatchQuery (delegate query: [ " + delegateQuery.toString(f) + " ], filter query: [ " + filter.toString(f) + "])"; + } +} diff --git a/server/src/main/java/org/opensearch/index/search/MatchQuery.java b/server/src/main/java/org/opensearch/index/search/MatchQuery.java index 9e2b79971369d..ec6755ea25703 100644 --- a/server/src/main/java/org/opensearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MatchQuery.java @@ -67,6 +67,7 @@ import org.opensearch.core.common.io.stream.Writeable; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.TextFieldMapper; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.query.support.QueryParsers; @@ -701,7 +702,7 @@ private Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClaus protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { try { checkForPositions(field); - return fieldType.phraseQuery(stream, slop, enablePositionIncrements); + return fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -714,7 +715,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { try { checkForPositions(field); - return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -728,7 +729,7 @@ private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, in if (positionCount > 1) { checkForPositions(field); } - return fieldType.phrasePrefixQuery(stream, slop, maxExpansions); + return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, context); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -887,6 +888,9 @@ private Query analyzeGraphPhrase(TokenStream source, String field, Type type, in private void checkForPositions(String field) { if (fieldType.getTextSearchInfo().hasPositions() == false) { + if (fieldType instanceof MatchOnlyTextFieldMapper.MatchOnlyTextFieldType) { + return; + } throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); } } diff --git a/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java index 241f05af2c512..8c0c87e8c9d0c 100644 --- a/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/opensearch/index/search/MultiMatchQuery.java @@ -248,7 +248,7 @@ protected Query newPrefixQuery(Term term) { protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } @@ -261,7 +261,7 @@ protected Query analyzePhrase(String field, TokenStream stream, int slop) throws protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { - Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context); if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } diff --git a/server/src/main/java/org/opensearch/indices/IndicesModule.java b/server/src/main/java/org/opensearch/indices/IndicesModule.java index 5c2137ec742a4..eea5dbbf57f6c 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesModule.java +++ b/server/src/main/java/org/opensearch/indices/IndicesModule.java @@ -59,6 +59,7 @@ import org.opensearch.index.mapper.IpFieldMapper; import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.Mapper; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.MetadataFieldMapper; import org.opensearch.index.mapper.NestedPathFieldMapper; import org.opensearch.index.mapper.NumberFieldMapper; @@ -158,6 +159,7 @@ public static Map getMappers(List mappe mappers.put(nanoseconds.type(), DateFieldMapper.NANOS_PARSER); mappers.put(IpFieldMapper.CONTENT_TYPE, IpFieldMapper.PARSER); mappers.put(TextFieldMapper.CONTENT_TYPE, TextFieldMapper.PARSER); + mappers.put(MatchOnlyTextFieldMapper.CONTENT_TYPE, MatchOnlyTextFieldMapper.PARSER); mappers.put(KeywordFieldMapper.CONTENT_TYPE, KeywordFieldMapper.PARSER); mappers.put(ObjectMapper.CONTENT_TYPE, new ObjectMapper.TypeParser()); mappers.put(ObjectMapper.NESTED_CONTENT_TYPE, new ObjectMapper.TypeParser()); diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java new file mode 100644 index 0000000000000..13cb279418fa8 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldAnalyzerModeTests.java @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +public class MatchOnlyTextFieldAnalyzerModeTests extends TextFieldAnalyzerModeTests { + @Override + ParametrizedFieldMapper.TypeParser getTypeParser() { + return MatchOnlyTextFieldMapper.PARSER; + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java new file mode 100644 index 0000000000000..580f8cccc9af5 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapperTests.java @@ -0,0 +1,450 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.IndexableFieldType; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.tests.analysis.MockSynonymAnalyzer; +import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.opensearch.core.common.Strings; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.index.query.MatchPhrasePrefixQueryBuilder; +import org.opensearch.index.query.MatchPhraseQueryBuilder; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.query.SourceFieldMatchQuery; +import org.opensearch.index.search.MatchQuery; +import org.junit.Before; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.core.Is.is; + +public class MatchOnlyTextFieldMapperTests extends TextFieldMapperTests { + + @Before + public void setupMatchOnlyTextFieldMapper() { + textFieldName = "match_only_text"; + } + + @Override + public void testDefaults() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(this::minimalMapping)); + assertEquals(fieldMapping(this::minimalMapping).toString(), mapper.mappingSource().toString()); + + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertEquals("1234", fields[0].stringValue()); + IndexableFieldType fieldType = fields[0].fieldType(); + assertThat(fieldType.omitNorms(), equalTo(true)); + assertTrue(fieldType.tokenized()); + assertFalse(fieldType.stored()); + assertThat(fieldType.indexOptions(), equalTo(IndexOptions.DOCS)); + assertThat(fieldType.storeTermVectors(), equalTo(false)); + assertThat(fieldType.storeTermVectorOffsets(), equalTo(false)); + assertThat(fieldType.storeTermVectorPositions(), equalTo(false)); + assertThat(fieldType.storeTermVectorPayloads(), equalTo(false)); + assertEquals(DocValuesType.NONE, fieldType.docValuesType()); + } + + @Override + public void testEnableStore() throws IOException { + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("store", true))); + ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertEquals(1, fields.length); + assertTrue(fields[0].fieldType().stored()); + } + + @Override + public void testIndexOptions() throws IOException { + Map supportedOptions = new HashMap<>(); + supportedOptions.put("docs", IndexOptions.DOCS); + + Map unsupportedOptions = new HashMap<>(); + unsupportedOptions.put("freqs", IndexOptions.DOCS_AND_FREQS); + unsupportedOptions.put("positions", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + unsupportedOptions.put("offsets", IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); + + for (String option : supportedOptions.keySet()) { + XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); + mapping.endObject().endObject().endObject(); + + DocumentMapper mapper = createDocumentMapper(mapping); + String serialized = Strings.toString(MediaTypeRegistry.JSON, mapper); + assertThat(serialized, containsString("\"docs\":{\"type\":\"match_only_text\"}")); + + ParsedDocument doc = mapper.parse(source(b -> { b.field(option, "1234"); })); + + IndexOptions options = supportedOptions.get(option); + IndexableField[] fields = doc.rootDoc().getFields(option); + assertEquals(1, fields.length); + assertEquals(options, fields[0].fieldType().indexOptions()); + } + + for (String option : unsupportedOptions.keySet()) { + XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); + mapping.endObject().endObject().endObject(); + MapperParsingException e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(mapping)); + assertThat( + e.getMessage(), + containsString( + "Failed to parse mapping [_doc]: Unknown value [" + option + "] for field [index_options] - accepted values are [docs]" + ) + ); + } + } + + @Override + public void testAnalyzedFieldPositionIncrementWithoutPositions() { + for (String indexOptions : List.of("docs")) { + try { + createDocumentMapper( + fieldMapping( + b -> b.field("type", textFieldName).field("index_options", indexOptions).field("position_increment_gap", 10) + ) + ); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + + @Override + public void testBWCSerialization() throws IOException {} + + @Override + public void testPositionIncrementGap() throws IOException {} + + @Override + public void testDefaultPositionIncrementGap() throws IOException {} + + @Override + public void testMinimalToMaximal() throws IOException {} + + @Override + public void testIndexPrefixMapping() throws IOException { + MapperParsingException e = expectThrows( + MapperParsingException.class, + () -> createDocumentMapper( + fieldMapping( + b -> b.field("type", textFieldName) + .field("analyzer", "standard") + .startObject("index_prefixes") + .field("min_chars", 2) + .field("max_chars", 10) + .endObject() + ) + ) + ); + assertEquals( + "Failed to parse mapping [_doc]: Index prefixes cannot be enabled on for match_only_text field. Use text field instead", + e.getMessage() + ); + } + + @Override + public void testIndexPrefixIndexTypes() throws IOException { + // not supported and asserted the expected behavior in testIndexPrefixMapping + } + + @Override + public void testFastPhrasePrefixes() throws IOException { + // not supported and asserted the expected behavior in testIndexPrefixMapping + } + + public void testPhrasePrefixes() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("field"); + { + b.field("type", textFieldName); + b.field("analyzer", "my_stop_analyzer"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + b.startObject("synfield"); + { + b.field("type", textFieldName); + b.field("analyzer", "standard"); // "standard" will be replaced with MockSynonymAnalyzer + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "words")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "two")); + mqbFilter.add(new Term("field", "words")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "here")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "three")); + mqbFilter.add(new Term("field", "words")); + mqbFilter.add(new Term("field", "here")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "words")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.setSlop(1); + mqbFilter.add(new Term("field", "two")); + mqbFilter.add(new Term("field", "words")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "singleton").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "singleton")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(mqb, BooleanClause.Occur.FILTER).build(), + mqb, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, is(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("field"); + mqb.add(new Term("field", "stopword")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("field"); + mqbFilter.add(new Term("field", "sparkle")); + mqbFilter.add(new Term[] { new Term("field", "stopword") }, 2); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "sparkle")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "motor dogs"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "motor")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "motor")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setPhraseSlop(1); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "two dogs"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "two")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + mqbFilter.setSlop(1); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "two")), BooleanClause.Occur.FILTER) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "three dogs word"); + MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery("synfield"); + mqb.add(new Term("synfield", "word")); + MultiPhrasePrefixQuery mqbFilter = new MultiPhrasePrefixQuery("synfield"); + mqbFilter.add(new Term("synfield", "three")); + mqbFilter.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + mqbFilter.add(new Term("synfield", "word")); + Query expected = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "three")), BooleanClause.Occur.FILTER) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "dogs")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("synfield", "dog")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.FILTER + ) + .add(mqb, BooleanClause.Occur.FILTER) + .build(), + mqbFilter, + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q, equalTo(expected)); + } + } + + @Override + public void testFastPhraseMapping() throws IOException { + MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(mapping(b -> { + b.startObject("field") + .field("type", textFieldName) + .field("analyzer", "my_stop_analyzer") + .field("index_phrases", true) + .endObject(); + // "standard" will be replaced with MockSynonymAnalyzer + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").field("index_phrases", true).endObject(); + }))); + assertEquals( + "Failed to parse mapping [_doc]: Index phrases cannot be enabled on for match_only_text field. Use text field instead", + e.getMessage() + ); + } + + @Override + public void testSimpleMerge() throws IOException {} + + public void testPhraseQuery() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("field").field("type", textFieldName).field("analyzer", "my_stop_analyzer").endObject(); + // "standard" will be replaced with MockSynonymAnalyzer + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + + Query q = new MatchPhraseQueryBuilder("field", "two words").toQuery(queryShardContext); + Query expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery("field", "two", "words"), + mapperService.fieldType("field"), + queryShardContext + ); + + assertThat(q, is(expectedQuery)); + Query q4 = new MatchPhraseQueryBuilder("field", "singleton").toQuery(queryShardContext); + assertThat(q4, is(new TermQuery(new Term("field", "singleton")))); + + Query q2 = new MatchPhraseQueryBuilder("field", "three words here").toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "three")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "here")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery("field", "three", "words", "here"), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q2, is(expectedQuery)); + + Query q3 = new MatchPhraseQueryBuilder("field", "two words").slop(2).toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "two")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "words")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery(2, "field", "two", "words"), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q3, is(expectedQuery)); + + Query q5 = new MatchPhraseQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("field", "sparkle")), BooleanClause.Occur.FILTER) + .add(new TermQuery(new Term("field", "stopword")), BooleanClause.Occur.FILTER) + .build(), + new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build(), + mapperService.fieldType("field"), + queryShardContext + ); + assertThat(q5, is(expectedQuery)); + + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs"); + expectedQuery = new SourceFieldMatchQuery( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "motor")), BooleanClause.Occur.FILTER) + .add( + new BooleanQuery.Builder().add(new TermQuery(new Term("synfield", "dogs")), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term("synfield", "dog")), BooleanClause.Occur.SHOULD) + .build(), + BooleanClause.Occur.FILTER + ) + .build(), + new MultiPhraseQuery.Builder().add(new Term("synfield", "motor")) + .add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }, 1) + .build(), + mapperService.fieldType("synfield"), + queryShardContext + ); + assertThat(q6, is(expectedQuery)); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java new file mode 100644 index 0000000000000..51234fa04ddc2 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/mapper/MatchOnlyTextFieldTypeTests.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.mapper; + +import org.opensearch.common.lucene.Lucene; + +public class MatchOnlyTextFieldTypeTests extends TextFieldTypeTests { + + @Override + TextFieldMapper.TextFieldType createFieldType(boolean searchable) { + TextSearchInfo tsi = new TextSearchInfo( + TextFieldMapper.Defaults.FIELD_TYPE, + null, + Lucene.STANDARD_ANALYZER, + Lucene.STANDARD_ANALYZER + ); + return new MatchOnlyTextFieldMapper.MatchOnlyTextFieldType( + "field", + searchable, + false, + tsi, + ParametrizedFieldMapper.Parameter.metaParam().get() + ); + } +} diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java index 93bed729f0974..83a3bdc580ae6 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldAnalyzerModeTests.java @@ -59,6 +59,9 @@ import static org.mockito.Mockito.when; public class TextFieldAnalyzerModeTests extends OpenSearchTestCase { + ParametrizedFieldMapper.TypeParser getTypeParser() { + return TextFieldMapper.PARSER; + } private static Map defaultAnalyzers() { Map analyzers = new HashMap<>(); @@ -101,7 +104,7 @@ public void testParseTextFieldCheckAnalyzerAnalysisMode() { IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); + getTypeParser().parse("field", fieldNode, parserContext); // check that "analyzer" set to something that only supports AnalysisMode.SEARCH_TIME or AnalysisMode.INDEX_TIME is blocked AnalysisMode mode = randomFrom(AnalysisMode.SEARCH_TIME, AnalysisMode.INDEX_TIME); @@ -110,7 +113,7 @@ public void testParseTextFieldCheckAnalyzerAnalysisMode() { indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); fieldNode.put("analyzer", "my_analyzer"); - MapperException ex = expectThrows(MapperException.class, () -> { TextFieldMapper.PARSER.parse("name", fieldNode, parserContext); }); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("name", fieldNode, parserContext); }); assertThat( ex.getMessage(), containsString("analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run") @@ -136,7 +139,7 @@ public void testParseTextFieldCheckSearchAnalyzerAnalysisMode() { IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("textField", fieldNode, parserContext); + getTypeParser().parse("textField", fieldNode, parserContext); // check that "analyzer" set to AnalysisMode.INDEX_TIME is blocked mode = AnalysisMode.INDEX_TIME; @@ -151,10 +154,7 @@ public void testParseTextFieldCheckSearchAnalyzerAnalysisMode() { if (settingToTest.equals("search_quote_analyzer")) { fieldNode.put("search_analyzer", "standard"); } - MapperException ex = expectThrows( - MapperException.class, - () -> { TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); } - ); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("field", fieldNode, parserContext); }); assertEquals( "analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run in search time mode.", ex.getMessage() @@ -174,10 +174,7 @@ public void testParseTextFieldCheckAnalyzerWithSearchAnalyzerAnalysisMode() { analyzers.put("my_analyzer", new NamedAnalyzer("my_named_analyzer", AnalyzerScope.INDEX, createAnalyzerWithMode(mode))); IndexAnalyzers indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - MapperException ex = expectThrows( - MapperException.class, - () -> { TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); } - ); + MapperException ex = expectThrows(MapperException.class, () -> { getTypeParser().parse("field", fieldNode, parserContext); }); assertThat( ex.getMessage(), containsString("analyzer [my_named_analyzer] contains filters [my_analyzer] that are not allowed to run") @@ -193,7 +190,6 @@ public void testParseTextFieldCheckAnalyzerWithSearchAnalyzerAnalysisMode() { indexAnalyzers = new IndexAnalyzers(analyzers, Collections.emptyMap(), Collections.emptyMap()); when(parserContext.getIndexAnalyzers()).thenReturn(indexAnalyzers); - TextFieldMapper.PARSER.parse("field", fieldNode, parserContext); + getTypeParser().parse("field", fieldNode, parserContext); } - } diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java index a9b902e121bda..a22bfa5e845b1 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java @@ -81,6 +81,7 @@ import org.opensearch.index.query.MatchPhraseQueryBuilder; import org.opensearch.index.query.QueryShardContext; import org.opensearch.index.search.MatchQuery; +import org.junit.Before; import java.io.IOException; import java.util.Arrays; @@ -95,6 +96,13 @@ public class TextFieldMapperTests extends MapperTestCase { + public String textFieldName = "text"; + + @Before + public void setup() { + textFieldName = "text"; + } + @Override protected void writeFieldValue(XContentBuilder builder) throws IOException { builder.value(1234); @@ -169,30 +177,34 @@ protected void registerParameters(ParameterChecker checker) throws IOException { checker.registerConflictCheck("index", b -> b.field("index", false)); checker.registerConflictCheck("store", b -> b.field("store", true)); - checker.registerConflictCheck("index_phrases", b -> b.field("index_phrases", true)); - checker.registerConflictCheck("index_prefixes", b -> b.startObject("index_prefixes").endObject()); - checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs")); + if (!textFieldName.equals("match_only_text")) { + checker.registerConflictCheck("index_phrases", b -> b.field("index_phrases", true)); + checker.registerConflictCheck("index_prefixes", b -> b.startObject("index_prefixes").endObject()); + checker.registerConflictCheck("index_options", b -> b.field("index_options", "docs")); + } checker.registerConflictCheck("similarity", b -> b.field("similarity", "boolean")); checker.registerConflictCheck("analyzer", b -> b.field("analyzer", "keyword")); checker.registerConflictCheck("term_vector", b -> b.field("term_vector", "yes")); checker.registerConflictCheck("position_increment_gap", b -> b.field("position_increment_gap", 10)); - // norms can be set from true to false, but not vice versa - checker.registerConflictCheck("norms", fieldMapping(b -> { - b.field("type", "text"); - b.field("norms", false); - }), fieldMapping(b -> { - b.field("type", "text"); - b.field("norms", true); - })); - checker.registerUpdateCheck(b -> { - b.field("type", "text"); - b.field("norms", true); - }, b -> { - b.field("type", "text"); - b.field("norms", false); - }, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms())); + if (!textFieldName.equals(MatchOnlyTextFieldMapper.CONTENT_TYPE)) { + // norms can be set from true to false, but not vice versa + checker.registerConflictCheck("norms", fieldMapping(b -> { + b.field("type", textFieldName); + b.field("norms", false); + }), fieldMapping(b -> { + b.field("type", textFieldName); + b.field("norms", true); + })); + checker.registerUpdateCheck(b -> { + b.field("type", textFieldName); + b.field("norms", true); + }, b -> { + b.field("type", textFieldName); + b.field("norms", false); + }, m -> assertFalse(m.fieldType().getTextSearchInfo().hasNorms())); + } checker.registerUpdateCheck(b -> b.field("boost", 2.0), m -> assertEquals(m.fieldType().boost(), 2.0, 0)); @@ -237,7 +249,7 @@ public TokenStream create(TokenStream tokenStream) { @Override protected void minimalMapping(XContentBuilder b) throws IOException { - b.field("type", "text"); + b.field("type", textFieldName); } public void testDefaults() throws IOException { @@ -262,7 +274,7 @@ public void testDefaults() throws IOException { public void testBWCSerialization() throws IOException { MapperService mapperService = createMapperService(fieldMapping(b -> { - b.field("type", "text"); + b.field("type", textFieldName); b.field("fielddata", true); b.startObject("fields"); { @@ -312,7 +324,7 @@ public void testBWCSerialization() throws IOException { } public void testEnableStore() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("store", true))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("store", true))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); @@ -320,14 +332,14 @@ public void testEnableStore() throws IOException { } public void testDisableIndex() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("index", false))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("index", false))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(0, fields.length); } public void testDisableNorms() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("norms", false))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("norms", false))); ParsedDocument doc = mapper.parse(source(b -> b.field("field", "1234"))); IndexableField[] fields = doc.rootDoc().getFields("field"); assertEquals(1, fields.length); @@ -343,7 +355,7 @@ public void testIndexOptions() throws IOException { XContentBuilder mapping = MediaTypeRegistry.JSON.contentBuilder().startObject().startObject("_doc").startObject("properties"); for (String option : supportedOptions.keySet()) { - mapping.startObject(option).field("type", "text").field("index_options", option).endObject(); + mapping.startObject(option).field("type", textFieldName).field("index_options", option).endObject(); } mapping.endObject().endObject().endObject(); @@ -389,7 +401,7 @@ public void testDefaultPositionIncrementGap() throws IOException { public void testPositionIncrementGap() throws IOException { final int positionIncrementGap = randomIntBetween(1, 1000); MapperService mapperService = createMapperService( - fieldMapping(b -> b.field("type", "text").field("position_increment_gap", positionIncrementGap)) + fieldMapping(b -> b.field("type", textFieldName).field("position_increment_gap", positionIncrementGap)) ); ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" }))); @@ -409,16 +421,16 @@ public void testPositionIncrementGap() throws IOException { public void testSearchAnalyzerSerialization() throws IOException { XContentBuilder mapping = fieldMapping( - b -> b.field("type", "text").field("analyzer", "standard").field("search_analyzer", "keyword") + b -> b.field("type", textFieldName).field("analyzer", "standard").field("search_analyzer", "keyword") ); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); // special case: default index analyzer - mapping = fieldMapping(b -> b.field("type", "text").field("analyzer", "default").field("search_analyzer", "keyword")); + mapping = fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "default").field("search_analyzer", "keyword")); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); // special case: default search analyzer - mapping = fieldMapping(b -> b.field("type", "text").field("analyzer", "keyword").field("search_analyzer", "default")); + mapping = fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "keyword").field("search_analyzer", "default")); assertEquals(mapping.toString(), createDocumentMapper(mapping).mappingSource().toString()); XContentBuilder builder = MediaTypeRegistry.JSON.contentBuilder(); @@ -436,7 +448,7 @@ public void testSearchAnalyzerSerialization() throws IOException { public void testSearchQuoteAnalyzerSerialization() throws IOException { XContentBuilder mapping = fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .field("search_analyzer", "standard") .field("search_quote_analyzer", "keyword") @@ -445,7 +457,7 @@ public void testSearchQuoteAnalyzerSerialization() throws IOException { // special case: default index/search analyzer mapping = fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "default") .field("search_analyzer", "default") .field("search_quote_analyzer", "keyword") @@ -456,27 +468,27 @@ public void testSearchQuoteAnalyzerSerialization() throws IOException { public void testTermVectors() throws IOException { XContentBuilder mapping = mapping( b -> b.startObject("field1") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "no") .endObject() .startObject("field2") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "yes") .endObject() .startObject("field3") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_offsets") .endObject() .startObject("field4") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions") .endObject() .startObject("field5") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions_offsets") .endObject() .startObject("field6") - .field("type", "text") + .field("type", textFieldName) .field("term_vector", "with_positions_offsets_payloads") .endObject() ); @@ -526,7 +538,9 @@ public void testTermVectors() throws IOException { } public void testEagerGlobalOrdinals() throws IOException { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("eager_global_ordinals", true))); + DocumentMapper mapper = createDocumentMapper( + fieldMapping(b -> b.field("type", textFieldName).field("eager_global_ordinals", true)) + ); FieldMapper fieldMapper = (FieldMapper) mapper.mappers().getMapper("field"); assertTrue(fieldMapper.fieldType().eagerGlobalOrdinals()); @@ -539,13 +553,13 @@ public void testFielddata() throws IOException { })); assertThat(e.getMessage(), containsString("Text fields are not optimised for operations that require per-document field data")); - MapperService enabledMapper = createMapperService(fieldMapping(b -> b.field("type", "text").field("fielddata", true))); + MapperService enabledMapper = createMapperService(fieldMapping(b -> b.field("type", textFieldName).field("fielddata", true))); enabledMapper.fieldType("field").fielddataBuilder("test", () -> { throw new UnsupportedOperationException(); }); // no exception // this time e = expectThrows( MapperParsingException.class, - () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", false).field("fielddata", true))) + () -> createMapperService(fieldMapping(b -> b.field("type", textFieldName).field("index", false).field("fielddata", true))) ); assertThat(e.getMessage(), containsString("Cannot enable fielddata on a [text] field that is not indexed")); } @@ -553,7 +567,7 @@ public void testFielddata() throws IOException { public void testFrequencyFilter() throws IOException { MapperService mapperService = createMapperService( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("fielddata", true) .startObject("fielddata_frequency_filter") .field("min", 2d) @@ -571,17 +585,22 @@ public void testFrequencyFilter() throws IOException { public void testNullConfigValuesFail() throws MapperParsingException { Exception e = expectThrows( MapperParsingException.class, - () -> createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("analyzer", (String) null))) + () -> createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).field("analyzer", (String) null))) + ); + assertThat( + e.getMessage(), + containsString("[analyzer] on mapper [field] of type [" + textFieldName + "] must not have a [null] value") ); - assertThat(e.getMessage(), containsString("[analyzer] on mapper [field] of type [text] must not have a [null] value")); } public void testNotIndexedFieldPositionIncrement() { Exception e = expectThrows( MapperParsingException.class, - () -> createDocumentMapper(fieldMapping(b -> b.field("type", "text").field("index", false).field("position_increment_gap", 10))) + () -> createDocumentMapper( + fieldMapping(b -> b.field("type", textFieldName).field("index", false).field("position_increment_gap", 10)) + ) ); - assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field] without positions enabled")); + assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field]")); } public void testAnalyzedFieldPositionIncrementWithoutPositions() { @@ -589,7 +608,9 @@ public void testAnalyzedFieldPositionIncrementWithoutPositions() { Exception e = expectThrows( MapperParsingException.class, () -> createDocumentMapper( - fieldMapping(b -> b.field("type", "text").field("index_options", indexOptions).field("position_increment_gap", 10)) + fieldMapping( + b -> b.field("type", textFieldName).field("index_options", indexOptions).field("position_increment_gap", 10) + ) ) ); assertThat(e.getMessage(), containsString("Cannot set position_increment_gap on field [field] without positions enabled")); @@ -600,7 +621,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -615,7 +636,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -632,7 +653,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -649,7 +670,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -666,7 +687,7 @@ public void testIndexPrefixIndexTypes() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .endObject() @@ -682,62 +703,18 @@ public void testIndexPrefixIndexTypes() throws IOException { } public void testNestedIndexPrefixes() throws IOException { - { - MapperService mapperService = createMapperService( - mapping( - b -> b.startObject("object") - .field("type", "object") - .startObject("properties") - .startObject("field") - .field("type", "text") - .startObject("index_prefixes") - .endObject() - .endObject() - .endObject() - .endObject() - ) - ); - MappedFieldType textField = mapperService.fieldType("object.field"); - assertNotNull(textField); - assertThat(textField, instanceOf(TextFieldType.class)); - MappedFieldType prefix = ((TextFieldType) textField).getPrefixFieldType(); - assertEquals(prefix.name(), "object.field._index_prefix"); - FieldMapper mapper = (FieldMapper) mapperService.documentMapper().mappers().getMapper("object.field._index_prefix"); - assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); - assertFalse(mapper.fieldType.storeTermVectorOffsets()); - } - { - MapperService mapperService = createMapperService( - mapping( - b -> b.startObject("body") - .field("type", "text") - .startObject("fields") - .startObject("with_prefix") - .field("type", "text") - .startObject("index_prefixes") - .endObject() - .endObject() - .endObject() - .endObject() - ) - ); - MappedFieldType textField = mapperService.fieldType("body.with_prefix"); - assertNotNull(textField); - assertThat(textField, instanceOf(TextFieldType.class)); - MappedFieldType prefix = ((TextFieldType) textField).getPrefixFieldType(); - assertEquals(prefix.name(), "body.with_prefix._index_prefix"); - FieldMapper mapper = (FieldMapper) mapperService.documentMapper().mappers().getMapper("body.with_prefix._index_prefix"); - assertEquals(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, mapper.fieldType.indexOptions()); - assertFalse(mapper.fieldType.storeTermVectorOffsets()); - } } public void testFastPhraseMapping() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { - b.startObject("field").field("type", "text").field("analyzer", "my_stop_analyzer").field("index_phrases", true).endObject(); + b.startObject("field") + .field("type", textFieldName) + .field("analyzer", "my_stop_analyzer") + .field("index_phrases", true) + .endObject(); // "standard" will be replaced with MockSynonymAnalyzer - b.startObject("synfield").field("type", "text").field("analyzer", "standard").field("index_phrases", true).endObject(); + b.startObject("synfield").field("type", textFieldName).field("analyzer", "standard").field("index_phrases", true).endObject(); })); QueryShardContext queryShardContext = createQueryShardContext(mapperService); @@ -808,14 +785,16 @@ protected TokenStreamComponents createComponents(String fieldName) { Exception e = expectThrows( MapperParsingException.class, - () -> createMapperService(fieldMapping(b -> b.field("type", "text").field("index", "false").field("index_phrases", true))) + () -> createMapperService( + fieldMapping(b -> b.field("type", textFieldName).field("index", "false").field("index_phrases", true)) + ) ); assertThat(e.getMessage(), containsString("Cannot set index_phrases on unindexed field [field]")); e = expectThrows( MapperParsingException.class, () -> createMapperService( - fieldMapping(b -> b.field("type", "text").field("index_options", "freqs").field("index_phrases", true)) + fieldMapping(b -> b.field("type", textFieldName).field("index_options", "freqs").field("index_phrases", true)) ) ); assertThat(e.getMessage(), containsString("Cannot set index_phrases on field [field] if positions are not enabled")); @@ -826,7 +805,7 @@ public void testIndexPrefixMapping() throws IOException { { DocumentMapper mapper = createDocumentMapper( fieldMapping( - b -> b.field("type", "text") + b -> b.field("type", textFieldName) .field("analyzer", "standard") .startObject("index_prefixes") .field("min_chars", 2) @@ -844,29 +823,29 @@ public void testIndexPrefixMapping() throws IOException { { DocumentMapper mapper = createDocumentMapper( - fieldMapping(b -> b.field("type", "text").field("analyzer", "standard").startObject("index_prefixes").endObject()) + fieldMapping(b -> b.field("type", textFieldName).field("analyzer", "standard").startObject("index_prefixes").endObject()) ); assertThat(mapper.mappers().getMapper("field._index_prefix").toString(), containsString("prefixChars=2:5")); } { - DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", "text").nullField("index_prefixes"))); + DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> b.field("type", textFieldName).nullField("index_prefixes"))); assertNull(mapper.mappers().getMapper("field._index_prefix")); } { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 1).field("max_chars", 10).endObject(); - b.startObject("fields").startObject("_index_prefix").field("type", "text").endObject().endObject(); + b.startObject("fields").startObject("_index_prefix").field("type", textFieldName).endObject().endObject(); }))); assertThat(e.getMessage(), containsString("Field [field._index_prefix] is defined more than once")); } { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 11).field("max_chars", 10).endObject(); }))); assertThat(e.getMessage(), containsString("min_chars [11] must be less than max_chars [10]")); @@ -874,7 +853,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 0).field("max_chars", 10).endObject(); }))); assertThat(e.getMessage(), containsString("min_chars [0] must be greater than zero")); @@ -882,7 +861,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard"); + b.field("type", textFieldName).field("analyzer", "standard"); b.startObject("index_prefixes").field("min_chars", 1).field("max_chars", 25).endObject(); }))); assertThat(e.getMessage(), containsString("max_chars [25] must be less than 20")); @@ -890,7 +869,7 @@ public void testIndexPrefixMapping() throws IOException { { MapperParsingException e = expectThrows(MapperParsingException.class, () -> createMapperService(fieldMapping(b -> { - b.field("type", "text").field("analyzer", "standard").field("index", false); + b.field("type", textFieldName).field("analyzer", "standard").field("index", false); b.startObject("index_prefixes").endObject(); }))); assertThat(e.getMessage(), containsString("Cannot set index_prefixes on unindexed field [field]")); @@ -901,14 +880,14 @@ public void testFastPhrasePrefixes() throws IOException { MapperService mapperService = createMapperService(mapping(b -> { b.startObject("field"); { - b.field("type", "text"); + b.field("type", textFieldName); b.field("analyzer", "my_stop_analyzer"); b.startObject("index_prefixes").field("min_chars", 2).field("max_chars", 10).endObject(); } b.endObject(); b.startObject("synfield"); { - b.field("type", "text"); + b.field("type", textFieldName); b.field("analyzer", "standard"); // "standard" will be replaced with MockSynonymAnalyzer b.field("index_phrases", true); b.startObject("index_prefixes").field("min_chars", 2).field("max_chars", 10).endObject(); @@ -999,7 +978,7 @@ public void testFastPhrasePrefixes() throws IOException { public void testSimpleMerge() throws IOException { XContentBuilder startingMapping = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", true) + b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", true) ); MapperService mapperService = createMapperService(startingMapping); assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class)); @@ -1008,19 +987,28 @@ public void testSimpleMerge() throws IOException { assertThat(mapperService.documentMapper().mappers().getMapper("field"), instanceOf(TextFieldMapper.class)); XContentBuilder differentPrefix = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").field("min_chars", "3").endObject().field("index_phrases", true) + b -> b.field("type", textFieldName) + .startObject("index_prefixes") + .field("min_chars", "3") + .endObject() + .field("index_phrases", true) ); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentPrefix)); assertThat(e.getMessage(), containsString("Cannot update parameter [index_prefixes]")); XContentBuilder differentPhrases = fieldMapping( - b -> b.field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", false) + b -> b.field("type", textFieldName).startObject("index_prefixes").endObject().field("index_phrases", false) ); e = expectThrows(IllegalArgumentException.class, () -> merge(mapperService, differentPhrases)); assertThat(e.getMessage(), containsString("Cannot update parameter [index_phrases]")); XContentBuilder newField = mapping(b -> { - b.startObject("field").field("type", "text").startObject("index_prefixes").endObject().field("index_phrases", true).endObject(); + b.startObject("field") + .field("type", textFieldName) + .startObject("index_prefixes") + .endObject() + .field("index_phrases", true) + .endObject(); b.startObject("other_field").field("type", "keyword").endObject(); }); merge(mapperService, newField); diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java index 0592a972db5e9..9c177bbec61fd 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java @@ -66,35 +66,39 @@ public class TextFieldTypeTests extends FieldTypeTestCase { - private static TextFieldType createFieldType() { - return new TextFieldType("field"); + TextFieldType createFieldType(boolean searchabe) { + if (searchabe) { + return new TextFieldType("field"); + } else { + return new TextFieldType("field", false, false, Collections.emptyMap()); + } } public void testIsAggregatableDependsOnFieldData() { - TextFieldType ft = createFieldType(); + TextFieldType ft = createFieldType(true); assertFalse(ft.isAggregatable()); ft.setFielddata(true); assertTrue(ft.isAggregatable()); } public void testTermQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals(new TermQuery(new Term("field", "foo")), ft.termQuery("foo", null)); assertEquals(AutomatonQueries.caseInsensitiveTermQuery(new Term("field", "fOo")), ft.termQueryCaseInsensitive("fOo", null)); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("bar", null)); assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); } public void testTermsQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); List terms = new ArrayList<>(); terms.add(new BytesRef("foo")); terms.add(new BytesRef("bar")); assertEquals(new TermInSetQuery("field", terms), ft.termsQuery(Arrays.asList("foo", "bar"), null)); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.termsQuery(Arrays.asList("foo", "bar"), null) @@ -103,7 +107,7 @@ public void testTermsQuery() { } public void testRangeQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new TermRangeQuery("field", BytesRefs.toBytesRef("foo"), BytesRefs.toBytesRef("bar"), true, false), ft.rangeQuery("foo", "bar", true, false, null, null, null, MOCK_QSC) @@ -120,13 +124,13 @@ public void testRangeQuery() { } public void testRegexpQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new RegexpQuery(new Term("field", "foo.*")), ft.regexpQuery("foo.*", 0, 0, 10, CONSTANT_SCORE_BLENDED_REWRITE, MOCK_QSC) ); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.regexpQuery("foo.*", 0, 0, 10, null, MOCK_QSC) @@ -141,13 +145,13 @@ public void testRegexpQuery() { } public void testFuzzyQuery() { - MappedFieldType ft = createFieldType(); + MappedFieldType ft = createFieldType(true); assertEquals( new FuzzyQuery(new Term("field", "foo"), 2, 1, 50, true), ft.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) ); - MappedFieldType unsearchable = new TextFieldType("field", false, false, Collections.emptyMap()); + MappedFieldType unsearchable = createFieldType(false); IllegalArgumentException e = expectThrows( IllegalArgumentException.class, () -> unsearchable.fuzzyQuery("foo", Fuzziness.fromEdits(2), 1, 50, true, MOCK_QSC) @@ -162,7 +166,7 @@ public void testFuzzyQuery() { } public void testIndexPrefixes() { - TextFieldType ft = createFieldType(); + TextFieldType ft = createFieldType(true); ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft, "field._index_prefix", 2, 10)); Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockShardContext()); @@ -222,7 +226,7 @@ public void testIndexPrefixes() { } public void testFetchSourceValue() throws IOException { - TextFieldType fieldType = createFieldType(); + TextFieldType fieldType = createFieldType(true); fieldType.setIndexAnalyzer(Lucene.STANDARD_ANALYZER); assertEquals(List.of("value"), fetchSourceValue(fieldType, "value")); diff --git a/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java new file mode 100644 index 0000000000000..6af717a97b328 --- /dev/null +++ b/server/src/test/java/org/opensearch/index/query/SourceFieldMatchQueryTests.java @@ -0,0 +1,173 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.query; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.opensearch.core.index.Index; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MapperServiceTestCase; +import org.opensearch.index.mapper.ParsedDocument; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Set; + +import static org.mockito.Mockito.when; + +public class SourceFieldMatchQueryTests extends MapperServiceTestCase { + + public void testAllPossibleScenarios() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("dessert"); + { + b.field("type", "match_only_text"); + } + b.endObject(); + })); + + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + + String[] desserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; + List docs = new ArrayList<>(); + for (String dessert : desserts) { + docs.add(mapperService.documentMapper().parse(source(b -> b.field("dessert", dessert)))); + } + SourceFieldMatchQuery matchBoth = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + + SourceFieldMatchQuery matchDelegate = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "juice").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + + SourceFieldMatchQuery matchFilter = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "tart").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + + SourceFieldMatchQuery matchNone = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "gulab").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "jamun").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + + SourceFieldMatchQuery matchMultipleDocs = new SourceFieldMatchQuery( + QueryBuilders.matchAllQuery().toQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(matchBoth, 10); + assertEquals(topDocs.totalHits.value, 1); + assertEquals(topDocs.scoreDocs[0].doc, 0); + + topDocs = searcher.search(matchDelegate, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchFilter, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchNone, 10); + assertEquals(topDocs.totalHits.value, 0); + + topDocs = searcher.search(matchMultipleDocs, 10); + assertEquals(topDocs.totalHits.value, 2); + // assert constant score + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + assertEquals(scoreDoc.score, 1.0, 0.00000000001); + } + } + } + } + + public void testSourceDisabled() throws IOException { + MapperService mapperService = createMapperService(topMapping(b -> b.startObject("_source").field("enabled", false).endObject())); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("dessert", "pie").doToQuery(queryShardContext), // Filter query + queryShardContext.getFieldType("dessert"), + queryShardContext + ) + ); + assertEquals( + "SourceFieldMatchQuery error: unable to fetch fields from _source field: " + + "_source is disabled in the mappings for index [test_index]", + e.getMessage() + ); + } + + public void testMissingField() throws IOException { + MapperService mapperService = createMapperService(mapping(b -> { + b.startObject("dessert"); + { + b.field("type", "match_only_text"); + } + b.endObject(); + })); + QueryShardContext queryShardContext = createQueryShardContext(mapperService); + when(queryShardContext.sourcePath("dessert")).thenReturn(Set.of("dessert")); + when(queryShardContext.index()).thenReturn(new Index("test_index", "uuid")); + + String[] desserts = new String[] { "apple pie pie", "banana split pie", "chocolate cake" }; + List docs = new ArrayList<>(); + for (String dessert : desserts) { + docs.add(mapperService.documentMapper().parse(source(b -> b.field("dessert", dessert)))); + } + SourceFieldMatchQuery matchDelegate = new SourceFieldMatchQuery( + QueryBuilders.matchQuery("dessert", "apple").doToQuery(queryShardContext), // Delegate query + QueryBuilders.matchQuery("username", "pie").doToQuery(queryShardContext), // Filter query missing field + queryShardContext.getFieldType("dessert"), + queryShardContext + ); + try (Directory dir = newDirectory()) { + IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(mapperService.indexAnalyzer())); + for (ParsedDocument d : docs) { + iw.addDocument(d.rootDoc()); + } + try (IndexReader reader = DirectoryReader.open(iw)) { + iw.close(); + IndexSearcher searcher = new IndexSearcher(reader); + TopDocs topDocs = searcher.search(matchDelegate, 10); + assertEquals(topDocs.totalHits.value, 0); + } + } + } +} diff --git a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java index 94c2e4ef7da62..ac78a0d1936ea 100644 --- a/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/mapper/MapperServiceTestCase.java @@ -239,7 +239,7 @@ protected final XContentBuilder fieldMapping(CheckedConsumer mapperService.fieldType(inv.getArguments()[0].toString())); @@ -254,6 +254,8 @@ QueryShardContext createQueryShardContext(MapperService mapperService) { when(queryShardContext.lookup()).thenReturn(new SearchLookup(mapperService, (ft, s) -> { throw new UnsupportedOperationException("search lookup not available"); })); + when(queryShardContext.getFieldType(any())).thenAnswer(inv -> mapperService.fieldType(inv.getArguments()[0].toString())); + when(queryShardContext.documentMapper(anyString())).thenReturn(mapperService.documentMapper()); return queryShardContext; } } diff --git a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java index 82f15a590bea6..ac0447dbebf7e 100644 --- a/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java +++ b/test/framework/src/main/java/org/opensearch/search/aggregations/AggregatorTestCase.java @@ -103,6 +103,7 @@ import org.opensearch.index.mapper.Mapper; import org.opensearch.index.mapper.Mapper.BuilderContext; import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MatchOnlyTextFieldMapper; import org.opensearch.index.mapper.NumberFieldMapper; import org.opensearch.index.mapper.ObjectMapper; import org.opensearch.index.mapper.ObjectMapper.Nested; @@ -760,7 +761,8 @@ public void testSupportedFieldTypes() throws IOException { source.put("type", mappedType.getKey()); // Text is the only field that doesn't support DVs, instead FD - if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false) { + if (mappedType.getKey().equals(TextFieldMapper.CONTENT_TYPE) == false + && mappedType.getKey().equals(MatchOnlyTextFieldMapper.CONTENT_TYPE) == false) { source.put("doc_values", "true"); } From bb3959d75c729c9213374db4109c628d1774491c Mon Sep 17 00:00:00 2001 From: Ticheng Lin <51488860+ticheng-aws@users.noreply.github.com> Date: Wed, 3 Jan 2024 04:23:11 -0800 Subject: [PATCH 02/13] Fix flaky testTerminateAfterEarlyTermination (#11683) Signed-off-by: Ticheng Lin --- .../search/query/QueryProfilePhaseTests.java | 74 +++++++++++++------ 1 file changed, 52 insertions(+), 22 deletions(-) diff --git a/server/src/test/java/org/opensearch/search/query/QueryProfilePhaseTests.java b/server/src/test/java/org/opensearch/search/query/QueryProfilePhaseTests.java index ba1600e6eb651..48ac2d3b5a804 100644 --- a/server/src/test/java/org/opensearch/search/query/QueryProfilePhaseTests.java +++ b/server/src/test/java/org/opensearch/search/query/QueryProfilePhaseTests.java @@ -615,12 +615,22 @@ public void testTerminateAfterEarlyTermination() throws Exception { assertThat(query.getTimeBreakdown().get("score"), greaterThan(0L)); assertThat(query.getTimeBreakdown().get("score_count"), greaterThan(0L)); if (executor != null) { - assertThat(query.getTimeBreakdown().get("max_score"), greaterThan(0L)); - assertThat(query.getTimeBreakdown().get("min_score"), greaterThan(0L)); - assertThat(query.getTimeBreakdown().get("avg_score"), greaterThan(0L)); - assertThat(query.getTimeBreakdown().get("max_score_count"), greaterThan(0L)); - assertThat(query.getTimeBreakdown().get("min_score_count"), greaterThan(0L)); - assertThat(query.getTimeBreakdown().get("avg_score_count"), greaterThan(0L)); + long maxScore = query.getTimeBreakdown().get("max_score"); + long minScore = query.getTimeBreakdown().get("min_score"); + long avgScore = query.getTimeBreakdown().get("avg_score"); + long maxScoreCount = query.getTimeBreakdown().get("max_score_count"); + long minScoreCount = query.getTimeBreakdown().get("min_score_count"); + long avgScoreCount = query.getTimeBreakdown().get("avg_score_count"); + assertThat(maxScore, greaterThan(0L)); + assertThat(minScore, greaterThanOrEqualTo(0L)); + assertThat(avgScore, greaterThanOrEqualTo(0L)); + assertThat(maxScore, greaterThanOrEqualTo(avgScore)); + assertThat(avgScore, greaterThanOrEqualTo(minScore)); + assertThat(maxScoreCount, greaterThan(0L)); + assertThat(minScoreCount, greaterThanOrEqualTo(0L)); + assertThat(avgScoreCount, greaterThanOrEqualTo(0L)); + assertThat(maxScoreCount, greaterThanOrEqualTo(avgScoreCount)); + assertThat(avgScoreCount, greaterThanOrEqualTo(minScoreCount)); } assertThat(query.getTimeBreakdown().get("create_weight"), greaterThan(0L)); assertThat(query.getTimeBreakdown().get("create_weight_count"), equalTo(1L)); @@ -744,30 +754,50 @@ public void testTerminateAfterEarlyTermination() throws Exception { assertThat(query.getProfiledChildren().get(0).getTime(), greaterThan(0L)); assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight"), greaterThan(0L)); assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("create_weight_count"), equalTo(1L)); - assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score_count"), greaterThan(0L)); + assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score"), greaterThanOrEqualTo(0L)); + assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("score_count"), greaterThanOrEqualTo(0L)); if (executor != null) { - assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("max_score"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("min_score"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("avg_score"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("max_score_count"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("min_score_count"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(0).getTimeBreakdown().get("avg_score_count"), greaterThan(0L)); + long maxScore = query.getProfiledChildren().get(0).getTimeBreakdown().get("max_score"); + long minScore = query.getProfiledChildren().get(0).getTimeBreakdown().get("min_score"); + long avgScore = query.getProfiledChildren().get(0).getTimeBreakdown().get("avg_score"); + long maxScoreCount = query.getProfiledChildren().get(0).getTimeBreakdown().get("max_score_count"); + long minScoreCount = query.getProfiledChildren().get(0).getTimeBreakdown().get("min_score_count"); + long avgScoreCount = query.getProfiledChildren().get(0).getTimeBreakdown().get("avg_score_count"); + assertThat(maxScore, greaterThanOrEqualTo(0L)); + assertThat(minScore, greaterThanOrEqualTo(0L)); + assertThat(avgScore, greaterThanOrEqualTo(0L)); + assertThat(maxScore, greaterThanOrEqualTo(avgScore)); + assertThat(avgScore, greaterThanOrEqualTo(minScore)); + assertThat(maxScoreCount, greaterThanOrEqualTo(0L)); + assertThat(minScoreCount, greaterThanOrEqualTo(0L)); + assertThat(avgScoreCount, greaterThanOrEqualTo(0L)); + assertThat(maxScoreCount, greaterThanOrEqualTo(avgScoreCount)); + assertThat(avgScoreCount, greaterThanOrEqualTo(minScoreCount)); } assertThat(query.getProfiledChildren().get(1).getQueryName(), equalTo("TermQuery")); assertThat(query.getProfiledChildren().get(1).getTime(), greaterThan(0L)); assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("create_weight"), greaterThan(0L)); assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("create_weight_count"), equalTo(1L)); - assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("score"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("score_count"), greaterThan(0L)); + assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("score"), greaterThanOrEqualTo(0L)); + assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("score_count"), greaterThanOrEqualTo(0L)); if (executor != null) { - assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("max_score"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("min_score"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("avg_score"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("max_score_count"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("min_score_count"), greaterThan(0L)); - assertThat(query.getProfiledChildren().get(1).getTimeBreakdown().get("avg_score_count"), greaterThan(0L)); + long maxScore = query.getProfiledChildren().get(1).getTimeBreakdown().get("max_score"); + long minScore = query.getProfiledChildren().get(1).getTimeBreakdown().get("min_score"); + long avgScore = query.getProfiledChildren().get(1).getTimeBreakdown().get("avg_score"); + long maxScoreCount = query.getProfiledChildren().get(1).getTimeBreakdown().get("max_score_count"); + long minScoreCount = query.getProfiledChildren().get(1).getTimeBreakdown().get("min_score_count"); + long avgScoreCount = query.getProfiledChildren().get(1).getTimeBreakdown().get("avg_score_count"); + assertThat(maxScore, greaterThanOrEqualTo(0L)); + assertThat(minScore, greaterThanOrEqualTo(0L)); + assertThat(avgScore, greaterThanOrEqualTo(0L)); + assertThat(maxScore, greaterThanOrEqualTo(avgScore)); + assertThat(avgScore, greaterThanOrEqualTo(minScore)); + assertThat(maxScoreCount, greaterThanOrEqualTo(0L)); + assertThat(minScoreCount, greaterThanOrEqualTo(0L)); + assertThat(avgScoreCount, greaterThanOrEqualTo(0L)); + assertThat(maxScoreCount, greaterThanOrEqualTo(avgScoreCount)); + assertThat(avgScoreCount, greaterThanOrEqualTo(minScoreCount)); } }, collector -> { assertThat(collector.getReason(), equalTo("search_terminate_after_count")); From 84404689d6259b317c3eaaf94895f369e9920434 Mon Sep 17 00:00:00 2001 From: Suraj Singh Date: Wed, 3 Jan 2024 11:14:52 -0800 Subject: [PATCH 03/13] Fix testIndexDeletionDuringSnapshotCreationInQueue flaky test (#11726) * Fix testIndexDeletionDuringSnapshotCreationInQueue flaky test Signed-off-by: Suraj Singh * Update comment Signed-off-by: Suraj Singh --------- Signed-off-by: Suraj Singh --- .../snapshots/DedicatedClusterSnapshotRestoreIT.java | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java index bc591de45dd86..7a52c8aa5018e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/DedicatedClusterSnapshotRestoreIT.java @@ -1457,6 +1457,13 @@ public void testIndexDeletionDuringSnapshotCreationInQueue() throws Exception { clusterAdmin().prepareRestoreSnapshot("test-repo", "test-snap").get(); ensureGreen("test-idx"); + + // Wait for snapshot process to complete to prevent conflict with repository clean up + assertBusy(() -> { + SnapshotInfo snapshotInfo = getSnapshot("test-repo", "test-snap-2"); + assertTrue(snapshotInfo.state().completed()); + assertEquals(SnapshotState.PARTIAL, snapshotInfo.state()); + }, 1, TimeUnit.MINUTES); } private long calculateTotalFilesSize(List files) { From 16d457d3f018893f6268a9ffa52d3d3f73f1a87a Mon Sep 17 00:00:00 2001 From: Peter Nied Date: Wed, 3 Jan 2024 15:28:45 -0600 Subject: [PATCH 04/13] Switch to more reliable OpenSearch Lucene snapshot location (#11728) * Switched to more reliable OpenSearch Lucene snapshot location - Related https://github.com/opensearch-project/opensearch-build/issues/3874 Signed-off-by: Peter Nied Signed-off-by: Peter Nied * Changelog entry Signed-off-by: Peter Nied Signed-off-by: Peter Nied --------- Signed-off-by: Peter Nied Signed-off-by: Peter Nied --- CHANGELOG.md | 2 +- .../java/org/opensearch/gradle/RepositoriesSetupPlugin.java | 2 +- gradle/code-coverage.gradle | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 026606ff57d65..f5191c5e04a41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -57,7 +57,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Return 409 Conflict HTTP status instead of 503 on failure to concurrently execute snapshots ([#8986](https://github.com/opensearch-project/OpenSearch/pull/5855)) - Add task completion count in search backpressure stats API ([#10028](https://github.com/opensearch-project/OpenSearch/pull/10028/)) - Deprecate CamelCase `PathHierarchy` tokenizer name in favor to lowercase `path_hierarchy` ([#10894](https://github.com/opensearch-project/OpenSearch/pull/10894)) - +- Switched to more reliable OpenSearch Lucene snapshot location([#11728](https://github.com/opensearch-project/OpenSearch/pull/11728)) ### Deprecated diff --git a/buildSrc/src/main/java/org/opensearch/gradle/RepositoriesSetupPlugin.java b/buildSrc/src/main/java/org/opensearch/gradle/RepositoriesSetupPlugin.java index 63b88f671c84c..8ecfbf40b6c62 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/RepositoriesSetupPlugin.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/RepositoriesSetupPlugin.java @@ -94,7 +94,7 @@ public static void configureRepositories(Project project) { String revision = matcher.group(1); MavenArtifactRepository luceneRepo = repos.maven(repo -> { repo.setName("lucene-snapshots"); - repo.setUrl("https://d1nvenhzbhpy0q.cloudfront.net/snapshots/lucene/"); + repo.setUrl("https://artifacts.opensearch.org/snapshots/lucene/"); }); repos.exclusiveContent(exclusiveRepo -> { exclusiveRepo.filter( diff --git a/gradle/code-coverage.gradle b/gradle/code-coverage.gradle index dfb4ddba24113..822b471e2e034 100644 --- a/gradle/code-coverage.gradle +++ b/gradle/code-coverage.gradle @@ -13,7 +13,7 @@ repositories { gradlePluginPortal() // TODO: Find the way to use the repositories from RepositoriesSetupPlugin maven { - url = "https://d1nvenhzbhpy0q.cloudfront.net/snapshots/lucene/" + url = "https://artifacts.opensearch.org/snapshots/lucene/" } } From 36bd67465a12bbc56631f49a617429529a8365f9 Mon Sep 17 00:00:00 2001 From: rayshrey <121871912+rayshrey@users.noreply.github.com> Date: Thu, 4 Jan 2024 18:59:18 +0530 Subject: [PATCH 05/13] Add deleted doc count in _cat/shards (#11678) Signed-off-by: Shreyansh Ray --- CHANGELOG.md | 1 + .../test/cat.shards/10_basic.yml | 94 ++++++++++++++++++- .../rest/action/cat/RestShardsAction.java | 2 + .../action/cat/RestShardsActionTests.java | 10 +- 4 files changed, 104 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5191c5e04a41..aa2086646fbac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -181,6 +181,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Interpret byte array as primitive using VarHandles ([#11362](https://github.com/opensearch-project/OpenSearch/pull/11362)) - Automatically add scheme to discovery.ec2.endpoint ([#11512](https://github.com/opensearch-project/OpenSearch/pull/11512)) - Restore support for Java 8 for RestClient ([#11562](https://github.com/opensearch-project/OpenSearch/pull/11562)) +- Add deleted doc count in _cat/shards ([#11678](https://github.com/opensearch-project/OpenSearch/pull/11678)) ### Deprecated diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml index f80c9f9c0bc80..b572ed9e62ea9 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/cat.shards/10_basic.yml @@ -1,13 +1,103 @@ "Help": - skip: - version: " - 2.3.99" + version: " - 2.11.99" + reason: deleted docs added in 2.12.0 + features: node_selector + - do: + cat.shards: + help: true + node_selector: + version: "2.12.0 - " + + - match: + $body: | + /^ index .+ \n + shard .+ \n + prirep .+ \n + state .+ \n + docs .+ \n + store .+ \n + ip .+ \n + id .+ \n + node .+ \n + sync_id .+ \n + unassigned.reason .+ \n + unassigned.at .+ \n + unassigned.for .+ \n + unassigned.details .+ \n + recoverysource.type .+ \n + completion.size .+ \n + fielddata.memory_size .+ \n + fielddata.evictions .+ \n + query_cache.memory_size .+ \n + query_cache.evictions .+ \n + flush.total .+ \n + flush.total_time .+ \n + get.current .+ \n + get.time .+ \n + get.total .+ \n + get.exists_time .+ \n + get.exists_total .+ \n + get.missing_time .+ \n + get.missing_total .+ \n + indexing.delete_current .+ \n + indexing.delete_time .+ \n + indexing.delete_total .+ \n + indexing.index_current .+ \n + indexing.index_time .+ \n + indexing.index_total .+ \n + indexing.index_failed .+ \n + merges.current .+ \n + merges.current_docs .+ \n + merges.current_size .+ \n + merges.total .+ \n + merges.total_docs .+ \n + merges.total_size .+ \n + merges.total_time .+ \n + refresh.total .+ \n + refresh.time .+ \n + refresh.external_total .+ \n + refresh.external_time .+ \n + refresh.listeners .+ \n + search.fetch_current .+ \n + search.fetch_time .+ \n + search.fetch_total .+ \n + search.open_contexts .+ \n + search.query_current .+ \n + search.query_time .+ \n + search.query_total .+ \n + search.scroll_current .+ \n + search.scroll_time .+ \n + search.scroll_total .+ \n + search.point_in_time_current .+ \n + search.point_in_time_time .+ \n + search.point_in_time_total .+ \n + segments.count .+ \n + segments.memory .+ \n + segments.index_writer_memory .+ \n + segments.version_map_memory .+ \n + segments.fixed_bitset_memory .+ \n + seq_no.max .+ \n + seq_no.local_checkpoint .+ \n + seq_no.global_checkpoint .+ \n + warmer.current .+ \n + warmer.total .+ \n + warmer.total_time .+ \n + path.data .+ \n + path.state .+ \n + docs.deleted .+ \n + $/ +--- +"Help from 2.4.0 to 2.11.0": + - skip: + version: " - 2.3.99 , 2.12.0 - " reason: point in time stats were added in 2.4.0 features: node_selector - do: cat.shards: help: true node_selector: - version: "2.4.0 - " + version: "2.4.0 - 2.11.99" - match: $body: | diff --git a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java index 5d5f55c7f4639..d0d00e4c4596a 100644 --- a/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java +++ b/server/src/main/java/org/opensearch/rest/action/cat/RestShardsAction.java @@ -283,6 +283,7 @@ protected Table getTableWithHeader(final RestRequest request) { table.addCell("path.data", "alias:pd,dataPath;default:false;text-align:right;desc:shard data path"); table.addCell("path.state", "alias:ps,statsPath;default:false;text-align:right;desc:shard state path"); + table.addCell("docs.deleted", "alias:dd,docsDeleted;default:false;text-align:right;desc:number of deleted docs in shard"); table.endHeaders(); return table; @@ -448,6 +449,7 @@ Table buildTable(RestRequest request, ClusterStateResponse state, IndicesStatsRe table.addCell(getOrNull(shardStats, ShardStats::getDataPath, s -> s)); table.addCell(getOrNull(shardStats, ShardStats::getStatePath, s -> s)); + table.addCell(getOrNull(commonStats, CommonStats::getDocs, DocsStats::getDeleted)); table.endRow(); } diff --git a/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java b/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java index a8679a087216d..73f83a5642bb4 100644 --- a/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java +++ b/server/src/test/java/org/opensearch/rest/action/cat/RestShardsActionTests.java @@ -46,6 +46,7 @@ import org.opensearch.cluster.routing.ShardRoutingState; import org.opensearch.cluster.routing.TestShardRouting; import org.opensearch.common.Table; +import org.opensearch.index.shard.DocsStats; import org.opensearch.index.shard.ShardPath; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.test.rest.FakeRestRequest; @@ -65,6 +66,8 @@ public class RestShardsActionTests extends OpenSearchTestCase { public void testBuildTable() { final int numShards = randomIntBetween(1, 5); + long numDocs = randomLongBetween(0, 10000); + long numDeletedDocs = randomLongBetween(0, 100); DiscoveryNode localNode = new DiscoveryNode("local", buildNewFakeTransportAddress(), Version.CURRENT); List shardRoutings = new ArrayList<>(numShards); @@ -76,10 +79,12 @@ public void testBuildTable() { Path path = createTempDir().resolve("indices") .resolve(shardRouting.shardId().getIndex().getUUID()) .resolve(String.valueOf(shardRouting.shardId().id())); + CommonStats commonStats = new CommonStats(); + commonStats.docs = new DocsStats(numDocs, numDeletedDocs, 0); ShardStats shardStats = new ShardStats( shardRouting, new ShardPath(false, path, path, shardRouting.shardId()), - null, + commonStats, null, null, null @@ -120,6 +125,7 @@ public void testBuildTable() { assertThat(headers.get(6).value, equalTo("ip")); assertThat(headers.get(7).value, equalTo("id")); assertThat(headers.get(8).value, equalTo("node")); + assertThat(headers.get(74).value, equalTo("docs.deleted")); final List> rows = table.getRows(); assertThat(rows.size(), equalTo(numShards)); @@ -132,10 +138,12 @@ public void testBuildTable() { assertThat(row.get(1).value, equalTo(shardRouting.getId())); assertThat(row.get(2).value, equalTo(shardRouting.primary() ? "p" : "r")); assertThat(row.get(3).value, equalTo(shardRouting.state())); + assertThat(row.get(4).value, equalTo(shardStats.getStats().getDocs().getCount())); assertThat(row.get(6).value, equalTo(localNode.getHostAddress())); assertThat(row.get(7).value, equalTo(localNode.getId())); assertThat(row.get(72).value, equalTo(shardStats.getDataPath())); assertThat(row.get(73).value, equalTo(shardStats.getStatePath())); + assertThat(row.get(74).value, equalTo(shardStats.getStats().getDocs().getDeleted())); } } } From f6475152aece1083add15ac6cac31758eb711d9e Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Thu, 4 Jan 2024 21:48:32 +0800 Subject: [PATCH 06/13] Fix simulate remove ingest processor throwing illegal_argument_exception (#11607) * Fix simulate remove ingest processor throwing illegal_argument_exception Signed-off-by: Gao Binlong * modify change log Signed-off-by: Gao Binlong * Create a new test mothod Signed-off-by: Gao Binlong * Use old method to get field value Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 2 +- .../ingest/common/RemoveProcessor.java | 20 ++-- .../ingest/common/RemoveProcessorTests.java | 97 +++++++++++++++++++ .../test/ingest/290_remove_processor.yml | 92 ++++++++++++++++++ 4 files changed, 201 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index aa2086646fbac..9b5f2dc5a16f2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -170,7 +170,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add telemetry tracer/metric enable flag and integ test. ([#10395](https://github.com/opensearch-project/OpenSearch/pull/10395)) - Performance improvement for Datetime field caching ([#4558](https://github.com/opensearch-project/OpenSearch/issues/4558)) - Add instrumentation for indexing in transport bulk action and transport shard bulk action. ([#10273](https://github.com/opensearch-project/OpenSearch/pull/10273)) -- Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895)) +- Disallow removing some metadata fields by remove ingest processor ([#10895](https://github.com/opensearch-project/OpenSearch/pull/10895), [#11607](https://github.com/opensearch-project/OpenSearch/pull/11607)) - Performance improvement for MultiTerm Queries on Keyword fields ([#7057](https://github.com/opensearch-project/OpenSearch/issues/7057)) - Refactor common parts from the Rounding class into a separate 'round' package ([#11023](https://github.com/opensearch-project/OpenSearch/issues/11023)) - Performance improvement for date histogram aggregations without sub-aggregations ([#11083](https://github.com/opensearch-project/OpenSearch/pull/11083)) diff --git a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java index bb3d4bca47859..a48cfd87b78c3 100644 --- a/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java +++ b/modules/ingest-common/src/main/java/org/opensearch/ingest/common/RemoveProcessor.java @@ -88,16 +88,18 @@ public IngestDocument execute(IngestDocument document) { throw new IllegalArgumentException("cannot remove metadata field [" + path + "]"); } // removing _id is disallowed when there's an external version specified in the request - String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class); if (path.equals(IngestDocument.Metadata.ID.getFieldName()) - && !Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) { - Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class); - throw new IllegalArgumentException( - "cannot remove metadata field [_id] when specifying external version for the document, version: " - + version - + ", version_type: " - + versionType - ); + && document.hasField(IngestDocument.Metadata.VERSION_TYPE.getFieldName())) { + String versionType = document.getFieldValue(IngestDocument.Metadata.VERSION_TYPE.getFieldName(), String.class); + if (!Objects.equals(versionType, VersionType.toString(VersionType.INTERNAL))) { + Long version = document.getFieldValue(IngestDocument.Metadata.VERSION.getFieldName(), Long.class, true); + throw new IllegalArgumentException( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + version + + ", version_type: " + + versionType + ); + } } document.removeField(path); }); diff --git a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java index 1a5630a4730f2..c138ad606d2e5 100644 --- a/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java +++ b/modules/ingest-common/src/test/java/org/opensearch/ingest/common/RemoveProcessorTests.java @@ -32,6 +32,7 @@ package org.opensearch.ingest.common; +import org.opensearch.common.lucene.uid.Versions; import org.opensearch.index.VersionType; import org.opensearch.ingest.IngestDocument; import org.opensearch.ingest.Processor; @@ -181,4 +182,100 @@ public void testRemoveMetadataField() throws Exception { } } } + + public void testRemoveDocumentId() throws Exception { + Map config = new HashMap<>(); + config.put("field", IngestDocument.Metadata.ID.getFieldName()); + String processorTag = randomAlphaOfLength(10); + + // test remove _id when _version_type is external + IngestDocument ingestDocumentWithExternalVersionType = new IngestDocument( + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + 1L, + VersionType.EXTERNAL, + RandomDocumentPicks.randomSource(random()) + ); + + Processor processorForExternalVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create( + null, + processorTag, + null, + config + ); + assertThrows( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + 1 + + ", version_type: " + + VersionType.EXTERNAL, + IllegalArgumentException.class, + () -> processorForExternalVersionType.execute(ingestDocumentWithExternalVersionType) + ); + + // test remove _id when _version_type is external_gte + config.put("field", IngestDocument.Metadata.ID.getFieldName()); + IngestDocument ingestDocumentWithExternalGTEVersionType = new IngestDocument( + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + 1L, + VersionType.EXTERNAL_GTE, + RandomDocumentPicks.randomSource(random()) + ); + + Processor processorForExternalGTEVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create( + null, + processorTag, + null, + config + ); + assertThrows( + "cannot remove metadata field [_id] when specifying external version for the document, version: " + + 1 + + ", version_type: " + + VersionType.EXTERNAL_GTE, + IllegalArgumentException.class, + () -> processorForExternalGTEVersionType.execute(ingestDocumentWithExternalGTEVersionType) + ); + + // test remove _id when _version_type is internal + config.put("field", IngestDocument.Metadata.ID.getFieldName()); + IngestDocument ingestDocumentWithInternalVersionType = new IngestDocument( + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + Versions.MATCH_ANY, + VersionType.INTERNAL, + RandomDocumentPicks.randomSource(random()) + ); + + Processor processorForInternalVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create( + null, + processorTag, + null, + config + ); + processorForInternalVersionType.execute(ingestDocumentWithInternalVersionType); + assertThat(ingestDocumentWithInternalVersionType.hasField(IngestDocument.Metadata.ID.getFieldName()), equalTo(false)); + + // test remove _id when _version_type is null + config.put("field", IngestDocument.Metadata.ID.getFieldName()); + IngestDocument ingestDocumentWithNoVersionType = new IngestDocument( + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + RandomDocumentPicks.randomString(random()), + null, + null, + RandomDocumentPicks.randomSource(random()) + ); + Processor processorForNullVersionType = new RemoveProcessor.Factory(TestTemplateService.instance()).create( + null, + processorTag, + null, + config + ); + processorForNullVersionType.execute(ingestDocumentWithNoVersionType); + assertThat(ingestDocumentWithNoVersionType.hasField(IngestDocument.Metadata.ID.getFieldName()), equalTo(false)); + } } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml index 4811769d04f0e..6668b468f8edc 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/290_remove_processor.yml @@ -5,6 +5,69 @@ teardown: id: "my_pipeline" ignore: 404 +--- +"Test simulate API works well with remove processor": + - do: + ingest.put_pipeline: + id: "my_pipeline" + body: > + { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "{{foo}}" + } + } + ] + } + - match: { acknowledged: true } + + # test simulating existing pipeline works well + - do: + ingest.simulate: + id: "my_pipeline" + body: > + { + "docs": [ + { + "_source": { + "foo": "bar", + "bar": "zoo" + } + } + ] + } + - length: { docs: 1 } + - match: { docs.0.doc._source: { "foo": "bar" } } + + # test simulating inflight pipeline works well + - do: + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "{{foo}}" + } + } + ] + }, + "docs": [ + { + "_source": { + "foo": "bar", + "bar": "zoo" + } + } + ] + } + - length: { docs: 1 } + - match: { docs.0.doc._source: { "foo": "bar" } } + --- "Test remove processor with non-existing field and without ignore_missing": - do: @@ -227,3 +290,32 @@ teardown: version: 1 version_type: "external" body: { message: "foo bar baz" } + + # test simulating pipeline with removing _id + - do: + ingest.simulate: + body: > + { + "pipeline": { + "description": "_description", + "processors": [ + { + "remove" : { + "field" : "_id" + } + } + ] + }, + "docs": [ + { + "_version_type": "external_gte", + "_version": 1, + "_source": { + "foo": "bar", + "bar": "zoo" + } + } + ] + } + - match: { docs.0.error.type: "illegal_argument_exception" } + - match: { docs.0.error.reason: "cannot remove metadata field [_id] when specifying external version for the document, version: 1, version_type: external_gte" } From 178a7a073e1236d552f697e7f67d32f04a7fe8ec Mon Sep 17 00:00:00 2001 From: Harsha Vamsi Kalluri Date: Thu, 4 Jan 2024 10:12:35 -0800 Subject: [PATCH 07/13] Fixes Numeric exact match queries to use range queries internally (#11209) * Updating numeric term and terms queries to use IODVQ Signed-off-by: Harsha Vamsi Kalluri * Addressing comments Signed-off-by: Harsha Vamsi Kalluri * Fix formatting Signed-off-by: Harsha Vamsi Kalluri * Fix changelog Signed-off-by: Harsha Vamsi Kalluri * Addressing more comments + adding tests Signed-off-by: Harsha Vamsi Kalluri * renaming yaml test Signed-off-by: Harsha Vamsi Kalluri * Adding skip for bwc Signed-off-by: Harsha Vamsi Kalluri * Adding new SortedUnsignedLongDocValuesSetQuery to allow for BitInteger Terms query Signed-off-by: Harsha Vamsi Kalluri * Fixing some tests Signed-off-by: Harsha Vamsi Kalluri * Remove duplicate skip Signed-off-by: Harsha Vamsi Kalluri * Remove unused points declaration Signed-off-by: Harsha Vamsi Kalluri * Change unsigned exact query to be consistent Signed-off-by: Harsha Vamsi Kalluri * Use slowExactQuery from Unsigned Set Query Signed-off-by: Harsha Vamsi Kalluri * Merging different yaml tests into a single test Signed-off-by: Harsha Vamsi Kalluri * Updating test case for main Signed-off-by: Harsha Vamsi Kalluri * Fix changelog Signed-off-by: Harsha Vamsi Kalluri --------- Signed-off-by: Harsha Vamsi Kalluri --- CHANGELOG.md | 1 + .../index/mapper/ScaledFloatFieldMapper.java | 17 +- .../mapper/ScaledFloatFieldTypeTests.java | 19 +- .../test/search/340_doc_values_field.yml | 1147 +++++++++++++++++ .../test/search/340_keyword_doc_values.yml | 46 - .../org/apache/lucene/util/LongHashSet.java | 136 ++ .../SortedUnsignedLongDocValuesSetQuery.java | 176 +++ .../index/mapper/NumberFieldMapper.java | 290 ++++- .../index/mapper/NumberFieldTypeTests.java | 90 +- .../index/query/TermQueryBuilderTests.java | 2 + .../index/query/TermsQueryBuilderTests.java | 2 + 11 files changed, 1795 insertions(+), 131 deletions(-) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml delete mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml create mode 100644 server/src/main/java/org/apache/lucene/util/LongHashSet.java create mode 100644 server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b5f2dc5a16f2..ac15176af56f5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -176,6 +176,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Performance improvement for date histogram aggregations without sub-aggregations ([#11083](https://github.com/opensearch-project/OpenSearch/pull/11083)) - Disable concurrent aggs for Diversified Sampler and Sampler aggs ([#11087](https://github.com/opensearch-project/OpenSearch/issues/11087)) - Made leader/follower check timeout setting dynamic ([#10528](https://github.com/opensearch-project/OpenSearch/pull/10528)) +- Improved performance of numeric exact-match queries ([#11209](https://github.com/opensearch-project/OpenSearch/pull/11209)) - Change error message when per shard document limit is breached ([#11312](https://github.com/opensearch-project/OpenSearch/pull/11312)) - Improve boolean parsing performance ([#11308](https://github.com/opensearch-project/OpenSearch/pull/11308)) - Interpret byte array as primitive using VarHandles ([#11362](https://github.com/opensearch-project/OpenSearch/pull/11362)) diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java index 7be241017f683..400d867296e5f 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java @@ -199,9 +199,9 @@ public String typeName() { @Override public Query termQuery(Object value, QueryShardContext context) { - failIfNotIndexed(); + failIfNotIndexedAndNoDocValues(); long scaledValue = Math.round(scale(value)); - Query query = NumberFieldMapper.NumberType.LONG.termQuery(name(), scaledValue); + Query query = NumberFieldMapper.NumberType.LONG.termQuery(name(), scaledValue, hasDocValues(), isSearchable()); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -210,13 +210,18 @@ public Query termQuery(Object value, QueryShardContext context) { @Override public Query termsQuery(List values, QueryShardContext context) { - failIfNotIndexed(); + failIfNotIndexedAndNoDocValues(); List scaledValues = new ArrayList<>(values.size()); for (Object value : values) { long scaledValue = Math.round(scale(value)); scaledValues.add(scaledValue); } - Query query = NumberFieldMapper.NumberType.LONG.termsQuery(name(), Collections.unmodifiableList(scaledValues)); + Query query = NumberFieldMapper.NumberType.LONG.termsQuery( + name(), + Collections.unmodifiableList(scaledValues), + hasDocValues(), + isSearchable() + ); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -225,7 +230,7 @@ public Query termsQuery(List values, QueryShardContext context) { @Override public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { - failIfNotIndexed(); + failIfNotIndexedAndNoDocValues(); Long lo = null; if (lowerTerm != null) { double dValue = scale(lowerTerm); @@ -242,7 +247,7 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower } hi = Math.round(Math.floor(dValue)); } - Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(name(), lo, hi, true, true, hasDocValues(), context); + Query query = NumberFieldMapper.NumberType.LONG.rangeQuery(name(), lo, hi, true, true, hasDocValues(), isSearchable(), context); if (boost() != 1f) { query = new BoostQuery(query, boost()); } diff --git a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java index be12c49321b87..d83811e6668eb 100644 --- a/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java +++ b/modules/mapper-extras/src/test/java/org/opensearch/index/mapper/ScaledFloatFieldTypeTests.java @@ -34,11 +34,13 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.DoublePoint; +import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; @@ -63,7 +65,9 @@ public void testTermQuery() { ); double value = (randomDouble() * 2 - 1) * 10000; long scaledValue = Math.round(value * ft.getScalingFactor()); - assertEquals(LongPoint.newExactQuery("scaled_float", scaledValue), ft.termQuery(value, null)); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery("scaled_float", scaledValue); + Query query = new IndexOrDocValuesQuery(LongPoint.newExactQuery("scaled_float", scaledValue), dvQuery); + assertEquals(query, ft.termQuery(value, null)); } public void testTermsQuery() { @@ -75,7 +79,7 @@ public void testTermsQuery() { long scaledValue1 = Math.round(value1 * ft.getScalingFactor()); double value2 = (randomDouble() * 2 - 1) * 10000; long scaledValue2 = Math.round(value2 * ft.getScalingFactor()); - assertEquals(LongPoint.newSetQuery("scaled_float", scaledValue1, scaledValue2), ft.termsQuery(Arrays.asList(value1, value2), null)); + assertEquals(LongField.newSetQuery("scaled_float", scaledValue1, scaledValue2), ft.termsQuery(Arrays.asList(value1, value2), null)); } public void testRangeQuery() throws IOException { @@ -112,7 +116,16 @@ public void testRangeQuery() throws IOException { Double u = randomBoolean() ? null : (randomDouble() * 2 - 1) * 10000; boolean includeLower = randomBoolean(); boolean includeUpper = randomBoolean(); - Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery("double", l, u, includeLower, includeUpper, false, MOCK_QSC); + Query doubleQ = NumberFieldMapper.NumberType.DOUBLE.rangeQuery( + "double", + l, + u, + includeLower, + includeUpper, + false, + true, + MOCK_QSC + ); Query scaledFloatQ = ft.rangeQuery(l, u, includeLower, includeUpper, MOCK_QSC); assertEquals(searcher.count(doubleQ), searcher.count(scaledFloatQ)); } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml new file mode 100644 index 0000000000000..f3281e35ac8e6 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_doc_values_field.yml @@ -0,0 +1,1147 @@ +setup: + - skip: + features: [ "headers" ] + version: " - 2.99.99" + reason: "searching with only doc_values was added in 3.0.0" +--- +"search on fields with both index and doc_values enabled": + - do: + indices.create: + index: test-iodvq + body: + mappings: + properties: + some_keyword: + type: keyword + index: true + doc_values: true + byte: + type: byte + index: true + doc_values: true + double: + type: double + index: true + doc_values: true + float: + type: float + index: true + doc_values: true + half_float: + type: half_float + index: true + doc_values: true + integer: + type: integer + index: true + doc_values: true + long: + type: long + index: true + doc_values: true + short: + type: short + index: true + doc_values: true + unsigned_long: + type: unsigned_long + index: true + doc_values: true + + - do: + bulk: + index: test-iodvq + refresh: true + body: + - '{"index": {"_index": "test-iodvq", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-iodvq", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-iodvq", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-iodvq + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } + +--- +"search on fields with only index enabled": + - do: + indices.create: + index: test-index + body: + mappings: + properties: + some_keyword: + type: keyword + index: true + doc_values: false + byte: + type: byte + index: true + doc_values: false + double: + type: double + index: true + doc_values: false + float: + type: float + index: true + doc_values: false + half_float: + type: half_float + index: true + doc_values: false + integer: + type: integer + index: true + doc_values: false + long: + type: long + index: true + doc_values: false + short: + type: short + index: true + doc_values: false + unsigned_long: + type: unsigned_long + index: true + doc_values: false + + - do: + bulk: + index: test-index + refresh: true + body: + - '{"index": {"_index": "test-index", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-index", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-index", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-index + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } + +--- +"search on fields with only doc_values enabled": + - do: + indices.create: + index: test-doc-values + body: + mappings: + properties: + some_keyword: + type: keyword + index: false + doc_values: true + byte: + type: byte + index: false + doc_values: true + double: + type: double + index: false + doc_values: true + float: + type: float + index: false + doc_values: true + half_float: + type: half_float + index: false + doc_values: true + integer: + type: integer + index: false + doc_values: true + long: + type: long + index: false + doc_values: true + short: + type: short + index: false + doc_values: true + unsigned_long: + type: unsigned_long + index: false + doc_values: true + + - do: + bulk: + index: test-doc-values + refresh: true + body: + - '{"index": {"_index": "test-doc-values", "_id": "1" }}' + - '{ "some_keyword": "ingesting some random keyword data", "byte": 120, "double": 100.0, "float": "800.0", "half_float": "400.0", "integer": 1290, "long": 13456, "short": 150, "unsigned_long": 10223372036854775800 }' + - '{ "index": { "_index": "test-doc-values", "_id": "2" }}' + - '{ "some_keyword": "400", "byte": 121, "double": 101.0, "float": "801.0", "half_float": "401.0", "integer": 1291, "long": 13457, "short": 151, "unsigned_long": 10223372036854775801 }' + - '{ "index": { "_index": "test-doc-values", "_id": "3" } }' + - '{ "some_keyword": "5", "byte": 122, "double": 102.0, "float": "802.0", "half_float": "402.0", "integer": 1292, "long": 13458, "short": 152, "unsigned_long": 10223372036854775802 }' + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + prefix: + some_keyword: "ing" + + - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + "some_keyword": { + "lt": 500 + } } + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + half_float: 400.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + float: 800.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + double: 100.0 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + byte: 120 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + short: 150 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + integer: 1291 + + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + long: 13456 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + term: + unsigned_long: 10223372036854775800 + + - match: { hits.total: 1 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + half_float: [ 400.0, 401.0 ] + + - match: { hits.total: 2 } + + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + float: [ 800.0, 801.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + byte: [ 120, 121 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + double: [ 100.0, 101.0 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + short: [ 150, 151 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + integer: [ 1290, 1291 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + long: [ 13456, 13457 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + terms: + unsigned_long: [ 10223372036854775800, 10223372036854775801 ] + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + half_float: { + gte: 401.0, + lte: 402.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + float: { + gte: 801.0, + lte: 802.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + byte: { + gte: 120, + lte: 121 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + double: { + gte: 101.0, + lte: 102.0 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + short: { + gte: 151, + lte: 152 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + integer: { + gte: 1291, + lte: 1292 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + long: { + gte: 13457, + lte: 13458 + }, + } + + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + index: test-doc-values + body: + query: + range: { + unsigned_long: { + gte: 10223372036854775801, + lte: 10223372036854775802 + }, + } + + - match: { hits.total: 2 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml deleted file mode 100644 index 8829e7b100fdd..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/340_keyword_doc_values.yml +++ /dev/null @@ -1,46 +0,0 @@ ---- -"search on keyword fields with doc_values enabled": - - do: - indices.create: - index: test - body: - mappings: - properties: - "some_keyword": - type: "keyword" - index: true - doc_values: true - - - do: - bulk: - index: test - refresh: true - body: - - '{"index": {"_index": "test", "_id": "1" }}' - - '{ "some_keyword": "ingesting some random keyword data" }' - - '{ "index": { "_index": "test", "_id": "2" }}' - - '{ "some_keyword": "400" }' - - '{ "index": { "_index": "test", "_id": "3" } }' - - '{ "some_keyword": "5" }' - - - do: - search: - index: test - body: - query: - prefix: - some_keyword: "ing" - - - match: { hits.hits.0._source.some_keyword: "ingesting some random keyword data" } - - - do: - search: - index: test - body: - query: - range: { - "some_keyword": { - "lt": 500 - } } - - - match: { hits.total.value: 2 } diff --git a/server/src/main/java/org/apache/lucene/util/LongHashSet.java b/server/src/main/java/org/apache/lucene/util/LongHashSet.java new file mode 100644 index 0000000000000..a463e8a189585 --- /dev/null +++ b/server/src/main/java/org/apache/lucene/util/LongHashSet.java @@ -0,0 +1,136 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.apache.lucene.util; + +import org.apache.lucene.util.packed.PackedInts; + +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.LongStream; + +/** Set of longs, optimized for docvalues usage */ +public final class LongHashSet implements Accountable { + private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(LongHashSet.class); + + private static final long MISSING = Long.MIN_VALUE; + + final long[] table; + final int mask; + final boolean hasMissingValue; + final int size; + /** minimum value in the set, or Long.MAX_VALUE for an empty set */ + public final long minValue; + /** maximum value in the set, or Long.MIN_VALUE for an empty set */ + public final long maxValue; + + /** Construct a set. Values must be in sorted order. */ + public LongHashSet(long[] values) { + int tableSize = Math.toIntExact(values.length * 3L / 2); + tableSize = 1 << PackedInts.bitsRequired(tableSize); // make it a power of 2 + assert tableSize >= values.length * 3L / 2; + table = new long[tableSize]; + Arrays.fill(table, MISSING); + mask = tableSize - 1; + boolean hasMissingValue = false; + int size = 0; + long previousValue = Long.MIN_VALUE; // for assert + for (long value : values) { + if (value == MISSING) { + size += hasMissingValue ? 0 : 1; + hasMissingValue = true; + } else if (add(value)) { + ++size; + } + assert value >= previousValue : "values must be provided in sorted order"; + previousValue = value; + } + this.hasMissingValue = hasMissingValue; + this.size = size; + this.minValue = values.length == 0 ? Long.MAX_VALUE : values[0]; + this.maxValue = values.length == 0 ? Long.MIN_VALUE : values[values.length - 1]; + } + + private boolean add(long l) { + assert l != MISSING; + final int slot = Long.hashCode(l) & mask; + for (int i = slot;; i = (i + 1) & mask) { + if (table[i] == MISSING) { + table[i] = l; + return true; + } else if (table[i] == l) { + // already added + return false; + } + } + } + + /** + * check for membership in the set. + * + *

You should use {@link #minValue} and {@link #maxValue} to guide/terminate iteration before + * calling this. + */ + public boolean contains(long l) { + if (l == MISSING) { + return hasMissingValue; + } + final int slot = Long.hashCode(l) & mask; + for (int i = slot;; i = (i + 1) & mask) { + if (table[i] == MISSING) { + return false; + } else if (table[i] == l) { + return true; + } + } + } + + /** returns a stream of all values contained in this set */ + LongStream stream() { + LongStream stream = Arrays.stream(table).filter(v -> v != MISSING); + if (hasMissingValue) { + stream = LongStream.concat(LongStream.of(MISSING), stream); + } + return stream; + } + + @Override + public int hashCode() { + return Objects.hash(size, minValue, maxValue, mask, hasMissingValue, Arrays.hashCode(table)); + } + + @Override + public boolean equals(Object obj) { + if (obj != null && obj instanceof LongHashSet) { + LongHashSet that = (LongHashSet) obj; + return size == that.size + && minValue == that.minValue + && maxValue == that.maxValue + && mask == that.mask + && hasMissingValue == that.hasMissingValue + && Arrays.equals(table, that.table); + } + return false; + } + + @Override + public String toString() { + return stream().mapToObj(String::valueOf).collect(Collectors.joining(", ", "[", "]")); + } + + /** number of elements in the set */ + public int size() { + return size; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(table); + } +} diff --git a/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java b/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java new file mode 100644 index 0000000000000..669dbb1e1bfc7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/document/SortedUnsignedLongDocValuesSetQuery.java @@ -0,0 +1,176 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.document; + +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.search.ConstantScoreScorer; +import org.apache.lucene.search.ConstantScoreWeight; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.MatchNoDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.TwoPhaseIterator; +import org.apache.lucene.search.Weight; +import org.apache.lucene.util.LongHashSet; + +import java.io.IOException; +import java.math.BigInteger; +import java.util.Arrays; +import java.util.Objects; + +/** + * The {@link org.apache.lucene.document.SortedNumericDocValuesSetQuery} implementation for unsigned long numeric data type. + * + * @opensearch.internal + */ +public abstract class SortedUnsignedLongDocValuesSetQuery extends Query { + + private final String field; + private final LongHashSet numbers; + + SortedUnsignedLongDocValuesSetQuery(String field, BigInteger[] numbers) { + this.field = Objects.requireNonNull(field); + Arrays.sort(numbers); + this.numbers = new LongHashSet(Arrays.stream(numbers).mapToLong(n -> n.longValue()).toArray()); + } + + @Override + public String toString(String field) { + return new StringBuilder().append(field).append(": ").append(numbers.toString()).toString(); + } + + @Override + public void visit(QueryVisitor visitor) { + if (visitor.acceptField(field)) { + visitor.visitLeaf(this); + } + } + + @Override + public Query rewrite(IndexSearcher indexSearcher) throws IOException { + if (numbers.size() == 0) { + return new MatchNoDocsQuery(); + } + return super.rewrite(indexSearcher); + } + + @Override + public boolean equals(Object other) { + if (sameClassAs(other) == false) { + return false; + } + SortedUnsignedLongDocValuesSetQuery that = (SortedUnsignedLongDocValuesSetQuery) other; + return field.equals(that.field) && numbers.equals(that.numbers); + } + + @Override + public int hashCode() { + return Objects.hash(classHash(), field, numbers); + } + + abstract SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException; + + @Override + public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { + return new ConstantScoreWeight(this, boost) { + + @Override + public boolean isCacheable(LeafReaderContext ctx) { + return DocValues.isCacheable(ctx, field); + } + + @Override + public Scorer scorer(LeafReaderContext context) throws IOException { + SortedNumericDocValues values = getValues(context.reader(), field); + if (values == null) { + return null; + } + final NumericDocValues singleton = DocValues.unwrapSingleton(values); + final TwoPhaseIterator iterator; + if (singleton != null) { + iterator = new TwoPhaseIterator(singleton) { + @Override + public boolean matches() throws IOException { + long value = singleton.longValue(); + return Long.compareUnsigned(value, numbers.minValue) >= 0 + && Long.compareUnsigned(value, numbers.maxValue) <= 0 + && numbers.contains(value); + } + + @Override + public float matchCost() { + return 5; // 2 comparisions, possible lookup in the set + } + }; + } else { + iterator = new TwoPhaseIterator(values) { + @Override + public boolean matches() throws IOException { + int count = values.docValueCount(); + for (int i = 0; i < count; i++) { + final long value = values.nextValue(); + if (Long.compareUnsigned(value, numbers.minValue) < 0) { + continue; + } else if (Long.compareUnsigned(value, numbers.maxValue) > 0) { + return false; // values are sorted, terminate + } else if (numbers.contains(value)) { + return true; + } + } + return false; + } + + @Override + public float matchCost() { + return 5; // 2 comparisons, possible lookup in the set + } + }; + } + return new ConstantScoreScorer(this, score(), scoreMode, iterator); + } + }; + } + + public static Query newSlowSetQuery(String field, BigInteger... values) { + return new SortedUnsignedLongDocValuesSetQuery(field, values) { + @Override + SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException { + FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + // Queries have some optimizations when one sub scorer returns null rather + // than a scorer that does not match any documents + return null; + } + return DocValues.getSortedNumeric(reader, field); + } + }; + } + + public static Query newSlowExactQuery(String field, BigInteger value) { + return new SortedUnsignedLongDocValuesRangeQuery(field, value, value) { + @Override + SortedNumericDocValues getValues(LeafReader reader, String field) throws IOException { + FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + // Queries have some optimizations when one sub scorer returns null rather + // than a scorer that does not match any documents + return null; + } + return DocValues.getSortedNumeric(reader, field); + } + }; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index 204e7bc4c16ab..524d2b0e0dd38 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -37,6 +37,7 @@ import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatField; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.LongPoint; @@ -61,6 +62,7 @@ import org.opensearch.core.xcontent.XContentParser; import org.opensearch.core.xcontent.XContentParser.Token; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; +import org.opensearch.index.document.SortedUnsignedLongDocValuesSetQuery; import org.opensearch.index.fielddata.IndexFieldData; import org.opensearch.index.fielddata.IndexNumericFieldData.NumericType; import org.opensearch.index.fielddata.plain.SortedNumericIndexFieldData; @@ -201,18 +203,39 @@ public Float parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { float v = parse(value, false); + if (isSearchable && hasDocValues) { + Query query = HalfFloatPoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, HalfFloatPoint.halfFloatToSortableShort(v)); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, HalfFloatPoint.halfFloatToSortableShort(v)); + } return HalfFloatPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { float[] v = new float[values.size()]; + long points[] = new long[v.length]; for (int i = 0; i < values.size(); ++i) { v[i] = parse(values.get(i), false); + if (hasDocValues) { + points[i] = HalfFloatPoint.halfFloatToSortableShort(v[i]); + } + } + if (isSearchable && hasDocValues) { + Query query = HalfFloatPoint.newSetQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowSetQuery(field, points); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, points); } return HalfFloatPoint.newSetQuery(field, v); + } @Override @@ -223,6 +246,7 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { float l = Float.NEGATIVE_INFINITY; @@ -241,16 +265,23 @@ public Query rangeQuery( } u = HalfFloatPoint.nextDown(u); } - Query query = HalfFloatPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = HalfFloatPoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery( field, HalfFloatPoint.halfFloatToSortableShort(l), HalfFloatPoint.halfFloatToSortableShort(u) ); - query = new IndexOrDocValuesQuery(query, dvQuery); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowRangeQuery( + field, + HalfFloatPoint.halfFloatToSortableShort(l), + HalfFloatPoint.halfFloatToSortableShort(u) + ); } - return query; + return HalfFloatPoint.newRangeQuery(field, l, u); } @Override @@ -309,18 +340,39 @@ public Float parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { float v = parse(value, false); + if (isSearchable && hasDocValues) { + Query query = FloatPoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, NumericUtils.floatToSortableInt(v)); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, NumericUtils.floatToSortableInt(v)); + } return FloatPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { float[] v = new float[values.size()]; + long points[] = new long[v.length]; for (int i = 0; i < values.size(); ++i) { v[i] = parse(values.get(i), false); + if (hasDocValues) { + points[i] = NumericUtils.floatToSortableInt(v[i]); + } + } + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery( + FloatPoint.newSetQuery(field, v), + SortedNumericDocValuesField.newSlowSetQuery(field, points) + ); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, points); } - return FloatPoint.newSetQuery(field, v); + return FloatField.newSetQuery(field, v); } @Override @@ -331,6 +383,7 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { float l = Float.NEGATIVE_INFINITY; @@ -347,16 +400,23 @@ public Query rangeQuery( u = FloatPoint.nextDown(u); } } - Query query = FloatPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = FloatPoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery( field, NumericUtils.floatToSortableInt(l), NumericUtils.floatToSortableInt(u) ); - query = new IndexOrDocValuesQuery(query, dvQuery); + return new IndexOrDocValuesQuery(query, dvQuery); } - return query; + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowRangeQuery( + field, + NumericUtils.floatToSortableInt(l), + NumericUtils.floatToSortableInt(u) + ); + } + return FloatPoint.newRangeQuery(field, l, u); } @Override @@ -406,16 +466,37 @@ public Double parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { double v = parse(value, false); + if (isSearchable && hasDocValues) { + Query query = DoublePoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, NumericUtils.doubleToSortableLong(v)); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, NumericUtils.doubleToSortableLong(v)); + } return DoublePoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { double[] v = new double[values.size()]; + long points[] = new long[v.length]; for (int i = 0; i < values.size(); ++i) { v[i] = parse(values.get(i), false); + if (hasDocValues) { + points[i] = NumericUtils.doubleToSortableLong(v[i]); + } + } + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery( + DoublePoint.newSetQuery(field, v), + SortedNumericDocValuesField.newSlowSetQuery(field, points) + ); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, points); } return DoublePoint.newSetQuery(field, v); } @@ -428,19 +509,27 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { return doubleRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> { - Query query = DoublePoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = DoublePoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery( field, NumericUtils.doubleToSortableLong(l), NumericUtils.doubleToSortableLong(u) ); - query = new IndexOrDocValuesQuery(query, dvQuery); + return new IndexOrDocValuesQuery(query, dvQuery); } - return query; + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowRangeQuery( + field, + NumericUtils.doubleToSortableLong(l), + NumericUtils.doubleToSortableLong(u) + ); + } + return DoublePoint.newRangeQuery(field, l, u); }); } @@ -504,13 +593,13 @@ public Short parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { - return INTEGER.termQuery(field, value); + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { + return INTEGER.termQuery(field, value, hasDocValues, isSearchable); } @Override - public Query termsQuery(String field, List values) { - return INTEGER.termsQuery(field, values); + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { + return INTEGER.termsQuery(field, values, hasDocValues, isSearchable); } @Override @@ -521,9 +610,10 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { - return INTEGER.rangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues, context); + return INTEGER.rangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues, isSearchable, context); } @Override @@ -571,13 +661,13 @@ public Short parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { - return INTEGER.termQuery(field, value); + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { + return INTEGER.termQuery(field, value, hasDocValues, isSearchable); } @Override - public Query termsQuery(String field, List values) { - return INTEGER.termsQuery(field, values); + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { + return INTEGER.termsQuery(field, values, hasDocValues, isSearchable); } @Override @@ -588,9 +678,10 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { - return INTEGER.rangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues, context); + return INTEGER.rangeQuery(field, lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues, isSearchable, context); } @Override @@ -638,16 +729,24 @@ public Integer parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { if (hasDecimalPart(value)) { return Queries.newMatchNoDocsQuery("Value [" + value + "] has a decimal part"); } int v = parse(value, true); + if (isSearchable && hasDocValues) { + Query query = IntPoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, v); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, v); + } return IntPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { int[] v = new int[values.size()]; int upTo = 0; @@ -664,6 +763,21 @@ public Query termsQuery(String field, List values) { if (upTo != v.length) { v = Arrays.copyOf(v, upTo); } + long points[] = new long[v.length]; + if (hasDocValues) { + for (int i = 0; i < v.length; i++) { + points[i] = v[i]; + } + } + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery( + IntPoint.newSetQuery(field, v), + SortedNumericDocValuesField.newSlowSetQuery(field, points) + ); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, points); + } return IntPoint.newSetQuery(field, v); } @@ -675,6 +789,7 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { int l = Integer.MIN_VALUE; @@ -704,15 +819,23 @@ public Query rangeQuery( --u; } } - Query query = IntPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = IntPoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); query = new IndexOrDocValuesQuery(query, dvQuery); if (context.indexSortedOnField(field)) { query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); } + return query; } - return query; + if (hasDocValues) { + Query query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); + if (context.indexSortedOnField(field)) { + query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); + } + return query; + } + return IntPoint.newRangeQuery(field, l, u); } @Override @@ -752,17 +875,28 @@ public Long parse(XContentParser parser, boolean coerce) throws IOException { } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { if (hasDecimalPart(value)) { return Queries.newMatchNoDocsQuery("Value [" + value + "] has a decimal part"); } long v = parse(value, true); + if (isSearchable && hasDocValues) { + Query query = LongPoint.newExactQuery(field, v); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery(field, v); + return new IndexOrDocValuesQuery(query, dvQuery); + + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowExactQuery(field, v); + + } return LongPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable) { long[] v = new long[values.size()]; + int upTo = 0; for (int i = 0; i < values.size(); i++) { @@ -778,6 +912,16 @@ public Query termsQuery(String field, List values) { if (upTo != v.length) { v = Arrays.copyOf(v, upTo); } + if (isSearchable && hasDocValues) { + return new IndexOrDocValuesQuery( + LongPoint.newSetQuery(field, v), + SortedNumericDocValuesField.newSlowSetQuery(field, v) + ); + } + if (hasDocValues) { + return SortedNumericDocValuesField.newSlowSetQuery(field, v); + + } return LongPoint.newSetQuery(field, v); } @@ -789,18 +933,28 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { return longRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> { - Query query = LongPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = LongPoint.newRangeQuery(field, l, u); Query dvQuery = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); query = new IndexOrDocValuesQuery(query, dvQuery); if (context.indexSortedOnField(field)) { query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); } + return query; } - return query; + if (hasDocValues) { + Query query = SortedNumericDocValuesField.newSlowRangeQuery(field, l, u); + if (context.indexSortedOnField(field)) { + query = new IndexSortSortedNumericDocValuesRangeQuery(field, l, u, query); + } + return query; + } + return LongPoint.newRangeQuery(field, l, u); + }); } @@ -841,16 +995,24 @@ public BigInteger parse(XContentParser parser, boolean coerce) throws IOExceptio } @Override - public Query termQuery(String field, Object value) { + public Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable) { if (hasDecimalPart(value)) { return Queries.newMatchNoDocsQuery("Value [" + value + "] has a decimal part"); } BigInteger v = parse(value, true); + if (isSearchable && hasDocValues) { + Query query = BigIntegerPoint.newExactQuery(field, v); + Query dvQuery = SortedUnsignedLongDocValuesSetQuery.newSlowExactQuery(field, v); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocValues) { + return SortedUnsignedLongDocValuesSetQuery.newSlowExactQuery(field, v); + } return BigIntegerPoint.newExactQuery(field, v); } @Override - public Query termsQuery(String field, List values) { + public Query termsQuery(String field, List values, boolean hasDocvalues, boolean isSearchable) { BigInteger[] v = new BigInteger[values.size()]; int upTo = 0; @@ -868,6 +1030,14 @@ public Query termsQuery(String field, List values) { v = Arrays.copyOf(v, upTo); } + if (isSearchable && hasDocvalues) { + Query query = BigIntegerPoint.newSetQuery(field, v); + Query dvQuery = SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery(field, v); + return new IndexOrDocValuesQuery(query, dvQuery); + } + if (hasDocvalues) { + return SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery(field, v); + } return BigIntegerPoint.newSetQuery(field, v); } @@ -879,15 +1049,19 @@ public Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ) { return unsignedLongRangeQuery(lowerTerm, upperTerm, includeLower, includeUpper, (l, u) -> { - Query query = BigIntegerPoint.newRangeQuery(field, l, u); - if (hasDocValues) { + if (isSearchable && hasDocValues) { + Query query = BigIntegerPoint.newRangeQuery(field, l, u); Query dvQuery = SortedUnsignedLongDocValuesRangeQuery.newSlowRangeQuery(field, l, u); - query = new IndexOrDocValuesQuery(query, dvQuery); + return new IndexOrDocValuesQuery(query, dvQuery); } - return query; + if (hasDocValues) { + return SortedUnsignedLongDocValuesRangeQuery.newSlowRangeQuery(field, l, u); + } + return BigIntegerPoint.newRangeQuery(field, l, u); }); } @@ -941,9 +1115,9 @@ public final TypeParser parser() { return parser; } - public abstract Query termQuery(String field, Object value); + public abstract Query termQuery(String field, Object value, boolean hasDocValues, boolean isSearchable); - public abstract Query termsQuery(String field, List values); + public abstract Query termsQuery(String field, List values, boolean hasDocValues, boolean isSearchable); public abstract Query rangeQuery( String field, @@ -952,6 +1126,7 @@ public abstract Query rangeQuery( boolean includeLower, boolean includeUpper, boolean hasDocValues, + boolean isSearchable, QueryShardContext context ); @@ -1226,8 +1401,8 @@ public NumericType numericType() { @Override public Query termQuery(Object value, QueryShardContext context) { - failIfNotIndexed(); - Query query = type.termQuery(name(), value); + failIfNotIndexedAndNoDocValues(); + Query query = type.termQuery(name(), value, hasDocValues(), isSearchable()); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -1236,8 +1411,8 @@ public Query termQuery(Object value, QueryShardContext context) { @Override public Query termsQuery(List values, QueryShardContext context) { - failIfNotIndexed(); - Query query = type.termsQuery(name(), values); + failIfNotIndexedAndNoDocValues(); + Query query = type.termsQuery(name(), values, hasDocValues(), isSearchable()); if (boost() != 1f) { query = new BoostQuery(query, boost()); } @@ -1246,8 +1421,17 @@ public Query termsQuery(List values, QueryShardContext context) { @Override public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower, boolean includeUpper, QueryShardContext context) { - failIfNotIndexed(); - Query query = type.rangeQuery(name(), lowerTerm, upperTerm, includeLower, includeUpper, hasDocValues(), context); + failIfNotIndexedAndNoDocValues(); + Query query = type.rangeQuery( + name(), + lowerTerm, + upperTerm, + includeLower, + includeUpper, + hasDocValues(), + isSearchable(), + context + ); if (boost() != 1f) { query = new BoostQuery(query, boost()); } diff --git a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java index 3c30bb81a9a32..af852b12e7a30 100644 --- a/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/NumberFieldTypeTests.java @@ -66,6 +66,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.index.IndexSettings; import org.opensearch.index.document.SortedUnsignedLongDocValuesRangeQuery; +import org.opensearch.index.document.SortedUnsignedLongDocValuesSetQuery; import org.opensearch.index.fielddata.IndexNumericFieldData; import org.opensearch.index.mapper.MappedFieldType.Relation; import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType; @@ -118,15 +119,27 @@ public void testIsFieldWithinQuery() throws IOException { public void testIntegerTermsQueryWithDecimalPart() { MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.INTEGER); - assertEquals(IntPoint.newSetQuery("field", 1), ft.termsQuery(Arrays.asList(1, 2.1), null)); - assertEquals(IntPoint.newSetQuery("field", 1), ft.termsQuery(Arrays.asList(1.0, 2.1), null)); + assertEquals( + new IndexOrDocValuesQuery(IntPoint.newSetQuery("field", 1), SortedNumericDocValuesField.newSlowSetQuery("field", 1)), + ft.termsQuery(Arrays.asList(1, 2.1), null) + ); + assertEquals( + new IndexOrDocValuesQuery(IntPoint.newSetQuery("field", 1), SortedNumericDocValuesField.newSlowSetQuery("field", 1)), + ft.termsQuery(Arrays.asList(1.0, 2.1), null) + ); assertTrue(ft.termsQuery(Arrays.asList(1.1, 2.1), null) instanceof MatchNoDocsQuery); } public void testLongTermsQueryWithDecimalPart() { MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberType.LONG); - assertEquals(LongPoint.newSetQuery("field", 1), ft.termsQuery(Arrays.asList(1, 2.1), null)); - assertEquals(LongPoint.newSetQuery("field", 1), ft.termsQuery(Arrays.asList(1.0, 2.1), null)); + assertEquals( + new IndexOrDocValuesQuery(LongPoint.newSetQuery("field", 1), SortedNumericDocValuesField.newSlowSetQuery("field", 1)), + ft.termsQuery(Arrays.asList(1, 2.1), null) + ); + assertEquals( + new IndexOrDocValuesQuery(LongPoint.newSetQuery("field", 1), SortedNumericDocValuesField.newSlowSetQuery("field", 1)), + ft.termsQuery(Arrays.asList(1.0, 2.1), null) + ); assertTrue(ft.termsQuery(Arrays.asList(1.1, 2.1), null) instanceof MatchNoDocsQuery); } @@ -151,16 +164,18 @@ public void testLongTermQueryWithDecimalPart() { } private static MappedFieldType unsearchable() { - return new NumberFieldType("field", NumberType.LONG, false, false, true, true, null, Collections.emptyMap()); + return new NumberFieldType("field", NumberType.LONG, false, false, false, true, null, Collections.emptyMap()); } public void testTermQuery() { MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.LONG); - assertEquals(LongPoint.newExactQuery("field", 42), ft.termQuery("42", null)); + Query dvQuery = SortedNumericDocValuesField.newSlowExactQuery("field", 42); + Query query = new IndexOrDocValuesQuery(LongPoint.newExactQuery("field", 42), dvQuery); + assertEquals(query, ft.termQuery("42", null)); MappedFieldType unsearchable = unsearchable(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> unsearchable.termQuery("42", null)); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testRangeQueryWithNegativeBounds() { @@ -380,7 +395,7 @@ public void testLongRangeQuery() { IllegalArgumentException.class, () -> unsearchable.rangeQuery("1", "3", true, true, null, null, null, MOCK_QSC) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testUnsignedLongRangeQuery() { @@ -396,7 +411,23 @@ public void testUnsignedLongRangeQuery() { IllegalArgumentException.class, () -> unsearchable.rangeQuery("1", "3", true, true, null, null, null, MOCK_QSC) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); + } + + public void testUnsignedLongTermsQuery() { + MappedFieldType ft = new NumberFieldMapper.NumberFieldType("field", NumberFieldMapper.NumberType.UNSIGNED_LONG); + Query expected = new IndexOrDocValuesQuery( + BigIntegerPoint.newSetQuery("field", BigInteger.valueOf(1), BigInteger.valueOf(3)), + SortedUnsignedLongDocValuesSetQuery.newSlowSetQuery("field", BigInteger.valueOf(1), BigInteger.valueOf(3)) + ); + assertEquals(expected, ft.termsQuery(List.of("1", "3"), MOCK_QSC)); + + MappedFieldType unsearchable = unsearchable(); + IllegalArgumentException e = expectThrows( + IllegalArgumentException.class, + () -> unsearchable.termsQuery(List.of("1", "3"), MOCK_QSC) + ); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testDoubleRangeQuery() { @@ -416,7 +447,7 @@ public void testDoubleRangeQuery() { IllegalArgumentException.class, () -> unsearchable.rangeQuery("1", "3", true, true, null, null, null, MOCK_QSC) ); - assertEquals("Cannot search on field [field] since it is not indexed.", e.getMessage()); + assertEquals("Cannot search on field [field] since it is both not indexed, and does not have doc_values enabled.", e.getMessage()); } public void testConversions() { @@ -518,8 +549,8 @@ public void testHalfFloatRange() throws IOException { float u = (randomFloat() * 2 - 1) * 65504; boolean includeLower = randomBoolean(); boolean includeUpper = randomBoolean(); - Query floatQ = NumberType.FLOAT.rangeQuery("float", l, u, includeLower, includeUpper, false, MOCK_QSC); - Query halfFloatQ = NumberType.HALF_FLOAT.rangeQuery("half_float", l, u, includeLower, includeUpper, false, MOCK_QSC); + Query floatQ = NumberType.FLOAT.rangeQuery("float", l, u, includeLower, includeUpper, false, true, MOCK_QSC); + Query halfFloatQ = NumberType.HALF_FLOAT.rangeQuery("half_float", l, u, includeLower, includeUpper, false, true, MOCK_QSC); assertEquals(searcher.count(floatQ), searcher.count(halfFloatQ)); } IOUtils.close(reader, dir); @@ -549,8 +580,17 @@ public void testUnsignedLongRange() throws IOException { BigInteger u = randomUnsignedLong(); boolean includeLower = randomBoolean(); boolean includeUpper = randomBoolean(); - Query unsignedLongQ = NumberType.UNSIGNED_LONG.rangeQuery("unsigned_long", l, u, includeLower, includeUpper, false, MOCK_QSC); - Query doubleQ = NumberType.DOUBLE.rangeQuery("double", l, u, includeLower, includeUpper, false, MOCK_QSC); + Query unsignedLongQ = NumberType.UNSIGNED_LONG.rangeQuery( + "unsigned_long", + l, + u, + includeLower, + includeUpper, + false, + true, + MOCK_QSC + ); + Query doubleQ = NumberType.DOUBLE.rangeQuery("double", l, u, includeLower, includeUpper, false, true, MOCK_QSC); assertEquals(searcher.count(doubleQ), searcher.count(unsignedLongQ)); } IOUtils.close(reader, dir); @@ -558,21 +598,23 @@ public void testUnsignedLongRange() throws IOException { public void testNegativeZero() { assertEquals( - NumberType.DOUBLE.rangeQuery("field", null, -0d, true, true, false, MOCK_QSC), - NumberType.DOUBLE.rangeQuery("field", null, +0d, true, false, false, MOCK_QSC) + NumberType.DOUBLE.rangeQuery("field", null, -0d, true, true, false, true, MOCK_QSC), + NumberType.DOUBLE.rangeQuery("field", null, +0d, true, false, false, true, MOCK_QSC) ); assertEquals( - NumberType.FLOAT.rangeQuery("field", null, -0f, true, true, false, MOCK_QSC), - NumberType.FLOAT.rangeQuery("field", null, +0f, true, false, false, MOCK_QSC) + NumberType.FLOAT.rangeQuery("field", null, -0f, true, true, false, true, MOCK_QSC), + NumberType.FLOAT.rangeQuery("field", null, +0f, true, false, false, true, MOCK_QSC) ); assertEquals( - NumberType.HALF_FLOAT.rangeQuery("field", null, -0f, true, true, false, MOCK_QSC), - NumberType.HALF_FLOAT.rangeQuery("field", null, +0f, true, false, false, MOCK_QSC) + NumberType.HALF_FLOAT.rangeQuery("field", null, -0f, true, true, false, true, MOCK_QSC), + NumberType.HALF_FLOAT.rangeQuery("field", null, +0f, true, false, false, true, MOCK_QSC) ); - assertFalse(NumberType.DOUBLE.termQuery("field", -0d).equals(NumberType.DOUBLE.termQuery("field", +0d))); - assertFalse(NumberType.FLOAT.termQuery("field", -0f).equals(NumberType.FLOAT.termQuery("field", +0f))); - assertFalse(NumberType.HALF_FLOAT.termQuery("field", -0f).equals(NumberType.HALF_FLOAT.termQuery("field", +0f))); + assertFalse(NumberType.DOUBLE.termQuery("field", -0d, true, true).equals(NumberType.DOUBLE.termQuery("field", +0d, true, true))); + assertFalse(NumberType.FLOAT.termQuery("field", -0f, true, true).equals(NumberType.FLOAT.termQuery("field", +0f, true, true))); + assertFalse( + NumberType.HALF_FLOAT.termQuery("field", -0f, true, true).equals(NumberType.HALF_FLOAT.termQuery("field", +0f, true, true)) + ); } // Make sure we construct the IndexOrDocValuesQuery objects with queries that match @@ -628,6 +670,7 @@ public void doTestDocValueRangeQueries(NumberType type, Supplier valueSu randomBoolean(), randomBoolean(), true, + true, MOCK_QSC ); assertThat(query, instanceOf(IndexOrDocValuesQuery.class)); @@ -708,6 +751,7 @@ public void doTestIndexSortRangeQueries(NumberType type, Supplier valueS randomBoolean(), randomBoolean(), true, + true, context ); assertThat(query, instanceOf(IndexSortSortedNumericDocValuesRangeQuery.class)); diff --git a/server/src/test/java/org/opensearch/index/query/TermQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/TermQueryBuilderTests.java index 3ac9bce840a20..c5bdf9b586df1 100644 --- a/server/src/test/java/org/opensearch/index/query/TermQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/TermQueryBuilderTests.java @@ -36,6 +36,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.AutomatonQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; @@ -112,6 +113,7 @@ protected void doAssertLuceneQuery(TermQueryBuilder queryBuilder, Query query, Q either(instanceOf(TermQuery.class)).or(instanceOf(PointRangeQuery.class)) .or(instanceOf(MatchNoDocsQuery.class)) .or(instanceOf(AutomatonQuery.class)) + .or(instanceOf(IndexOrDocValuesQuery.class)) ); MappedFieldType mapper = context.fieldMapper(queryBuilder.fieldName()); if (query instanceof TermQuery) { diff --git a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java index b587bec2d5343..32bf290627b63 100644 --- a/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/TermsQueryBuilderTests.java @@ -34,6 +34,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PointInSetQuery; import org.apache.lucene.search.Query; @@ -135,6 +136,7 @@ protected void doAssertLuceneQuery(TermsQueryBuilder queryBuilder, Query query, either(instanceOf(TermInSetQuery.class)).or(instanceOf(PointInSetQuery.class)) .or(instanceOf(ConstantScoreQuery.class)) .or(instanceOf(MatchNoDocsQuery.class)) + .or(instanceOf(IndexOrDocValuesQuery.class)) ); if (query instanceof ConstantScoreQuery) { assertThat(((ConstantScoreQuery) query).getQuery(), instanceOf(BooleanQuery.class)); From 808ed56bcbc6ed547f64895cd02283df07fb5c48 Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Thu, 4 Jan 2024 15:14:29 -0500 Subject: [PATCH 08/13] Add the means to extract the contextual underlying channel from HttpChannel without excessive typecasting (#11751) Signed-off-by: Andriy Redko --- CHANGELOG.md | 2 +- .../http/netty4/Netty4HttpChannel.java | 5 ++ .../http/netty4/Netty4HttpChannelTests.java | 52 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 modules/transport-netty4/src/test/java/org/opensearch/http/netty4/Netty4HttpChannelTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index ac15176af56f5..51276133af7d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -162,7 +162,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Changed - Mute the query profile IT with concurrent execution ([#9840](https://github.com/opensearch-project/OpenSearch/pull/9840)) - Force merge with `only_expunge_deletes` honors max segment size ([#10036](https://github.com/opensearch-project/OpenSearch/pull/10036)) -- Add the means to extract the contextual properties from HttpChannel, TcpCChannel and TrasportChannel without excessive typecasting ([#10562](https://github.com/opensearch-project/OpenSearch/pull/10562)) +- Add the means to extract the contextual properties from HttpChannel, TcpCChannel and TrasportChannel without excessive typecasting ([#10562](https://github.com/opensearch-project/OpenSearch/pull/10562)), ([#11751](https://github.com/opensearch-project/OpenSearch/pull/11751)) - Introduce new dynamic cluster setting to control slice computation for concurrent segment search ([#9107](https://github.com/opensearch-project/OpenSearch/pull/9107)) - Search pipelines now support asynchronous request and response processors to avoid blocking on a transport thread ([#10598](https://github.com/opensearch-project/OpenSearch/pull/10598)) - [Remote Store] Add Remote Store backpressure rejection stats to `_nodes/stats` ([#10524](https://github.com/opensearch-project/OpenSearch/pull/10524)) diff --git a/modules/transport-netty4/src/main/java/org/opensearch/http/netty4/Netty4HttpChannel.java b/modules/transport-netty4/src/main/java/org/opensearch/http/netty4/Netty4HttpChannel.java index 6475a0b744c60..75d30aa9797c0 100644 --- a/modules/transport-netty4/src/main/java/org/opensearch/http/netty4/Netty4HttpChannel.java +++ b/modules/transport-netty4/src/main/java/org/opensearch/http/netty4/Netty4HttpChannel.java @@ -46,6 +46,7 @@ import io.netty.channel.ChannelPipeline; public class Netty4HttpChannel implements HttpChannel { + private static final String CHANNEL_PROPERTY = "channel"; private final Channel channel; private final CompletableContext closeContext = new CompletableContext<>(); @@ -102,6 +103,10 @@ public Channel getNettyChannel() { @SuppressWarnings("unchecked") @Override public Optional get(String name, Class clazz) { + if (CHANNEL_PROPERTY.equalsIgnoreCase(name) && clazz.isAssignableFrom(Channel.class)) { + return (Optional) Optional.of(getNettyChannel()); + } + Object handler = getNettyChannel().pipeline().get(name); if (handler == null && inboundPipeline() != null) { diff --git a/modules/transport-netty4/src/test/java/org/opensearch/http/netty4/Netty4HttpChannelTests.java b/modules/transport-netty4/src/test/java/org/opensearch/http/netty4/Netty4HttpChannelTests.java new file mode 100644 index 0000000000000..c49166a51c24a --- /dev/null +++ b/modules/transport-netty4/src/test/java/org/opensearch/http/netty4/Netty4HttpChannelTests.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.http.netty4; + +import org.opensearch.test.OpenSearchTestCase; +import org.opensearch.transport.Netty4NioSocketChannel; +import org.junit.Before; + +import java.util.Optional; + +import io.netty.channel.Channel; +import io.netty.channel.ChannelOutboundInvoker; +import io.netty.channel.ServerChannel; + +import static org.hamcrest.CoreMatchers.is; +import static org.hamcrest.CoreMatchers.sameInstance; + +public class Netty4HttpChannelTests extends OpenSearchTestCase { + private Netty4HttpChannel netty4HttpChannel; + private Channel channel; + + @Before + @Override + public void setUp() throws Exception { + super.setUp(); + channel = new Netty4NioSocketChannel(); + netty4HttpChannel = new Netty4HttpChannel(channel); + } + + public void testChannelAttributeMatchesChannel() { + final Optional channelOpt = netty4HttpChannel.get("channel", Channel.class); + assertThat(channelOpt.isPresent(), is(true)); + assertThat(channelOpt.get(), sameInstance(channel)); + } + + public void testChannelAttributeMatchesChannelOutboundInvoker() { + final Optional channelOpt = netty4HttpChannel.get("channel", ChannelOutboundInvoker.class); + assertThat(channelOpt.isPresent(), is(true)); + assertThat(channelOpt.get(), sameInstance(channel)); + } + + public void testChannelAttributeIsEmpty() { + final Optional channelOpt = netty4HttpChannel.get("channel", ServerChannel.class); + assertThat(channelOpt.isEmpty(), is(true)); + } +} From 22b628bacaab56c145538898ca190ffce9778c6e Mon Sep 17 00:00:00 2001 From: Michael Froh Date: Thu, 4 Jan 2024 21:13:28 +0000 Subject: [PATCH 09/13] Fix parsing of flat object fields with dots in keys (#11425) We have a bug where a flat object field with inner fields that contain dots will "push" the dotted name onto the dot-path from the root, but then would just "pop" off the last part of the dotted name. This change adds more robust support for flat object keys and subkeys that contain dots (i.e. it pops off the entirety of the latest key, regardless of how many dots it contains). Fixes https://github.com/opensearch-project/OpenSearch/issues/11402 Signed-off-by: Michael Froh --- CHANGELOG.md | 3 +- .../xcontent/JsonToStringXContentParser.java | 31 +++-- .../index/mapper/FlatObjectFieldMapper.java | 2 +- .../JsonToStringXContentParserTests.java | 113 ++++++++++++++++++ 4 files changed, 137 insertions(+), 12 deletions(-) create mode 100644 server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 51276133af7d2..0958dd41d5a84 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -191,7 +191,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Fixed - Fix failure in dissect ingest processor parsing empty brackets ([#9225](https://github.com/opensearch-project/OpenSearch/pull/9255)) -- Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101)) +- Fix `class_cast_exception` when passing int to `_version` and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101)) - Fix Segment Replication ShardLockObtainFailedException bug during index corruption ([10370](https://github.com/opensearch-project/OpenSearch/pull/10370)) - Fix some test methods in SimulatePipelineRequestParsingTests never run and fix test failure ([#10496](https://github.com/opensearch-project/OpenSearch/pull/10496)) - Fix passing wrong parameter when calling newConfigurationException() in DotExpanderProcessor ([#10737](https://github.com/opensearch-project/OpenSearch/pull/10737)) @@ -203,6 +203,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix the issue with DefaultSpanScope restoring wrong span in the TracerContextStorage upon detach ([#11316](https://github.com/opensearch-project/OpenSearch/issues/11316)) - Remove shadowJar from `lang-painless` module publication ([#11369](https://github.com/opensearch-project/OpenSearch/issues/11369)) - Fix remote shards balancer and remove unused variables ([#11167](https://github.com/opensearch-project/OpenSearch/pull/11167)) +- Fix parsing of flat object fields with dots in keys ([#11425](https://github.com/opensearch-project/OpenSearch/pull/11425)) - Fix bug where replication lag grows post primary relocation ([#11238](https://github.com/opensearch-project/OpenSearch/pull/11238)) - Fix for stuck update action in a bulk with `retry_on_conflict` property ([#11152](https://github.com/opensearch-project/OpenSearch/issues/11152)) - Fix template setting override for replication type ([#11417](https://github.com/opensearch-project/OpenSearch/pull/11417)) diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 9e81d8a7af078..9b2bd06a88e2e 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -18,7 +18,6 @@ import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentLocation; import org.opensearch.core.xcontent.XContentParser; -import org.opensearch.index.mapper.ParseContext; import java.io.IOException; import java.math.BigInteger; @@ -40,7 +39,6 @@ public class JsonToStringXContentParser extends AbstractXContentParser { private ArrayList keyList = new ArrayList<>(); private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent); - private ParseContext parseContext; private NamedXContentRegistry xContentRegistry; @@ -54,14 +52,13 @@ public class JsonToStringXContentParser extends AbstractXContentParser { public JsonToStringXContentParser( NamedXContentRegistry xContentRegistry, DeprecationHandler deprecationHandler, - ParseContext parseContext, + XContentParser parser, String fieldTypeName ) throws IOException { super(xContentRegistry, deprecationHandler); - this.parseContext = parseContext; this.deprecationHandler = deprecationHandler; this.xContentRegistry = xContentRegistry; - this.parser = parseContext.parser(); + this.parser = parser; this.fieldTypeName = fieldTypeName; } @@ -86,8 +83,22 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx StringBuilder parsedFields = new StringBuilder(); if (this.parser.currentToken() == Token.FIELD_NAME) { - path.append(DOT_SYMBOL + currentFieldName); - this.keyList.add(currentFieldName); + path.append(DOT_SYMBOL).append(currentFieldName); + int dotIndex = currentFieldName.indexOf(DOT_SYMBOL); + String fieldNameSuffix = currentFieldName; + // The field name may be of the form foo.bar.baz + // If that's the case, each "part" is a key. + while (dotIndex >= 0) { + String fieldNamePrefix = fieldNameSuffix.substring(0, dotIndex); + if (!fieldNamePrefix.isEmpty()) { + this.keyList.add(fieldNamePrefix); + } + fieldNameSuffix = fieldNameSuffix.substring(dotIndex + 1); + dotIndex = fieldNameSuffix.indexOf(DOT_SYMBOL); + } + if (!fieldNameSuffix.isEmpty()) { + this.keyList.add(fieldNameSuffix); + } } else if (this.parser.currentToken() == Token.START_ARRAY) { parseToken(path, currentFieldName); break; @@ -97,18 +108,18 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx parseToken(path, currentFieldName); int dotIndex = path.lastIndexOf(DOT_SYMBOL); if (dotIndex != -1) { - path.delete(dotIndex, path.length()); + path.setLength(path.length() - currentFieldName.length() - 1); } } else { if (!path.toString().contains(currentFieldName)) { - path.append(DOT_SYMBOL + currentFieldName); + path.append(DOT_SYMBOL).append(currentFieldName); } parseValue(parsedFields); this.valueList.add(parsedFields.toString()); this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); int dotIndex = path.lastIndexOf(DOT_SYMBOL); if (dotIndex != -1) { - path.delete(dotIndex, path.length()); + path.setLength(path.length() - currentFieldName.length() - 1); } } diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 00b623dddac23..9a3f2595a7c9e 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -572,7 +572,7 @@ protected void parseCreateField(ParseContext context) throws IOException { JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser( NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, - context, + context.parser(), fieldType().name() ); /* diff --git a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java new file mode 100644 index 0000000000000..0feb7bcd1ceec --- /dev/null +++ b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java @@ -0,0 +1,113 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.common.xcontent; + +import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class JsonToStringXContentParserTests extends OpenSearchTestCase { + + private String flattenJsonString(String fieldName, String in) throws IOException { + String transformed; + try ( + XContentParser parser = JsonXContent.jsonXContent.createParser( + xContentRegistry(), + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + in + ) + ) { + JsonToStringXContentParser jsonToStringXContentParser = new JsonToStringXContentParser( + xContentRegistry(), + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + parser, + fieldName + ); + // Skip the START_OBJECT token: + jsonToStringXContentParser.nextToken(); + + XContentParser transformedParser = jsonToStringXContentParser.parseObject(); + try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) { + jsonBuilder.copyCurrentStructure(transformedParser); + return jsonBuilder.toString(); + } + } + } + + public void testNestedObjects() throws IOException { + String jsonExample = "{" + "\"first\" : \"1\"," + "\"second\" : {" + " \"inner\": \"2.0\"" + "}," + "\"third\": \"three\"" + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample) + ); + } + + public void testChildHasDots() throws IOException { + // This should be exactly the same as testNestedObjects. We're just using the "flat" notation for the inner + // object. + String jsonExample = "{" + "\"first\" : \"1\"," + "\"second.inner\" : \"2.0\"," + "\"third\": \"three\"" + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample) + ); + } + + public void testNestChildObjectWithDots() throws IOException { + String jsonExample = "{" + + "\"first\" : \"1\"," + + "\"second.inner\" : {" + + " \"really_inner\" : \"2.0\"" + + "}," + + "\"third\": \"three\"" + + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"really_inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample) + ); + } + + public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException { + String jsonExample = "{" + + "\"first\" : \"1\"," + + "\"second.inner\" : {" + + " \"totally.absolutely.inner\" : \"2.0\"" + + "}," + + "\"third\": \"three\"" + + "}"; + + assertEquals( + "{" + + "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"]," + + "\"flat._value\":[\"1\",\"2.0\",\"three\"]," + + "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" + + "}", + flattenJsonString("flat", jsonExample) + ); + } + +} From 2860805e894c49a04ebf72121d96a24d353b6194 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Thu, 4 Jan 2024 13:17:25 -0800 Subject: [PATCH 10/13] Update the skip version for match_only_text field Integ tests to 2.11.99 (#11752) * update the skip version for integ tests to 2.11.99 Signed-off-by: Rishabh Maurya * fix the version in skip section reason Signed-off-by: Rishabh Maurya --------- Signed-off-by: Rishabh Maurya --- .../11_match_field_match_only_text.yml | 4 ++-- .../20_ngram_search_field_match_only_text.yml | 8 ++++---- ..._ngram_highligthing_field_match_only_text.yml | 4 ++-- .../40_query_string_field_match_only_text.yml | 4 ++-- ...th_default_analyzer_field_match_only_text.yml | 4 ++-- ...eries_with_synonyms_field_match_only_text.yml | 8 ++++---- .../60_synonym_graph_field_match_only_text.yml | 4 ++-- .../70_intervals_field_match_only_text.yml | 8 ++++---- .../20_phrase_field_match_only_text.yml | 16 ++++++++-------- .../20_highlighting_field_match_only_text.yml | 4 ++-- .../20_query_string_field_match_only_text.yml | 4 ++-- .../30_sig_terms_field_match_only_text.yml | 4 ++-- .../90_sig_text_field_match_only_text.yml | 8 ++++---- .../20_highlighting_field_match_only_text.yml | 4 ++-- .../search/160_exists_query_match_only_text.yml | 4 ++-- .../200_phrase_search_field_match_only_text.yml | 4 ++-- ...0_match_bool_prefix_field_match_only_text.yml | 4 ++-- ...20_disallow_queries_field_match_only_text.yml | 4 ++-- .../10_basic_field_match_only_field.yml | 4 ++-- 19 files changed, 52 insertions(+), 52 deletions(-) diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml index 40ff2c2f4cdbe..140d70414a4a7 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml @@ -2,8 +2,8 @@ "match query with stacked stems": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" # Tests the match query stemmed tokens are "stacked" on top of the unstemmed # versions in the same position. - do: diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml index 95b648dee47c8..a5da3043f19b5 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml @@ -1,7 +1,7 @@ "ngram search": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test @@ -45,8 +45,8 @@ --- "testNGramCopyField": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml index 597f55679a2c6..accf5d975d57f 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml @@ -1,7 +1,7 @@ "ngram highlighting": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml index ddebb1d76acbc..717d3a7dd8a3e 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "Test query string with snowball": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml index 97f3fb65e94a2..cd2d2e42c6a17 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "Test default search analyzer is applied": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml index 0c263a47a38e6..0c537dd42d583 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "Test common terms query with stacked tokens": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" features: "allowed_warnings" - do: @@ -247,8 +247,8 @@ --- "Test match query with synonyms - see #3881 for extensive description of the issue": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml index 91a8b1509517e..d3f5d0fe4f8b4 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml index 9792c9d2695ea..8334ca27ff274 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml @@ -1,8 +1,8 @@ # integration tests for intervals queries using analyzers setup: - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test @@ -26,8 +26,8 @@ setup: --- "Test use_field": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: catch: bad_request search: diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml index aff2b3f11101c..90596ca04205c 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml @@ -2,8 +2,8 @@ setup: - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test @@ -122,8 +122,8 @@ setup: --- "breaks ties by sorting terms": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" # This runs the suggester without bigrams so we can be sure of the sort order - do: search: @@ -181,8 +181,8 @@ setup: --- "doesn't fail when asked to run on a field without unigrams when force_unigrams=false": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: search: rest_total_hits_as_int: true @@ -213,8 +213,8 @@ setup: --- "reverse suggestions": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: search: rest_total_hits_as_int: true diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml index 3cb8e09c70aed..1d6a938675e39 100644 --- a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml index 085c5633ac72b..044ae5dd6a94d 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "validate_query with query_string parameters": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml index 7a96536a2e261..d1cc6c8295bd9 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "Default index": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: goodbad diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml index bc41f157dfdc4..e21c4fb946d85 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "Default index": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: goodbad @@ -78,8 +78,8 @@ --- "Dedup noise": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: goodbad diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml index 7100d620bf19e..9e60d69bfedd7 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: index: test diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml index 03626236604a1..69c639a8f506a 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" features: ["headers"] - do: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml index a41b8d353e3e9..13fd6b3858948 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml @@ -1,8 +1,8 @@ --- "search with indexed phrases": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml index fc4e9f9de0f38..682a7dded1e9b 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml @@ -1,7 +1,7 @@ setup: - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml index f4faf87eb83cc..00e54e43d6f04 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml @@ -1,8 +1,8 @@ --- setup: - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml index cc15796e4697f..44adb48c8765e 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml @@ -1,8 +1,8 @@ --- "Search shards aliases with and without filters": - skip: - version: " - 2.99.99" - reason: "match_only_text was added in 3.0" + version: " - 2.11.99" + reason: "match_only_text was added in 2.12" - do: indices.create: From 714fa73c4bc4c0cbaa624629c724e57be8405209 Mon Sep 17 00:00:00 2001 From: Poojita Raj Date: Thu, 4 Jan 2024 15:13:10 -0800 Subject: [PATCH 11/13] Add logging for tests in RemoteStoreStatsIT to catch assertion failure cause (#11734) * Add logging for tests in RemoteStoreStatsIT to catch assertion failure cause Signed-off-by: Poojita Raj * Add test logging annotation for trace logs Signed-off-by: Poojita Raj --------- Signed-off-by: Poojita Raj --- .../remotestore/RemoteStoreStatsIT.java | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java index 2d3ab135d0377..4a0af206b9d89 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreStatsIT.java @@ -29,6 +29,7 @@ import org.opensearch.test.InternalTestCluster; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.test.disruption.NetworkDisruption; +import org.opensearch.test.junit.annotations.TestLogging; import org.opensearch.test.transport.MockTransportService; import java.io.IOException; @@ -249,6 +250,7 @@ public void testStatsResponseFromLocalNode() { } } + @TestLogging(reason = "Getting trace logs from remote store package", value = "org.opensearch.remotestore:TRACE") public void testDownloadStatsCorrectnessSinglePrimarySingleReplica() throws Exception { setup(); // Scenario: @@ -277,6 +279,13 @@ public void testDownloadStatsCorrectnessSinglePrimarySingleReplica() throws Exce .collect(Collectors.toList()) .get(0) .getSegmentStats(); + logger.info( + "Zero state primary stats: {}ms refresh time lag, {}b bytes lag, {}b upload bytes started and {}b upload bytes failed.", + zeroStatePrimaryStats.refreshTimeLagMs, + zeroStatePrimaryStats.bytesLag, + zeroStatePrimaryStats.uploadBytesStarted, + zeroStatePrimaryStats.uploadBytesFailed + ); assertTrue( zeroStatePrimaryStats.totalUploadsStarted == zeroStatePrimaryStats.totalUploadsSucceeded && zeroStatePrimaryStats.totalUploadsSucceeded == 1 @@ -339,6 +348,7 @@ public void testDownloadStatsCorrectnessSinglePrimarySingleReplica() throws Exce } } + @TestLogging(reason = "Getting trace logs from remote store package", value = "org.opensearch.remotestore:TRACE") public void testDownloadStatsCorrectnessSinglePrimaryMultipleReplicaShards() throws Exception { setup(); // Scenario: @@ -371,6 +381,13 @@ public void testDownloadStatsCorrectnessSinglePrimaryMultipleReplicaShards() thr .collect(Collectors.toList()) .get(0) .getSegmentStats(); + logger.info( + "Zero state primary stats: {}ms refresh time lag, {}b bytes lag, {}b upload bytes started and {}b upload bytes failed.", + zeroStatePrimaryStats.refreshTimeLagMs, + zeroStatePrimaryStats.bytesLag, + zeroStatePrimaryStats.uploadBytesStarted, + zeroStatePrimaryStats.uploadBytesFailed + ); assertTrue( zeroStatePrimaryStats.totalUploadsStarted == zeroStatePrimaryStats.totalUploadsSucceeded && zeroStatePrimaryStats.totalUploadsSucceeded == 1 From 3a3da4fe353500e6d5e46785a594db33280fb56c Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Thu, 4 Jan 2024 16:25:36 -0800 Subject: [PATCH 12/13] [Segment Replication] [Remote Store] Replace overriding mockInternalEngine() in test classes with NRTReplicationEngine. (#11716) * Replace overriding mockInternalEngine() in test classes with NRTReplicationEngine. Signed-off-by: Rishikesh1159 * remove unused comment. Signed-off-by: Rishikesh1159 * Add comment for explaining the conditional logic. Signed-off-by: Rishikesh1159 * Update comment with exact reason for conditional logic. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 --- .../indices/replication/SegmentReplicationBaseIT.java | 5 ----- .../replication/SegmentReplicationClusterSettingIT.java | 5 ----- .../remotestore/RemoteIndexPrimaryRelocationIT.java | 5 ----- .../remotestore/RemoteStoreBaseIntegTestCase.java | 5 ----- .../SegmentReplicationUsingRemoteStoreDisruptionIT.java | 5 ----- .../snapshots/SegmentReplicationSnapshotIT.java | 5 ----- .../org/opensearch/test/engine/MockEngineFactory.java | 9 ++++++++- 7 files changed, 8 insertions(+), 31 deletions(-) diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java index 1d93eecd6b245..641f714d33414 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationBaseIT.java @@ -60,11 +60,6 @@ protected Collection> nodePlugins() { return asList(MockTransportService.TestPlugin.class); } - @Override - protected boolean addMockInternalEngine() { - return false; - } - @Override public Settings indexSettings() { return Settings.builder() diff --git a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java index c4e8ccfc0ecec..f2cb7c9c6bfc8 100644 --- a/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/indices/replication/SegmentReplicationClusterSettingIT.java @@ -52,11 +52,6 @@ public Settings indexSettings() { .build(); } - @Override - protected boolean addMockInternalEngine() { - return false; - } - public void testIndexReplicationSettingOverridesSegRepClusterSetting() throws Exception { Settings settings = Settings.builder().put(CLUSTER_SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT).build(); final String ANOTHER_INDEX = "test-index"; diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexPrimaryRelocationIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexPrimaryRelocationIT.java index d8b7718a55377..67316ed0e6e6b 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexPrimaryRelocationIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteIndexPrimaryRelocationIT.java @@ -35,11 +35,6 @@ protected Settings nodeSettings(int nodeOrdinal) { .build(); } - @Override - protected boolean addMockInternalEngine() { - return false; - } - public Settings indexSettings() { return Settings.builder() .put(super.indexSettings()) diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java index 8c15ebd0505d9..d23e634bb3368 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/RemoteStoreBaseIntegTestCase.java @@ -121,11 +121,6 @@ protected Map indexData(int numberOfIterations, boolean invokeFlus return indexingStats; } - @Override - protected boolean addMockInternalEngine() { - return false; - } - @Override protected Settings nodeSettings(int nodeOrdinal) { if (segmentRepoPath == null || translogRepoPath == null) { diff --git a/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java b/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java index d5cdc22a15478..8372135fc55c4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/remotestore/SegmentReplicationUsingRemoteStoreDisruptionIT.java @@ -44,11 +44,6 @@ public Settings indexSettings() { return remoteStoreIndexSettings(1); } - @Override - protected boolean addMockInternalEngine() { - return false; - } - public void testCancelReplicationWhileSyncingSegments() throws Exception { Path location = randomRepoPath().toAbsolutePath(); setup(location, 0d, "metadata", Long.MAX_VALUE, 1); diff --git a/server/src/internalClusterTest/java/org/opensearch/snapshots/SegmentReplicationSnapshotIT.java b/server/src/internalClusterTest/java/org/opensearch/snapshots/SegmentReplicationSnapshotIT.java index 2c12c0abb202b..c649c4ab13e7e 100644 --- a/server/src/internalClusterTest/java/org/opensearch/snapshots/SegmentReplicationSnapshotIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/snapshots/SegmentReplicationSnapshotIT.java @@ -74,11 +74,6 @@ public Settings restoreIndexDocRepSettings() { return Settings.builder().put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.DOCUMENT).build(); } - @Override - protected boolean addMockInternalEngine() { - return false; - } - public void ingestData(int docCount, String indexName) throws Exception { for (int i = 0; i < docCount; i++) { client().prepareIndex(indexName).setId(Integer.toString(i)).setSource("field", "value" + i).execute().actionGet(); diff --git a/test/framework/src/main/java/org/opensearch/test/engine/MockEngineFactory.java b/test/framework/src/main/java/org/opensearch/test/engine/MockEngineFactory.java index 30cc48c588be1..102c641746b01 100644 --- a/test/framework/src/main/java/org/opensearch/test/engine/MockEngineFactory.java +++ b/test/framework/src/main/java/org/opensearch/test/engine/MockEngineFactory.java @@ -35,6 +35,7 @@ import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.EngineConfig; import org.opensearch.index.engine.EngineFactory; +import org.opensearch.index.engine.NRTReplicationEngine; public final class MockEngineFactory implements EngineFactory { @@ -46,6 +47,12 @@ public MockEngineFactory(Class wrapper) { @Override public Engine newReadWriteEngine(EngineConfig config) { - return new MockInternalEngine(config, wrapper); + + /** + * Segment replication enabled replicas (i.e. read only replicas) do not use an InternalEngine so a MockInternalEngine + * will not work and an NRTReplicationEngine must be used instead. The primary shards for these indexes will + * still use a MockInternalEngine. + */ + return config.isReadOnlyReplica() ? new NRTReplicationEngine(config) : new MockInternalEngine(config, wrapper); } } From ab0f70eef70d9c75eabc4abfc8e7f1073d739ced Mon Sep 17 00:00:00 2001 From: Rishikesh Pasham <62345295+Rishikesh1159@users.noreply.github.com> Date: Fri, 5 Jan 2024 09:20:56 -0800 Subject: [PATCH 13/13] Introduce a new feature flag "WRITEABLE_REMOTE_INDEX" to gate the writeable remote index functionality (#11717) * Introduce a new feature flag to gate the writeable remote index functionality. Signed-off-by: Rishikesh1159 * Add changelog entry. Signed-off-by: Rishikesh1159 * Update changelog entry. Signed-off-by: Rishikesh1159 --------- Signed-off-by: Rishikesh1159 --- CHANGELOG.md | 1 + .../common/settings/FeatureFlagSettings.java | 3 ++- .../org/opensearch/common/util/FeatureFlags.java | 12 ++++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0958dd41d5a84..f7aa083eb2bcb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -120,6 +120,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591)) - Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583)) - Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039)) +- Introduce new feature flag "WRITEABLE_REMOTE_INDEX" to gate the writeable remote index functionality ([#11717](https://github.com/opensearch-project/OpenSearch/pull/11170)) ### Dependencies - Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822)) diff --git a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java index 387b0c9753574..d3285c379bcc4 100644 --- a/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java +++ b/server/src/main/java/org/opensearch/common/settings/FeatureFlagSettings.java @@ -40,7 +40,8 @@ protected FeatureFlagSettings( FeatureFlags.IDENTITY_SETTING, FeatureFlags.CONCURRENT_SEGMENT_SEARCH_SETTING, FeatureFlags.TELEMETRY_SETTING, - FeatureFlags.DATETIME_FORMATTER_CACHING_SETTING + FeatureFlags.DATETIME_FORMATTER_CACHING_SETTING, + FeatureFlags.WRITEABLE_REMOTE_INDEX_SETTING ) ) ); diff --git a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java index 4e9b417e3433b..c54772caa574b 100644 --- a/server/src/main/java/org/opensearch/common/util/FeatureFlags.java +++ b/server/src/main/java/org/opensearch/common/util/FeatureFlags.java @@ -60,6 +60,12 @@ public class FeatureFlags { */ public static final String DATETIME_FORMATTER_CACHING = "opensearch.experimental.optimization.datetime_formatter_caching.enabled"; + /** + * Gates the functionality of writeable remote index + * Once the feature is ready for release, this feature flag can be removed. + */ + public static final String WRITEABLE_REMOTE_INDEX = "opensearch.experimental.feature.writeable_remote_index.enabled"; + /** * Should store the settings from opensearch.yml. */ @@ -122,4 +128,10 @@ public static boolean isEnabled(Setting featureFlag) { true, Property.NodeScope ); + + public static final Setting WRITEABLE_REMOTE_INDEX_SETTING = Setting.boolSetting( + WRITEABLE_REMOTE_INDEX, + false, + Property.NodeScope + ); }