From 40e205e076dda8e3e7101df97d2f0d398f4236e3 Mon Sep 17 00:00:00 2001 From: Rishabh Maurya Date: Sun, 19 Nov 2023 15:21:05 -0800 Subject: [PATCH] Added integration tests for match_only_text replicating text field integ tests Signed-off-by: Rishabh Maurya --- .../11_match_field_match_only_text.yml | 67 ++++ .../20_ngram_search_field_match_only.yml | 138 +++++++ ...ram_highligthing_field_match_only_text.yml | 134 +++++++ .../40_query_string_field_match_only_text.yml | 56 +++ ...default_analyzer_field_match_only_text.yml | 39 ++ ...queries_with_synonyms_field_match_only.yml | 343 +++++++++++++++++ .../60_synonym_graph_field_match_only.yml | 206 +++++++++++ .../70_intervals_field_match_only_text.yml | 64 ++++ .../20_phrase_field_match_only_text.yml | 226 ++++++++++++ .../20_highlighting_field_match_only_text.yml | 201 ++++++++++ .../20_query_string_field_match_only_text.yml | 50 +++ .../30_sig_terms_field_match_only_text.yml | 154 ++++++++ .../90_sig_text_field_match_only.yml | 151 ++++++++ .../20_highlighting_field_match_only_text.yml | 137 +++++++ .../test/search/160_exists_query.yml | 61 ++++ ...ex_phrase_search_field_match_only_text.yml | 64 ++++ ...atch_bool_prefix_field_match_only_text.yml | 345 ++++++++++++++++++ .../320_disallow_queries_field_match_only.yml | 147 ++++++++ .../10_basic_field_match_only_field.yml | 101 +++++ 19 files changed, 2684 insertions(+) create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml create mode 100644 modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml create mode 100644 modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml new file mode 100644 index 0000000000000..a93890f2b3865 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml @@ -0,0 +1,67 @@ +# integration tests for queries with specific analysis chains + +"match query with stacked stems": + # Tests the match query stemmed tokens are "stacked" on top of the unstemmed + # versions in the same position. + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + analysis: + analyzer: + index: + tokenizer: standard + filter: [lowercase] + search: + rest_total_hits_as_int: true + tokenizer: standard + filter: [lowercase, keyword_repeat, porter_stem, unique_stem] + filter: + unique_stem: + type: unique + only_on_same_position: true + mappings: + properties: + text: + type: match_only_text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + id: 1 + body: { "text": "the fox runs across the street" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fox runs + operator: AND + - match: {hits.total: 1} + + - do: + index: + index: test + id: 2 + body: { "text": "run fox run" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fox runs + operator: AND + - match: {hits.total: 2} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml new file mode 100644 index 0000000000000..9d536d346f6f1 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only.yml @@ -0,0 +1,138 @@ +"ngram search": + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + analysis: + analyzer: + my_analyzer: + tokenizer: standard + filter: [my_ngram] + filter: + my_ngram: + type: ngram + min: 2, + max: 2 + mappings: + properties: + text: + type: match_only_text + analyzer: my_analyzer + + - do: + index: + index: test + id: 1 + body: { "text": "foo bar baz" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: foa + - match: {hits.total: 1} + +--- +"testNGramCopyField": + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + max_ngram_diff: 9 + analysis: + analyzer: + my_ngram_analyzer: + tokenizer: my_ngram_tokenizer + tokenizer: + my_ngram_tokenizer: + type: ngram + min: 1, + max: 10 + token_chars: [] + mappings: + properties: + origin: + type: match_only_text + copy_to: meta + meta: + type: match_only_text + analyzer: my_ngram_analyzer + + - do: + index: + index: test + id: 1 + body: { "origin": "C.A1234.5678" } + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: 1234 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: 1234.56 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: A1234 + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + term: + meta: + value: a1234 + - match: {hits.total: 0} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: A1234 + analyzer: my_ngram_analyzer + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + meta: + query: a1234 + analyzer: my_ngram_analyzer + - match: {hits.total: 1} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml new file mode 100644 index 0000000000000..9c7d47827a27c --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml @@ -0,0 +1,134 @@ +"ngram highlighting": + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + index.max_ngram_diff: 19 + analysis: + tokenizer: + my_ngramt: + type: ngram + min_gram: 1 + max_gram: 20 + token_chars: letter,digit + filter: + my_ngram: + type: ngram + min_gram: 1 + max_gram: 20 + analyzer: + name2_index_analyzer: + tokenizer: whitespace + filter: [my_ngram] + name_index_analyzer: + tokenizer: my_ngramt + name_search_analyzer: + tokenizer: whitespace + mappings: + properties: + name: + type: match_only_text + term_vector: with_positions_offsets + analyzer: name_index_analyzer + search_analyzer: name_search_analyzer + name2: + type: match_only_text + term_vector: with_positions_offsets + analyzer: name2_index_analyzer + search_analyzer: name_search_analyzer + + - do: + index: + index: test + id: 1 + refresh: true + body: + name: logicacmg ehemals avinci - the know how company + name2: logicacmg ehemals avinci - the know how company + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica m + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica ma + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name: + query: logica + highlight: + fields: + - name: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica m + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica ma + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + name2: + query: logica + highlight: + fields: + - name2: {} + - match: {hits.total: 1} + - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml new file mode 100644 index 0000000000000..d3dc85c87ed46 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml @@ -0,0 +1,56 @@ +--- +"Test query string with snowball": + - do: + indices.create: + index: test + body: + mappings: + properties: + field: + type: match_only_text + number: + type: integer + + - do: + index: + index: test + id: 1 + body: { field: foo bar} + + - do: + indices.refresh: + index: [test] + + - do: + indices.validate_query: + index: test + q: field:bars + analyzer: snowball + + - is_true: valid + + - do: + search: + rest_total_hits_as_int: true + index: test + q: field:bars + analyzer: snowball + + - match: {hits.total: 1} + + - do: + explain: + index: test + id: 1 + q: field:bars + analyzer: snowball + + - is_true: matched + + - do: + count: + index: test + q: field:bars + analyzer: snowball + + - match: {count : 1} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml new file mode 100644 index 0000000000000..eb884644eac38 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml @@ -0,0 +1,39 @@ +--- +"Test default search analyzer is applied": + - do: + indices.create: + index: test + body: + settings: + index.analysis.analyzer.default.type: simple + index.analysis.analyzer.default_search.type: german + mappings: + properties: + body: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + body: Ich lese die Bücher + + - do: + indices.refresh: + index: [ test ] + + - do: + search: + index: test + q: "body:Bücher" + + - match: { hits.total.value: 0 } + + - do: + search: + index: test + q: "body:Bücher" + analyzer: simple + + - match: { hits.total.value: 1 } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml new file mode 100644 index 0000000000000..b79511d5c4dfd --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only.yml @@ -0,0 +1,343 @@ +--- +"Test common terms query with stacked tokens": + - skip: + features: "allowed_warnings" + + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + syns: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + syns: + tokenizer: standard + filter: [ "syns" ] + mappings: + properties: + field1: + type: match_only_text + analyzer: syns + field2: + type: match_only_text + analyzer: syns + + - do: + index: + index: test + id: 3 + body: + field1: quick lazy huge brown pidgin + field2: the quick lazy huge brown fox jumps over the tree + + - do: + index: + index: test + id: 1 + body: + field1: the quick brown fox + + - do: + index: + index: test + id: 2 + body: + field1: the quick lazy huge brown fox jumps over the tree + refresh: true + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + low_freq_operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast huge fox + minimum_should_match: + low_freq: 3 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 5 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "1" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + minimum_should_match: + high_freq: 6 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the fast lazy fox brown + cutoff_frequency: 1 + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + common: + field1: + query: the quick brown + cutoff_frequency: 3 + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: and + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + operator: or + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.2._id: "3" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + match: + field1: + query: the fast brown + cutoff_frequency: 3 + minimum_should_match: 3 + - match: { hits.total: 2 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.1._id: "2" } + + - do: + allowed_warnings: + - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [multi_match] query can skip block of documents efficiently if the total number of hits is not tracked]' + search: + rest_total_hits_as_int: true + body: + query: + multi_match: + query: the fast brown + fields: [ "field1", "field2" ] + cutoff_frequency: 3 + operator: and + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.2._id: "2" } + +--- +"Test match query with synonyms - see #3881 for extensive description of the issue": + - do: + indices.create: + index: test + body: + settings: + analysis: + filter: + synonym: + type: synonym + synonyms: [ "quick,fast" ] + analyzer: + index: + type: custom + tokenizer: standard + filter: lowercase + search: + rest_total_hits_as_int: true + type: custom + tokenizer: standard + filter: [ lowercase, synonym ] + mappings: + properties: + text: + type: match_only_text + analyzer: index + search_analyzer: search + + - do: + index: + index: test + id: 1 + body: + text: quick brown fox + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: fast + operator: and + - match: { hits.total: 1 } + + - do: + index: + index: test + id: 2 + body: + text: fast brown fox + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick + operator: and + - match: { hits.total: 2 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: quick brown + operator: and + - match: { hits.total: 2 } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml new file mode 100644 index 0000000000000..4b8bc4857250f --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only.yml @@ -0,0 +1,206 @@ +setup: + - do: + indices.create: + index: test + body: + settings: + index: + number_of_shards: 1 # keep scoring stable + analysis: + filter: + syns: + type: synonym + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + graph_syns: + type: synonym_graph + synonyms: [ "wtf, what the fudge", "foo, bar baz" ] + analyzer: + lower_syns: + type: custom + tokenizer: standard + filter: [ lowercase, syns ] + lower_graph_syns: + type: custom + tokenizer: standard + filter: [ lowercase, graph_syns ] + mappings: + properties: + field: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + text: say wtf happened foo + - do: + index: + index: test + id: 2 + body: + text: bar baz what the fudge man + + - do: + index: + index: test + id: 3 + body: + text: wtf + + - do: + index: + index: test + id: 4 + body: + text: what is the name for fudge + + - do: + index: + index: test + id: 5 + body: + text: bar two three + + - do: + index: + index: test + id: 6 + body: + text: bar baz two three + refresh: true + +--- +"simple multiterm phrase": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "5" } # incorrect match because we're not using graph synonyms + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase: + text: + query: foo two three + analyzer: lower_graph_syns + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "6" } # correct match because we're using graph synonyms + +--- +"simple multiterm and": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } # non-graph synonyms coincidentally give us the correct answer here + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: say what the fudge + analyzer: lower_graph_syns + operator: and + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + +--- +"minimum should match": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + auto_generate_synonyms_phrase_query: false + - match: { hits.total: 6 } + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: three what the fudge foo + operator: or + analyzer: lower_graph_syns + minimum_should_match: 80% + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "6" } + - match: { hits.hits.2._id: "1" } + +--- +"multiterm synonyms phrase": + - do: + search: + rest_total_hits_as_int: true + body: + query: + match: + text: + query: wtf + operator: and + analyzer: lower_graph_syns + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "1" } + +--- +"phrase prefix": + - do: + index: + index: test + id: 7 + body: + text: "WTFD!" + + - do: + index: + index: test + id: 8 + body: + text: "Weird Al's WHAT THE FUDGESICLE" + refresh: true + + - do: + search: + rest_total_hits_as_int: true + body: + query: + match_phrase_prefix: + text: + query: wtf + analyzer: lower_graph_syns + - match: { hits.total: 5 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.1._id: "7" } + - match: { hits.hits.2._id: "1" } + - match: { hits.hits.3._id: "8" } + - match: { hits.hits.4._id: "2" } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml new file mode 100644 index 0000000000000..6691621fbdf22 --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml @@ -0,0 +1,64 @@ +# integration tests for intervals queries using analyzers +setup: + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + analyzer: standard + text_en: + type: match_only_text + analyzer: english + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet and raining cats and dogs", + "text_en" : "Outside it is cold and wet and raining cats and dogs"}' + +--- +"Test use_field": + - skip: + version: " - 7.1.99" + reason: "Implemented in 7.2" + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + max_gaps: 1 + - match: { status: 400 } + - match: { error.type: "search_phase_execution_exception"} + - match: { error.reason: "all shards failed"} + - do: + catch: bad_request + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - match: + query: cats + - match: + query: dog + use_field: text_en + max_gaps: 1 + - match: { status: 400 } + - match: { error.type: "search_phase_execution_exception"} + - match: { error.reason: "all shards failed"} diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml new file mode 100644 index 0000000000000..38afe7db89efd --- /dev/null +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml @@ -0,0 +1,226 @@ +# Integration tests for the phrase suggester with a few analyzers + +setup: + - do: + indices.create: + index: test + body: + settings: + number_of_shards: 1 + number_of_replicas: 1 + analysis: + analyzer: + body: + tokenizer: standard + filter: [lowercase] + bigram: + tokenizer: standard + filter: [lowercase, bigram] + ngram: + tokenizer: standard + filter: [lowercase, ngram] + reverse: + tokenizer: standard + filter: [lowercase, reverse] + filter: + bigram: + type: shingle + output_unigrams: false + min_shingle_size: 2 + max_shingle_size: 2 + ngram: + type: shingle + output_unigrams: true + min_shingle_size: 2 + max_shingle_size: 2 + mappings: + properties: + body: + type: match_only_text + analyzer: body + fields: + bigram: + type: match_only_text + analyzer: bigram + ngram: + type: match_only_text + analyzer: ngram + reverse: + type: match_only_text + analyzer: reverse + + - do: + bulk: + index: test + refresh: true + body: | + { "index": {} } + { "body": "Xorr the God-Jewel" } + { "index": {} } + { "body": "Xorn" } + { "index": {} } + { "body": "Arthur, King of the Britons" } + { "index": {} } + { "body": "Sir Lancelot the Brave" } + { "index": {} } + { "body": "Patsy, Arthur's Servant" } + { "index": {} } + { "body": "Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot" } + { "index": {} } + { "body": "Sir Bedevere the Wise" } + { "index": {} } + { "body": "Sir Galahad the Pure" } + { "index": {} } + { "body": "Miss Islington, the Witch" } + { "index": {} } + { "body": "Zoot" } + { "index": {} } + { "body": "Leader of Robin's Minstrels" } + { "index": {} } + { "body": "Old Crone" } + { "index": {} } + { "body": "Frank, the Historian" } + { "index": {} } + { "body": "Frank's Wife" } + { "index": {} } + { "body": "Dr. Piglet" } + { "index": {} } + { "body": "Dr. Winston" } + { "index": {} } + { "body": "Sir Robin (Stand-in)" } + { "index": {} } + { "body": "Knight Who Says Ni" } + { "index": {} } + { "body": "Police sergeant who stops the film" } + +--- +"sorts by score": + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.ngram + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body.ngram + min_word_length: 1 + suggest_mode: always + + - match: {suggest.test.0.options.0.text: xorr the god jewel} + - match: {suggest.test.0.options.1.text: xorn the god jewel} + +--- +"breaks ties by sorting terms": + # This runs the suggester without bigrams so we can be sure of the sort order + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body + analyzer: body + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body + min_word_length: 1 + suggest_mode: always + + # The scores are identical but xorn comes first because it sorts first + - match: {suggest.test.0.options.0.text: xorn the god jewel} + - match: {suggest.test.0.options.1.text: xorr the god jewel} + - match: {suggest.test.0.options.0.score: $body.suggest.test.0.options.0.score} + +--- +"fails when asked to run on a field without unigrams": + - do: + catch: /since it doesn't emit unigrams/ + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + + - do: + catch: /since it doesn't emit unigrams/ + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + analyzer: bigram + +--- +"doesn't fail when asked to run on a field without unigrams when force_unigrams=false": + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + force_unigrams: false + + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: xor the got-jewel + test: + phrase: + field: body.bigram + analyzer: bigram + force_unigrams: false + +--- +"reverse suggestions": + - do: + search: + rest_total_hits_as_int: true + size: 0 + index: test + body: + suggest: + text: Artur, Ging of the Britons + test: + phrase: + field: body.ngram + force_unigrams: true + max_errors: 0.5 + direct_generator: + - field: body.reverse + min_word_length: 1 + suggest_mode: always + pre_filter: reverse + post_filter: reverse + + - match: {suggest.test.0.options.0.text: arthur king of the britons} diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml new file mode 100644 index 0000000000000..19c50b9157f32 --- /dev/null +++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml @@ -0,0 +1,201 @@ +setup: + - skip: + version: " - 7.1.99" + reason: "added in 7.2.0" + + - do: + indices.create: + index: test + body: + settings: + number_of_replicas: 0 + mappings: + properties: + a_field: + type: search_as_you_type + analyzer: simple + max_shingle_size: 4 + text_field: + type: match_only_text + analyzer: simple + + - do: + index: + index: test + id: 1 + body: + a_field: "quick brown fox jump lazy dog" + text_field: "quick brown fox jump lazy dog" + + - do: + indices.refresh: {} + +--- +"phrase query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + a_field: "brown" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"bool prefix query": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + a_field: "brown fo" + highlight: + fields: + a_field: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" } + +--- +"multi match bool prefix query 1 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fo" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: null } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 2 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: null } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 3 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: null } + +--- +"multi match bool prefix query 4 complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy d" + type: "bool_prefix" + fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ] + highlight: + fields: + a_field: + type: unified + a_field._2gram: + type: unified + a_field._3gram: + type: unified + a_field._4gram: + type: unified + + - match: { hits.total: 1 } + - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" } + - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] } + - match: { hits.hits.0.highlight.a_field\._4gram: ["quick brown fox jump lazy dog"] } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml new file mode 100644 index 0000000000000..18129e1d11861 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml @@ -0,0 +1,50 @@ +--- +"validate_query with query_string parameters": + - do: + indices.create: + index: test + body: + mappings: + properties: + field: + type: match_only_text + number: + type: integer + + - do: + indices.validate_query: + index: test + q: bar + df: field + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:foo field:xyz + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:foo field:xyz + default_operator: AND + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: field:BA* + + - is_true: valid + + - do: + indices.validate_query: + index: test + q: number:foo + lenient: true + + - is_true: valid diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml new file mode 100644 index 0000000000000..c75dee019a351 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml @@ -0,0 +1,154 @@ +--- +"Default index": + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: true + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_terms": {"significant_terms": {"field": "text"}}}}}} + + - match: {aggregations.class.buckets.0.sig_terms.buckets.0.key: "bad"} + - match: {aggregations.class.buckets.1.sig_terms.buckets.0.key: "good"} + +--- +"IP test": + - do: + indices.create: + index: ip_index + body: + mappings: + properties: + ip: + type: ip + + - do: + index: + index: ip_index + id: 1 + body: { ip: "::1" } + - do: + index: + index: ip_index + id: 2 + body: { } + + - do: + indices.refresh: {} + + - do: + search: + rest_total_hits_as_int: true + body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1 } } } } + + - match: { hits.total: 1 } + + - length: { aggregations.ip_terms.buckets: 1 } + + - match: { aggregations.ip_terms.buckets.0.key: "::1" } + + - is_false: aggregations.ip_terms.buckets.0.key_as_string + + - match: { aggregations.ip_terms.buckets.0.doc_count: 1 } + + - do: + search: + rest_total_hits_as_int: true + body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "include" : [ "::1" ] } } } } + + - match: { hits.total: 1 } + + - length: { aggregations.ip_terms.buckets: 1 } + + - match: { aggregations.ip_terms.buckets.0.key: "::1" } + + - do: + search: + rest_total_hits_as_int: true + body: { "query" : { "exists" : { "field" : "ip" } }, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "min_doc_count" : 1, "exclude" : [ "::1" ] } } } } + + - match: { hits.total: 1 } + + - length: { aggregations.ip_terms.buckets: 0 } + + - do: + catch: /Aggregation \[ip_terms\] cannot support regular expression style include\/exclude settings as they can only be applied to string fields\. Use an array of values for include\/exclude clauses/ + search: + rest_total_hits_as_int: true + body: { "size" : 0, "aggs" : { "ip_terms" : { "significant_terms" : { "field" : "ip", "exclude" : "127.*" } } } } + +--- +'Misspelled fields get "did you mean"': + - skip: + version: " - 7.6.99" + reason: Implemented in 8.0 (to be backported to 7.7) + - do: + catch: /\[significant_terms\] unknown field \[jlp\] did you mean \[jlh\]\?/ + search: + body: + aggs: + foo: + significant_terms: + field: foo + jlp: {} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml new file mode 100644 index 0000000000000..873d59907982a --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only.yml @@ -0,0 +1,151 @@ +--- +"Default index": + + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: false + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text"}}}}}} + + - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"} + - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"} + +--- +"Dedup noise": + + - do: + indices.create: + index: goodbad + body: + settings: + number_of_shards: "1" + mappings: + properties: + text: + type: match_only_text + fielddata: false + class: + type: keyword + + - do: + index: + index: goodbad + id: 1 + body: { text: "good noisewords1 g1 g2 g3 g4 g5 g6", class: "good" } + - do: + index: + index: goodbad + id: 2 + body: { text: "good noisewords2 g1 g2 g3 g4 g5 g6", class: "good" } + - do: + index: + index: goodbad + id: 3 + body: { text: "bad noisewords3 b1 b2 b3 b4 b5 b6", class: "bad" } + - do: + index: + index: goodbad + id: 4 + body: { text: "bad noisewords4 b1 b2 b3 b4 b5 b6", class: "bad" } + - do: + index: + index: goodbad + id: 5 + body: { text: "good bad noisewords5 gb1 gb2 gb3 gb4 gb5 gb6", class: "good" } + - do: + index: + index: goodbad + id: 6 + body: { text: "good bad noisewords6 gb1 gb2 gb3 gb4 gb5 gb6", class: "bad" } + - do: + index: + index: goodbad + id: 7 + body: { text: "bad noisewords7 b1 b2 b3 b4 b5 b6", class: "bad" } + + + + - do: + indices.refresh: + index: [goodbad] + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + + - match: {hits.total: 7} + + - do: + search: + rest_total_hits_as_int: true + index: goodbad + body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text", "filter_duplicate_text": true}}}}}} + + - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"} + - length: { aggregations.class.buckets.0.sig_text.buckets: 1 } + - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"} + - length: { aggregations.class.buckets.1.sig_text.buckets: 1 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml new file mode 100644 index 0000000000000..a1b9e7a5506e2 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml @@ -0,0 +1,137 @@ +setup: + - do: + indices.create: + index: test + body: + mappings: + _source: + excludes: ["nested.stored_only"] + properties: + nested: + type: nested + properties: + field: + type: text + fields: + vectors: + type: text + term_vector: "with_positions_offsets" + postings: + type: text + index_options: "offsets" + stored: + type: match_only_text + store: true + stored_only: + type: match_only_text + store: true + - do: + index: + index: test + id: 1 + refresh: true + body: + nested: + field : "The quick brown fox is brown." + stored : "The quick brown fox is brown." + stored_only : "The quick brown fox is brown." + +--- +"Unified highlighter": + - do: + search: + index: test + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: [ "nested.field", "nested.field.vectors", "nested.field.postings" ] + inner_hits: + highlight: + type: "unified" + fields: + nested.field: {} + nested.field.vectors: {} + nested.field.postings: {} + + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.vectors.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.postings.0: "The quick brown fox is brown." } + +--- +"Unified highlighter with stored fields": + - do: + search: + index: test + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: [ "nested.stored", "nested.stored_only" ] + inner_hits: + highlight: + type: "unified" + fields: + nested.stored: {} + nested.stored_only: {} + + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored.0: "The quick brown fox is brown." } + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown." } + +--- +"Unified highlighter with stored fields and disabled source": + - skip: + version: "- 7.10.1" + reason: "bug fix introduced in 7.10.2" + - do: + indices.create: + index: disabled_source + body: + mappings: + _source: + enabled: false + properties: + nested: + type: nested + properties: + field: + type: match_only_text + stored_only: + type: match_only_text + store: true + - do: + index: + index: disabled_source + id: 1 + refresh: true + body: + nested: + field: "The quick brown fox is brown." + stored_only: "The quick brown fox is brown." + + - do: + search: + index: disabled_source + body: + query: + nested: + path: "nested" + query: + multi_match: + query: "quick brown fox" + fields: ["nested.field", "nested.stored_only"] + inner_hits: + highlight: + type: "unified" + fields: + nested.field: {} + nested.stored_only: {} + + - is_false: hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field + - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown."} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml index be97930d41eb9..582880b0748c9 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query.yml @@ -45,6 +45,8 @@ setup: type: keyword text: type: text + match_only_text: + type: match_only_text - do: headers: @@ -70,6 +72,7 @@ setup: inner1: "foo" inner2: "bar" text: "foo bar" + match_only_text: "foo bar" - do: headers: @@ -94,6 +97,7 @@ setup: object: inner1: "foo" text: "foo bar" + match_only_text: "foo bar" - do: headers: @@ -119,6 +123,7 @@ setup: object: inner2: "bar" text: "foo bar" + match_only_text: "foo bar" - do: index: @@ -184,6 +189,8 @@ setup: doc_values: false text: type: text + match_only_text: + type: match_only_text - do: headers: @@ -209,6 +216,7 @@ setup: inner1: "foo" inner2: "bar" text: "foo bar" + match_only_text: "foo bar" - do: headers: @@ -233,6 +241,7 @@ setup: object: inner1: "foo" text: "foo bar" + match_only_text: "foo bar" - do: headers: @@ -258,6 +267,7 @@ setup: object: inner2: "bar" text: "foo bar" + match_only_text: "foo bar" - do: index: @@ -322,6 +332,8 @@ setup: type: keyword text: type: text + match_only_text: + type: match_only_text - do: indices.refresh: @@ -534,7 +546,18 @@ setup: field: text - match: {hits.total: 3} +--- +"Test exists query on mapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + exists: + field: match_only_text + - match: {hits.total: 3} --- "Test exists query on _id field": - do: @@ -821,6 +844,18 @@ setup: - match: {hits.total: 0} --- +"Test exists query on unmapped match_only_text field": + - do: + search: + rest_total_hits_as_int: true + index: test-unmapped + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} +--- "Test exists query on binary field in empty index": - do: search: @@ -1028,6 +1063,19 @@ setup: - match: {hits.total: 0} +--- +"Test exists query on match_only_text field in empty index": + - do: + search: + rest_total_hits_as_int: true + index: test-empty + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 0} + --- "Test exists query on mapped binary field with no doc values": - do: @@ -1236,3 +1284,16 @@ setup: field: text - match: {hits.total: 3} + +--- +"Test exists query on mapped match_only_text field with no doc values": + - do: + search: + rest_total_hits_as_int: true + index: test-no-dv + body: + query: + exists: + field: match_only_text + + - match: {hits.total: 3} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml new file mode 100644 index 0000000000000..7c0499de538cf --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_index_phrase_search_field_match_only_text.yml @@ -0,0 +1,64 @@ +--- +"search with indexed phrases": + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + index_phrases: true + + - do: + index: + index: test + id: 1 + body: { text: "peter piper picked a peck of pickled peppers" } + + - do: + indices.refresh: + index: [test] + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: + query: "peter piper" + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + q: '"peter piper"~1' + df: text + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: "peter piper picked" + + - match: {hits.total: 1} + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase: + text: "piper" + + - match: {hits.total: 1} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml new file mode 100644 index 0000000000000..3b380bb808245 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml @@ -0,0 +1,345 @@ +setup: + - skip: + version: " - 7.1.99" + reason: "added in 7.2.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + my_field1: + type: match_only_text + my_field2: + type: match_only_text + + - do: + index: + index: test + id: 1 + body: + my_field1: "brown fox jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 2 + body: + my_field1: "brown emu jump" + my_field2: "xylophone" + + - do: + index: + index: test + id: 3 + body: + my_field1: "jumparound" + my_field2: "emu" + + - do: + index: + index: test + id: 4 + body: + my_field1: "dog" + my_field2: "brown fox jump lazy" + + - do: + indices.refresh: {} + +--- +"scoring complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: "brown fox jump" + + - match: { hits.total: 3 } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"scoring partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: "brown fox ju" + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"minimum should match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + minimum_should_match: 3 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "BROWN dog" + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + +--- +"operator": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field1: + query: "brown fox jump" + operator: AND + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + +--- +"fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_bool_prefix: + my_field2: + query: "xylophoen foo" + fuzziness: 1 + prefix_length: 1 + max_expansions: 10 + fuzzy_transpositions: true + fuzzy_rewrite: constant_score + + - match: { hits.total: 2 } + - match: { hits.hits.0._source.my_field2: "xylophone" } + - match: { hits.hits.1._source.my_field2: "xylophone" } + +--- +"multi_match single field complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"multi_match single field partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox ju" + type: bool_prefix + fields: [ "my_field1" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0._source.my_field1: "brown fox jump" } + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1._source.my_field1: "brown emu jump" } + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2._source.my_field1: "jumparound" } + +--- +"multi_match multiple fields complete term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump lazy" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._source.my_field1: "brown fox jump" } + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2._source.my_field1: "brown emu jump" } + +--- +"multi_match multiple fields partial term": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump laz" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + + - match: { hits.total: 3 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1._source.my_field1: "brown fox jump" } + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2._source.my_field1: "brown emu jump" } + +--- +"multi_match multiple fields with analyzer": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "BROWN FOX JUMP dog" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + analyzer: whitespace # this analyzer doesn't lowercase terms + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with minimum_should_match": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown fox jump la" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + minimum_should_match: 4 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with fuzziness": + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "dob nomatch" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + fuzziness: 1 + + - match: { hits.total: 1 } + - match: { hits.hits.0._id: "4" } + - match: { hits.hits.0._source.my_field1: "dog" } + - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" } + +--- +"multi_match multiple fields with slop throws exception": + + - do: + catch: /\[slop\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + slop: 1 + +--- +"multi_match multiple fields with cutoff_frequency throws exception": + + - do: + catch: /\[cutoff_frequency\] not allowed for type \[bool_prefix\]/ + search: + rest_total_hits_as_int: true + index: test + body: + query: + multi_match: + query: "brown" + type: bool_prefix + fields: [ "my_field1", "my_field2" ] + cutoff_frequency: 0.001 diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml new file mode 100644 index 0000000000000..40989de61810b --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only.yml @@ -0,0 +1,147 @@ +--- +setup: + - skip: + version: " - 7.6.99" + reason: "implemented in 7.7.0" + + - do: + indices.create: + index: test + body: + mappings: + properties: + text: + type: match_only_text + analyzer: standard + fields: + raw: + type: keyword + nested1: + type: nested + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_id": "1"}}' + - '{"text" : "Some like it hot, some like it cold", "nested1": [{"foo": "bar1"}]}' + - '{"index": {"_index": "test", "_id": "2"}}' + - '{"text" : "Its cold outside, theres no kind of atmosphere", "nested1": [{"foo": "bar2"}]}' + - '{"index": {"_index": "test", "_id": "3"}}' + - '{"text" : "Baby its cold there outside", "nested1": [{"foo": "bar3"}]}' + - '{"index": {"_index": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet", "nested1": [{"foo": "bar4"}]}' + +--- +teardown: + - skip: + version: " - 7.6.99" + reason: "implemented in 7.7.0" + + - do: + cluster.put_settings: + body: + transient: + search.allow_expensive_queries: null + +--- +"Test disallow expensive queries": + - skip: + version: " - 7.6.99" + reason: "implemented in 7.7.0" + + ### Check for initial setting = null -> false + - do: + cluster.get_settings: + flat_settings: true + + - is_false: search.allow_expensive_queries + + ### Update setting to false + - do: + cluster.put_settings: + body: + transient: + search.allow_expensive_queries: "false" + flat_settings: true + + - match: {transient: {search.allow_expensive_queries: "false"}} + + ### Prefix + - do: + catch: /\[prefix\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false. For optimised prefix queries on text fields please enable \[index_prefixes\]./ + search: + index: test + body: + query: + prefix: + text: + value: out + + ### Fuzzy + - do: + catch: /\[fuzzy\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + fuzzy: + text: + value: outwide + + ### Regexp + - do: + catch: /\[regexp\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + regexp: + text: + value: .*ou.*id.* + + ### Wildcard + - do: + catch: /\[wildcard\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + wildcard: + text: + value: out?ide + + ### Range on text + - do: + catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + range: + text: + gte: "theres" + + ### Range on keyword + - do: + catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + range: + text.raw: + gte : "Outside it is cold and wet" + + ### Nested + - do: + catch: /\[joining\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./ + search: + index: test + body: + query: + nested: + path: "nested1" + query: + bool: + must: [{"match" : {"nested1.foo" : "bar2"}}] diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml new file mode 100644 index 0000000000000..142293bc33c5c --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml @@ -0,0 +1,101 @@ +--- +"Basic /_search_shards test": + - do: + indices.create: + index: test_1 + + - do: + search_shards: + index: test_1 + routing: foo + + - match: { shards.0.0.index: test_1 } + +--- +"Search shards aliases with and without filters": + - do: + indices.create: + index: test_index + body: + settings: + index: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + field: + type: match_only_text + aliases: + test_alias_no_filter: {} + test_alias_filter_1: + filter: + term: + field : value1 + test_alias_filter_2: + filter: + term: + field : value2 + + - do: + search_shards: + index: test_alias_no_filter + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - is_true: indices.test_index + - is_false: indices.test_index.filter + - match: { indices.test_index.aliases: [test_alias_no_filter]} + + - do: + search_shards: + index: test_alias_filter_1 + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1] } + - match: { indices.test_index.filter.term.field.value: value1 } + - lte: { indices.test_index.filter.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.term.field.boost: 1.0 } + + - do: + search_shards: + index: ["test_alias_filter_1","test_alias_filter_2"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2]} + - length: { indices.test_index.filter.bool.should: 2 } + - lte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 } + - lte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 } + - gte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 } + - match: { indices.test_index.filter.bool.adjust_pure_negative: true} + - lte: { indices.test_index.filter.bool.boost: 1.0 } + - gte: { indices.test_index.filter.bool.boost: 1.0 } + + - do: + search_shards: + index: "test*" + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2, test_alias_no_filter]} + - is_false: indices.test_index.filter + + - do: + search_shards: + index: ["test_alias_filter_1","test_alias_no_filter"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_no_filter]} + - is_false: indices.test_index.filter + + - do: + search_shards: + index: ["test_alias_no_filter"] + + - length: { shards: 1 } + - match: { shards.0.0.index: test_index } + - match: { indices.test_index.aliases: [test_alias_no_filter]} + - is_false: indices.test_index.filter