diff --git a/CHANGELOG.md b/CHANGELOG.md
index f864f3d5a42e7..1b5e359b0836a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Create separate transport action for render search template action ([#11170](https://github.com/opensearch-project/OpenSearch/pull/11170))
- Add additional handling in SearchTemplateRequest when simulate is set to true ([#11591](https://github.com/opensearch-project/OpenSearch/pull/11591))
- Introduce cluster level setting `cluster.index.restrict.replication.type` to prevent replication type setting override during index creations([#11583](https://github.com/opensearch-project/OpenSearch/pull/11583))
+- Add match_only_text field that is optimized for storage by trading off positional queries performance ([#6836](https://github.com/opensearch-project/OpenSearch/pull/11039))
### Dependencies
- Bumps jetty version to 9.4.52.v20230823 to fix GMS-2023-1857 ([#9822](https://github.com/opensearch-project/OpenSearch/pull/9822))
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml
new file mode 100644
index 0000000000000..40ff2c2f4cdbe
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/11_match_field_match_only_text.yml
@@ -0,0 +1,70 @@
+# integration tests for queries with specific analysis chains
+
+"match query with stacked stems":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ # Tests the match query stemmed tokens are "stacked" on top of the unstemmed
+ # versions in the same position.
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ number_of_shards: 1
+ number_of_replicas: 1
+ analysis:
+ analyzer:
+ index:
+ tokenizer: standard
+ filter: [lowercase]
+ search:
+ rest_total_hits_as_int: true
+ tokenizer: standard
+ filter: [lowercase, keyword_repeat, porter_stem, unique_stem]
+ filter:
+ unique_stem:
+ type: unique
+ only_on_same_position: true
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+ analyzer: index
+ search_analyzer: search
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body: { "text": "the fox runs across the street" }
+ refresh: true
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: fox runs
+ operator: AND
+ - match: {hits.total: 1}
+
+ - do:
+ index:
+ index: test
+ id: 2
+ body: { "text": "run fox run" }
+ refresh: true
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: fox runs
+ operator: AND
+ - match: {hits.total: 2}
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml
new file mode 100644
index 0000000000000..95b648dee47c8
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/20_ngram_search_field_match_only_text.yml
@@ -0,0 +1,144 @@
+"ngram search":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ number_of_shards: 1
+ number_of_replicas: 0
+ analysis:
+ analyzer:
+ my_analyzer:
+ tokenizer: standard
+ filter: [my_ngram]
+ filter:
+ my_ngram:
+ type: ngram
+ min: 2,
+ max: 2
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+ analyzer: my_analyzer
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body: { "text": "foo bar baz" }
+ refresh: true
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: foa
+ - match: {hits.total: 1}
+
+---
+"testNGramCopyField":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ number_of_shards: 1
+ number_of_replicas: 0
+ max_ngram_diff: 9
+ analysis:
+ analyzer:
+ my_ngram_analyzer:
+ tokenizer: my_ngram_tokenizer
+ tokenizer:
+ my_ngram_tokenizer:
+ type: ngram
+ min: 1,
+ max: 10
+ token_chars: []
+ mappings:
+ properties:
+ origin:
+ type: match_only_text
+ copy_to: meta
+ meta:
+ type: match_only_text
+ analyzer: my_ngram_analyzer
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body: { "origin": "C.A1234.5678" }
+ refresh: true
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ meta:
+ query: 1234
+ - match: {hits.total: 1}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ meta:
+ query: 1234.56
+ - match: {hits.total: 1}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ meta:
+ query: A1234
+ - match: {hits.total: 1}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ term:
+ meta:
+ value: a1234
+ - match: {hits.total: 0}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ meta:
+ query: A1234
+ analyzer: my_ngram_analyzer
+ - match: {hits.total: 1}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ meta:
+ query: a1234
+ analyzer: my_ngram_analyzer
+ - match: {hits.total: 1}
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml
new file mode 100644
index 0000000000000..597f55679a2c6
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/30_ngram_highligthing_field_match_only_text.yml
@@ -0,0 +1,137 @@
+"ngram highlighting":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ number_of_shards: 1
+ number_of_replicas: 0
+ index.max_ngram_diff: 19
+ analysis:
+ tokenizer:
+ my_ngramt:
+ type: ngram
+ min_gram: 1
+ max_gram: 20
+ token_chars: letter,digit
+ filter:
+ my_ngram:
+ type: ngram
+ min_gram: 1
+ max_gram: 20
+ analyzer:
+ name2_index_analyzer:
+ tokenizer: whitespace
+ filter: [my_ngram]
+ name_index_analyzer:
+ tokenizer: my_ngramt
+ name_search_analyzer:
+ tokenizer: whitespace
+ mappings:
+ properties:
+ name:
+ type: match_only_text
+ term_vector: with_positions_offsets
+ analyzer: name_index_analyzer
+ search_analyzer: name_search_analyzer
+ name2:
+ type: match_only_text
+ term_vector: with_positions_offsets
+ analyzer: name2_index_analyzer
+ search_analyzer: name_search_analyzer
+
+ - do:
+ index:
+ index: test
+ id: 1
+ refresh: true
+ body:
+ name: logicacmg ehemals avinci - the know how company
+ name2: logicacmg ehemals avinci - the know how company
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ name:
+ query: logica m
+ highlight:
+ fields:
+ - name: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ name:
+ query: logica ma
+ highlight:
+ fields:
+ - name: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ name:
+ query: logica
+ highlight:
+ fields:
+ - name: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ name2:
+ query: logica m
+ highlight:
+ fields:
+ - name2: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ name2:
+ query: logica ma
+ highlight:
+ fields:
+ - name2: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ name2:
+ query: logica
+ highlight:
+ fields:
+ - name2: {}
+ - match: {hits.total: 1}
+ - match: {hits.hits.0.highlight.name2.0: "logicacmg ehemals avinci - the know how company"}
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml
new file mode 100644
index 0000000000000..ddebb1d76acbc
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/40_query_string_field_match_only_text.yml
@@ -0,0 +1,59 @@
+---
+"Test query string with snowball":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ mappings:
+ properties:
+ field:
+ type: match_only_text
+ number:
+ type: integer
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body: { field: foo bar}
+
+ - do:
+ indices.refresh:
+ index: [test]
+
+ - do:
+ indices.validate_query:
+ index: test
+ q: field:bars
+ analyzer: snowball
+
+ - is_true: valid
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ q: field:bars
+ analyzer: snowball
+
+ - match: {hits.total: 1}
+
+ - do:
+ explain:
+ index: test
+ id: 1
+ q: field:bars
+ analyzer: snowball
+
+ - is_true: matched
+
+ - do:
+ count:
+ index: test
+ q: field:bars
+ analyzer: snowball
+
+ - match: {count : 1}
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml
new file mode 100644
index 0000000000000..97f3fb65e94a2
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/41_query_string_with_default_analyzer_field_match_only_text.yml
@@ -0,0 +1,42 @@
+---
+"Test default search analyzer is applied":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ index.analysis.analyzer.default.type: simple
+ index.analysis.analyzer.default_search.type: german
+ mappings:
+ properties:
+ body:
+ type: match_only_text
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body:
+ body: Ich lese die Bücher
+
+ - do:
+ indices.refresh:
+ index: [ test ]
+
+ - do:
+ search:
+ index: test
+ q: "body:Bücher"
+
+ - match: { hits.total.value: 0 }
+
+ - do:
+ search:
+ index: test
+ q: "body:Bücher"
+ analyzer: simple
+
+ - match: { hits.total.value: 1 }
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml
new file mode 100644
index 0000000000000..0c263a47a38e6
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/50_queries_with_synonyms_field_match_only_text.yml
@@ -0,0 +1,348 @@
+---
+"Test common terms query with stacked tokens":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ features: "allowed_warnings"
+
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ syns:
+ type: synonym
+ synonyms: [ "quick,fast" ]
+ analyzer:
+ syns:
+ tokenizer: standard
+ filter: [ "syns" ]
+ mappings:
+ properties:
+ field1:
+ type: match_only_text
+ analyzer: syns
+ field2:
+ type: match_only_text
+ analyzer: syns
+
+ - do:
+ index:
+ index: test
+ id: 3
+ body:
+ field1: quick lazy huge brown pidgin
+ field2: the quick lazy huge brown fox jumps over the tree
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body:
+ field1: the quick brown fox
+
+ - do:
+ index:
+ index: test
+ id: 2
+ body:
+ field1: the quick lazy huge brown fox jumps over the tree
+ refresh: true
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ common:
+ field1:
+ query: the fast brown
+ cutoff_frequency: 3
+ low_freq_operator: or
+ - match: { hits.total: 3 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.1._id: "2" }
+ - match: { hits.hits.2._id: "3" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ common:
+ field1:
+ query: the fast brown
+ cutoff_frequency: 3
+ low_freq_operator: and
+ - match: { hits.total: 2 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.1._id: "2" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ common:
+ field1:
+ query: the fast brown
+ cutoff_frequency: 3
+ - match: { hits.total: 3 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.1._id: "2" }
+ - match: { hits.hits.2._id: "3" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ common:
+ field1:
+ query: the fast huge fox
+ minimum_should_match:
+ low_freq: 3
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "2" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ common:
+ field1:
+ query: the fast lazy fox brown
+ cutoff_frequency: 1
+ minimum_should_match:
+ high_freq: 5
+ - match: { hits.total: 2 }
+ - match: { hits.hits.0._id: "2" }
+ - match: { hits.hits.1._id: "1" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ common:
+ field1:
+ query: the fast lazy fox brown
+ cutoff_frequency: 1
+ minimum_should_match:
+ high_freq: 6
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "2" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ common:
+ field1:
+ query: the fast lazy fox brown
+ cutoff_frequency: 1
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "2" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [common] used, replaced by [[match] query which can efficiently skip blocks of documents if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ common:
+ field1:
+ query: the quick brown
+ cutoff_frequency: 3
+ - match: { hits.total: 3 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.1._id: "2" }
+ - match: { hits.hits.2._id: "3" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ field1:
+ query: the fast brown
+ cutoff_frequency: 3
+ operator: and
+ - match: { hits.total: 2 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.1._id: "2" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ field1:
+ query: the fast brown
+ cutoff_frequency: 3
+ operator: or
+ - match: { hits.total: 3 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.1._id: "2" }
+ - match: { hits.hits.2._id: "3" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [match] query can skip block of documents efficiently if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ field1:
+ query: the fast brown
+ cutoff_frequency: 3
+ minimum_should_match: 3
+ - match: { hits.total: 2 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.1._id: "2" }
+
+ - do:
+ allowed_warnings:
+ - 'Deprecated field [cutoff_frequency] used, replaced by [you can omit this option, the [multi_match] query can skip block of documents efficiently if the total number of hits is not tracked]'
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ multi_match:
+ query: the fast brown
+ fields: [ "field1", "field2" ]
+ cutoff_frequency: 3
+ operator: and
+ - match: { hits.total: 3 }
+ - match: { hits.hits.0._id: "3" }
+ - match: { hits.hits.1._id: "1" }
+ - match: { hits.hits.2._id: "2" }
+
+---
+"Test match query with synonyms - see #3881 for extensive description of the issue":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ analysis:
+ filter:
+ synonym:
+ type: synonym
+ synonyms: [ "quick,fast" ]
+ analyzer:
+ index:
+ type: custom
+ tokenizer: standard
+ filter: lowercase
+ search:
+ rest_total_hits_as_int: true
+ type: custom
+ tokenizer: standard
+ filter: [ lowercase, synonym ]
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+ analyzer: index
+ search_analyzer: search
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body:
+ text: quick brown fox
+ refresh: true
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: quick
+ operator: and
+ - match: { hits.total: 1 }
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: quick brown
+ operator: and
+ - match: { hits.total: 1 }
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: fast
+ operator: and
+ - match: { hits.total: 1 }
+
+ - do:
+ index:
+ index: test
+ id: 2
+ body:
+ text: fast brown fox
+ refresh: true
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: quick
+ operator: and
+ - match: { hits.total: 2 }
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: quick brown
+ operator: and
+ - match: { hits.total: 2 }
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml
new file mode 100644
index 0000000000000..91a8b1509517e
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/60_synonym_graph_field_match_only_text.yml
@@ -0,0 +1,209 @@
+setup:
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ index:
+ number_of_shards: 1 # keep scoring stable
+ analysis:
+ filter:
+ syns:
+ type: synonym
+ synonyms: [ "wtf, what the fudge", "foo, bar baz" ]
+ graph_syns:
+ type: synonym_graph
+ synonyms: [ "wtf, what the fudge", "foo, bar baz" ]
+ analyzer:
+ lower_syns:
+ type: custom
+ tokenizer: standard
+ filter: [ lowercase, syns ]
+ lower_graph_syns:
+ type: custom
+ tokenizer: standard
+ filter: [ lowercase, graph_syns ]
+ mappings:
+ properties:
+ field:
+ type: match_only_text
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body:
+ text: say wtf happened foo
+ - do:
+ index:
+ index: test
+ id: 2
+ body:
+ text: bar baz what the fudge man
+
+ - do:
+ index:
+ index: test
+ id: 3
+ body:
+ text: wtf
+
+ - do:
+ index:
+ index: test
+ id: 4
+ body:
+ text: what is the name for fudge
+
+ - do:
+ index:
+ index: test
+ id: 5
+ body:
+ text: bar two three
+
+ - do:
+ index:
+ index: test
+ id: 6
+ body:
+ text: bar baz two three
+ refresh: true
+
+---
+"simple multiterm phrase":
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match_phrase:
+ text:
+ query: foo two three
+ analyzer: lower_syns
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "5" } # incorrect match because we're not using graph synonyms
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match_phrase:
+ text:
+ query: foo two three
+ analyzer: lower_graph_syns
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "6" } # correct match because we're using graph synonyms
+
+---
+"simple multiterm and":
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: say what the fudge
+ analyzer: lower_syns
+ operator: and
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "1" } # non-graph synonyms coincidentally give us the correct answer here
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: say what the fudge
+ analyzer: lower_graph_syns
+ operator: and
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "1" }
+
+---
+"minimum should match":
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: three what the fudge foo
+ operator: or
+ analyzer: lower_graph_syns
+ auto_generate_synonyms_phrase_query: false
+ - match: { hits.total: 6 }
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: three what the fudge foo
+ operator: or
+ analyzer: lower_graph_syns
+ minimum_should_match: 80%
+ - match: { hits.total: 3 }
+ - match: { hits.hits.0._id: "2" }
+ - match: { hits.hits.1._id: "6" }
+ - match: { hits.hits.2._id: "1" }
+
+---
+"multiterm synonyms phrase":
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match:
+ text:
+ query: wtf
+ operator: and
+ analyzer: lower_graph_syns
+ - match: { hits.total: 3 }
+ - match: { hits.hits.0._id: "2" }
+ - match: { hits.hits.1._id: "3" }
+ - match: { hits.hits.2._id: "1" }
+
+---
+"phrase prefix":
+ - do:
+ index:
+ index: test
+ id: 7
+ body:
+ text: "WTFD!"
+
+ - do:
+ index:
+ index: test
+ id: 8
+ body:
+ text: "Weird Al's WHAT THE FUDGESICLE"
+ refresh: true
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ body:
+ query:
+ match_phrase_prefix:
+ text:
+ query: wtf
+ analyzer: lower_graph_syns
+ - match: { hits.total: 5 }
+ - match: { hits.hits.0._id: "3" }
+ - match: { hits.hits.1._id: "7" }
+ - match: { hits.hits.2._id: "1" }
+ - match: { hits.hits.3._id: "8" }
+ - match: { hits.hits.4._id: "2" }
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml
new file mode 100644
index 0000000000000..9792c9d2695ea
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.query/70_intervals_field_match_only_text.yml
@@ -0,0 +1,67 @@
+# integration tests for intervals queries using analyzers
+setup:
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+ analyzer: standard
+ text_en:
+ type: match_only_text
+ analyzer: english
+ - do:
+ bulk:
+ refresh: true
+ body:
+ - '{"index": {"_index": "test", "_id": "4"}}'
+ - '{"text" : "Outside it is cold and wet and raining cats and dogs",
+ "text_en" : "Outside it is cold and wet and raining cats and dogs"}'
+
+---
+"Test use_field":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ catch: bad_request
+ search:
+ index: test
+ body:
+ query:
+ intervals:
+ text:
+ all_of:
+ intervals:
+ - match:
+ query: cats
+ - match:
+ query: dog
+ max_gaps: 1
+ - match: { status: 400 }
+ - match: { error.type: "search_phase_execution_exception"}
+ - match: { error.reason: "all shards failed"}
+ - do:
+ catch: bad_request
+ search:
+ index: test
+ body:
+ query:
+ intervals:
+ text:
+ all_of:
+ intervals:
+ - match:
+ query: cats
+ - match:
+ query: dog
+ use_field: text_en
+ max_gaps: 1
+ - match: { status: 400 }
+ - match: { error.type: "search_phase_execution_exception"}
+ - match: { error.reason: "all shards failed"}
diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml
new file mode 100644
index 0000000000000..aff2b3f11101c
--- /dev/null
+++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/search.suggest/20_phrase_field_match_only_text.yml
@@ -0,0 +1,238 @@
+# Integration tests for the phrase suggester with a few analyzers
+
+setup:
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ number_of_shards: 1
+ number_of_replicas: 1
+ analysis:
+ analyzer:
+ body:
+ tokenizer: standard
+ filter: [lowercase]
+ bigram:
+ tokenizer: standard
+ filter: [lowercase, bigram]
+ ngram:
+ tokenizer: standard
+ filter: [lowercase, ngram]
+ reverse:
+ tokenizer: standard
+ filter: [lowercase, reverse]
+ filter:
+ bigram:
+ type: shingle
+ output_unigrams: false
+ min_shingle_size: 2
+ max_shingle_size: 2
+ ngram:
+ type: shingle
+ output_unigrams: true
+ min_shingle_size: 2
+ max_shingle_size: 2
+ mappings:
+ properties:
+ body:
+ type: match_only_text
+ analyzer: body
+ fields:
+ bigram:
+ type: match_only_text
+ analyzer: bigram
+ ngram:
+ type: match_only_text
+ analyzer: ngram
+ reverse:
+ type: match_only_text
+ analyzer: reverse
+
+ - do:
+ bulk:
+ index: test
+ refresh: true
+ body: |
+ { "index": {} }
+ { "body": "Xorr the God-Jewel" }
+ { "index": {} }
+ { "body": "Xorn" }
+ { "index": {} }
+ { "body": "Arthur, King of the Britons" }
+ { "index": {} }
+ { "body": "Sir Lancelot the Brave" }
+ { "index": {} }
+ { "body": "Patsy, Arthur's Servant" }
+ { "index": {} }
+ { "body": "Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot" }
+ { "index": {} }
+ { "body": "Sir Bedevere the Wise" }
+ { "index": {} }
+ { "body": "Sir Galahad the Pure" }
+ { "index": {} }
+ { "body": "Miss Islington, the Witch" }
+ { "index": {} }
+ { "body": "Zoot" }
+ { "index": {} }
+ { "body": "Leader of Robin's Minstrels" }
+ { "index": {} }
+ { "body": "Old Crone" }
+ { "index": {} }
+ { "body": "Frank, the Historian" }
+ { "index": {} }
+ { "body": "Frank's Wife" }
+ { "index": {} }
+ { "body": "Dr. Piglet" }
+ { "index": {} }
+ { "body": "Dr. Winston" }
+ { "index": {} }
+ { "body": "Sir Robin (Stand-in)" }
+ { "index": {} }
+ { "body": "Knight Who Says Ni" }
+ { "index": {} }
+ { "body": "Police sergeant who stops the film" }
+
+---
+"sorts by score":
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ size: 0
+ index: test
+ body:
+ suggest:
+ text: xor the got-jewel
+ test:
+ phrase:
+ field: body.ngram
+ force_unigrams: true
+ max_errors: 0.5
+ direct_generator:
+ - field: body.ngram
+ min_word_length: 1
+ suggest_mode: always
+
+ - match: {suggest.test.0.options.0.text: xorr the god jewel}
+ - match: {suggest.test.0.options.1.text: xorn the god jewel}
+
+---
+"breaks ties by sorting terms":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ # This runs the suggester without bigrams so we can be sure of the sort order
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ size: 0
+ index: test
+ body:
+ suggest:
+ text: xor the got-jewel
+ test:
+ phrase:
+ field: body
+ analyzer: body
+ force_unigrams: true
+ max_errors: 0.5
+ direct_generator:
+ - field: body
+ min_word_length: 1
+ suggest_mode: always
+
+ # The scores are identical but xorn comes first because it sorts first
+ - match: {suggest.test.0.options.0.text: xorn the god jewel}
+ - match: {suggest.test.0.options.1.text: xorr the god jewel}
+ - match: {suggest.test.0.options.0.score: $body.suggest.test.0.options.0.score}
+
+---
+"fails when asked to run on a field without unigrams":
+ - do:
+ catch: /since it doesn't emit unigrams/
+ search:
+ rest_total_hits_as_int: true
+ size: 0
+ index: test
+ body:
+ suggest:
+ text: xor the got-jewel
+ test:
+ phrase:
+ field: body.bigram
+
+ - do:
+ catch: /since it doesn't emit unigrams/
+ search:
+ rest_total_hits_as_int: true
+ size: 0
+ index: test
+ body:
+ suggest:
+ text: xor the got-jewel
+ test:
+ phrase:
+ field: body.bigram
+ analyzer: bigram
+
+---
+"doesn't fail when asked to run on a field without unigrams when force_unigrams=false":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ size: 0
+ index: test
+ body:
+ suggest:
+ text: xor the got-jewel
+ test:
+ phrase:
+ field: body.bigram
+ force_unigrams: false
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ size: 0
+ index: test
+ body:
+ suggest:
+ text: xor the got-jewel
+ test:
+ phrase:
+ field: body.bigram
+ analyzer: bigram
+ force_unigrams: false
+
+---
+"reverse suggestions":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ size: 0
+ index: test
+ body:
+ suggest:
+ text: Artur, Ging of the Britons
+ test:
+ phrase:
+ field: body.ngram
+ force_unigrams: true
+ max_errors: 0.5
+ direct_generator:
+ - field: body.reverse
+ min_word_length: 1
+ suggest_mode: always
+ pre_filter: reverse
+ post_filter: reverse
+
+ - match: {suggest.test.0.options.0.text: arthur king of the britons}
diff --git a/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml
new file mode 100644
index 0000000000000..3cb8e09c70aed
--- /dev/null
+++ b/modules/mapper-extras/src/yamlRestTest/resources/rest-api-spec/test/search-as-you-type/20_highlighting_field_match_only_text.yml
@@ -0,0 +1,201 @@
+setup:
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+
+ - do:
+ indices.create:
+ index: test
+ body:
+ settings:
+ number_of_replicas: 0
+ mappings:
+ properties:
+ a_field:
+ type: search_as_you_type
+ analyzer: simple
+ max_shingle_size: 4
+ text_field:
+ type: match_only_text
+ analyzer: simple
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body:
+ a_field: "quick brown fox jump lazy dog"
+ text_field: "quick brown fox jump lazy dog"
+
+ - do:
+ indices.refresh: {}
+
+---
+"phrase query":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_phrase:
+ a_field: "brown"
+ highlight:
+ fields:
+ a_field:
+ type: unified
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" }
+
+---
+"bool prefix query":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_bool_prefix:
+ a_field: "brown fo"
+ highlight:
+ fields:
+ a_field:
+ type: unified
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0.highlight.a_field.0: "quick brown fox jump lazy dog" }
+
+---
+"multi match bool prefix query 1 complete term":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fo"
+ type: "bool_prefix"
+ fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ]
+ highlight:
+ fields:
+ a_field:
+ type: unified
+ a_field._2gram:
+ type: unified
+ a_field._3gram:
+ type: unified
+ a_field._4gram:
+ type: unified
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._2gram: null }
+ - match: { hits.hits.0.highlight.a_field\._3gram: null }
+ - match: { hits.hits.0.highlight.a_field\._4gram: null }
+
+---
+"multi match bool prefix query 2 complete term":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fox ju"
+ type: "bool_prefix"
+ fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ]
+ highlight:
+ fields:
+ a_field:
+ type: unified
+ a_field._2gram:
+ type: unified
+ a_field._3gram:
+ type: unified
+ a_field._4gram:
+ type: unified
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._3gram: null }
+ - match: { hits.hits.0.highlight.a_field\._4gram: null }
+
+---
+"multi match bool prefix query 3 complete term":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fox jump la"
+ type: "bool_prefix"
+ fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ]
+ highlight:
+ fields:
+ a_field:
+ type: unified
+ a_field._2gram:
+ type: unified
+ a_field._3gram:
+ type: unified
+ a_field._4gram:
+ type: unified
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._4gram: null }
+
+---
+"multi match bool prefix query 4 complete term":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fox jump lazy d"
+ type: "bool_prefix"
+ fields: [ "a_field", "a_field._2gram", "a_field._3gram", "a_field._4gram" ]
+ highlight:
+ fields:
+ a_field:
+ type: unified
+ a_field._2gram:
+ type: unified
+ a_field._3gram:
+ type: unified
+ a_field._4gram:
+ type: unified
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._source.a_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0._source.text_field: "quick brown fox jump lazy dog" }
+ - match: { hits.hits.0.highlight.a_field: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._2gram: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._3gram: ["quick brown fox jump lazy dog"] }
+ - match: { hits.hits.0.highlight.a_field\._4gram: ["quick brown fox jump lazy dog"] }
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml
new file mode 100644
index 0000000000000..085c5633ac72b
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.validate_query/20_query_string_field_match_only_text.yml
@@ -0,0 +1,53 @@
+---
+"validate_query with query_string parameters":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ mappings:
+ properties:
+ field:
+ type: match_only_text
+ number:
+ type: integer
+
+ - do:
+ indices.validate_query:
+ index: test
+ q: bar
+ df: field
+
+ - is_true: valid
+
+ - do:
+ indices.validate_query:
+ index: test
+ q: field:foo field:xyz
+
+ - is_true: valid
+
+ - do:
+ indices.validate_query:
+ index: test
+ q: field:foo field:xyz
+ default_operator: AND
+
+ - is_true: valid
+
+ - do:
+ indices.validate_query:
+ index: test
+ q: field:BA*
+
+ - is_true: valid
+
+ - do:
+ indices.validate_query:
+ index: test
+ q: number:foo
+ lenient: true
+
+ - is_true: valid
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml
new file mode 100644
index 0000000000000..7a96536a2e261
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/30_sig_terms_field_match_only_text.yml
@@ -0,0 +1,76 @@
+---
+"Default index":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: goodbad
+ body:
+ settings:
+ number_of_shards: "1"
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+ fielddata: true
+ class:
+ type: keyword
+
+ - do:
+ index:
+ index: goodbad
+ id: 1
+ body: { text: "good", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 2
+ body: { text: "good", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 3
+ body: { text: "bad", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 4
+ body: { text: "bad", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 5
+ body: { text: "good bad", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 6
+ body: { text: "good bad", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 7
+ body: { text: "bad", class: "bad" }
+
+
+
+ - do:
+ indices.refresh:
+ index: [goodbad]
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: goodbad
+
+ - match: {hits.total: 7}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: goodbad
+ body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_terms": {"significant_terms": {"field": "text"}}}}}}
+
+ - match: {aggregations.class.buckets.0.sig_terms.buckets.0.key: "bad"}
+ - match: {aggregations.class.buckets.1.sig_terms.buckets.0.key: "good"}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml
new file mode 100644
index 0000000000000..bc41f157dfdc4
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/90_sig_text_field_match_only_text.yml
@@ -0,0 +1,155 @@
+---
+"Default index":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: goodbad
+ body:
+ settings:
+ number_of_shards: "1"
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+ fielddata: false
+ class:
+ type: keyword
+
+ - do:
+ index:
+ index: goodbad
+ id: 1
+ body: { text: "good", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 2
+ body: { text: "good", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 3
+ body: { text: "bad", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 4
+ body: { text: "bad", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 5
+ body: { text: "good bad", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 6
+ body: { text: "good bad", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 7
+ body: { text: "bad", class: "bad" }
+
+
+
+ - do:
+ indices.refresh:
+ index: [goodbad]
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: goodbad
+
+ - match: {hits.total: 7}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: goodbad
+ body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text"}}}}}}
+
+ - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"}
+ - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"}
+
+---
+"Dedup noise":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: goodbad
+ body:
+ settings:
+ number_of_shards: "1"
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+ fielddata: false
+ class:
+ type: keyword
+
+ - do:
+ index:
+ index: goodbad
+ id: 1
+ body: { text: "good noisewords1 g1 g2 g3 g4 g5 g6", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 2
+ body: { text: "good noisewords2 g1 g2 g3 g4 g5 g6", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 3
+ body: { text: "bad noisewords3 b1 b2 b3 b4 b5 b6", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 4
+ body: { text: "bad noisewords4 b1 b2 b3 b4 b5 b6", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 5
+ body: { text: "good bad noisewords5 gb1 gb2 gb3 gb4 gb5 gb6", class: "good" }
+ - do:
+ index:
+ index: goodbad
+ id: 6
+ body: { text: "good bad noisewords6 gb1 gb2 gb3 gb4 gb5 gb6", class: "bad" }
+ - do:
+ index:
+ index: goodbad
+ id: 7
+ body: { text: "bad noisewords7 b1 b2 b3 b4 b5 b6", class: "bad" }
+
+
+
+ - do:
+ indices.refresh:
+ index: [goodbad]
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: goodbad
+
+ - match: {hits.total: 7}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: goodbad
+ body: {"aggs": {"class": {"terms": {"field": "class"},"aggs": {"sig_text": {"significant_text": {"field": "text", "filter_duplicate_text": true}}}}}}
+
+ - match: {aggregations.class.buckets.0.sig_text.buckets.0.key: "bad"}
+ - length: { aggregations.class.buckets.0.sig_text.buckets: 1 }
+ - match: {aggregations.class.buckets.1.sig_text.buckets.0.key: "good"}
+ - length: { aggregations.class.buckets.1.sig_text.buckets: 1 }
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml
new file mode 100644
index 0000000000000..7100d620bf19e
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.inner_hits/20_highlighting_field_match_only_text.yml
@@ -0,0 +1,137 @@
+setup:
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ - do:
+ indices.create:
+ index: test
+ body:
+ mappings:
+ _source:
+ excludes: ["nested.stored_only"]
+ properties:
+ nested:
+ type: nested
+ properties:
+ field:
+ type: text
+ fields:
+ vectors:
+ type: text
+ term_vector: "with_positions_offsets"
+ postings:
+ type: text
+ index_options: "offsets"
+ stored:
+ type: match_only_text
+ store: true
+ stored_only:
+ type: match_only_text
+ store: true
+ - do:
+ index:
+ index: test
+ id: 1
+ refresh: true
+ body:
+ nested:
+ field : "The quick brown fox is brown."
+ stored : "The quick brown fox is brown."
+ stored_only : "The quick brown fox is brown."
+
+---
+"Unified highlighter":
+ - do:
+ search:
+ index: test
+ body:
+ query:
+ nested:
+ path: "nested"
+ query:
+ multi_match:
+ query: "quick brown fox"
+ fields: [ "nested.field", "nested.field.vectors", "nested.field.postings" ]
+ inner_hits:
+ highlight:
+ type: "unified"
+ fields:
+ nested.field: {}
+ nested.field.vectors: {}
+ nested.field.postings: {}
+
+ - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field.0: "The quick brown fox is brown." }
+ - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.vectors.0: "The quick brown fox is brown." }
+ - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field\.postings.0: "The quick brown fox is brown." }
+
+---
+"Unified highlighter with stored fields":
+ - do:
+ search:
+ index: test
+ body:
+ query:
+ nested:
+ path: "nested"
+ query:
+ multi_match:
+ query: "quick brown fox"
+ fields: [ "nested.stored", "nested.stored_only" ]
+ inner_hits:
+ highlight:
+ type: "unified"
+ fields:
+ nested.stored: {}
+ nested.stored_only: {}
+
+ - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored.0: "The quick brown fox is brown." }
+ - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown." }
+
+---
+"Unified highlighter with stored fields and disabled source":
+ - do:
+ indices.create:
+ index: disabled_source
+ body:
+ mappings:
+ _source:
+ enabled: false
+ properties:
+ nested:
+ type: nested
+ properties:
+ field:
+ type: match_only_text
+ stored_only:
+ type: match_only_text
+ store: true
+ - do:
+ index:
+ index: disabled_source
+ id: 1
+ refresh: true
+ body:
+ nested:
+ field: "The quick brown fox is brown."
+ stored_only: "The quick brown fox is brown."
+
+ - do:
+ search:
+ index: disabled_source
+ body:
+ query:
+ nested:
+ path: "nested"
+ query:
+ multi_match:
+ query: "quick brown fox"
+ fields: ["nested.field", "nested.stored_only"]
+ inner_hits:
+ highlight:
+ type: "unified"
+ fields:
+ nested.field: {}
+ nested.stored_only: {}
+
+ - is_false: hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.field
+ - match: { hits.hits.0.inner_hits.nested.hits.hits.0.highlight.nested\.stored_only.0: "The quick brown fox is brown."}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml
new file mode 100644
index 0000000000000..03626236604a1
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/160_exists_query_match_only_text.yml
@@ -0,0 +1,119 @@
+setup:
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+ features: ["headers"]
+
+ - do:
+ indices.create:
+ index: test
+ body:
+ mappings:
+ dynamic: false
+ properties:
+ match_only_text:
+ type: match_only_text
+
+ - do:
+ headers:
+ Content-Type: application/json
+ index:
+ index: "test"
+ id: 1
+ body:
+ match_only_text: "foo bar"
+
+ - do:
+ headers:
+ Content-Type: application/json
+ index:
+ index: "test"
+ id: 2
+ body:
+ match_only_text: "foo bar"
+
+ - do:
+ headers:
+ Content-Type: application/json
+ index:
+ index: "test"
+ id: 3
+ routing: "route_me"
+ body:
+ match_only_text: "foo bar"
+
+ - do:
+ index:
+ index: "test"
+ id: 4
+ body: {}
+
+ - do:
+ indices.create:
+ index: test-unmapped
+ body:
+ mappings:
+ dynamic: false
+ properties:
+ unrelated:
+ type: keyword
+
+ - do:
+ index:
+ index: "test-unmapped"
+ id: 1
+ body:
+ unrelated: "foo"
+
+ - do:
+ indices.create:
+ index: test-empty
+ body:
+ mappings:
+ dynamic: false
+ properties:
+ match_only_text:
+ type: match_only_text
+
+ - do:
+ indices.refresh:
+ index: [test, test-unmapped, test-empty]
+
+---
+"Test exists query on mapped match_only_text field":
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ exists:
+ field: match_only_text
+
+ - match: {hits.total: 3}
+
+---
+"Test exists query on unmapped match_only_text field":
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test-unmapped
+ body:
+ query:
+ exists:
+ field: match_only_text
+
+ - match: {hits.total: 0}
+
+---
+"Test exists query on match_only_text field in empty index":
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test-empty
+ body:
+ query:
+ exists:
+ field: match_only_text
+
+ - match: {hits.total: 0}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml
new file mode 100644
index 0000000000000..a41b8d353e3e9
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/200_phrase_search_field_match_only_text.yml
@@ -0,0 +1,67 @@
+---
+"search with indexed phrases":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+
+ - do:
+ indices.create:
+ index: test
+ body:
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body: { text: "peter piper picked a peck of pickled peppers" }
+
+ - do:
+ indices.refresh:
+ index: [test]
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_phrase:
+ text:
+ query: "peter piper"
+
+ - match: {hits.total: 1}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ q: '"peter piper"~1'
+ df: text
+
+ - match: {hits.total: 1}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_phrase:
+ text: "peter piper picked"
+
+ - match: {hits.total: 1}
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_phrase:
+ text: "piper"
+
+ - match: {hits.total: 1}
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml
new file mode 100644
index 0000000000000..fc4e9f9de0f38
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/310_match_bool_prefix_field_match_only_text.yml
@@ -0,0 +1,282 @@
+setup:
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+
+ - do:
+ indices.create:
+ index: test
+ body:
+ mappings:
+ properties:
+ my_field1:
+ type: match_only_text
+ my_field2:
+ type: match_only_text
+
+ - do:
+ index:
+ index: test
+ id: 1
+ body:
+ my_field1: "brown fox jump"
+ my_field2: "xylophone"
+
+ - do:
+ index:
+ index: test
+ id: 2
+ body:
+ my_field1: "brown emu jump"
+ my_field2: "xylophone"
+
+ - do:
+ index:
+ index: test
+ id: 3
+ body:
+ my_field1: "jumparound"
+ my_field2: "emu"
+
+ - do:
+ index:
+ index: test
+ id: 4
+ body:
+ my_field1: "dog"
+ my_field2: "brown fox jump lazy"
+
+ - do:
+ indices.refresh: {}
+
+---
+"minimum should match":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_bool_prefix:
+ my_field1:
+ query: "brown fox jump"
+ minimum_should_match: 3
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.0._source.my_field1: "brown fox jump" }
+
+---
+"analyzer":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_bool_prefix:
+ my_field1:
+ query: "BROWN dog"
+ analyzer: whitespace # this analyzer doesn't lowercase terms
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "4" }
+ - match: { hits.hits.0._source.my_field1: "dog" }
+
+---
+"operator":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_bool_prefix:
+ my_field1:
+ query: "brown fox jump"
+ operator: AND
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "1" }
+ - match: { hits.hits.0._source.my_field1: "brown fox jump" }
+
+---
+"fuzziness":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ match_bool_prefix:
+ my_field2:
+ query: "xylophoen foo"
+ fuzziness: 1
+ prefix_length: 1
+ max_expansions: 10
+ fuzzy_transpositions: true
+ fuzzy_rewrite: constant_score
+
+ - match: { hits.total: 2 }
+ - match: { hits.hits.0._source.my_field2: "xylophone" }
+ - match: { hits.hits.1._source.my_field2: "xylophone" }
+
+---
+"multi_match single field complete term":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fox jump"
+ type: bool_prefix
+ fields: [ "my_field1" ]
+
+ - match: { hits.total: 3 }
+
+---
+"multi_match single field partial term":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fox ju"
+ type: bool_prefix
+ fields: [ "my_field1" ]
+
+ - match: { hits.total: 3 }
+
+---
+"multi_match multiple fields complete term":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fox jump lazy"
+ type: bool_prefix
+ fields: [ "my_field1", "my_field2" ]
+
+ - match: { hits.total: 3 }
+
+---
+"multi_match multiple fields partial term":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fox jump laz"
+ type: bool_prefix
+ fields: [ "my_field1", "my_field2" ]
+
+ - match: { hits.total: 3 }
+
+---
+"multi_match multiple fields with analyzer":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "BROWN FOX JUMP dog"
+ type: bool_prefix
+ fields: [ "my_field1", "my_field2" ]
+ analyzer: whitespace # this analyzer doesn't lowercase terms
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "4" }
+ - match: { hits.hits.0._source.my_field1: "dog" }
+ - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" }
+
+---
+"multi_match multiple fields with minimum_should_match":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown fox jump la"
+ type: bool_prefix
+ fields: [ "my_field1", "my_field2" ]
+ minimum_should_match: 4
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "4" }
+ - match: { hits.hits.0._source.my_field1: "dog" }
+ - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" }
+
+---
+"multi_match multiple fields with fuzziness":
+
+ - do:
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "dob nomatch"
+ type: bool_prefix
+ fields: [ "my_field1", "my_field2" ]
+ fuzziness: 1
+
+ - match: { hits.total: 1 }
+ - match: { hits.hits.0._id: "4" }
+ - match: { hits.hits.0._source.my_field1: "dog" }
+ - match: { hits.hits.0._source.my_field2: "brown fox jump lazy" }
+
+---
+"multi_match multiple fields with slop throws exception":
+
+ - do:
+ catch: /\[slop\] not allowed for type \[bool_prefix\]/
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown"
+ type: bool_prefix
+ fields: [ "my_field1", "my_field2" ]
+ slop: 1
+
+---
+"multi_match multiple fields with cutoff_frequency throws exception":
+
+ - do:
+ catch: /\[cutoff_frequency\] not allowed for type \[bool_prefix\]/
+ search:
+ rest_total_hits_as_int: true
+ index: test
+ body:
+ query:
+ multi_match:
+ query: "brown"
+ type: bool_prefix
+ fields: [ "my_field1", "my_field2" ]
+ cutoff_frequency: 0.001
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml
new file mode 100644
index 0000000000000..f4faf87eb83cc
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/320_disallow_queries_field_match_only_text.yml
@@ -0,0 +1,141 @@
+---
+setup:
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+
+ - do:
+ indices.create:
+ index: test
+ body:
+ mappings:
+ properties:
+ text:
+ type: match_only_text
+ analyzer: standard
+ fields:
+ raw:
+ type: keyword
+ nested1:
+ type: nested
+
+ - do:
+ bulk:
+ refresh: true
+ body:
+ - '{"index": {"_index": "test", "_id": "1"}}'
+ - '{"text" : "Some like it hot, some like it cold", "nested1": [{"foo": "bar1"}]}'
+ - '{"index": {"_index": "test", "_id": "2"}}'
+ - '{"text" : "Its cold outside, theres no kind of atmosphere", "nested1": [{"foo": "bar2"}]}'
+ - '{"index": {"_index": "test", "_id": "3"}}'
+ - '{"text" : "Baby its cold there outside", "nested1": [{"foo": "bar3"}]}'
+ - '{"index": {"_index": "test", "_id": "4"}}'
+ - '{"text" : "Outside it is cold and wet", "nested1": [{"foo": "bar4"}]}'
+
+---
+teardown:
+
+ - do:
+ cluster.put_settings:
+ body:
+ transient:
+ search.allow_expensive_queries: null
+
+---
+"Test disallow expensive queries":
+
+ ### Check for initial setting = null -> false
+ - do:
+ cluster.get_settings:
+ flat_settings: true
+
+ - is_false: search.allow_expensive_queries
+
+ ### Update setting to false
+ - do:
+ cluster.put_settings:
+ body:
+ transient:
+ search.allow_expensive_queries: "false"
+ flat_settings: true
+
+ - match: {transient: {search.allow_expensive_queries: "false"}}
+
+ ### Prefix
+ - do:
+ catch: /\[prefix\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false. For optimised prefix queries on text fields please enable \[index_prefixes\]./
+ search:
+ index: test
+ body:
+ query:
+ prefix:
+ text:
+ value: out
+
+ ### Fuzzy
+ - do:
+ catch: /\[fuzzy\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./
+ search:
+ index: test
+ body:
+ query:
+ fuzzy:
+ text:
+ value: outwide
+
+ ### Regexp
+ - do:
+ catch: /\[regexp\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./
+ search:
+ index: test
+ body:
+ query:
+ regexp:
+ text:
+ value: .*ou.*id.*
+
+ ### Wildcard
+ - do:
+ catch: /\[wildcard\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./
+ search:
+ index: test
+ body:
+ query:
+ wildcard:
+ text:
+ value: out?ide
+
+ ### Range on text
+ - do:
+ catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./
+ search:
+ index: test
+ body:
+ query:
+ range:
+ text:
+ gte: "theres"
+
+ ### Range on keyword
+ - do:
+ catch: /\[range\] queries on \[text\] or \[keyword\] fields cannot be executed when \'search.allow_expensive_queries\' is set to false./
+ search:
+ index: test
+ body:
+ query:
+ range:
+ text.raw:
+ gte : "Outside it is cold and wet"
+
+ ### Nested
+ - do:
+ catch: /\[joining\] queries cannot be executed when \'search.allow_expensive_queries\' is set to false./
+ search:
+ index: test
+ body:
+ query:
+ nested:
+ path: "nested1"
+ query:
+ bool:
+ must: [{"match" : {"nested1.foo" : "bar2"}}]
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml
new file mode 100644
index 0000000000000..cc15796e4697f
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search_shards/10_basic_field_match_only_field.yml
@@ -0,0 +1,92 @@
+---
+"Search shards aliases with and without filters":
+ - skip:
+ version: " - 2.99.99"
+ reason: "match_only_text was added in 3.0"
+
+ - do:
+ indices.create:
+ index: test_index
+ body:
+ settings:
+ index:
+ number_of_shards: 1
+ number_of_replicas: 0
+ mappings:
+ properties:
+ field:
+ type: match_only_text
+ aliases:
+ test_alias_no_filter: {}
+ test_alias_filter_1:
+ filter:
+ term:
+ field : value1
+ test_alias_filter_2:
+ filter:
+ term:
+ field : value2
+
+ - do:
+ search_shards:
+ index: test_alias_no_filter
+
+ - length: { shards: 1 }
+ - match: { shards.0.0.index: test_index }
+ - is_true: indices.test_index
+ - is_false: indices.test_index.filter
+ - match: { indices.test_index.aliases: [test_alias_no_filter]}
+
+ - do:
+ search_shards:
+ index: test_alias_filter_1
+
+ - length: { shards: 1 }
+ - match: { shards.0.0.index: test_index }
+ - match: { indices.test_index.aliases: [test_alias_filter_1] }
+ - match: { indices.test_index.filter.term.field.value: value1 }
+ - lte: { indices.test_index.filter.term.field.boost: 1.0 }
+ - gte: { indices.test_index.filter.term.field.boost: 1.0 }
+
+ - do:
+ search_shards:
+ index: ["test_alias_filter_1","test_alias_filter_2"]
+
+ - length: { shards: 1 }
+ - match: { shards.0.0.index: test_index }
+ - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2]}
+ - length: { indices.test_index.filter.bool.should: 2 }
+ - lte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 }
+ - gte: { indices.test_index.filter.bool.should.0.term.field.boost: 1.0 }
+ - lte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 }
+ - gte: { indices.test_index.filter.bool.should.1.term.field.boost: 1.0 }
+ - match: { indices.test_index.filter.bool.adjust_pure_negative: true}
+ - lte: { indices.test_index.filter.bool.boost: 1.0 }
+ - gte: { indices.test_index.filter.bool.boost: 1.0 }
+
+ - do:
+ search_shards:
+ index: "test*"
+
+ - length: { shards: 1 }
+ - match: { shards.0.0.index: test_index }
+ - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_filter_2, test_alias_no_filter]}
+ - is_false: indices.test_index.filter
+
+ - do:
+ search_shards:
+ index: ["test_alias_filter_1","test_alias_no_filter"]
+
+ - length: { shards: 1 }
+ - match: { shards.0.0.index: test_index }
+ - match: { indices.test_index.aliases: [test_alias_filter_1, test_alias_no_filter]}
+ - is_false: indices.test_index.filter
+
+ - do:
+ search_shards:
+ index: ["test_alias_no_filter"]
+
+ - length: { shards: 1 }
+ - match: { shards.0.0.index: test_index }
+ - match: { indices.test_index.aliases: [test_alias_no_filter]}
+ - is_false: indices.test_index.filter
diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java
index da62ddfd7017d..66d4654e543a2 100644
--- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java
+++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java
@@ -359,18 +359,31 @@ public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionInc
);
}
+ public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException {
+ return phraseQuery(stream, slop, enablePositionIncrements);
+ }
+
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
throw new IllegalArgumentException(
"Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}
+ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context)
+ throws IOException {
+ return multiPhraseQuery(stream, slop, enablePositionIncrements);
+ }
+
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
throw new IllegalArgumentException(
"Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}
+ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException {
+ return phrasePrefixQuery(stream, slop, maxExpansions);
+ }
+
public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) {
throw new IllegalArgumentException(
"Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
diff --git a/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java
new file mode 100644
index 0000000000000..fb97f8c309a70
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java
@@ -0,0 +1,312 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.mapper;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
+import org.apache.lucene.document.FieldType;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.MultiPhraseQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TermQuery;
+import org.opensearch.Version;
+import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
+import org.opensearch.index.analysis.IndexAnalyzers;
+import org.opensearch.index.analysis.NamedAnalyzer;
+import org.opensearch.index.query.QueryShardContext;
+import org.opensearch.index.query.SourceFieldMatchQuery;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.function.Function;
+
+/**
+ * A specialized type of TextFieldMapper which disables the positions and norms to save on storage and executes phrase queries, which requires
+ * positional data, in a slightly less efficient manner using the {@link org.opensearch.index.query.SourceFieldMatchQuery}.
+ */
+public class MatchOnlyTextFieldMapper extends TextFieldMapper {
+
+ public static final FieldType FIELD_TYPE = new FieldType();
+ public static final String CONTENT_TYPE = "match_only_text";
+ private final String indexOptions = FieldMapper.indexOptionToString(FIELD_TYPE.indexOptions());
+ private final boolean norms = FIELD_TYPE.omitNorms() == false;
+
+ @Override
+ protected String contentType() {
+ return CONTENT_TYPE;
+ }
+
+ static {
+ FIELD_TYPE.setTokenized(true);
+ FIELD_TYPE.setStored(false);
+ FIELD_TYPE.setStoreTermVectors(false);
+ FIELD_TYPE.setOmitNorms(true);
+ FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
+ FIELD_TYPE.freeze();
+ }
+
+ public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));
+
+ protected MatchOnlyTextFieldMapper(
+ String simpleName,
+ FieldType fieldType,
+ MatchOnlyTextFieldType mappedFieldType,
+ TextFieldMapper.PrefixFieldMapper prefixFieldMapper,
+ TextFieldMapper.PhraseFieldMapper phraseFieldMapper,
+ MultiFields multiFields,
+ CopyTo copyTo,
+ Builder builder
+ ) {
+
+ super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder);
+ }
+
+ @Override
+ public ParametrizedFieldMapper.Builder getMergeBuilder() {
+ return new Builder(simpleName(), this.indexCreatedVersion, this.indexAnalyzers).init(this);
+ }
+
+ /**
+ * Builder class for constructing the MatchOnlyTextFieldMapper.
+ */
+ public static class Builder extends TextFieldMapper.Builder {
+ final Parameter indexOptions = indexOptions(m -> ((MatchOnlyTextFieldMapper) m).indexOptions);
+
+ private static Parameter indexOptions(Function initializer) {
+ return Parameter.restrictedStringParam("index_options", false, initializer, "docs");
+ }
+
+ final Parameter norms = norms(m -> ((MatchOnlyTextFieldMapper) m).norms);
+ final Parameter indexPhrases = Parameter.boolParam(
+ "index_phrases",
+ false,
+ m -> ((MatchOnlyTextFieldType) m.mappedFieldType).indexPhrases,
+ false
+ ).setValidator(v -> {
+ if (v == true) {
+ throw new MapperParsingException("Index phrases cannot be enabled on for match_only_text field. Use text field instead");
+ }
+ });
+
+ final Parameter indexPrefixes = new Parameter<>(
+ "index_prefixes",
+ false,
+ () -> null,
+ TextFieldMapper::parsePrefixConfig,
+ m -> Optional.ofNullable(((MatchOnlyTextFieldType) m.mappedFieldType).prefixFieldType)
+ .map(p -> new PrefixConfig(p.minChars, p.maxChars))
+ .orElse(null)
+ ).acceptsNull().setValidator(v -> {
+ if (v != null) {
+ throw new MapperParsingException("Index prefixes cannot be enabled on for match_only_text field. Use text field instead");
+ }
+ });
+
+ private static Parameter norms(Function initializer) {
+ return Parameter.boolParam("norms", false, initializer, false)
+ .setMergeValidator((o, n) -> o == n || (o && n == false))
+ .setValidator(v -> {
+ if (v == true) {
+ throw new MapperParsingException("Norms cannot be enabled on for match_only_text field");
+ }
+ });
+ }
+
+ public Builder(String name, IndexAnalyzers indexAnalyzers) {
+ super(name, indexAnalyzers);
+ }
+
+ public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
+ super(name, indexCreatedVersion, indexAnalyzers);
+ }
+
+ @Override
+ public MatchOnlyTextFieldMapper build(BuilderContext context) {
+ FieldType fieldType = TextParams.buildFieldType(index, store, indexOptions, norms, termVectors);
+ MatchOnlyTextFieldType tft = buildFieldType(fieldType, context);
+ return new MatchOnlyTextFieldMapper(
+ name,
+ fieldType,
+ tft,
+ buildPrefixMapper(context, fieldType, tft),
+ buildPhraseMapper(fieldType, tft),
+ multiFieldsBuilder.build(this, context),
+ copyTo.build(),
+ this
+ );
+ }
+
+ @Override
+ protected MatchOnlyTextFieldType buildFieldType(FieldType fieldType, BuilderContext context) {
+ NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
+ NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
+ NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
+
+ if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) > 0) {
+ throw new IllegalArgumentException("Cannot set position_increment_gap on field [" + name + "] without positions enabled");
+ }
+ if (positionIncrementGap.get() != POSITION_INCREMENT_GAP_USE_ANALYZER) {
+ if (fieldType.indexOptions().compareTo(IndexOptions.DOCS) < 0) {
+ throw new IllegalArgumentException(
+ "Cannot set position_increment_gap on field [" + name + "] without indexing enabled"
+ );
+ }
+ indexAnalyzer = new NamedAnalyzer(indexAnalyzer, positionIncrementGap.get());
+ searchAnalyzer = new NamedAnalyzer(searchAnalyzer, positionIncrementGap.get());
+ searchQuoteAnalyzer = new NamedAnalyzer(searchQuoteAnalyzer, positionIncrementGap.get());
+ }
+ TextSearchInfo tsi = new TextSearchInfo(fieldType, similarity.getValue(), searchAnalyzer, searchQuoteAnalyzer);
+ MatchOnlyTextFieldType ft = new MatchOnlyTextFieldType(
+ buildFullName(context),
+ index.getValue(),
+ fieldType.stored(),
+ tsi,
+ meta.getValue()
+ );
+ ft.setIndexAnalyzer(indexAnalyzer);
+ ft.setEagerGlobalOrdinals(eagerGlobalOrdinals.getValue());
+ ft.setBoost(boost.getValue());
+ if (fieldData.getValue()) {
+ ft.setFielddata(true, freqFilter.getValue());
+ }
+ return ft;
+ }
+
+ @Override
+ protected List> getParameters() {
+ return Arrays.asList(
+ index,
+ store,
+ indexOptions,
+ norms,
+ termVectors,
+ analyzers.indexAnalyzer,
+ analyzers.searchAnalyzer,
+ analyzers.searchQuoteAnalyzer,
+ similarity,
+ positionIncrementGap,
+ fieldData,
+ freqFilter,
+ eagerGlobalOrdinals,
+ indexPhrases,
+ indexPrefixes,
+ boost,
+ meta
+ );
+ }
+ }
+
+ /**
+ * The specific field type for MatchOnlyTextFieldMapper
+ *
+ * @opensearch.internal
+ */
+ public static final class MatchOnlyTextFieldType extends TextFieldType {
+ private final boolean indexPhrases = false;
+
+ private PrefixFieldType prefixFieldType;
+
+ @Override
+ public String typeName() {
+ return CONTENT_TYPE;
+ }
+
+ public MatchOnlyTextFieldType(String name, boolean indexed, boolean stored, TextSearchInfo tsi, Map meta) {
+ super(name, indexed, stored, tsi, meta);
+ }
+
+ @Override
+ public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext context) throws IOException {
+ PhraseQuery phraseQuery = (PhraseQuery) super.phraseQuery(stream, slop, enablePosIncrements);
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ for (Term term : phraseQuery.getTerms()) {
+ builder.add(new TermQuery(term), BooleanClause.Occur.FILTER);
+ }
+ return new SourceFieldMatchQuery(builder.build(), phraseQuery, this, context);
+ }
+
+ @Override
+ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context)
+ throws IOException {
+ MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements);
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ for (Term[] terms : multiPhraseQuery.getTermArrays()) {
+ if (terms.length > 1) {
+ // Multiple terms in the same position, creating a disjunction query for it and
+ // adding it to conjunction query
+ BooleanQuery.Builder disjunctions = new BooleanQuery.Builder();
+ for (Term term : terms) {
+ disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
+ }
+ builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
+ } else {
+ builder.add(new TermQuery(terms[0]), BooleanClause.Occur.FILTER);
+ }
+ }
+ return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this, context);
+ }
+
+ @Override
+ public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException {
+ Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions);
+ List> termArray = getTermsFromTokenStream(stream);
+ BooleanQuery.Builder builder = new BooleanQuery.Builder();
+ for (int i = 0; i < termArray.size(); i++) {
+ if (i == termArray.size() - 1) {
+ // last element of the term Array is a prefix, thus creating a prefix query for it and adding it to
+ // conjunction query
+ MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name());
+ mqb.add(termArray.get(i).toArray(new Term[0]));
+ builder.add(mqb, BooleanClause.Occur.FILTER);
+ } else {
+ if (termArray.get(i).size() > 1) {
+ // multiple terms in the same position, creating a disjunction query for it and
+ // adding it to conjunction query
+ BooleanQuery.Builder disjunctions = new BooleanQuery.Builder();
+ for (Term term : termArray.get(i)) {
+ disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
+ }
+ builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
+ } else {
+ builder.add(new TermQuery(termArray.get(i).get(0)), BooleanClause.Occur.FILTER);
+ }
+ }
+ }
+ return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, context);
+ }
+
+ private List> getTermsFromTokenStream(TokenStream stream) throws IOException {
+ final List> termArray = new ArrayList<>();
+ TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
+ PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
+ List currentTerms = new ArrayList<>();
+ stream.reset();
+ while (stream.incrementToken()) {
+ if (posIncrAtt.getPositionIncrement() != 0) {
+ if (currentTerms.isEmpty() == false) {
+ termArray.add(List.copyOf(currentTerms));
+ }
+ currentTerms.clear();
+ }
+ currentTerms.add(new Term(name(), termAtt.getBytesRef()));
+ }
+ termArray.add(List.copyOf(currentTerms));
+ return termArray;
+ }
+ }
+}
diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java
index a772b363936f3..ef0f7b33e826e 100644
--- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java
@@ -111,7 +111,7 @@
public class TextFieldMapper extends ParametrizedFieldMapper {
public static final String CONTENT_TYPE = "text";
- private static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
+ protected static final int POSITION_INCREMENT_GAP_USE_ANALYZER = -1;
private static final String FAST_PHRASE_SUFFIX = "._index_phrase";
/**
@@ -153,11 +153,11 @@ private static TextFieldMapper toType(FieldMapper in) {
*
* @opensearch.internal
*/
- private static final class PrefixConfig implements ToXContent {
+ protected static final class PrefixConfig implements ToXContent {
final int minChars;
final int maxChars;
- private PrefixConfig(int minChars, int maxChars) {
+ PrefixConfig(int minChars, int maxChars) {
this.minChars = minChars;
this.maxChars = maxChars;
if (minChars > maxChars) {
@@ -199,7 +199,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
}
}
- private static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) {
+ static PrefixConfig parsePrefixConfig(String propName, ParserContext parserContext, Object propNode) {
if (propNode == null) {
return null;
}
@@ -215,7 +215,7 @@ private static PrefixConfig parsePrefixConfig(String propName, ParserContext par
*
* @opensearch.internal
*/
- private static final class FielddataFrequencyFilter implements ToXContent {
+ protected static final class FielddataFrequencyFilter implements ToXContent {
final double minFreq;
final double maxFreq;
final int minSegmentSize;
@@ -281,15 +281,14 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
private final Version indexCreatedVersion;
- private final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true);
- private final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false);
+ protected final Parameter index = Parameter.indexParam(m -> toType(m).mappedFieldType.isSearchable(), true);
+ protected final Parameter store = Parameter.storeParam(m -> toType(m).fieldType.stored(), false);
final Parameter similarity = TextParams.similarity(m -> toType(m).similarity);
final Parameter indexOptions = TextParams.indexOptions(m -> toType(m).indexOptions);
final Parameter norms = TextParams.norms(true, m -> toType(m).fieldType.omitNorms() == false);
final Parameter termVectors = TextParams.termVectors(m -> toType(m).termVectors);
-
final Parameter positionIncrementGap = Parameter.intParam(
"position_increment_gap",
false,
@@ -333,8 +332,8 @@ public static class Builder extends ParametrizedFieldMapper.Builder {
.orElse(null)
).acceptsNull();
- private final Parameter boost = Parameter.boostParam();
- private final Parameter