From 5dafb2fc5960e0a1edff70a089a5390519fc909c Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Thu, 24 Oct 2024 15:23:20 -0700 Subject: [PATCH 1/5] Added documentation for the plugins.query.field_type_tolerance setting Signed-off-by: Norman Jordan --- docs/user/admin/settings.rst | 52 ++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/docs/user/admin/settings.rst b/docs/user/admin/settings.rst index 71718d1726..c6e2dccf27 100644 --- a/docs/user/admin/settings.rst +++ b/docs/user/admin/settings.rst @@ -824,3 +824,55 @@ To Re-enable Data Sources::: } } +plugins.query.field_type_tolerance +================================== + +Description +----------- + +This setting controls whether preserve arrays. If this setting is set to false, then an array is reduced +to the first non array value of any level of nesting. + +If you have an index with the following value for a field:: + + [ + [ + {"name": "one", "value": 1}, + {"name": "two", "value": 2}, + 3 + ] + ] + +With plugins.query.field_type_tolerance set to true, the array is returned in full:: + + [ + [ + {"name": "one", "value": 1}, + {"name": "two", "value": 2}, + 3 + ] + ] + +With plugins.query.field_type_tolerance set to false, the array is reduced:: + + {"name": "one", "value": 1} + +1. The default value is true (preserve arrays) +2. This setting is node scope +3. This setting can be updated dynamically + +Update Settings Request:: + + sh$ curl -sS -H 'Content-Type: application/json' -X PUT 'localhost:9200/_cluster/settings?pretty' \ + ... -d '{"transient":{"plugins.query.field_type_tolerance":"false"}}' + { + "acknowledged": true, + "persistent": {}, + "transient": { + "plugins": { + "query": { + "field_type_tolerance": "false" + } + } + } + } From ffa1bc6dc350f622feda15b951d0735e7d05bf26 Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Mon, 28 Oct 2024 13:52:14 -0700 Subject: [PATCH 2/5] Updated documentation for plugins.query.field_type_tolerance Signed-off-by: Norman Jordan --- docs/user/admin/settings.rst | 66 +++++++++++++++++------------------- 1 file changed, 31 insertions(+), 35 deletions(-) diff --git a/docs/user/admin/settings.rst b/docs/user/admin/settings.rst index c6e2dccf27..67b000ee46 100644 --- a/docs/user/admin/settings.rst +++ b/docs/user/admin/settings.rst @@ -833,46 +833,42 @@ Description This setting controls whether preserve arrays. If this setting is set to false, then an array is reduced to the first non array value of any level of nesting. -If you have an index with the following value for a field:: +1. The default value is true (preserve arrays) +2. This setting is node scope +3. This setting can be updated dynamically - [ - [ - {"name": "one", "value": 1}, - {"name": "two", "value": 2}, - 3 - ] - ] +Querying a field containing array values will return the full array values:: -With plugins.query.field_type_tolerance set to true, the array is returned in full:: + os> SELECT accounts FROM people; + fetched rows / total rows = 1/1 + +-----------------------+ + | accounts | + +-----------------------+ + | [{'id': 1},{'id': 2}] | + +-----------------------+ - [ - [ - {"name": "one", "value": 1}, - {"name": "two", "value": 2}, - 3 - ] - ] +Disable field type tolerance:: -With plugins.query.field_type_tolerance set to false, the array is reduced:: + >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ + "transient" : { + "plugins.query.field_type_tolerance" : false + } + }' - {"name": "one", "value": 1} +When field type tolerance is disabled, arrays are collapsed to the first non array value:: -1. The default value is true (preserve arrays) -2. This setting is node scope -3. This setting can be updated dynamically + os> SELECT accounts FROM people; + fetched rows / total rows = 1/1 + +-----------+ + | accounts | + +-----------+ + | {'id': 1} | + +-----------+ -Update Settings Request:: +Reenable field type tolerance:: - sh$ curl -sS -H 'Content-Type: application/json' -X PUT 'localhost:9200/_cluster/settings?pretty' \ - ... -d '{"transient":{"plugins.query.field_type_tolerance":"false"}}' - { - "acknowledged": true, - "persistent": {}, - "transient": { - "plugins": { - "query": { - "field_type_tolerance": "false" - } - } - } - } + >> curl -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings -d '{ + "transient" : { + "plugins.query.field_type_tolerance" : true + } + }' From f7fe680a67350b1db61d3e6c0efd4d2a9f5adbc5 Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Mon, 28 Oct 2024 14:02:30 -0700 Subject: [PATCH 3/5] Added limitations for field type tolerance Signed-off-by: Norman Jordan --- docs/user/admin/settings.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/user/admin/settings.rst b/docs/user/admin/settings.rst index 67b000ee46..2bd94d72fc 100644 --- a/docs/user/admin/settings.rst +++ b/docs/user/admin/settings.rst @@ -872,3 +872,10 @@ Reenable field type tolerance:: "plugins.query.field_type_tolerance" : true } }' + +Limitations: +------------ +* Fields with array values should only be used in the projection list +* Array values are not supported by SQL or PPL functions +* Array values in expressions will cause the query to fail or produce incorrect results +* PPL commands do not support examining or altering array values \ No newline at end of file From 0ec510a37b8ae82aee661bd65ce5c9b0411bcb80 Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Tue, 29 Oct 2024 14:26:01 -0700 Subject: [PATCH 4/5] Reworded the limitations and added an example error Signed-off-by: Norman Jordan --- docs/user/admin/settings.rst | 11 +++++++---- docs/user/limitations/limitations.rst | 21 +++++++++++++++++++++ doctest/test_data/multi_value_long.json | 5 +++++ 3 files changed, 33 insertions(+), 4 deletions(-) create mode 100644 doctest/test_data/multi_value_long.json diff --git a/docs/user/admin/settings.rst b/docs/user/admin/settings.rst index 2bd94d72fc..cbcb4f329d 100644 --- a/docs/user/admin/settings.rst +++ b/docs/user/admin/settings.rst @@ -875,7 +875,10 @@ Reenable field type tolerance:: Limitations: ------------ -* Fields with array values should only be used in the projection list -* Array values are not supported by SQL or PPL functions -* Array values in expressions will cause the query to fail or produce incorrect results -* PPL commands do not support examining or altering array values \ No newline at end of file +OpenSearch does not natively support the ARRAY data type but does allow multi-value fields implicitly. The +SQL/PPL plugin adheres strictly to the data type semantics defined in index mappings. When parsing OpenSearch +responses, it expects data to match the declared type and does not account for data in array format. If the +plugins.query.field_type_tolerance setting is enabled, the SQL/PPL plugin will handle array datasets by returning +scalar data types, allowing basic queries (e.g., SELECT * FROM tbl WHERE condition). However, using multi-value +fields in expressions or functions will result in exceptions. If this setting is disabled or absent, only the +first element of an array is returned, preserving the default behavior. \ No newline at end of file diff --git a/docs/user/limitations/limitations.rst b/docs/user/limitations/limitations.rst index 8ce75a0e25..a7f728df2e 100644 --- a/docs/user/limitations/limitations.rst +++ b/docs/user/limitations/limitations.rst @@ -101,3 +101,24 @@ The response in JDBC format with cursor id:: } The query with `aggregation` and `join` does not support pagination for now. + +Limitations on Using Multi-valued Fields +======================================== + +Using a multi-valued field as an argument of a SQL or PPL function/operator will cause the query to fail. For +example, the following query fails:: + + POST _plugins/_sql/ + { + "query": "SELECT id, ABS(long_array) FROM multi_value_long" + } +The response in JSON format is:: + + { + "error": { + "reason": "Invalid SQL query", + "details": "invalid to get longValue from value of type ARRAY", + "type": "ExpressionEvaluationException" + }, + "status": 400 + } diff --git a/doctest/test_data/multi_value_long.json b/doctest/test_data/multi_value_long.json new file mode 100644 index 0000000000..3c139630f6 --- /dev/null +++ b/doctest/test_data/multi_value_long.json @@ -0,0 +1,5 @@ +{"id": 1, "long_array": [1, 2]} +{"id": 2, "long_array": [3, 4]} +{"id": 3, "long_array": [1, 5]} +{"id": 4, "long_array": [1, 2]} +{"id": 5, "long_array": [2, 3]} \ No newline at end of file From 4064eac2ee9cb5d868dc96b3442c8744991c8036 Mon Sep 17 00:00:00 2001 From: Norman Jordan Date: Wed, 30 Oct 2024 09:18:11 -0700 Subject: [PATCH 5/5] Copied the description to the limitations.rst file Signed-off-by: Norman Jordan --- docs/user/limitations/limitations.rst | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/docs/user/limitations/limitations.rst b/docs/user/limitations/limitations.rst index a7f728df2e..22ad3c2a17 100644 --- a/docs/user/limitations/limitations.rst +++ b/docs/user/limitations/limitations.rst @@ -105,8 +105,16 @@ The query with `aggregation` and `join` does not support pagination for now. Limitations on Using Multi-valued Fields ======================================== -Using a multi-valued field as an argument of a SQL or PPL function/operator will cause the query to fail. For -example, the following query fails:: +OpenSearch does not natively support the ARRAY data type but does allow multi-value fields implicitly. The +SQL/PPL plugin adheres strictly to the data type semantics defined in index mappings. When parsing OpenSearch +responses, it expects data to match the declared type and does not account for data in array format. If the +plugins.query.field_type_tolerance setting is enabled, the SQL/PPL plugin will handle array datasets by returning +scalar data types, allowing basic queries (e.g., SELECT * FROM tbl WHERE condition). However, using multi-value +fields in expressions or functions will result in exceptions. If this setting is disabled or absent, only the +first element of an array is returned, preserving the default behavior. + +For example, the following query tries to calculate the absolute value of a field that contains arrays of +longs:: POST _plugins/_sql/ {