Skip to content

Commit

Permalink
Fix parsing of flat object fields with dots in keys (opensearch-proje…
Browse files Browse the repository at this point in the history
…ct#11425)

We have a bug where a flat object field with inner fields that contain
dots will "push" the dotted name onto the dot-path from the root, but
then would just "pop" off the last part of the dotted name.

This change adds more robust support for flat object keys and subkeys
that contain dots (i.e. it pops off the entirety of the latest key,
regardless of how many dots it contains).

Fixes opensearch-project#11402

Signed-off-by: Michael Froh <[email protected]>
(cherry picked from commit 22b628b)
  • Loading branch information
msfroh committed Jan 9, 2024
1 parent c76df2d commit b43ce24
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 12 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Remove deprecated classes for Rounding ([#10956](https://github.com/opensearch-project/OpenSearch/issues/10956))

### Fixed
- Fix class_cast_exception when passing int to _version and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101))
- Fix `class_cast_exception` when passing int to `_version` and other metadata fields in ingest simulate API ([#10101](https://github.com/opensearch-project/OpenSearch/pull/10101))
- Fix passing wrong parameter when calling newConfigurationException() in DotExpanderProcessor ([#10737](https://github.com/opensearch-project/OpenSearch/pull/10737))
- Delegating CachingWeightWrapper#count to internal weight object ([#10543](https://github.com/opensearch-project/OpenSearch/pull/10543))
- Fix per request latency last phase not tracked ([#10934](https://github.com/opensearch-project/OpenSearch/pull/10934))
Expand All @@ -119,6 +119,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Fix the issue with DefaultSpanScope restoring wrong span in the TracerContextStorage upon detach ([#11316](https://github.com/opensearch-project/OpenSearch/issues/11316))
- Remove shadowJar from `lang-painless` module publication ([#11369](https://github.com/opensearch-project/OpenSearch/issues/11369))
- Fix remote shards balancer and remove unused variables ([#11167](https://github.com/opensearch-project/OpenSearch/pull/11167))
- Fix parsing of flat object fields with dots in keys ([#11425](https://github.com/opensearch-project/OpenSearch/pull/11425))
- Fix bug where replication lag grows post primary relocation ([#11238](https://github.com/opensearch-project/OpenSearch/pull/11238))
- Fix for stuck update action in a bulk with `retry_on_conflict` property ([#11152](https://github.com/opensearch-project/OpenSearch/issues/11152))
- Fix template setting override for replication type ([#11417](https://github.com/opensearch-project/OpenSearch/pull/11417))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentLocation;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.mapper.ParseContext;

import java.io.IOException;
import java.math.BigInteger;
Expand All @@ -40,7 +39,6 @@ public class JsonToStringXContentParser extends AbstractXContentParser {
private ArrayList<String> keyList = new ArrayList<>();

private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent);
private ParseContext parseContext;

private NamedXContentRegistry xContentRegistry;

Expand All @@ -54,14 +52,13 @@ public class JsonToStringXContentParser extends AbstractXContentParser {
public JsonToStringXContentParser(
NamedXContentRegistry xContentRegistry,
DeprecationHandler deprecationHandler,
ParseContext parseContext,
XContentParser parser,
String fieldTypeName
) throws IOException {
super(xContentRegistry, deprecationHandler);
this.parseContext = parseContext;
this.deprecationHandler = deprecationHandler;
this.xContentRegistry = xContentRegistry;
this.parser = parseContext.parser();
this.parser = parser;
this.fieldTypeName = fieldTypeName;
}

Expand All @@ -86,8 +83,22 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx
StringBuilder parsedFields = new StringBuilder();

if (this.parser.currentToken() == Token.FIELD_NAME) {
path.append(DOT_SYMBOL + currentFieldName);
this.keyList.add(currentFieldName);
path.append(DOT_SYMBOL).append(currentFieldName);
int dotIndex = currentFieldName.indexOf(DOT_SYMBOL);
String fieldNameSuffix = currentFieldName;
// The field name may be of the form foo.bar.baz
// If that's the case, each "part" is a key.
while (dotIndex >= 0) {
String fieldNamePrefix = fieldNameSuffix.substring(0, dotIndex);
if (!fieldNamePrefix.isEmpty()) {
this.keyList.add(fieldNamePrefix);
}
fieldNameSuffix = fieldNameSuffix.substring(dotIndex + 1);
dotIndex = fieldNameSuffix.indexOf(DOT_SYMBOL);
}
if (!fieldNameSuffix.isEmpty()) {
this.keyList.add(fieldNameSuffix);
}
} else if (this.parser.currentToken() == Token.START_ARRAY) {
parseToken(path, currentFieldName);
break;
Expand All @@ -97,18 +108,18 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx
parseToken(path, currentFieldName);
int dotIndex = path.lastIndexOf(DOT_SYMBOL);
if (dotIndex != -1) {
path.delete(dotIndex, path.length());
path.setLength(path.length() - currentFieldName.length() - 1);
}
} else {
if (!path.toString().contains(currentFieldName)) {
path.append(DOT_SYMBOL + currentFieldName);
path.append(DOT_SYMBOL).append(currentFieldName);
}
parseValue(parsedFields);
this.valueList.add(parsedFields.toString());
this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields);
int dotIndex = path.lastIndexOf(DOT_SYMBOL);
if (dotIndex != -1) {
path.delete(dotIndex, path.length());
path.setLength(path.length() - currentFieldName.length() - 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
context,
context.parser(),
fieldType().name()
);
/*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.xcontent;

import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.core.xcontent.DeprecationHandler;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.test.OpenSearchTestCase;

import java.io.IOException;

public class JsonToStringXContentParserTests extends OpenSearchTestCase {

private String flattenJsonString(String fieldName, String in) throws IOException {
String transformed;
try (
XContentParser parser = JsonXContent.jsonXContent.createParser(
xContentRegistry(),
DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
in
)
) {
JsonToStringXContentParser jsonToStringXContentParser = new JsonToStringXContentParser(
xContentRegistry(),
DeprecationHandler.THROW_UNSUPPORTED_OPERATION,
parser,
fieldName
);
// Skip the START_OBJECT token:
jsonToStringXContentParser.nextToken();

XContentParser transformedParser = jsonToStringXContentParser.parseObject();
try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) {
jsonBuilder.copyCurrentStructure(transformedParser);
return jsonBuilder.toString();
}
}
}

public void testNestedObjects() throws IOException {
String jsonExample = "{" + "\"first\" : \"1\"," + "\"second\" : {" + " \"inner\": \"2.0\"" + "}," + "\"third\": \"three\"" + "}";

assertEquals(
"{"
+ "\"flat\":[\"first\",\"second\",\"inner\",\"third\"],"
+ "\"flat._value\":[\"1\",\"2.0\",\"three\"],"
+ "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]"
+ "}",
flattenJsonString("flat", jsonExample)
);
}

public void testChildHasDots() throws IOException {
// This should be exactly the same as testNestedObjects. We're just using the "flat" notation for the inner
// object.
String jsonExample = "{" + "\"first\" : \"1\"," + "\"second.inner\" : \"2.0\"," + "\"third\": \"three\"" + "}";

assertEquals(
"{"
+ "\"flat\":[\"first\",\"second\",\"inner\",\"third\"],"
+ "\"flat._value\":[\"1\",\"2.0\",\"three\"],"
+ "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]"
+ "}",
flattenJsonString("flat", jsonExample)
);
}

public void testNestChildObjectWithDots() throws IOException {
String jsonExample = "{"
+ "\"first\" : \"1\","
+ "\"second.inner\" : {"
+ " \"really_inner\" : \"2.0\""
+ "},"
+ "\"third\": \"three\""
+ "}";

assertEquals(
"{"
+ "\"flat\":[\"first\",\"second\",\"inner\",\"really_inner\",\"third\"],"
+ "\"flat._value\":[\"1\",\"2.0\",\"three\"],"
+ "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]"
+ "}",
flattenJsonString("flat", jsonExample)
);
}

public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException {
String jsonExample = "{"
+ "\"first\" : \"1\","
+ "\"second.inner\" : {"
+ " \"totally.absolutely.inner\" : \"2.0\""
+ "},"
+ "\"third\": \"three\""
+ "}";

assertEquals(
"{"
+ "\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"],"
+ "\"flat._value\":[\"1\",\"2.0\",\"three\"],"
+ "\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]"
+ "}",
flattenJsonString("flat", jsonExample)
);
}

}

0 comments on commit b43ce24

Please sign in to comment.