Skip to content

Commit

Permalink
Fix parsing of flat object fields with dots in keys
Browse files Browse the repository at this point in the history
We have a bug where a flat object field with inner fields that contain
dots will "push" the dotted name onto the dot-path from the root, but
then would just "pop" off the last part of the dotted name.

This change adds more robust support for flat object keys and subkeys
that contain dots (i.e. it pops off the entirety of the latest key,
regardless of how many dots it contains).

Fixes opensearch-project#11402

Signed-off-by: Michael Froh <[email protected]>
  • Loading branch information
msfroh committed Nov 30, 2023
1 parent 21c0597 commit 7dbfa8c
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ public class JsonToStringXContentParser extends AbstractXContentParser {
private ArrayList<String> keyList = new ArrayList<>();

private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent);
private ParseContext parseContext;

private NamedXContentRegistry xContentRegistry;

Expand All @@ -54,14 +53,13 @@ public class JsonToStringXContentParser extends AbstractXContentParser {
public JsonToStringXContentParser(
NamedXContentRegistry xContentRegistry,
DeprecationHandler deprecationHandler,
ParseContext parseContext,
XContentParser parser,
String fieldTypeName
) throws IOException {
super(xContentRegistry, deprecationHandler);
this.parseContext = parseContext;
this.deprecationHandler = deprecationHandler;
this.xContentRegistry = xContentRegistry;
this.parser = parseContext.parser();
this.parser = parser;
this.fieldTypeName = fieldTypeName;
}

Expand All @@ -86,8 +84,22 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx
StringBuilder parsedFields = new StringBuilder();

if (this.parser.currentToken() == Token.FIELD_NAME) {
path.append(DOT_SYMBOL + currentFieldName);
this.keyList.add(currentFieldName);
path.append(DOT_SYMBOL).append(currentFieldName);
int dotIndex = currentFieldName.indexOf(DOT_SYMBOL);
String fieldNameSuffix = currentFieldName;
// The field name may be of the form foo.bar.baz
// If that's the case, each "part" is a key.
while (dotIndex >= 0) {
String fieldNamePrefix = fieldNameSuffix.substring(0, dotIndex);
if (!fieldNamePrefix.isEmpty()) {
this.keyList.add(fieldNamePrefix);
}
fieldNameSuffix = fieldNameSuffix.substring(dotIndex + 1);
dotIndex = fieldNameSuffix.indexOf(DOT_SYMBOL);
}
if (!fieldNameSuffix.isEmpty()) {
this.keyList.add(fieldNameSuffix);
}
} else if (this.parser.currentToken() == Token.START_ARRAY) {
parseToken(path, currentFieldName);
break;
Expand All @@ -97,18 +109,18 @@ private void parseToken(StringBuilder path, String currentFieldName) throws IOEx
parseToken(path, currentFieldName);
int dotIndex = path.lastIndexOf(DOT_SYMBOL);
if (dotIndex != -1) {
path.delete(dotIndex, path.length());
path.setLength(path.length() - currentFieldName.length() - 1);
}
} else {
if (!path.toString().contains(currentFieldName)) {
path.append(DOT_SYMBOL + currentFieldName);
path.append(DOT_SYMBOL).append(currentFieldName);
}
parseValue(parsedFields);
this.valueList.add(parsedFields.toString());
this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields);
int dotIndex = path.lastIndexOf(DOT_SYMBOL);
if (dotIndex != -1) {
path.delete(dotIndex, path.length());
path.setLength(path.length() - currentFieldName.length() - 1);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,7 @@ protected void parseCreateField(ParseContext context) throws IOException {
JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser(
NamedXContentRegistry.EMPTY,
DeprecationHandler.IGNORE_DEPRECATIONS,
context,
context.parser(),
fieldType().name()
);
/*
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.xcontent;

import org.opensearch.common.xcontent.json.JsonXContent;
import org.opensearch.core.xcontent.DeprecationHandler;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.test.OpenSearchTestCase;

import java.io.IOException;

public class JsonToStringXContentParserTests extends OpenSearchTestCase {

private String flattenJsonString(String fieldName, String in) throws IOException {
String transformed;
try (XContentParser parser =
JsonXContent.jsonXContent.createParser(xContentRegistry(), DeprecationHandler.THROW_UNSUPPORTED_OPERATION, in)) {
JsonToStringXContentParser jsonToStringXContentParser = new JsonToStringXContentParser(xContentRegistry(),
DeprecationHandler.THROW_UNSUPPORTED_OPERATION, parser, fieldName);
// Skip the START_OBJECT token:
jsonToStringXContentParser.nextToken();

XContentParser transformedParser = jsonToStringXContentParser.parseObject();
try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) {
jsonBuilder.copyCurrentStructure(transformedParser);
return jsonBuilder.toString();
}
}
}


public void testNestedObjects() throws IOException {
String jsonExample = "{" +
"\"first\" : \"1\"," +
"\"second\" : {" +
" \"inner\": \"2.0\"" +
"}," +
"\"third\": \"three\"" +
"}";

assertEquals("{" +
"\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," +
"\"flat._value\":[\"1\",\"2.0\",\"three\"]," +
"\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" +
"}", flattenJsonString("flat", jsonExample));
}

public void testChildHasDots() throws IOException {
// This should be exactly the same as testNestedObjects. We're just using the "flat" notation for the inner
// object.
String jsonExample = "{" +
"\"first\" : \"1\"," +
"\"second.inner\" : \"2.0\"," +
"\"third\": \"three\"" +
"}";

assertEquals("{" +
"\"flat\":[\"first\",\"second\",\"inner\",\"third\"]," +
"\"flat._value\":[\"1\",\"2.0\",\"three\"]," +
"\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner=2.0\",\"flat.third=three\"]" +
"}", flattenJsonString("flat", jsonExample));
}

public void testNestChildObjectWithDots() throws IOException {
String jsonExample = "{" +
"\"first\" : \"1\"," +
"\"second.inner\" : {" +
" \"really_inner\" : \"2.0\"" +
"}," +
"\"third\": \"three\"" +
"}";

assertEquals("{" +
"\"flat\":[\"first\",\"second\",\"inner\",\"really_inner\",\"third\"]," +
"\"flat._value\":[\"1\",\"2.0\",\"three\"]," +
"\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.really_inner=2.0\",\"flat.third=three\"]" +
"}", flattenJsonString("flat", jsonExample));
}

public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException {
String jsonExample = "{" +
"\"first\" : \"1\"," +
"\"second.inner\" : {" +
" \"totally.absolutely.inner\" : \"2.0\"" +
"}," +
"\"third\": \"three\"" +
"}";

assertEquals("{" +
"\"flat\":[\"first\",\"second\",\"inner\",\"totally\",\"absolutely\",\"inner\",\"third\"]," +
"\"flat._value\":[\"1\",\"2.0\",\"three\"]," +
"\"flat._valueAndPath\":[\"flat.first=1\",\"flat.second.inner.totally.absolutely.inner=2.0\",\"flat.third=three\"]" +
"}", flattenJsonString("flat", jsonExample));
}

}

0 comments on commit 7dbfa8c

Please sign in to comment.