diff --git a/CHANGELOG.md b/CHANGELOG.md index aeb25e5aa2947..581c9334cab29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix missing value of FieldSort for unsigned_long ([#14963](https://github.com/opensearch-project/OpenSearch/pull/14963)) - Fix delete index template failed when the index template matches a data stream but is unused ([#15080](https://github.com/opensearch-project/OpenSearch/pull/15080)) - Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126)) +- Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620)) ### Security diff --git a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java index 998122d9e5c43..d24571fc5778d 100644 --- a/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java +++ b/server/src/main/java/org/opensearch/common/xcontent/JsonToStringXContentParser.java @@ -9,6 +9,7 @@ package org.opensearch.common.xcontent; import org.opensearch.common.xcontent.json.JsonXContent; +import org.opensearch.core.common.Strings; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.AbstractXContentParser; import org.opensearch.core.xcontent.DeprecationHandler; @@ -23,6 +24,9 @@ import java.math.BigInteger; import java.nio.CharBuffer; import java.util.ArrayList; +import java.util.Collections; +import java.util.Deque; +import java.util.LinkedList; /** * JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser @@ -32,21 +36,20 @@ */ public class JsonToStringXContentParser extends AbstractXContentParser { private final String fieldTypeName; - private XContentParser parser; + private final XContentParser parser; - private ArrayList valueList = new ArrayList<>(); - private ArrayList valueAndPathList = new ArrayList<>(); - private ArrayList keyList = new ArrayList<>(); + private final ArrayList valueList = new ArrayList<>(); + private final ArrayList valueAndPathList = new ArrayList<>(); + private final ArrayList keyList = new ArrayList<>(); - private XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent); + private final XContentBuilder builder = XContentBuilder.builder(JsonXContent.jsonXContent); - private NamedXContentRegistry xContentRegistry; + private final NamedXContentRegistry xContentRegistry; - private DeprecationHandler deprecationHandler; + private final DeprecationHandler deprecationHandler; private static final String VALUE_AND_PATH_SUFFIX = "._valueAndPath"; private static final String VALUE_SUFFIX = "._value"; - private static final String DOT_SYMBOL = "."; private static final String EQUAL_SYMBOL = "="; public JsonToStringXContentParser( @@ -63,9 +66,14 @@ public JsonToStringXContentParser( } public XContentParser parseObject() throws IOException { + assert currentToken() == Token.START_OBJECT; + parser.nextToken(); // Skip the outer START_OBJECT. Need to return on END_OBJECT. + builder.startObject(); - StringBuilder path = new StringBuilder(fieldTypeName); - parseToken(path, null); + LinkedList path = new LinkedList<>(Collections.singleton(fieldTypeName)); + while (currentToken() != Token.END_OBJECT) { + parseToken(path); + } builder.field(this.fieldTypeName, keyList); builder.field(this.fieldTypeName + VALUE_SUFFIX, valueList); builder.field(this.fieldTypeName + VALUE_AND_PATH_SUFFIX, valueAndPathList); @@ -74,75 +82,55 @@ public XContentParser parseObject() throws IOException { return JsonXContent.jsonXContent.createParser(this.xContentRegistry, this.deprecationHandler, String.valueOf(jString)); } - private void parseToken(StringBuilder path, String currentFieldName) throws IOException { - - while (this.parser.nextToken() != Token.END_OBJECT) { - if (this.parser.currentName() != null) { - currentFieldName = this.parser.currentName(); + private void parseToken(Deque path) throws IOException { + if (this.parser.currentToken() == Token.FIELD_NAME) { + String fieldName = this.parser.currentName(); + path.addLast(fieldName); // Pushing onto the stack *must* be matched by pop + String parts = fieldName; + while (parts.contains(".")) { // Extract the intermediate keys maybe present in fieldName + int dotPos = parts.indexOf('.'); + String part = parts.substring(0, dotPos); + this.keyList.add(part); + parts = parts.substring(dotPos + 1); } - StringBuilder parsedFields = new StringBuilder(); - - if (this.parser.currentToken() == Token.FIELD_NAME) { - path.append(DOT_SYMBOL).append(currentFieldName); - int dotIndex = currentFieldName.indexOf(DOT_SYMBOL); - String fieldNameSuffix = currentFieldName; - // The field name may be of the form foo.bar.baz - // If that's the case, each "part" is a key. - while (dotIndex >= 0) { - String fieldNamePrefix = fieldNameSuffix.substring(0, dotIndex); - if (!fieldNamePrefix.isEmpty()) { - this.keyList.add(fieldNamePrefix); - } - fieldNameSuffix = fieldNameSuffix.substring(dotIndex + 1); - dotIndex = fieldNameSuffix.indexOf(DOT_SYMBOL); - } - if (!fieldNameSuffix.isEmpty()) { - this.keyList.add(fieldNameSuffix); - } - } else if (this.parser.currentToken() == Token.START_ARRAY) { - parseToken(path, currentFieldName); - break; - } else if (this.parser.currentToken() == Token.END_ARRAY) { - // skip - } else if (this.parser.currentToken() == Token.START_OBJECT) { - parseToken(path, currentFieldName); - int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); - - if (dotIndex != -1 && path.length() > currentFieldName.length()) { - path.setLength(path.length() - currentFieldName.length() - 1); - } - } else { - if (!path.toString().contains(currentFieldName)) { - path.append(DOT_SYMBOL).append(currentFieldName); - } - parseValue(parsedFields); - this.valueList.add(parsedFields.toString()); - this.valueAndPathList.add(path + EQUAL_SYMBOL + parsedFields); - int dotIndex = path.lastIndexOf(DOT_SYMBOL, path.length()); - if (dotIndex != -1 && path.length() > currentFieldName.length()) { - path.setLength(path.length() - currentFieldName.length() - 1); - } + this.keyList.add(parts); // parts has no dot, so either it's the original fieldName or it's the last part + this.parser.nextToken(); // advance to the value of fieldName + parseToken(path); // parse the value for fieldName (which will be an array, an object, or a primitive value) + path.removeLast(); // Here is where we pop fieldName from the stack (since we're done with the value of fieldName) + // Note that whichever other branch we just passed through has already ended with nextToken(), so we + // don't need to call it. + } else if (this.parser.currentToken() == Token.START_ARRAY) { + parser.nextToken(); + while (this.parser.currentToken() != Token.END_ARRAY) { + parseToken(path); } - + this.parser.nextToken(); + } else if (this.parser.currentToken() == Token.START_OBJECT) { + parser.nextToken(); + while (this.parser.currentToken() != Token.END_OBJECT) { + parseToken(path); + } + this.parser.nextToken(); + } else if (this.parser.currentToken().isValue()) { + String parsedValue = parseValue(); + if (parsedValue != null) { + this.valueList.add(parsedValue); + this.valueAndPathList.add(Strings.collectionToDelimitedString(path, ".") + EQUAL_SYMBOL + parsedValue); + } + this.parser.nextToken(); } } - private void parseValue(StringBuilder parsedFields) throws IOException { + private String parseValue() throws IOException { switch (this.parser.currentToken()) { case VALUE_BOOLEAN: case VALUE_NUMBER: case VALUE_STRING: case VALUE_NULL: - parsedFields.append(this.parser.textOrNull()); - break; + return this.parser.textOrNull(); // Handle other token types as needed - case FIELD_NAME: - case VALUE_EMBEDDED_OBJECT: - case END_ARRAY: - case START_ARRAY: - break; default: - throw new IOException("Unsupported token type [" + parser.currentToken() + "]"); + throw new IOException("Unsupported value token type [" + parser.currentToken() + "]"); } } diff --git a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java index 9a3f2595a7c9e..b82fa3999612a 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FlatObjectFieldMapper.java @@ -568,8 +568,12 @@ protected void parseCreateField(ParseContext context) throws IOException { if (context.externalValueSet()) { String value = context.externalValue().toString(); parseValueAddFields(context, value, fieldType().name()); + } else if (context.parser().currentToken() == XContentParser.Token.VALUE_NULL) { + context.parser().nextToken(); // This triggers an exception in DocumentParser. + // We could remove the above nextToken() call to skip the null value, but the existing + // behavior (since 2.7) throws the exception. } else { - JsonToStringXContentParser JsonToStringParser = new JsonToStringXContentParser( + JsonToStringXContentParser jsonToStringParser = new JsonToStringXContentParser( NamedXContentRegistry.EMPTY, DeprecationHandler.IGNORE_DEPRECATIONS, context.parser(), @@ -579,7 +583,7 @@ protected void parseCreateField(ParseContext context) throws IOException { JsonToStringParser is the main parser class to transform JSON into stringFields in a XContentParser It reads the JSON object and parsed to a list of string */ - XContentParser parser = JsonToStringParser.parseObject(); + XContentParser parser = jsonToStringParser.parseObject(); XContentParser.Token currentToken; while ((currentToken = parser.nextToken()) != XContentParser.Token.END_OBJECT) { @@ -594,7 +598,6 @@ protected void parseCreateField(ParseContext context) throws IOException { } } - } } diff --git a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java index 0feb7bcd1ceec..a0f5150981a08 100644 --- a/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java +++ b/server/src/test/java/org/opensearch/common/xcontent/JsonToStringXContentParserTests.java @@ -19,7 +19,6 @@ public class JsonToStringXContentParserTests extends OpenSearchTestCase { private String flattenJsonString(String fieldName, String in) throws IOException { - String transformed; try ( XContentParser parser = JsonXContent.jsonXContent.createParser( xContentRegistry(), @@ -33,10 +32,11 @@ private String flattenJsonString(String fieldName, String in) throws IOException parser, fieldName ); - // Skip the START_OBJECT token: + // Point to the first token (should be START_OBJECT) jsonToStringXContentParser.nextToken(); XContentParser transformedParser = jsonToStringXContentParser.parseObject(); + assertSame(XContentParser.Token.END_OBJECT, jsonToStringXContentParser.currentToken()); try (XContentBuilder jsonBuilder = XContentFactory.jsonBuilder()) { jsonBuilder.copyCurrentStructure(transformedParser); return jsonBuilder.toString(); @@ -110,4 +110,57 @@ public void testNestChildObjectWithDotsAndFieldWithDots() throws IOException { ); } + public void testArrayOfObjects() throws IOException { + String jsonExample = "{" + + "\"field\": {" + + " \"detail\": {" + + " \"foooooooooooo\": [" + + " {\"name\":\"baz\"}," + + " {\"name\":\"baz\"}" + + " ]" + + " }" + + "}}"; + + assertEquals( + "{" + + "\"flat\":[\"field\",\"detail\",\"foooooooooooo\",\"name\",\"name\"]," + + "\"flat._value\":[\"baz\",\"baz\"]," + + "\"flat._valueAndPath\":[" + + "\"flat.field.detail.foooooooooooo.name=baz\"," + + "\"flat.field.detail.foooooooooooo.name=baz\"" + + "]}", + flattenJsonString("flat", jsonExample) + ); + } + + public void testArraysOfObjectsAndValues() throws IOException { + String jsonExample = "{" + + "\"field\": {" + + " \"detail\": {" + + " \"foooooooooooo\": [" + + " {\"name\":\"baz\"}," + + " {\"name\":\"baz\"}" + + " ]" + + " }," + + " \"numbers\" : [" + + " 1," + + " 2," + + " 3" + + " ]" + + "}}"; + + assertEquals( + "{" + + "\"flat\":[\"field\",\"detail\",\"foooooooooooo\",\"name\",\"name\",\"numbers\"]," + + "\"flat._value\":[\"baz\",\"baz\",\"1\",\"2\",\"3\"]," + + "\"flat._valueAndPath\":[" + + "\"flat.field.detail.foooooooooooo.name=baz\"," + + "\"flat.field.detail.foooooooooooo.name=baz\"," + + "\"flat.field.numbers=1\"," + + "\"flat.field.numbers=2\"," + + "\"flat.field.numbers=3\"" + + "]}", + flattenJsonString("flat", jsonExample) + ); + } }