diff --git a/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java b/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java index 8d0f6678e34..8dee12dc30f 100644 --- a/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java +++ b/core/src/main/java/org/apache/druid/data/input/impl/JsonReader.java @@ -47,7 +47,7 @@ import java.util.List; import java.util.Map; /** - * In constract to {@link JsonLineReader} which processes input text line by line independently, + * In contrast to {@link JsonLineReader} which processes input text line by line independently, * this class tries to parse the input text as a whole to an array of objects. * * The input text can be: diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java index 36e7ca34acc..19b702d83e8 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlattener.java @@ -23,7 +23,22 @@ import java.util.Map; public interface ObjectFlattener { + /** + * Transforms an input row object into a {@link Map}, likely based on the instructions in some {@link JSONPathSpec}. + * + * This method is used in normal ingestion to extract values into a map to translate into an + * {@link org.apache.druid.data.input.InputRow} + */ Map flatten(T obj); + /** + * Completely transforms an input row into a {@link Map}, including translating all nested structure into plain java + * objects such as {@link Map} and {@link java.util.List}. This method should translate everything as much as + * possible, ignoring any instructions in {@link JSONPathSpec} which might otherwise limit the amount of + * transformation done. + * + * This method is used by the ingestion "sampler" to provide a "raw" JSON form of the original input data, regardless + * of actual format, so that it can use "inline" JSON datasources and reduce sampling overhead. + */ Map toMap(T obj); } diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java index 78fd1148718..77ae467e855 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java @@ -251,7 +251,9 @@ public class ObjectFlatteners Map actualMap = new HashMap<>(); for (String key : jsonProvider.getPropertyKeys(o)) { Object field = jsonProvider.getMapValue(o, key); - if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) { + if (field == null) { + actualMap.put(key, null); + } else if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) { actualMap.put(key, toMapHelper(finalizeConversionForMap(field))); } else { actualMap.put(key, finalizeConversionForMap(field)); diff --git a/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java b/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java new file mode 100644 index 00000000000..e48c4dafe88 --- /dev/null +++ b/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.java.util.common.parsers; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import org.junit.Assert; +import org.junit.Test; + +import java.util.Map; + +public class ObjectFlattenersTest +{ + private static final String SOME_JSON = "{\"foo\": null, \"bar\": 1}"; + private static final ObjectFlattener FLATTENER = ObjectFlatteners.create( + new JSONPathSpec( + true, + ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extract", "$.bar")) + ), + new JSONFlattenerMaker(true) + ); + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + + @Test + public void testFlatten() throws JsonProcessingException + { + JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON); + Map flat = FLATTENER.flatten(node); + Assert.assertNull(flat.get("foo")); + Assert.assertEquals(1L, flat.get("bar")); + Assert.assertEquals(1L, flat.get("extract")); + } + + @Test + public void testToMap() throws JsonProcessingException + { + JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON); + Map flat = FLATTENER.toMap(node); + Assert.assertNull(flat.get("foo")); + Assert.assertEquals(1, flat.get("bar")); + } +} diff --git a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java index 60173212b53..f94949d6bd2 100644 --- a/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java +++ b/extensions-core/parquet-extensions/src/test/java/org/apache/druid/data/input/parquet/CompatParquetReaderTest.java @@ -248,30 +248,30 @@ public class CompatParquetReaderTest extends BaseParquetReaderTest List sampled = sampleAllRows(reader); final String expectedJson = "{\n" + " \"enumColumn\" : \"SPADES\",\n" - + " \"maybeStringColumn\" : { },\n" - + " \"maybeBinaryColumn\" : { },\n" + + " \"maybeStringColumn\" : null,\n" + + " \"maybeBinaryColumn\" : null,\n" + " \"shortColumn\" : 1,\n" + " \"byteColumn\" : 0,\n" - + " \"maybeBoolColumn\" : { },\n" + + " \"maybeBoolColumn\" : null,\n" + " \"intColumn\" : 2,\n" + " \"doubleColumn\" : 0.2,\n" - + " \"maybeByteColumn\" : { },\n" + + " \"maybeByteColumn\" : null,\n" + " \"intSetColumn\" : [ 0 ],\n" + " \"boolColumn\" : true,\n" + " \"binaryColumn\" : \"val_0\",\n" - + " \"maybeIntColumn\" : { },\n" + + " \"maybeIntColumn\" : null,\n" + " \"intToStringColumn\" : {\n" + " \"0\" : \"val_0\",\n" + " \"1\" : \"val_1\",\n" + " \"2\" : \"val_2\"\n" + " },\n" - + " \"maybeDoubleColumn\" : { },\n" - + " \"maybeEnumColumn\" : { },\n" - + " \"maybeLongColumn\" : { },\n" + + " \"maybeDoubleColumn\" : null,\n" + + " \"maybeEnumColumn\" : null,\n" + + " \"maybeLongColumn\" : null,\n" + " \"stringsColumn\" : [ \"arr_0\", \"arr_1\", \"arr_2\" ],\n" + " \"longColumn\" : 0,\n" + " \"stringColumn\" : \"val_0\",\n" - + " \"maybeShortColumn\" : { },\n" + + " \"maybeShortColumn\" : null,\n" + " \"complexColumn\" : {\n" + " \"0\" : [ {\n" + " \"nestedStringColumn\" : \"val_0\",\n" @@ -426,9 +426,9 @@ public class CompatParquetReaderTest extends BaseParquetReaderTest ); List sampled = sampleAllRows(reader); final String expectedJson = "{\n" - + " \"optionalMessage\" : { },\n" + + " \"optionalMessage\" : null,\n" + " \"requiredPrimitive\" : 9,\n" - + " \"repeatedPrimitive\" : { },\n" + + " \"repeatedPrimitive\" : null,\n" + " \"repeatedMessage\" : [ 9, 10 ],\n" + " \"optionalPrimitive\" : 10,\n" + " \"requiredMessage\" : {\n"