From 37b8d4861c928a9b9e5cbde74df7f8a8a5562e45 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Mon, 28 Nov 2022 12:29:43 -0800 Subject: [PATCH] fix issues with nested data conversion (#13407) --- .../util/common/parsers/ObjectFlatteners.java | 13 +++++- .../common/parsers/ObjectFlattenersTest.java | 14 ++++++- .../orc-extensions/example/test_simple.orc | Bin 0 -> 506 bytes .../druid/data/input/orc/OrcReaderTest.java | 39 ++++++++++++++++++ 4 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 extensions-core/orc-extensions/example/test_simple.orc diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java index 4e955a4d50b..98f087333a5 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/ObjectFlatteners.java @@ -29,6 +29,7 @@ import javax.annotation.Nullable; import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.LinkedHashSet; @@ -250,15 +251,23 @@ public class ObjectFlatteners */ default Map toMap(T obj) { - return (Map) toPlainJavaType(obj); + final Object mapOrNull = toPlainJavaType(obj); + if (mapOrNull == null) { + return Collections.emptyMap(); + } + return (Map) mapOrNull; } /** * Recursively traverse "json" object using a {@link JsonProvider}, converting to Java {@link Map} and {@link List}, * potentially transforming via {@link #finalizeConversionForMap} as we go */ + @Nullable default Object toPlainJavaType(Object o) { + if (o == null) { + return null; + } final JsonProvider jsonProvider = getJsonProvider(); if (jsonProvider.isMap(o)) { Map actualMap = new HashMap<>(); @@ -287,7 +296,7 @@ public class ObjectFlatteners return finalizeConversionForMap(actualList); } // unknown, just pass it through - return o; + return finalizeConversionForMap(o); } /** diff --git a/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java b/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java index 2b610690db0..e0b0fbcc510 100644 --- a/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java +++ b/core/src/test/java/org/apache/druid/java/util/common/parsers/ObjectFlattenersTest.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import org.junit.Assert; import org.junit.Test; @@ -32,12 +33,14 @@ import java.util.Map; public class ObjectFlattenersTest { private static final String SOME_JSON = "{\"foo\": null, \"bar\": 1}"; + + private static final ObjectFlatteners.FlattenerMaker FLATTENER_MAKER = new JSONFlattenerMaker(true); private static final ObjectFlattener FLATTENER = ObjectFlatteners.create( new JSONPathSpec( true, ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extract", "$.bar")) ), - new JSONFlattenerMaker(true) + FLATTENER_MAKER ); private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); @@ -62,4 +65,13 @@ public class ObjectFlattenersTest Assert.assertNull(flat.get("foo")); Assert.assertEquals(1, flat.get("bar")); } + + @Test + public void testToMapNull() throws JsonProcessingException + { + JsonNode node = OBJECT_MAPPER.readTree("null"); + Map flat = FLATTENER.toMap(node); + Assert.assertNull(FLATTENER_MAKER.toPlainJavaType(node)); + Assert.assertEquals(ImmutableMap.of(), flat); + } } diff --git a/extensions-core/orc-extensions/example/test_simple.orc b/extensions-core/orc-extensions/example/test_simple.orc new file mode 100644 index 0000000000000000000000000000000000000000..31c5391fb687961c1837637c7954841c96235b02 GIT binary patch literal 506 zcmeYda+YOa;Nsz8VE_Ul77nHWh5!bJ$D5u%N@-wJROVAtmDNjBW>!{?Wp-BJGBnu0 zq^#JoKrklYka`0ngBnn&(9=f=4NQuDd~UjFdTz%U+!)M&qCro<1_<)GsoLpHdo)SO zS881S%MxY5S4l!>dv>mXClL9Pw|wPN^O9;eT>H}$Xd zi^#fO;Bj1y+1htb|D`3DoqC;iYHvS(WOup#gsJSOtiITM=}=w^&P({Qq@wOO;_;Hi#gPtSJm8z&B&@VI_Jpu>rWXUzc*4#f-> z@hKk|Up;)1a3$$kQVLJP7NLg;A&(v>Y3gYji7-tTl40!O6aHf(F*$zS`-7&-bG b1sEk78d#K=714cMB#Ff;oHIZFTlJD96o literal 0 HcmV?d00001 diff --git a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java index d0f012245b0..03083f61ebc 100644 --- a/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java +++ b/extensions-core/orc-extensions/src/test/java/org/apache/druid/data/input/orc/OrcReaderTest.java @@ -29,6 +29,7 @@ import org.apache.druid.data.input.InputRow; import org.apache.druid.data.input.InputRowSchema; import org.apache.druid.data.input.impl.DimensionsSpec; import org.apache.druid.data.input.impl.FileEntity; +import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.data.input.impl.TimestampSpec; import org.apache.druid.java.util.common.DateTimes; import org.apache.druid.java.util.common.parsers.CloseableIterator; @@ -569,6 +570,44 @@ public class OrcReaderTest extends InitializedNullHandlingTest } } + @Test + public void testSimpleNullValues() throws IOException + { + final InputFormat inputFormat = new OrcInputFormat( + new JSONPathSpec( + true, + ImmutableList.of() + ), + null, + new Configuration() + ); + final InputEntityReader reader = createReader( + new TimestampSpec("timestamp", "auto", null), + new DimensionsSpec( + ImmutableList.of( + new StringDimensionSchema("c1"), + new StringDimensionSchema("c2") + ) + ), + inputFormat, + "example/test_simple.orc" + ); + try (CloseableIterator iterator = reader.read()) { + Assert.assertTrue(iterator.hasNext()); + InputRow row = iterator.next(); + + Assert.assertEquals(DateTimes.of("2022-01-01T00:00:00.000Z"), row.getTimestamp()); + Assert.assertEquals("true", Iterables.getOnlyElement(row.getDimension("c1"))); + Assert.assertEquals("str1", Iterables.getOnlyElement(row.getDimension("c2"))); + + row = iterator.next(); + Assert.assertEquals(DateTimes.of("2022-01-02T00:00:00.000Z"), row.getTimestamp()); + Assert.assertEquals(ImmutableList.of(), row.getDimension("c1")); + Assert.assertEquals(ImmutableList.of(), row.getDimension("c2")); + Assert.assertFalse(iterator.hasNext()); + } + } + private InputEntityReader createReader( TimestampSpec timestampSpec, DimensionsSpec dimensionsSpec,