diff --git a/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java b/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java new file mode 100644 index 00000000000..a98999a1b72 --- /dev/null +++ b/core/src/main/java/org/apache/druid/data/input/impl/FastJacksonJsonNodeJsonProvider.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.druid.data.input.impl; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider; + +import java.util.Collection; +import java.util.Collections; + +// +// + +/** + * Custom json-path JsonProvider override to circumvent slow performance when encountering null paths as described in + * https://github.com/json-path/JsonPath/issues/396 + * + * Note that this only avoids errors for map properties, avoiding the exception on array paths is not possible without + * patching json-path itself + */ +public class FastJacksonJsonNodeJsonProvider extends JacksonJsonNodeJsonProvider +{ + @Override + public boolean isMap(Object obj) + { + return obj == null || super.isMap(obj); + } + + @Override + public Object getMapValue(Object obj, String key) + { + if (obj == null) { + return null; + } else { + ObjectNode jsonObject = (ObjectNode) obj; + Object o = jsonObject.get(key); + if (!jsonObject.has(key)) { + return null; + } else { + return unwrap(o); + } + } + } + + @Override + public Collection getPropertyKeys(final Object o) + { + if (o == null) { + return Collections.emptySet(); + } + return super.getPropertyKeys(o); + } +} diff --git a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java index d16280a8831..8d58451f34b 100644 --- a/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java +++ b/core/src/main/java/org/apache/druid/java/util/common/parsers/JSONFlattenerMaker.java @@ -24,10 +24,10 @@ import com.google.common.collect.FluentIterable; import com.jayway.jsonpath.Configuration; import com.jayway.jsonpath.JsonPath; import com.jayway.jsonpath.Option; -import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider; import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider; import net.thisptr.jackson.jq.JsonQuery; import net.thisptr.jackson.jq.exception.JsonQueryException; +import org.apache.druid.data.input.impl.FastJacksonJsonNodeJsonProvider; import org.apache.druid.java.util.common.StringUtils; import javax.annotation.Nullable; @@ -45,7 +45,7 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker newList = new ArrayList<>(); for (JsonNode entry : val) { - newList.add(valueConversionFunction(entry)); + if (!entry.isNull()) { + newList.add(valueConversionFunction(entry)); + } } return newList; } diff --git a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java index 7168076b265..4dd13b601a6 100644 --- a/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java +++ b/core/src/test/java/org/apache/druid/data/input/impl/JSONParseSpecTest.java @@ -91,6 +91,7 @@ public class JSONParseSpecTest true, ImmutableList.of( new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.[?(@.maybe_object)].maybe_object.foo.test"), + new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.maybe_object_2.foo.test"), new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo") ) ), @@ -99,6 +100,7 @@ public class JSONParseSpecTest final Map expected = new HashMap<>(); expected.put("foo", new ArrayList()); + expected.put("baz", null); expected.put("bar", Collections.singletonList("test")); final Parser parser = parseSpec.makeParser(); diff --git a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java index facdaa043a7..0b55d762f00 100644 --- a/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java +++ b/extensions-core/avro-extensions/src/main/java/org/apache/druid/data/input/avro/AvroFlattenerMaker.java @@ -34,6 +34,7 @@ import org.apache.druid.java.util.common.parsers.ObjectFlatteners; import java.nio.ByteBuffer; import java.util.EnumSet; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -140,6 +141,9 @@ public class AvroFlattenerMaker implements ObjectFlatteners.FlattenerMaker getPropertyKeys(final Object o) { - if (o instanceof Map) { + if (o == null) { + return Collections.emptySet(); + } else if (o instanceof Map) { return ((Map) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet()); } else if (o instanceof GenericRecord) { return ((GenericRecord) o).getSchema().getFields().stream().map(Schema.Field::name).collect(Collectors.toSet()); @@ -138,7 +141,9 @@ public class GenericAvroJsonProvider implements JsonProvider @Override public Object getMapValue(final Object o, final String s) { - if (o instanceof GenericRecord) { + if (o == null) { + return null; + } else if (o instanceof GenericRecord) { return ((GenericRecord) o).get(s); } else if (o instanceof Map) { return ((Map) o).get(s); @@ -172,7 +177,7 @@ public class GenericAvroJsonProvider implements JsonProvider @Override public boolean isMap(final Object o) { - return o instanceof Map || o instanceof GenericRecord; + return o == null || o instanceof Map || o instanceof GenericRecord; } @Override diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java index cd2612ebd01..2ba939b162a 100644 --- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java +++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupFlattenerMaker.java @@ -30,6 +30,7 @@ import org.apache.parquet.schema.Type; import javax.annotation.Nullable; import java.util.EnumSet; import java.util.List; +import java.util.Objects; import java.util.Set; import java.util.function.Function; import java.util.stream.Collectors; @@ -100,7 +101,7 @@ public class ParquetGroupFlattenerMaker implements ObjectFlatteners.FlattenerMak if (ParquetGroupConverter.isWrappedListPrimitive(o)) { return converter.unwrapListPrimitive(o); } else if (o instanceof List) { - List asList = (List) o; + List asList = ((List) o).stream().filter(Objects::nonNull).collect(Collectors.toList()); if (asList.stream().allMatch(ParquetGroupConverter::isWrappedListPrimitive)) { return asList.stream().map(Group.class::cast).map(converter::unwrapListPrimitive).collect(Collectors.toList()); } diff --git a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java index 78e3f3355f7..b422e97d7ed 100644 --- a/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java +++ b/extensions-core/parquet-extensions/src/main/java/org/apache/druid/data/input/parquet/simple/ParquetGroupJsonProvider.java @@ -26,6 +26,7 @@ import org.apache.parquet.example.data.Group; import java.io.InputStream; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -64,7 +65,7 @@ public class ParquetGroupJsonProvider implements JsonProvider @Override public boolean isMap(final Object o) { - return o instanceof Map || o instanceof Group; + return o == null || o instanceof Map || o instanceof Group; } @Override @@ -93,7 +94,9 @@ public class ParquetGroupJsonProvider implements JsonProvider @Override public Collection getPropertyKeys(final Object o) { - if (o instanceof Map) { + if (o == null) { + return Collections.emptySet(); + } else if (o instanceof Map) { return ((Map) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet()); } else if (o instanceof Group) { return ((Group) o).getType().getFields().stream().map(f -> f.getName()).collect(Collectors.toSet()); @@ -105,7 +108,9 @@ public class ParquetGroupJsonProvider implements JsonProvider @Override public Object getMapValue(final Object o, final String s) { - if (o instanceof Map) { + if (o == null) { + return null; + } else if (o instanceof Map) { return ((Map) o).get(s); } else if (o instanceof Group) { Group g = (Group) o;