faster flattening for non-existent paths (#6654)

* faster flattening for non-existent properties to circumvent upstream json-path issue

* fix json provider

* revert to using null instead of undefined
This commit is contained in:
Clint Wylie 2018-11-27 14:14:11 -08:00 committed by Gian Merlino
parent b4a4669128
commit 8f8a569aa2
7 changed files with 99 additions and 10 deletions

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.data.input.impl;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
import java.util.Collection;
import java.util.Collections;
//
//
/**
* Custom json-path JsonProvider override to circumvent slow performance when encountering null paths as described in
* https://github.com/json-path/JsonPath/issues/396
*
* Note that this only avoids errors for map properties, avoiding the exception on array paths is not possible without
* patching json-path itself
*/
public class FastJacksonJsonNodeJsonProvider extends JacksonJsonNodeJsonProvider
{
@Override
public boolean isMap(Object obj)
{
return obj == null || super.isMap(obj);
}
@Override
public Object getMapValue(Object obj, String key)
{
if (obj == null) {
return null;
} else {
ObjectNode jsonObject = (ObjectNode) obj;
Object o = jsonObject.get(key);
if (!jsonObject.has(key)) {
return null;
} else {
return unwrap(o);
}
}
}
@Override
public Collection<String> getPropertyKeys(final Object o)
{
if (o == null) {
return Collections.emptySet();
}
return super.getPropertyKeys(o);
}
}

View File

@ -24,10 +24,10 @@ import com.google.common.collect.FluentIterable;
import com.jayway.jsonpath.Configuration;
import com.jayway.jsonpath.JsonPath;
import com.jayway.jsonpath.Option;
import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
import net.thisptr.jackson.jq.JsonQuery;
import net.thisptr.jackson.jq.exception.JsonQueryException;
import org.apache.druid.data.input.impl.FastJacksonJsonNodeJsonProvider;
import org.apache.druid.java.util.common.StringUtils;
import javax.annotation.Nullable;
@ -45,7 +45,7 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker<JsonN
{
private static final Configuration JSONPATH_CONFIGURATION =
Configuration.builder()
.jsonProvider(new JacksonJsonNodeJsonProvider())
.jsonProvider(new FastJacksonJsonNodeJsonProvider())
.mappingProvider(new JacksonMappingProvider())
.options(EnumSet.of(Option.SUPPRESS_EXCEPTIONS))
.build();
@ -119,7 +119,9 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker<JsonN
if (val.isArray()) {
List<Object> newList = new ArrayList<>();
for (JsonNode entry : val) {
newList.add(valueConversionFunction(entry));
if (!entry.isNull()) {
newList.add(valueConversionFunction(entry));
}
}
return newList;
}

View File

@ -91,6 +91,7 @@ public class JSONParseSpecTest
true,
ImmutableList.of(
new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.[?(@.maybe_object)].maybe_object.foo.test"),
new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.maybe_object_2.foo.test"),
new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo")
)
),
@ -99,6 +100,7 @@ public class JSONParseSpecTest
final Map<String, Object> expected = new HashMap<>();
expected.put("foo", new ArrayList());
expected.put("baz", null);
expected.put("bar", Collections.singletonList("test"));
final Parser<String, Object> parser = parseSpec.makeParser();

View File

@ -34,6 +34,7 @@ import org.apache.druid.java.util.common.parsers.ObjectFlatteners;
import java.nio.ByteBuffer;
import java.util.EnumSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
@ -140,6 +141,9 @@ public class AvroFlattenerMaker implements ObjectFlatteners.FlattenerMaker<Gener
if (field instanceof Utf8) {
return field.toString();
}
if (field instanceof List) {
return ((List) field).stream().filter(Objects::nonNull).collect(Collectors.toList());
}
return field;
}
}

View File

@ -27,6 +27,7 @@ import org.apache.avro.generic.GenericRecord;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -98,7 +99,9 @@ public class GenericAvroJsonProvider implements JsonProvider
@Override
public Collection<String> getPropertyKeys(final Object o)
{
if (o instanceof Map) {
if (o == null) {
return Collections.emptySet();
} else if (o instanceof Map) {
return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet());
} else if (o instanceof GenericRecord) {
return ((GenericRecord) o).getSchema().getFields().stream().map(Schema.Field::name).collect(Collectors.toSet());
@ -138,7 +141,9 @@ public class GenericAvroJsonProvider implements JsonProvider
@Override
public Object getMapValue(final Object o, final String s)
{
if (o instanceof GenericRecord) {
if (o == null) {
return null;
} else if (o instanceof GenericRecord) {
return ((GenericRecord) o).get(s);
} else if (o instanceof Map) {
return ((Map) o).get(s);
@ -172,7 +177,7 @@ public class GenericAvroJsonProvider implements JsonProvider
@Override
public boolean isMap(final Object o)
{
return o instanceof Map || o instanceof GenericRecord;
return o == null || o instanceof Map || o instanceof GenericRecord;
}
@Override

View File

@ -30,6 +30,7 @@ import org.apache.parquet.schema.Type;
import javax.annotation.Nullable;
import java.util.EnumSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
@ -100,7 +101,7 @@ public class ParquetGroupFlattenerMaker implements ObjectFlatteners.FlattenerMak
if (ParquetGroupConverter.isWrappedListPrimitive(o)) {
return converter.unwrapListPrimitive(o);
} else if (o instanceof List) {
List<Object> asList = (List<Object>) o;
List<Object> asList = ((List<Object>) o).stream().filter(Objects::nonNull).collect(Collectors.toList());
if (asList.stream().allMatch(ParquetGroupConverter::isWrappedListPrimitive)) {
return asList.stream().map(Group.class::cast).map(converter::unwrapListPrimitive).collect(Collectors.toList());
}

View File

@ -26,6 +26,7 @@ import org.apache.parquet.example.data.Group;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -64,7 +65,7 @@ public class ParquetGroupJsonProvider implements JsonProvider
@Override
public boolean isMap(final Object o)
{
return o instanceof Map || o instanceof Group;
return o == null || o instanceof Map || o instanceof Group;
}
@Override
@ -93,7 +94,9 @@ public class ParquetGroupJsonProvider implements JsonProvider
@Override
public Collection<String> getPropertyKeys(final Object o)
{
if (o instanceof Map) {
if (o == null) {
return Collections.emptySet();
} else if (o instanceof Map) {
return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet());
} else if (o instanceof Group) {
return ((Group) o).getType().getFields().stream().map(f -> f.getName()).collect(Collectors.toSet());
@ -105,7 +108,9 @@ public class ParquetGroupJsonProvider implements JsonProvider
@Override
public Object getMapValue(final Object o, final String s)
{
if (o instanceof Map) {
if (o == null) {
return null;
} else if (o instanceof Map) {
return ((Map) o).get(s);
} else if (o instanceof Group) {
Group g = (Group) o;