mirror of
https://github.com/apache/druid.git
synced 2025-02-17 07:25:02 +00:00
faster flattening for non-existent paths (#6654)
* faster flattening for non-existent properties to circumvent upstream json-path issue * fix json provider * revert to using null instead of undefined
This commit is contained in:
parent
b4a4669128
commit
8f8a569aa2
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.data.input.impl;
|
||||
|
||||
import com.fasterxml.jackson.databind.node.ObjectNode;
|
||||
import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
//
|
||||
//
|
||||
|
||||
/**
|
||||
* Custom json-path JsonProvider override to circumvent slow performance when encountering null paths as described in
|
||||
* https://github.com/json-path/JsonPath/issues/396
|
||||
*
|
||||
* Note that this only avoids errors for map properties, avoiding the exception on array paths is not possible without
|
||||
* patching json-path itself
|
||||
*/
|
||||
public class FastJacksonJsonNodeJsonProvider extends JacksonJsonNodeJsonProvider
|
||||
{
|
||||
@Override
|
||||
public boolean isMap(Object obj)
|
||||
{
|
||||
return obj == null || super.isMap(obj);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getMapValue(Object obj, String key)
|
||||
{
|
||||
if (obj == null) {
|
||||
return null;
|
||||
} else {
|
||||
ObjectNode jsonObject = (ObjectNode) obj;
|
||||
Object o = jsonObject.get(key);
|
||||
if (!jsonObject.has(key)) {
|
||||
return null;
|
||||
} else {
|
||||
return unwrap(o);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<String> getPropertyKeys(final Object o)
|
||||
{
|
||||
if (o == null) {
|
||||
return Collections.emptySet();
|
||||
}
|
||||
return super.getPropertyKeys(o);
|
||||
}
|
||||
}
|
@ -24,10 +24,10 @@ import com.google.common.collect.FluentIterable;
|
||||
import com.jayway.jsonpath.Configuration;
|
||||
import com.jayway.jsonpath.JsonPath;
|
||||
import com.jayway.jsonpath.Option;
|
||||
import com.jayway.jsonpath.spi.json.JacksonJsonNodeJsonProvider;
|
||||
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
|
||||
import net.thisptr.jackson.jq.JsonQuery;
|
||||
import net.thisptr.jackson.jq.exception.JsonQueryException;
|
||||
import org.apache.druid.data.input.impl.FastJacksonJsonNodeJsonProvider;
|
||||
import org.apache.druid.java.util.common.StringUtils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
@ -45,7 +45,7 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker<JsonN
|
||||
{
|
||||
private static final Configuration JSONPATH_CONFIGURATION =
|
||||
Configuration.builder()
|
||||
.jsonProvider(new JacksonJsonNodeJsonProvider())
|
||||
.jsonProvider(new FastJacksonJsonNodeJsonProvider())
|
||||
.mappingProvider(new JacksonMappingProvider())
|
||||
.options(EnumSet.of(Option.SUPPRESS_EXCEPTIONS))
|
||||
.build();
|
||||
@ -119,7 +119,9 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker<JsonN
|
||||
if (val.isArray()) {
|
||||
List<Object> newList = new ArrayList<>();
|
||||
for (JsonNode entry : val) {
|
||||
newList.add(valueConversionFunction(entry));
|
||||
if (!entry.isNull()) {
|
||||
newList.add(valueConversionFunction(entry));
|
||||
}
|
||||
}
|
||||
return newList;
|
||||
}
|
||||
|
@ -91,6 +91,7 @@ public class JSONParseSpecTest
|
||||
true,
|
||||
ImmutableList.of(
|
||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.[?(@.maybe_object)].maybe_object.foo.test"),
|
||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.maybe_object_2.foo.test"),
|
||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo")
|
||||
)
|
||||
),
|
||||
@ -99,6 +100,7 @@ public class JSONParseSpecTest
|
||||
|
||||
final Map<String, Object> expected = new HashMap<>();
|
||||
expected.put("foo", new ArrayList());
|
||||
expected.put("baz", null);
|
||||
expected.put("bar", Collections.singletonList("test"));
|
||||
|
||||
final Parser<String, Object> parser = parseSpec.makeParser();
|
||||
|
@ -34,6 +34,7 @@ import org.apache.druid.java.util.common.parsers.ObjectFlatteners;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
@ -140,6 +141,9 @@ public class AvroFlattenerMaker implements ObjectFlatteners.FlattenerMaker<Gener
|
||||
if (field instanceof Utf8) {
|
||||
return field.toString();
|
||||
}
|
||||
if (field instanceof List) {
|
||||
return ((List) field).stream().filter(Objects::nonNull).collect(Collectors.toList());
|
||||
}
|
||||
return field;
|
||||
}
|
||||
}
|
||||
|
@ -27,6 +27,7 @@ import org.apache.avro.generic.GenericRecord;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -98,7 +99,9 @@ public class GenericAvroJsonProvider implements JsonProvider
|
||||
@Override
|
||||
public Collection<String> getPropertyKeys(final Object o)
|
||||
{
|
||||
if (o instanceof Map) {
|
||||
if (o == null) {
|
||||
return Collections.emptySet();
|
||||
} else if (o instanceof Map) {
|
||||
return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet());
|
||||
} else if (o instanceof GenericRecord) {
|
||||
return ((GenericRecord) o).getSchema().getFields().stream().map(Schema.Field::name).collect(Collectors.toSet());
|
||||
@ -138,7 +141,9 @@ public class GenericAvroJsonProvider implements JsonProvider
|
||||
@Override
|
||||
public Object getMapValue(final Object o, final String s)
|
||||
{
|
||||
if (o instanceof GenericRecord) {
|
||||
if (o == null) {
|
||||
return null;
|
||||
} else if (o instanceof GenericRecord) {
|
||||
return ((GenericRecord) o).get(s);
|
||||
} else if (o instanceof Map) {
|
||||
return ((Map) o).get(s);
|
||||
@ -172,7 +177,7 @@ public class GenericAvroJsonProvider implements JsonProvider
|
||||
@Override
|
||||
public boolean isMap(final Object o)
|
||||
{
|
||||
return o instanceof Map || o instanceof GenericRecord;
|
||||
return o == null || o instanceof Map || o instanceof GenericRecord;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -30,6 +30,7 @@ import org.apache.parquet.schema.Type;
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.EnumSet;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
@ -100,7 +101,7 @@ public class ParquetGroupFlattenerMaker implements ObjectFlatteners.FlattenerMak
|
||||
if (ParquetGroupConverter.isWrappedListPrimitive(o)) {
|
||||
return converter.unwrapListPrimitive(o);
|
||||
} else if (o instanceof List) {
|
||||
List<Object> asList = (List<Object>) o;
|
||||
List<Object> asList = ((List<Object>) o).stream().filter(Objects::nonNull).collect(Collectors.toList());
|
||||
if (asList.stream().allMatch(ParquetGroupConverter::isWrappedListPrimitive)) {
|
||||
return asList.stream().map(Group.class::cast).map(converter::unwrapListPrimitive).collect(Collectors.toList());
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ import org.apache.parquet.example.data.Group;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -64,7 +65,7 @@ public class ParquetGroupJsonProvider implements JsonProvider
|
||||
@Override
|
||||
public boolean isMap(final Object o)
|
||||
{
|
||||
return o instanceof Map || o instanceof Group;
|
||||
return o == null || o instanceof Map || o instanceof Group;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -93,7 +94,9 @@ public class ParquetGroupJsonProvider implements JsonProvider
|
||||
@Override
|
||||
public Collection<String> getPropertyKeys(final Object o)
|
||||
{
|
||||
if (o instanceof Map) {
|
||||
if (o == null) {
|
||||
return Collections.emptySet();
|
||||
} else if (o instanceof Map) {
|
||||
return ((Map<Object, Object>) o).keySet().stream().map(String::valueOf).collect(Collectors.toSet());
|
||||
} else if (o instanceof Group) {
|
||||
return ((Group) o).getType().getFields().stream().map(f -> f.getName()).collect(Collectors.toSet());
|
||||
@ -105,7 +108,9 @@ public class ParquetGroupJsonProvider implements JsonProvider
|
||||
@Override
|
||||
public Object getMapValue(final Object o, final String s)
|
||||
{
|
||||
if (o instanceof Map) {
|
||||
if (o == null) {
|
||||
return null;
|
||||
} else if (o instanceof Map) {
|
||||
return ((Map) o).get(s);
|
||||
} else if (o instanceof Group) {
|
||||
Group g = (Group) o;
|
||||
|
Loading…
x
Reference in New Issue
Block a user