mirror of https://github.com/apache/druid.git
fix bug in ObjectFlatteners.toMap which caused null values in avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null in web-console sampler UI (#12785)
* fix bug in ObjectFlatteners.toMap which caused null values in avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null * fix parquet test that expected wrong behavior, my bad heh
This commit is contained in:
parent
05b2e967ed
commit
e25ba00470
|
@ -47,7 +47,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* In constract to {@link JsonLineReader} which processes input text line by line independently,
|
||||
* In contrast to {@link JsonLineReader} which processes input text line by line independently,
|
||||
* this class tries to parse the input text as a whole to an array of objects.
|
||||
*
|
||||
* The input text can be:
|
||||
|
|
|
@ -23,7 +23,22 @@ import java.util.Map;
|
|||
|
||||
public interface ObjectFlattener<T>
|
||||
{
|
||||
/**
|
||||
* Transforms an input row object into a {@link Map}, likely based on the instructions in some {@link JSONPathSpec}.
|
||||
*
|
||||
* This method is used in normal ingestion to extract values into a map to translate into an
|
||||
* {@link org.apache.druid.data.input.InputRow}
|
||||
*/
|
||||
Map<String, Object> flatten(T obj);
|
||||
|
||||
/**
|
||||
* Completely transforms an input row into a {@link Map}, including translating all nested structure into plain java
|
||||
* objects such as {@link Map} and {@link java.util.List}. This method should translate everything as much as
|
||||
* possible, ignoring any instructions in {@link JSONPathSpec} which might otherwise limit the amount of
|
||||
* transformation done.
|
||||
*
|
||||
* This method is used by the ingestion "sampler" to provide a "raw" JSON form of the original input data, regardless
|
||||
* of actual format, so that it can use "inline" JSON datasources and reduce sampling overhead.
|
||||
*/
|
||||
Map<String, Object> toMap(T obj);
|
||||
}
|
||||
|
|
|
@ -251,7 +251,9 @@ public class ObjectFlatteners
|
|||
Map<String, Object> actualMap = new HashMap<>();
|
||||
for (String key : jsonProvider.getPropertyKeys(o)) {
|
||||
Object field = jsonProvider.getMapValue(o, key);
|
||||
if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) {
|
||||
if (field == null) {
|
||||
actualMap.put(key, null);
|
||||
} else if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) {
|
||||
actualMap.put(key, toMapHelper(finalizeConversionForMap(field)));
|
||||
} else {
|
||||
actualMap.put(key, finalizeConversionForMap(field));
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.druid.java.util.common.parsers;
|
||||
|
||||
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public class ObjectFlattenersTest
|
||||
{
|
||||
private static final String SOME_JSON = "{\"foo\": null, \"bar\": 1}";
|
||||
private static final ObjectFlattener FLATTENER = ObjectFlatteners.create(
|
||||
new JSONPathSpec(
|
||||
true,
|
||||
ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extract", "$.bar"))
|
||||
),
|
||||
new JSONFlattenerMaker(true)
|
||||
);
|
||||
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||
|
||||
@Test
|
||||
public void testFlatten() throws JsonProcessingException
|
||||
{
|
||||
JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
|
||||
Map<String, Object> flat = FLATTENER.flatten(node);
|
||||
Assert.assertNull(flat.get("foo"));
|
||||
Assert.assertEquals(1L, flat.get("bar"));
|
||||
Assert.assertEquals(1L, flat.get("extract"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToMap() throws JsonProcessingException
|
||||
{
|
||||
JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
|
||||
Map<String, Object> flat = FLATTENER.toMap(node);
|
||||
Assert.assertNull(flat.get("foo"));
|
||||
Assert.assertEquals(1, flat.get("bar"));
|
||||
}
|
||||
}
|
|
@ -248,30 +248,30 @@ public class CompatParquetReaderTest extends BaseParquetReaderTest
|
|||
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
|
||||
final String expectedJson = "{\n"
|
||||
+ " \"enumColumn\" : \"SPADES\",\n"
|
||||
+ " \"maybeStringColumn\" : { },\n"
|
||||
+ " \"maybeBinaryColumn\" : { },\n"
|
||||
+ " \"maybeStringColumn\" : null,\n"
|
||||
+ " \"maybeBinaryColumn\" : null,\n"
|
||||
+ " \"shortColumn\" : 1,\n"
|
||||
+ " \"byteColumn\" : 0,\n"
|
||||
+ " \"maybeBoolColumn\" : { },\n"
|
||||
+ " \"maybeBoolColumn\" : null,\n"
|
||||
+ " \"intColumn\" : 2,\n"
|
||||
+ " \"doubleColumn\" : 0.2,\n"
|
||||
+ " \"maybeByteColumn\" : { },\n"
|
||||
+ " \"maybeByteColumn\" : null,\n"
|
||||
+ " \"intSetColumn\" : [ 0 ],\n"
|
||||
+ " \"boolColumn\" : true,\n"
|
||||
+ " \"binaryColumn\" : \"val_0\",\n"
|
||||
+ " \"maybeIntColumn\" : { },\n"
|
||||
+ " \"maybeIntColumn\" : null,\n"
|
||||
+ " \"intToStringColumn\" : {\n"
|
||||
+ " \"0\" : \"val_0\",\n"
|
||||
+ " \"1\" : \"val_1\",\n"
|
||||
+ " \"2\" : \"val_2\"\n"
|
||||
+ " },\n"
|
||||
+ " \"maybeDoubleColumn\" : { },\n"
|
||||
+ " \"maybeEnumColumn\" : { },\n"
|
||||
+ " \"maybeLongColumn\" : { },\n"
|
||||
+ " \"maybeDoubleColumn\" : null,\n"
|
||||
+ " \"maybeEnumColumn\" : null,\n"
|
||||
+ " \"maybeLongColumn\" : null,\n"
|
||||
+ " \"stringsColumn\" : [ \"arr_0\", \"arr_1\", \"arr_2\" ],\n"
|
||||
+ " \"longColumn\" : 0,\n"
|
||||
+ " \"stringColumn\" : \"val_0\",\n"
|
||||
+ " \"maybeShortColumn\" : { },\n"
|
||||
+ " \"maybeShortColumn\" : null,\n"
|
||||
+ " \"complexColumn\" : {\n"
|
||||
+ " \"0\" : [ {\n"
|
||||
+ " \"nestedStringColumn\" : \"val_0\",\n"
|
||||
|
@ -426,9 +426,9 @@ public class CompatParquetReaderTest extends BaseParquetReaderTest
|
|||
);
|
||||
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
|
||||
final String expectedJson = "{\n"
|
||||
+ " \"optionalMessage\" : { },\n"
|
||||
+ " \"optionalMessage\" : null,\n"
|
||||
+ " \"requiredPrimitive\" : 9,\n"
|
||||
+ " \"repeatedPrimitive\" : { },\n"
|
||||
+ " \"repeatedPrimitive\" : null,\n"
|
||||
+ " \"repeatedMessage\" : [ 9, 10 ],\n"
|
||||
+ " \"optionalPrimitive\" : 10,\n"
|
||||
+ " \"requiredMessage\" : {\n"
|
||||
|
|
Loading…
Reference in New Issue