mirror of
https://github.com/apache/druid.git
synced 2025-02-09 19:44:57 +00:00
fix bug in ObjectFlatteners.toMap which caused null values in avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null in web-console sampler UI (#12785)
* fix bug in ObjectFlatteners.toMap which caused null values in avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null * fix parquet test that expected wrong behavior, my bad heh
This commit is contained in:
parent
05b2e967ed
commit
e25ba00470
@ -47,7 +47,7 @@ import java.util.List;
|
|||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* In constract to {@link JsonLineReader} which processes input text line by line independently,
|
* In contrast to {@link JsonLineReader} which processes input text line by line independently,
|
||||||
* this class tries to parse the input text as a whole to an array of objects.
|
* this class tries to parse the input text as a whole to an array of objects.
|
||||||
*
|
*
|
||||||
* The input text can be:
|
* The input text can be:
|
||||||
|
@ -23,7 +23,22 @@ import java.util.Map;
|
|||||||
|
|
||||||
public interface ObjectFlattener<T>
|
public interface ObjectFlattener<T>
|
||||||
{
|
{
|
||||||
|
/**
|
||||||
|
* Transforms an input row object into a {@link Map}, likely based on the instructions in some {@link JSONPathSpec}.
|
||||||
|
*
|
||||||
|
* This method is used in normal ingestion to extract values into a map to translate into an
|
||||||
|
* {@link org.apache.druid.data.input.InputRow}
|
||||||
|
*/
|
||||||
Map<String, Object> flatten(T obj);
|
Map<String, Object> flatten(T obj);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Completely transforms an input row into a {@link Map}, including translating all nested structure into plain java
|
||||||
|
* objects such as {@link Map} and {@link java.util.List}. This method should translate everything as much as
|
||||||
|
* possible, ignoring any instructions in {@link JSONPathSpec} which might otherwise limit the amount of
|
||||||
|
* transformation done.
|
||||||
|
*
|
||||||
|
* This method is used by the ingestion "sampler" to provide a "raw" JSON form of the original input data, regardless
|
||||||
|
* of actual format, so that it can use "inline" JSON datasources and reduce sampling overhead.
|
||||||
|
*/
|
||||||
Map<String, Object> toMap(T obj);
|
Map<String, Object> toMap(T obj);
|
||||||
}
|
}
|
||||||
|
@ -251,7 +251,9 @@ public class ObjectFlatteners
|
|||||||
Map<String, Object> actualMap = new HashMap<>();
|
Map<String, Object> actualMap = new HashMap<>();
|
||||||
for (String key : jsonProvider.getPropertyKeys(o)) {
|
for (String key : jsonProvider.getPropertyKeys(o)) {
|
||||||
Object field = jsonProvider.getMapValue(o, key);
|
Object field = jsonProvider.getMapValue(o, key);
|
||||||
if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) {
|
if (field == null) {
|
||||||
|
actualMap.put(key, null);
|
||||||
|
} else if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) {
|
||||||
actualMap.put(key, toMapHelper(finalizeConversionForMap(field)));
|
actualMap.put(key, toMapHelper(finalizeConversionForMap(field)));
|
||||||
} else {
|
} else {
|
||||||
actualMap.put(key, finalizeConversionForMap(field));
|
actualMap.put(key, finalizeConversionForMap(field));
|
||||||
|
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.druid.java.util.common.parsers;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.JsonNode;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public class ObjectFlattenersTest
|
||||||
|
{
|
||||||
|
private static final String SOME_JSON = "{\"foo\": null, \"bar\": 1}";
|
||||||
|
private static final ObjectFlattener FLATTENER = ObjectFlatteners.create(
|
||||||
|
new JSONPathSpec(
|
||||||
|
true,
|
||||||
|
ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extract", "$.bar"))
|
||||||
|
),
|
||||||
|
new JSONFlattenerMaker(true)
|
||||||
|
);
|
||||||
|
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFlatten() throws JsonProcessingException
|
||||||
|
{
|
||||||
|
JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
|
||||||
|
Map<String, Object> flat = FLATTENER.flatten(node);
|
||||||
|
Assert.assertNull(flat.get("foo"));
|
||||||
|
Assert.assertEquals(1L, flat.get("bar"));
|
||||||
|
Assert.assertEquals(1L, flat.get("extract"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testToMap() throws JsonProcessingException
|
||||||
|
{
|
||||||
|
JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
|
||||||
|
Map<String, Object> flat = FLATTENER.toMap(node);
|
||||||
|
Assert.assertNull(flat.get("foo"));
|
||||||
|
Assert.assertEquals(1, flat.get("bar"));
|
||||||
|
}
|
||||||
|
}
|
@ -248,30 +248,30 @@ public class CompatParquetReaderTest extends BaseParquetReaderTest
|
|||||||
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
|
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
|
||||||
final String expectedJson = "{\n"
|
final String expectedJson = "{\n"
|
||||||
+ " \"enumColumn\" : \"SPADES\",\n"
|
+ " \"enumColumn\" : \"SPADES\",\n"
|
||||||
+ " \"maybeStringColumn\" : { },\n"
|
+ " \"maybeStringColumn\" : null,\n"
|
||||||
+ " \"maybeBinaryColumn\" : { },\n"
|
+ " \"maybeBinaryColumn\" : null,\n"
|
||||||
+ " \"shortColumn\" : 1,\n"
|
+ " \"shortColumn\" : 1,\n"
|
||||||
+ " \"byteColumn\" : 0,\n"
|
+ " \"byteColumn\" : 0,\n"
|
||||||
+ " \"maybeBoolColumn\" : { },\n"
|
+ " \"maybeBoolColumn\" : null,\n"
|
||||||
+ " \"intColumn\" : 2,\n"
|
+ " \"intColumn\" : 2,\n"
|
||||||
+ " \"doubleColumn\" : 0.2,\n"
|
+ " \"doubleColumn\" : 0.2,\n"
|
||||||
+ " \"maybeByteColumn\" : { },\n"
|
+ " \"maybeByteColumn\" : null,\n"
|
||||||
+ " \"intSetColumn\" : [ 0 ],\n"
|
+ " \"intSetColumn\" : [ 0 ],\n"
|
||||||
+ " \"boolColumn\" : true,\n"
|
+ " \"boolColumn\" : true,\n"
|
||||||
+ " \"binaryColumn\" : \"val_0\",\n"
|
+ " \"binaryColumn\" : \"val_0\",\n"
|
||||||
+ " \"maybeIntColumn\" : { },\n"
|
+ " \"maybeIntColumn\" : null,\n"
|
||||||
+ " \"intToStringColumn\" : {\n"
|
+ " \"intToStringColumn\" : {\n"
|
||||||
+ " \"0\" : \"val_0\",\n"
|
+ " \"0\" : \"val_0\",\n"
|
||||||
+ " \"1\" : \"val_1\",\n"
|
+ " \"1\" : \"val_1\",\n"
|
||||||
+ " \"2\" : \"val_2\"\n"
|
+ " \"2\" : \"val_2\"\n"
|
||||||
+ " },\n"
|
+ " },\n"
|
||||||
+ " \"maybeDoubleColumn\" : { },\n"
|
+ " \"maybeDoubleColumn\" : null,\n"
|
||||||
+ " \"maybeEnumColumn\" : { },\n"
|
+ " \"maybeEnumColumn\" : null,\n"
|
||||||
+ " \"maybeLongColumn\" : { },\n"
|
+ " \"maybeLongColumn\" : null,\n"
|
||||||
+ " \"stringsColumn\" : [ \"arr_0\", \"arr_1\", \"arr_2\" ],\n"
|
+ " \"stringsColumn\" : [ \"arr_0\", \"arr_1\", \"arr_2\" ],\n"
|
||||||
+ " \"longColumn\" : 0,\n"
|
+ " \"longColumn\" : 0,\n"
|
||||||
+ " \"stringColumn\" : \"val_0\",\n"
|
+ " \"stringColumn\" : \"val_0\",\n"
|
||||||
+ " \"maybeShortColumn\" : { },\n"
|
+ " \"maybeShortColumn\" : null,\n"
|
||||||
+ " \"complexColumn\" : {\n"
|
+ " \"complexColumn\" : {\n"
|
||||||
+ " \"0\" : [ {\n"
|
+ " \"0\" : [ {\n"
|
||||||
+ " \"nestedStringColumn\" : \"val_0\",\n"
|
+ " \"nestedStringColumn\" : \"val_0\",\n"
|
||||||
@ -426,9 +426,9 @@ public class CompatParquetReaderTest extends BaseParquetReaderTest
|
|||||||
);
|
);
|
||||||
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
|
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
|
||||||
final String expectedJson = "{\n"
|
final String expectedJson = "{\n"
|
||||||
+ " \"optionalMessage\" : { },\n"
|
+ " \"optionalMessage\" : null,\n"
|
||||||
+ " \"requiredPrimitive\" : 9,\n"
|
+ " \"requiredPrimitive\" : 9,\n"
|
||||||
+ " \"repeatedPrimitive\" : { },\n"
|
+ " \"repeatedPrimitive\" : null,\n"
|
||||||
+ " \"repeatedMessage\" : [ 9, 10 ],\n"
|
+ " \"repeatedMessage\" : [ 9, 10 ],\n"
|
||||||
+ " \"optionalPrimitive\" : 10,\n"
|
+ " \"optionalPrimitive\" : 10,\n"
|
||||||
+ " \"requiredMessage\" : {\n"
|
+ " \"requiredMessage\" : {\n"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user