fix bug in ObjectFlatteners.toMap which caused null values in avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null in web-console sampler UI (#12785)

* fix bug in ObjectFlatteners.toMap which caused null values in avro-stream/avro-ocf/parquet/orc to be converted to {} instead of null
* fix parquet test that expected wrong behavior, my bad heh
This commit is contained in:
Clint Wylie 2022-07-14 16:52:01 -07:00 committed by GitHub
parent 05b2e967ed
commit e25ba00470
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 91 additions and 13 deletions

View File

@ -47,7 +47,7 @@ import java.util.List;
import java.util.Map;
/**
* In constract to {@link JsonLineReader} which processes input text line by line independently,
* In contrast to {@link JsonLineReader} which processes input text line by line independently,
* this class tries to parse the input text as a whole to an array of objects.
*
* The input text can be:

View File

@ -23,7 +23,22 @@ import java.util.Map;
public interface ObjectFlattener<T>
{
/**
* Transforms an input row object into a {@link Map}, likely based on the instructions in some {@link JSONPathSpec}.
*
* This method is used in normal ingestion to extract values into a map to translate into an
* {@link org.apache.druid.data.input.InputRow}
*/
Map<String, Object> flatten(T obj);
/**
* Completely transforms an input row into a {@link Map}, including translating all nested structure into plain java
* objects such as {@link Map} and {@link java.util.List}. This method should translate everything as much as
* possible, ignoring any instructions in {@link JSONPathSpec} which might otherwise limit the amount of
* transformation done.
*
* This method is used by the ingestion "sampler" to provide a "raw" JSON form of the original input data, regardless
* of actual format, so that it can use "inline" JSON datasources and reduce sampling overhead.
*/
Map<String, Object> toMap(T obj);
}

View File

@ -251,7 +251,9 @@ public class ObjectFlatteners
Map<String, Object> actualMap = new HashMap<>();
for (String key : jsonProvider.getPropertyKeys(o)) {
Object field = jsonProvider.getMapValue(o, key);
if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) {
if (field == null) {
actualMap.put(key, null);
} else if (jsonProvider.isMap(field) || jsonProvider.isArray(field)) {
actualMap.put(key, toMapHelper(finalizeConversionForMap(field)));
} else {
actualMap.put(key, finalizeConversionForMap(field));

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.java.util.common.parsers;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import org.junit.Assert;
import org.junit.Test;
import java.util.Map;
public class ObjectFlattenersTest
{
private static final String SOME_JSON = "{\"foo\": null, \"bar\": 1}";
private static final ObjectFlattener FLATTENER = ObjectFlatteners.create(
new JSONPathSpec(
true,
ImmutableList.of(new JSONPathFieldSpec(JSONPathFieldType.PATH, "extract", "$.bar"))
),
new JSONFlattenerMaker(true)
);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
@Test
public void testFlatten() throws JsonProcessingException
{
JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
Map<String, Object> flat = FLATTENER.flatten(node);
Assert.assertNull(flat.get("foo"));
Assert.assertEquals(1L, flat.get("bar"));
Assert.assertEquals(1L, flat.get("extract"));
}
@Test
public void testToMap() throws JsonProcessingException
{
JsonNode node = OBJECT_MAPPER.readTree(SOME_JSON);
Map<String, Object> flat = FLATTENER.toMap(node);
Assert.assertNull(flat.get("foo"));
Assert.assertEquals(1, flat.get("bar"));
}
}

View File

@ -248,30 +248,30 @@ public class CompatParquetReaderTest extends BaseParquetReaderTest
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n"
+ " \"enumColumn\" : \"SPADES\",\n"
+ " \"maybeStringColumn\" : { },\n"
+ " \"maybeBinaryColumn\" : { },\n"
+ " \"maybeStringColumn\" : null,\n"
+ " \"maybeBinaryColumn\" : null,\n"
+ " \"shortColumn\" : 1,\n"
+ " \"byteColumn\" : 0,\n"
+ " \"maybeBoolColumn\" : { },\n"
+ " \"maybeBoolColumn\" : null,\n"
+ " \"intColumn\" : 2,\n"
+ " \"doubleColumn\" : 0.2,\n"
+ " \"maybeByteColumn\" : { },\n"
+ " \"maybeByteColumn\" : null,\n"
+ " \"intSetColumn\" : [ 0 ],\n"
+ " \"boolColumn\" : true,\n"
+ " \"binaryColumn\" : \"val_0\",\n"
+ " \"maybeIntColumn\" : { },\n"
+ " \"maybeIntColumn\" : null,\n"
+ " \"intToStringColumn\" : {\n"
+ " \"0\" : \"val_0\",\n"
+ " \"1\" : \"val_1\",\n"
+ " \"2\" : \"val_2\"\n"
+ " },\n"
+ " \"maybeDoubleColumn\" : { },\n"
+ " \"maybeEnumColumn\" : { },\n"
+ " \"maybeLongColumn\" : { },\n"
+ " \"maybeDoubleColumn\" : null,\n"
+ " \"maybeEnumColumn\" : null,\n"
+ " \"maybeLongColumn\" : null,\n"
+ " \"stringsColumn\" : [ \"arr_0\", \"arr_1\", \"arr_2\" ],\n"
+ " \"longColumn\" : 0,\n"
+ " \"stringColumn\" : \"val_0\",\n"
+ " \"maybeShortColumn\" : { },\n"
+ " \"maybeShortColumn\" : null,\n"
+ " \"complexColumn\" : {\n"
+ " \"0\" : [ {\n"
+ " \"nestedStringColumn\" : \"val_0\",\n"
@ -426,9 +426,9 @@ public class CompatParquetReaderTest extends BaseParquetReaderTest
);
List<InputRowListPlusRawValues> sampled = sampleAllRows(reader);
final String expectedJson = "{\n"
+ " \"optionalMessage\" : { },\n"
+ " \"optionalMessage\" : null,\n"
+ " \"requiredPrimitive\" : 9,\n"
+ " \"repeatedPrimitive\" : { },\n"
+ " \"repeatedPrimitive\" : null,\n"
+ " \"repeatedMessage\" : [ 9, 10 ],\n"
+ " \"optionalPrimitive\" : 10,\n"
+ " \"requiredMessage\" : {\n"