mirror of https://github.com/apache/druid.git
Fixing incorrect filtering of nulls in an array when ingesting for JSON and Avro (#13712)
This commit is contained in:
parent
c95a26cae3
commit
74ff848ce5
|
@ -179,12 +179,11 @@ public class JSONFlattenerMaker implements ObjectFlatteners.FlattenerMaker<JsonN
|
|||
return ((BinaryNode) val).binaryValue();
|
||||
}
|
||||
|
||||
|
||||
if (val.isArray()) {
|
||||
List<Object> newList = new ArrayList<>();
|
||||
for (JsonNode entry : val) {
|
||||
if (!entry.isNull()) {
|
||||
newList.add(convertJsonNode(entry, enc));
|
||||
}
|
||||
newList.add(convertJsonNode(entry, enc));
|
||||
}
|
||||
return newList;
|
||||
}
|
||||
|
|
|
@ -102,7 +102,7 @@ public class JSONParseSpecTest
|
|||
);
|
||||
|
||||
final Map<String, Object> expected = new HashMap<>();
|
||||
expected.put("foo", new ArrayList());
|
||||
expected.put("foo", Collections.singletonList(null));
|
||||
expected.put("baz", null);
|
||||
expected.put("bar", Collections.singletonList("test"));
|
||||
|
||||
|
@ -113,6 +113,43 @@ public class JSONParseSpecTest
|
|||
Assert.assertEquals(expected, parsedRow);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseRowWithNullsInArrays()
|
||||
{
|
||||
final JSONParseSpec parseSpec = new JSONParseSpec(
|
||||
new TimestampSpec("timestamp", "iso", null),
|
||||
new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))),
|
||||
new JSONPathSpec(
|
||||
true,
|
||||
ImmutableList.of(
|
||||
// https://github.com/apache/druid/issues/6653 $.x.y.z where y is missing
|
||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "foo", "$.baz.[?(@.maybe_object)].maybe_object"),
|
||||
// https://github.com/apache/druid/issues/6653 $.x.y.z where y is from an array and is null
|
||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "nullFoo", "$.nullFoo.[?(@.value)][0].foo"),
|
||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "baz", "$.baz"),
|
||||
// $.x.y.z where x is from an array and is null
|
||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "nullBaz", "$.baz[1].foo.maybe_object"),
|
||||
new JSONPathFieldSpec(JSONPathFieldType.PATH, "bar", "$.[?(@.something_else)].something_else.foo")
|
||||
)
|
||||
),
|
||||
null,
|
||||
false
|
||||
);
|
||||
|
||||
final Map<String, Object> expected = new HashMap<>();
|
||||
expected.put("foo", new ArrayList<>());
|
||||
expected.put("baz", Arrays.asList("1", null, "2", null));
|
||||
expected.put("bar", Collections.singletonList("test"));
|
||||
expected.put("nullFoo", new ArrayList<>());
|
||||
expected.put("nullBaz", null);
|
||||
|
||||
final Parser<String, Object> parser = parseSpec.makeParser();
|
||||
final Map<String, Object> parsedRow = parser.parseToMap("{\"baz\":[\"1\",null,\"2\",null],\"nullFoo\":{\"value\":[null,null]},\"something_else\": {\"foo\": \"test\"}}");
|
||||
|
||||
Assert.assertNotNull(parsedRow);
|
||||
Assert.assertEquals(expected, parsedRow);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSerde() throws IOException
|
||||
{
|
||||
|
|
|
@ -141,7 +141,11 @@ public class JsonLineReaderTest
|
|||
while (iterator.hasNext()) {
|
||||
final InputRow row = iterator.next();
|
||||
Assert.assertEquals("test", Iterables.getOnlyElement(row.getDimension("bar")));
|
||||
Assert.assertEquals(Collections.emptyList(), row.getDimension("foo"));
|
||||
// Since foo is in the JSONPathSpec it comes as an array of [null]
|
||||
// row.getRaw("foo") comes out as an array of nulls but the
|
||||
// row.getDimension("foo") stringifies it as "null". A future developer should aim to relieve this
|
||||
Assert.assertEquals(Collections.singletonList(null), row.getRaw("foo"));
|
||||
Assert.assertEquals(Collections.singletonList("null"), row.getDimension("foo"));
|
||||
Assert.assertTrue(row.getDimension("baz").isEmpty());
|
||||
numActualIterations++;
|
||||
}
|
||||
|
|
|
@ -39,7 +39,6 @@ import java.util.EnumSet;
|
|||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -194,7 +193,7 @@ public class AvroFlattenerMaker implements ObjectFlatteners.FlattenerMaker<Gener
|
|||
} else if (field instanceof Utf8) {
|
||||
return field.toString();
|
||||
} else if (field instanceof List) {
|
||||
return ((List<?>) field).stream().filter(Objects::nonNull).map(this::transformValue).collect(Collectors.toList());
|
||||
return ((List<?>) field).stream().map(this::transformValue).collect(Collectors.toList());
|
||||
} else if (field instanceof GenericEnumSymbol) {
|
||||
return field.toString();
|
||||
} else if (field instanceof GenericFixed) {
|
||||
|
|
|
@ -33,10 +33,14 @@
|
|||
},
|
||||
{
|
||||
"name": "someStringArray",
|
||||
"type": {
|
||||
"type": "array",
|
||||
"items": "string"
|
||||
}
|
||||
"type": [
|
||||
"null",
|
||||
{
|
||||
"type": "array",
|
||||
"items": ["null","string"]
|
||||
}
|
||||
],
|
||||
"default":null
|
||||
},
|
||||
{
|
||||
"name": "someIntValueMap",
|
||||
|
|
|
@ -81,7 +81,7 @@ import static org.apache.druid.data.input.AvroStreamInputRowParserTest.buildSome
|
|||
* "someOtherId": 6568719896,
|
||||
* "isValid": true,
|
||||
* "someIntArray": [1, 2, 4, 8],
|
||||
* "someStringArray": ["8", "4", "2", "1"],
|
||||
* "someStringArray": ["8", "4", "2", "1", null],
|
||||
* "someIntValueMap": {"8": 8, "1": 1, "2": 2, "4": 4},
|
||||
* "someStringValueMap": {"8": "8", "1": "1", "2": "2", "4": "4"},
|
||||
* "someUnion": "string as union",
|
||||
|
@ -108,14 +108,13 @@ public class AvroStreamInputFormatTest extends InitializedNullHandlingTest
|
|||
private static final List<String> DIMENSIONS_SCHEMALESS = Arrays.asList(
|
||||
"nested",
|
||||
SOME_OTHER_ID,
|
||||
"someStringArray",
|
||||
"someIntArray",
|
||||
"someFloat",
|
||||
"someUnion",
|
||||
EVENT_TYPE,
|
||||
ID,
|
||||
"someFixed",
|
||||
"someBytes",
|
||||
"someUnion",
|
||||
ID,
|
||||
"someEnum",
|
||||
"someLong",
|
||||
"someInt",
|
||||
|
|
|
@ -88,14 +88,13 @@ public class AvroStreamInputRowParserTest
|
|||
private static final List<String> DIMENSIONS_SCHEMALESS = Arrays.asList(
|
||||
"nested",
|
||||
SOME_OTHER_ID,
|
||||
"someStringArray",
|
||||
"someIntArray",
|
||||
"someFloat",
|
||||
"someUnion",
|
||||
EVENT_TYPE,
|
||||
ID,
|
||||
"someFixed",
|
||||
"someBytes",
|
||||
"someUnion",
|
||||
ID,
|
||||
"someEnum",
|
||||
"someLong",
|
||||
"someInt",
|
||||
|
@ -128,7 +127,7 @@ public class AvroStreamInputRowParserTest
|
|||
.setSubInt(SUB_INT_VALUE)
|
||||
.setSubLong(SUB_LONG_VALUE)
|
||||
.build();
|
||||
private static final List<CharSequence> SOME_STRING_ARRAY_VALUE = Arrays.asList("8", "4", "2", "1");
|
||||
private static final List<CharSequence> SOME_STRING_ARRAY_VALUE = Arrays.asList("8", "4", "2", "1", null);
|
||||
private static final List<Integer> SOME_INT_ARRAY_VALUE = Arrays.asList(1, 2, 4, 8);
|
||||
static final Map<CharSequence, Integer> SOME_INT_VALUE_MAP_VALUE = Maps.asMap(
|
||||
new HashSet<>(Arrays.asList("8", "2", "4", "1")), new Function<CharSequence, Integer>()
|
||||
|
|
|
@ -164,11 +164,12 @@ public class AvroFlattenerMakerTest
|
|||
final AvroFlattenerMaker flattenerNested = new AvroFlattenerMaker(false, false, true, true);
|
||||
|
||||
SomeAvroDatum input = AvroStreamInputRowParserTest.buildSomeAvroDatum();
|
||||
|
||||
// isFieldPrimitive on someStringArray is false
|
||||
// as it contains items as nulls and strings
|
||||
// so flattenerNested should only be able to discover it
|
||||
Assert.assertEquals(
|
||||
ImmutableSet.of(
|
||||
"someOtherId",
|
||||
"someStringArray",
|
||||
"someIntArray",
|
||||
"someFloat",
|
||||
"eventType",
|
||||
|
@ -210,6 +211,49 @@ public class AvroFlattenerMakerTest
|
|||
);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testNullsInStringArray()
|
||||
{
|
||||
final AvroFlattenerMaker flattenerNested = new AvroFlattenerMaker(false, false, true, true);
|
||||
|
||||
SomeAvroDatum input = AvroStreamInputRowParserTest.buildSomeAvroDatum();
|
||||
|
||||
Assert.assertEquals(
|
||||
ImmutableSet.of(
|
||||
"someStringValueMap",
|
||||
"someOtherId",
|
||||
"someStringArray",
|
||||
"someIntArray",
|
||||
"someFloat",
|
||||
"isValid",
|
||||
"someIntValueMap",
|
||||
"eventType",
|
||||
"someFixed",
|
||||
"someBytes",
|
||||
"someRecord",
|
||||
"someMultiMemberUnion",
|
||||
"someNull",
|
||||
"someRecordArray",
|
||||
"someUnion",
|
||||
"id",
|
||||
"someEnum",
|
||||
"someLong",
|
||||
"someInt",
|
||||
"timestamp"
|
||||
),
|
||||
ImmutableSet.copyOf(flattenerNested.discoverRootFields(input))
|
||||
);
|
||||
|
||||
ArrayList<Object> results = (ArrayList<Object>) flattenerNested.getRootField(input, "someStringArray");
|
||||
// 4 strings a 1 null for a total of 5
|
||||
Assert.assertEquals("8", results.get(0).toString());
|
||||
Assert.assertEquals("4", results.get(1).toString());
|
||||
Assert.assertEquals("2", results.get(2).toString());
|
||||
Assert.assertEquals("1", results.get(3).toString());
|
||||
Assert.assertEquals(null, results.get(4));
|
||||
}
|
||||
|
||||
private void getRootField_common(final SomeAvroDatum record, final AvroFlattenerMaker flattener)
|
||||
{
|
||||
Assert.assertEquals(
|
||||
|
|
Loading…
Reference in New Issue