From 7f3a0dae285c6736952e555abad62d22f914283b Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Tue, 11 Sep 2018 19:16:19 -0700 Subject: [PATCH] ParseSpec: Remove default setting. (#6310) * ParseSpec: Remove default setting. Having a default ParseSpec implementation is bad for users, because it masks problems specifying the format. Two common problems masked by this are specifying the "format" at the wrong level of the JSON, and specifying a format that Druid doesn't support. In both cases, having a default implementation means that users will get the delimited parser rather than an error, and then be confused when, later on, their data failed to parse. * Fix integration tests. --- .../druid/data/input/impl/ParseSpec.java | 2 +- .../druid/data/input/impl/ParseSpecTest.java | 65 +++++++++++++++++++ ...edia_realtime_appenderator_index_task.json | 1 + .../wikipedia_realtime_index_task.json | 1 + .../indexer/wikipedia_union_index_task.json | 1 + 5 files changed, 69 insertions(+), 1 deletion(-) diff --git a/api/src/main/java/org/apache/druid/data/input/impl/ParseSpec.java b/api/src/main/java/org/apache/druid/data/input/impl/ParseSpec.java index f98e9b14873..b872219e7c8 100644 --- a/api/src/main/java/org/apache/druid/data/input/impl/ParseSpec.java +++ b/api/src/main/java/org/apache/druid/data/input/impl/ParseSpec.java @@ -30,7 +30,7 @@ import org.apache.druid.java.util.common.parsers.Parser; import java.util.List; @ExtensionPoint -@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "format", defaultImpl = DelimitedParseSpec.class) +@JsonTypeInfo(use = JsonTypeInfo.Id.NAME, property = "format") @JsonSubTypes(value = { @JsonSubTypes.Type(name = "json", value = JSONParseSpec.class), @JsonSubTypes.Type(name = "csv", value = CSVParseSpec.class), diff --git a/api/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java b/api/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java index 104e2ee9842..424dec57f13 100644 --- a/api/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java +++ b/api/src/test/java/org/apache/druid/data/input/impl/ParseSpecTest.java @@ -19,12 +19,22 @@ package org.apache.druid.data.input.impl; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.MapperFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; +import org.apache.druid.java.util.common.jackson.JacksonUtils; import org.apache.druid.java.util.common.parsers.ParseException; +import org.hamcrest.CoreMatchers; +import org.junit.Assert; +import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import java.io.IOException; import java.util.Arrays; import java.util.Collections; @@ -33,6 +43,21 @@ public class ParseSpecTest @Rule public ExpectedException expectedException = ExpectedException.none(); + private ObjectMapper mapper; + + @Before + public void setUp() + { + // Similar to configs from DefaultObjectMapper, which we cannot use here since we're in druid-api. + mapper = new ObjectMapper(); + mapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false); + mapper.configure(MapperFeature.AUTO_DETECT_GETTERS, false); + mapper.configure(MapperFeature.AUTO_DETECT_FIELDS, false); + mapper.configure(MapperFeature.AUTO_DETECT_IS_GETTERS, false); + mapper.configure(MapperFeature.AUTO_DETECT_SETTERS, false); + mapper.configure(MapperFeature.ALLOW_FINAL_FIELDS_AS_MUTATORS, false); + } + @Test(expected = ParseException.class) public void testDuplicateNames() { @@ -143,4 +168,44 @@ public class ParseSpecTest 0 ); } + + @Test + public void testSerde() throws IOException + { + final String json = "{" + + "\"format\":\"timeAndDims\", " + + "\"timestampSpec\": {\"column\":\"timestamp\"}, " + + "\"dimensionsSpec\":{}" + + "}"; + + final Object mapValue = mapper.readValue(json, JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT); + final ParseSpec parseSpec = mapper.convertValue(mapValue, ParseSpec.class); + + Assert.assertEquals(TimeAndDimsParseSpec.class, parseSpec.getClass()); + Assert.assertEquals("timestamp", parseSpec.getTimestampSpec().getTimestampColumn()); + Assert.assertEquals(ImmutableList.of(), parseSpec.getDimensionsSpec().getDimensionNames()); + + // Test round-trip. + Assert.assertEquals( + parseSpec, + mapper.readValue(mapper.writeValueAsString(parseSpec), ParseSpec.class) + ); + } + + @Test + public void testBadTypeSerde() throws IOException + { + final String json = "{" + + "\"format\":\"foo\", " + + "\"timestampSpec\": {\"column\":\"timestamp\"}, " + + "\"dimensionsSpec\":{}" + + "}"; + + final Object mapValue = mapper.readValue(json, JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT); + + expectedException.expect(IllegalArgumentException.class); + expectedException.expectCause(CoreMatchers.instanceOf(JsonMappingException.class)); + expectedException.expectMessage("Could not resolve type id 'foo' into a subtype"); + mapper.convertValue(mapValue, ParseSpec.class); + } } diff --git a/integration-tests/src/test/resources/indexer/wikipedia_realtime_appenderator_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_realtime_appenderator_index_task.json index 91f39221082..765914b62ad 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_realtime_appenderator_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_realtime_appenderator_index_task.json @@ -31,6 +31,7 @@ "parser": { "type": "map", "parseSpec": { + "format": "tsv", "columns": [ "timestamp", "page", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_realtime_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_realtime_index_task.json index 8d75559b992..ecfff579716 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_realtime_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_realtime_index_task.json @@ -31,6 +31,7 @@ "parser": { "type": "map", "parseSpec": { + "format": "tsv", "columns": [ "timestamp", "page", diff --git a/integration-tests/src/test/resources/indexer/wikipedia_union_index_task.json b/integration-tests/src/test/resources/indexer/wikipedia_union_index_task.json index c70a896218c..09af36efe28 100644 --- a/integration-tests/src/test/resources/indexer/wikipedia_union_index_task.json +++ b/integration-tests/src/test/resources/indexer/wikipedia_union_index_task.json @@ -31,6 +31,7 @@ "parser": { "type": "map", "parseSpec": { + "format": "tsv", "columns": [ "timestamp", "page",