From 7416d1d02d5e88b92b728c627922e6293066e03a Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Fri, 2 Mar 2018 19:59:26 -0800 Subject: [PATCH] Add "joda" option to timeFormat extractionFn. (#5448) --- docs/content/querying/dimensionspecs.md | 11 +- .../io/druid/java/util/common/DateTimes.java | 6 + .../query/extraction/TimeDimExtractionFn.java | 128 +++++++++++------- .../extraction/TimeDimExtractionFnTest.java | 59 ++++++-- .../segment/filter/SelectorFilterTest.java | 8 +- 5 files changed, 150 insertions(+), 62 deletions(-) diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index 19d080493fe..b70fa68583a 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -263,13 +263,18 @@ Note, if you are working with the `__time` dimension, you should consider using [time extraction function instead](#time-format-extraction-function) instead, which works on time value directly as opposed to string values. -Time formats are described in the -[SimpleDateFormat documentation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) +If "joda" is true, time formats are described in the [Joda DateTimeFormat documentation](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html). +If "joda" is false (or unspecified) then formats are described in the [SimpleDateFormat documentation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html). +In general, we recommend setting "joda" to true since Joda format strings are more common in Druid APIs and since Joda handles certain edge cases (like weeks and weekyears near +the start and end of calendar years) in a more ISO8601 compliant way. + +If a value cannot be parsed using the provided timeFormat, it will be returned as-is. ```json { "type" : "time", "timeFormat" : , - "resultFormat" : } + "resultFormat" : , + "joda" : } ``` diff --git a/java-util/src/main/java/io/druid/java/util/common/DateTimes.java b/java-util/src/main/java/io/druid/java/util/common/DateTimes.java index d1e525ed4d7..3635c04f776 100644 --- a/java-util/src/main/java/io/druid/java/util/common/DateTimes.java +++ b/java-util/src/main/java/io/druid/java/util/common/DateTimes.java @@ -70,10 +70,16 @@ public final class DateTimes { return innerFormatter.parseDateTime(instant); } + + public String print(final DateTime instant) + { + return innerFormatter.print(instant); + } } /** * Creates a {@link UtcFormatter} that wraps around a {@link DateTimeFormatter}. + * * @param formatter inner {@link DateTimeFormatter} used to parse {@link String} */ public static UtcFormatter wrapFormatter(final DateTimeFormatter formatter) diff --git a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java index 0aa7fdeb8ac..e731b3f3d0d 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeDimExtractionFn.java @@ -24,43 +24,86 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.ibm.icu.text.SimpleDateFormat; +import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.StringUtils; +import org.joda.time.DateTime; +import org.joda.time.format.DateTimeFormat; import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.text.ParseException; import java.util.Date; +import java.util.Objects; +import java.util.function.Function; +import java.util.function.Supplier; /** */ public class TimeDimExtractionFn extends DimExtractionFn { private final String timeFormat; - private final ThreadLocal timeFormatter; private final String resultFormat; - private final ThreadLocal resultFormatter; + private final Supplier> fn; + private final boolean joda; @JsonCreator public TimeDimExtractionFn( @JsonProperty("timeFormat") String timeFormat, - @JsonProperty("resultFormat") String resultFormat + @JsonProperty("resultFormat") String resultFormat, + @JsonProperty("joda") boolean joda ) { Preconditions.checkNotNull(timeFormat, "timeFormat must not be null"); Preconditions.checkNotNull(resultFormat, "resultFormat must not be null"); + this.joda = joda; this.timeFormat = timeFormat; - this.timeFormatter = ThreadLocal.withInitial(() -> { - SimpleDateFormat formatter = new SimpleDateFormat(TimeDimExtractionFn.this.timeFormat); - formatter.setLenient(true); - return formatter; - }); - this.resultFormat = resultFormat; - this.resultFormatter = ThreadLocal.withInitial(() -> { - SimpleDateFormat formatter = new SimpleDateFormat(TimeDimExtractionFn.this.resultFormat); - return formatter; - }); + this.fn = makeFunctionSupplier(); + } + + private Supplier> makeFunctionSupplier() + { + if (joda) { + final DateTimes.UtcFormatter parser = DateTimes.wrapFormatter(DateTimeFormat.forPattern(timeFormat)); + final DateTimes.UtcFormatter formatter = DateTimes.wrapFormatter(DateTimeFormat.forPattern(resultFormat)); + + final Function fn = value -> { + DateTime date; + try { + date = parser.parse(value); + } + catch (IllegalArgumentException e) { + return value; + } + return formatter.print(date); + }; + + // Single shared function, since Joda formatters are thread-safe. + return () -> fn; + } else { + final ThreadLocal> threadLocal = ThreadLocal.withInitial( + () -> { + final SimpleDateFormat parser = new SimpleDateFormat(timeFormat); + final SimpleDateFormat formatter = new SimpleDateFormat(resultFormat); + parser.setLenient(true); + + return value -> { + Date date; + try { + date = parser.parse(value); + } + catch (ParseException e) { + return value; + } + return formatter.format(date); + }; + } + ); + + // Thread-local, since SimpleDateFormats are not thread-safe. + return threadLocal::get; + } } @Override @@ -81,28 +124,27 @@ public class TimeDimExtractionFn extends DimExtractionFn return null; } - Date date; - try { - date = timeFormatter.get().parse(dimValue); - } - catch (ParseException e) { - return dimValue; - } - return resultFormatter.get().format(date); + return fn.get().apply(dimValue); } - @JsonProperty("timeFormat") + @JsonProperty public String getTimeFormat() { return timeFormat; } - @JsonProperty("resultFormat") + @JsonProperty public String getResultFormat() { return resultFormat; } + @JsonProperty + public boolean isJoda() + { + return joda; + } + @Override public boolean preservesOrdering() { @@ -116,16 +158,7 @@ public class TimeDimExtractionFn extends DimExtractionFn } @Override - public String toString() - { - return "TimeDimExtractionFn{" + - "timeFormat='" + timeFormat + '\'' + - ", resultFormat='" + resultFormat + '\'' + - '}'; - } - - @Override - public boolean equals(Object o) + public boolean equals(final Object o) { if (this == o) { return true; @@ -133,24 +166,25 @@ public class TimeDimExtractionFn extends DimExtractionFn if (o == null || getClass() != o.getClass()) { return false; } - - TimeDimExtractionFn that = (TimeDimExtractionFn) o; - - if (!resultFormat.equals(that.resultFormat)) { - return false; - } - if (!timeFormat.equals(that.timeFormat)) { - return false; - } - - return true; + final TimeDimExtractionFn that = (TimeDimExtractionFn) o; + return joda == that.joda && + Objects.equals(timeFormat, that.timeFormat) && + Objects.equals(resultFormat, that.resultFormat); } @Override public int hashCode() { - int result = timeFormat.hashCode(); - result = 31 * result + resultFormat.hashCode(); - return result; + return Objects.hash(timeFormat, resultFormat, joda); + } + + @Override + public String toString() + { + return "TimeDimExtractionFn{" + + "timeFormat='" + timeFormat + '\'' + + ", resultFormat='" + resultFormat + '\'' + + ", joda=" + joda + + '}'; } } diff --git a/processing/src/test/java/io/druid/query/extraction/TimeDimExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/TimeDimExtractionFnTest.java index 77fb05cdb2e..a8ea07a9ce5 100644 --- a/processing/src/test/java/io/druid/query/extraction/TimeDimExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/TimeDimExtractionFnTest.java @@ -25,6 +25,7 @@ import io.druid.jackson.DefaultObjectMapper; import org.junit.Assert; import org.junit.Test; +import java.util.Arrays; import java.util.Set; /** @@ -41,19 +42,39 @@ public class TimeDimExtractionFnTest }; @Test - public void testEmptyAndNullExtraction() + public void testEmptyNullAndUnparseableExtraction() { - ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy"); + for (boolean joda : Arrays.asList(true, false)) { + ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", joda); - Assert.assertNull(extractionFn.apply(null)); - Assert.assertNull(extractionFn.apply("")); + Assert.assertNull(extractionFn.apply(null)); + Assert.assertNull(extractionFn.apply("")); + Assert.assertEquals("foo", extractionFn.apply("foo")); + } } @Test public void testMonthExtraction() { Set months = Sets.newHashSet(); - ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy"); + ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", false); + + for (String dim : dims) { + months.add(extractionFn.apply(dim)); + } + + Assert.assertEquals(months.size(), 4); + Assert.assertTrue(months.contains("01/2012")); + Assert.assertTrue(months.contains("03/2012")); + Assert.assertTrue(months.contains("05/2012")); + Assert.assertTrue(months.contains("12/2012")); + } + + @Test + public void testMonthExtractionJoda() + { + Set months = Sets.newHashSet(); + ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", true); for (String dim : dims) { months.add(extractionFn.apply(dim)); @@ -70,7 +91,7 @@ public class TimeDimExtractionFnTest public void testQuarterExtraction() { Set quarters = Sets.newHashSet(); - ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy"); + ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy", false); for (String dim : dims) { quarters.add(extractionFn.apply(dim)); @@ -82,15 +103,37 @@ public class TimeDimExtractionFnTest Assert.assertTrue(quarters.contains("Q4/2012")); } + @Test + public void testWeeks() + { + final TimeDimExtractionFn weekFn = new TimeDimExtractionFn("yyyy-MM-dd", "YYYY-ww", false); + Assert.assertEquals("2016-01", weekFn.apply("2015-12-31")); + Assert.assertEquals("2016-01", weekFn.apply("2016-01-01")); + Assert.assertEquals("2017-01", weekFn.apply("2017-01-01")); + Assert.assertEquals("2018-01", weekFn.apply("2017-12-31")); + Assert.assertEquals("2018-01", weekFn.apply("2018-01-01")); + } + + @Test + public void testWeeksJoda() + { + final TimeDimExtractionFn weekFn = new TimeDimExtractionFn("yyyy-MM-dd", "xxxx-ww", true); + Assert.assertEquals("2015-53", weekFn.apply("2015-12-31")); + Assert.assertEquals("2015-53", weekFn.apply("2016-01-01")); + Assert.assertEquals("2016-52", weekFn.apply("2017-01-01")); + Assert.assertEquals("2017-52", weekFn.apply("2017-12-31")); + Assert.assertEquals("2018-01", weekFn.apply("2018-01-01")); + } + @Test public void testSerde() throws Exception { final ObjectMapper objectMapper = new DefaultObjectMapper(); - final String json = "{ \"type\" : \"time\", \"timeFormat\" : \"MM/dd/yyyy\", \"resultFormat\" : \"QQQ/yyyy\" }"; + final String json = "{ \"type\" : \"time\", \"timeFormat\" : \"MM/dd/yyyy\", \"resultFormat\" : \"yyyy-MM-dd\", \"joda\" : true }"; TimeDimExtractionFn extractionFn = (TimeDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class); Assert.assertEquals("MM/dd/yyyy", extractionFn.getTimeFormat()); - Assert.assertEquals("QQQ/yyyy", extractionFn.getResultFormat()); + Assert.assertEquals("yyyy-MM-dd", extractionFn.getResultFormat()); // round trip Assert.assertEquals( diff --git a/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java b/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java index 4feac1adff9..b37009f578e 100644 --- a/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java +++ b/processing/src/test/java/io/druid/segment/filter/SelectorFilterTest.java @@ -106,10 +106,10 @@ public class SelectorFilterTest extends BaseFilterTest @Test public void testWithTimeExtractionFnNull() { - assertFilterMatches(new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.of()); - assertFilterMatches(new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.of("3", "4", "5")); - assertFilterMatches(new SelectorDimFilter("dim6", "2017-07", new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.of("0", "1")); - assertFilterMatches(new SelectorDimFilter("dim6", "2017-05", new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.of("2")); + assertFilterMatches(new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of()); + assertFilterMatches(new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("3", "4", "5")); + assertFilterMatches(new SelectorDimFilter("dim6", "2017-07", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("0", "1")); + assertFilterMatches(new SelectorDimFilter("dim6", "2017-05", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("2")); } @Test