From e9050c2b4c533d5032fd3229ca5608cfcc111754 Mon Sep 17 00:00:00 2001 From: Gian Merlino Date: Wed, 31 Aug 2016 08:28:53 -0700 Subject: [PATCH] TimeFormatExtractionFn: Allow null formats (equivalent to ISO8601) and granular bucketing. (#3411) --- docs/content/querying/dimensionspecs.md | 8 ++- .../extraction/TimeFormatExtractionFn.java | 59 +++++++++++-------- .../TimeFormatExtractionFnTest.java | 52 +++++++++++----- .../query/groupby/GroupByQueryRunnerTest.java | 6 +- .../druid/query/topn/TopNQueryRunnerTest.java | 2 +- .../segment/filter/TimeFilteringTest.java | 2 +- 6 files changed, 83 insertions(+), 46 deletions(-) diff --git a/docs/content/querying/dimensionspecs.md b/docs/content/querying/dimensionspecs.md index cc90eec3630..3549c2cd8ca 100644 --- a/docs/content/querying/dimensionspecs.md +++ b/docs/content/querying/dimensionspecs.md @@ -111,15 +111,17 @@ For `__time` dimension values, this formats the time value bucketed by the For a regular dimension, it assumes the string is formatted in [ISO-8601 date and time format](https://en.wikipedia.org/wiki/ISO_8601). -* `format` : date time format for the resulting dimension value, in [Joda Time DateTimeFormat](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html). +* `format` : date time format for the resulting dimension value, in [Joda Time DateTimeFormat](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or null to use the default ISO8601 format. * `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](http://www.oracle.com/technetwork/java/javase/java8locales-2095355.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc. * `timeZone` : time zone to use in [IANA tz database format](http://en.wikipedia.org/wiki/List_of_tz_database_time_zones), e.g. `Europe/Berlin` (this can possibly be different than the aggregation time-zone) +* `granularity` : [granularity](granularities.html) to apply before formatting, or omit to not apply any granularity. ```json { "type" : "timeFormat", - "format" : , + "format" : (optional), "timeZone" : (optional), - "locale" : (optional) } + "locale" : (optional), + "granularity" : (optional) } ``` For example, the following dimension spec returns the day of the week for Montréal in French: diff --git a/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java b/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java index 2ee2011e701..78ac17d61cb 100644 --- a/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java +++ b/processing/src/main/java/io/druid/query/extraction/TimeFormatExtractionFn.java @@ -20,12 +20,14 @@ package io.druid.query.extraction; import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.base.Preconditions; import com.metamx.common.StringUtils; +import io.druid.granularity.QueryGranularities; +import io.druid.granularity.QueryGranularity; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; +import org.joda.time.format.ISODateTimeFormat; import java.nio.ByteBuffer; import java.util.Locale; @@ -33,24 +35,25 @@ import java.util.Locale; public class TimeFormatExtractionFn implements ExtractionFn { private final DateTimeZone tz; - private final String pattern; + private final String format; private final Locale locale; + private final QueryGranularity granularity; private final DateTimeFormatter formatter; public TimeFormatExtractionFn( - @JsonProperty("format") String pattern, + @JsonProperty("format") String format, @JsonProperty("timeZone") DateTimeZone tz, - @JsonProperty("locale") String localeString + @JsonProperty("locale") String localeString, + @JsonProperty("granularity") QueryGranularity granularity ) { - Preconditions.checkArgument(pattern != null, "format cannot be null"); - - this.pattern = pattern; + this.format = format; this.tz = tz; this.locale = localeString == null ? null : Locale.forLanguageTag(localeString); - this.formatter = DateTimeFormat.forPattern(pattern) - .withZone(tz == null ? DateTimeZone.UTC : tz) - .withLocale(locale); + this.granularity = granularity == null ? QueryGranularities.NONE : granularity; + this.formatter = (format == null ? ISODateTimeFormat.dateTime() : DateTimeFormat.forPattern(format)) + .withZone(tz == null ? DateTimeZone.UTC : tz) + .withLocale(locale); } @JsonProperty @@ -62,7 +65,7 @@ public class TimeFormatExtractionFn implements ExtractionFn @JsonProperty public String getFormat() { - return pattern; + return format; } @JsonProperty @@ -75,26 +78,35 @@ public class TimeFormatExtractionFn implements ExtractionFn } } + @JsonProperty + public QueryGranularity getGranularity() + { + return granularity; + } + @Override public byte[] getCacheKey() { - byte[] exprBytes = StringUtils.toUtf8(pattern + "\u0001" + tz.getID() + "\u0001" + locale.toLanguageTag()); - return ByteBuffer.allocate(1 + exprBytes.length) + final byte[] exprBytes = StringUtils.toUtf8(format + "\u0001" + tz.getID() + "\u0001" + locale.toLanguageTag()); + final byte[] granularityCacheKey = granularity.cacheKey(); + return ByteBuffer.allocate(2 + exprBytes.length + granularityCacheKey.length) .put(ExtractionCacheHelper.CACHE_TYPE_ID_TIME_FORMAT) .put(exprBytes) + .put((byte) 0xFF) + .put(granularityCacheKey) .array(); } @Override public String apply(long value) { - return formatter.print(value); + return formatter.print(granularity.truncate(value)); } @Override public String apply(Object value) { - return formatter.print(new DateTime(value)); + return apply(new DateTime(value).getMillis()); } @Override @@ -127,25 +139,26 @@ public class TimeFormatExtractionFn implements ExtractionFn TimeFormatExtractionFn that = (TimeFormatExtractionFn) o; - if (locale != null ? !locale.equals(that.locale) : that.locale != null) { - return false; - } - if (!pattern.equals(that.pattern)) { - return false; - } if (tz != null ? !tz.equals(that.tz) : that.tz != null) { return false; } + if (format != null ? !format.equals(that.format) : that.format != null) { + return false; + } + if (locale != null ? !locale.equals(that.locale) : that.locale != null) { + return false; + } + return granularity.equals(that.granularity); - return true; } @Override public int hashCode() { int result = tz != null ? tz.hashCode() : 0; - result = 31 * result + pattern.hashCode(); + result = 31 * result + (format != null ? format.hashCode() : 0); result = 31 * result + (locale != null ? locale.hashCode() : 0); + result = 31 * result + granularity.hashCode(); return result; } } diff --git a/processing/src/test/java/io/druid/query/extraction/TimeFormatExtractionFnTest.java b/processing/src/test/java/io/druid/query/extraction/TimeFormatExtractionFnTest.java index 5b4ec42c002..a9c76a96877 100644 --- a/processing/src/test/java/io/druid/query/extraction/TimeFormatExtractionFnTest.java +++ b/processing/src/test/java/io/druid/query/extraction/TimeFormatExtractionFnTest.java @@ -20,6 +20,8 @@ package io.druid.query.extraction; import com.fasterxml.jackson.databind.ObjectMapper; +import io.druid.granularity.QueryGranularities; +import io.druid.granularity.QueryGranularity; import io.druid.jackson.DefaultObjectMapper; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; @@ -38,16 +40,10 @@ public class TimeFormatExtractionFnTest new DateTime("2015-12-21T23:00:00Z").getMillis() }; - @Test(expected = IllegalArgumentException.class) - public void testIAEForNullPattern() throws Exception - { - new TimeFormatExtractionFn(null, null, null); - } - @Test public void testDayOfWeekExtraction() throws Exception { - TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, null); + TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, null, null); Assert.assertEquals("Thursday", fn.apply(timestamps[0])); Assert.assertEquals("Friday", fn.apply(timestamps[1])); Assert.assertEquals("Tuesday", fn.apply(timestamps[2])); @@ -55,13 +51,13 @@ public class TimeFormatExtractionFnTest Assert.assertEquals("Saturday", fn.apply(timestamps[4])); Assert.assertEquals("Monday", fn.apply(timestamps[5])); - testSerde(fn, "EEEE", null, null); + testSerde(fn, "EEEE", null, null, QueryGranularities.NONE); } @Test public void testLocalizedExtraction() throws Exception { - TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, "is"); + TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, "is", null); Assert.assertEquals("fimmtudagur", fn.apply(timestamps[0])); Assert.assertEquals("föstudagur", fn.apply(timestamps[1])); Assert.assertEquals("þriðjudagur", fn.apply(timestamps[2])); @@ -69,13 +65,32 @@ public class TimeFormatExtractionFnTest Assert.assertEquals("laugardagur", fn.apply(timestamps[4])); Assert.assertEquals("mánudagur", fn.apply(timestamps[5])); - testSerde(fn, "EEEE", null, "is"); + testSerde(fn, "EEEE", null, "is", QueryGranularities.NONE); + } + + @Test + public void testGranularExtractionWithNullPattern() throws Exception + { + TimeFormatExtractionFn fn = new TimeFormatExtractionFn(null, null, null, QueryGranularities.DAY); + Assert.assertEquals("2015-01-01T00:00:00.000Z", fn.apply(timestamps[0])); + Assert.assertEquals("2015-01-02T00:00:00.000Z", fn.apply(timestamps[1])); + Assert.assertEquals("2015-03-03T00:00:00.000Z", fn.apply(timestamps[2])); + Assert.assertEquals("2015-03-04T00:00:00.000Z", fn.apply(timestamps[3])); + Assert.assertEquals("2015-05-02T00:00:00.000Z", fn.apply(timestamps[4])); + Assert.assertEquals("2015-12-21T00:00:00.000Z", fn.apply(timestamps[5])); + + testSerde(fn, null, null, null, QueryGranularities.DAY); } @Test public void testTimeZoneExtraction() throws Exception { - TimeFormatExtractionFn fn = new TimeFormatExtractionFn("'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de"); + TimeFormatExtractionFn fn = new TimeFormatExtractionFn( + "'In Berlin ist es schon 'EEEE", + DateTimeZone.forID("Europe/Berlin"), + "de", + null + ); Assert.assertEquals("In Berlin ist es schon Freitag", fn.apply(timestamps[0])); Assert.assertEquals("In Berlin ist es schon Samstag", fn.apply(timestamps[1])); Assert.assertEquals("In Berlin ist es schon Mittwoch", fn.apply(timestamps[2])); @@ -83,18 +98,25 @@ public class TimeFormatExtractionFnTest Assert.assertEquals("In Berlin ist es schon Sonntag", fn.apply(timestamps[4])); Assert.assertEquals("In Berlin ist es schon Dienstag", fn.apply(timestamps[5])); - testSerde(fn, "'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de"); + testSerde(fn, "'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de", QueryGranularities.NONE); } - public void testSerde(TimeFormatExtractionFn fn, String format, DateTimeZone tz, String locale) throws Exception { + public void testSerde( + final TimeFormatExtractionFn fn, + final String format, + final DateTimeZone tz, + final String locale, + final QueryGranularity granularity + ) throws Exception + { final ObjectMapper objectMapper = new DefaultObjectMapper(); - final String json = objectMapper.writeValueAsString(fn); + final String json = objectMapper.writeValueAsString(fn); TimeFormatExtractionFn deserialized = objectMapper.readValue(json, TimeFormatExtractionFn.class); Assert.assertEquals(format, deserialized.getFormat()); Assert.assertEquals(tz, deserialized.getTimeZone()); Assert.assertEquals(locale, deserialized.getLocale()); - + Assert.assertEquals(granularity, deserialized.getGranularity()); Assert.assertEquals(fn, deserialized); } diff --git a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java index 2b9e99ba794..f503db4158e 100644 --- a/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/groupby/GroupByQueryRunnerTest.java @@ -4654,8 +4654,8 @@ public class GroupByQueryRunnerTest .setGranularity(QueryRunnerTestHelper.dayGran) .build(); - final DimFilter fridayFilter = new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Friday", new TimeFormatExtractionFn("EEEE", null, null)); - final DimFilter firstDaysFilter = new InDimFilter(Column.TIME_COLUMN_NAME, ImmutableList.of("1", "2", "3"), new TimeFormatExtractionFn("d", null, null)); + final DimFilter fridayFilter = new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Friday", new TimeFormatExtractionFn("EEEE", null, null, null)); + final DimFilter firstDaysFilter = new InDimFilter(Column.TIME_COLUMN_NAME, ImmutableList.of("1", "2", "3"), new TimeFormatExtractionFn("d", null, null, null)); final GroupByQuery query = GroupByQuery .builder() .setDataSource(subquery) @@ -5131,7 +5131,7 @@ public class GroupByQueryRunnerTest new ExtractionDimensionSpec( Column.TIME_COLUMN_NAME, "dayOfWeek", - new TimeFormatExtractionFn("EEEE", null, null), + new TimeFormatExtractionFn("EEEE", null, null, null), null ) ) diff --git a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java index f0570c85816..3160ad2ca57 100644 --- a/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java +++ b/processing/src/test/java/io/druid/query/topn/TopNQueryRunnerTest.java @@ -3041,7 +3041,7 @@ public class TopNQueryRunnerTest new ExtractionDimensionSpec( Column.TIME_COLUMN_NAME, "dayOfWeek", - new TimeFormatExtractionFn("EEEE", null, null), + new TimeFormatExtractionFn("EEEE", null, null, null), null ) ) diff --git a/processing/src/test/java/io/druid/segment/filter/TimeFilteringTest.java b/processing/src/test/java/io/druid/segment/filter/TimeFilteringTest.java index a7f58d0cb8c..d8bf6abd689 100644 --- a/processing/src/test/java/io/druid/segment/filter/TimeFilteringTest.java +++ b/processing/src/test/java/io/druid/segment/filter/TimeFilteringTest.java @@ -226,7 +226,7 @@ public class TimeFilteringTest extends BaseFilterTest @Test public void testTimeFilterWithTimeFormatExtractionFn() { - ExtractionFn exfn = new TimeFormatExtractionFn("EEEE", DateTimeZone.forID("America/New_York"), "en"); + ExtractionFn exfn = new TimeFormatExtractionFn("EEEE", DateTimeZone.forID("America/New_York"), "en", null); assertFilterMatches( new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Wednesday", exfn), ImmutableList.of("0", "1", "2", "3", "4", "5")