mirror of https://github.com/apache/druid.git
TimeFormatExtractionFn: Allow null formats (equivalent to ISO8601) and granular bucketing. (#3411)
This commit is contained in:
parent
0076b5fc1a
commit
e9050c2b4c
|
@ -111,15 +111,17 @@ For `__time` dimension values, this formats the time value bucketed by the
|
||||||
For a regular dimension, it assumes the string is formatted in
|
For a regular dimension, it assumes the string is formatted in
|
||||||
[ISO-8601 date and time format](https://en.wikipedia.org/wiki/ISO_8601).
|
[ISO-8601 date and time format](https://en.wikipedia.org/wiki/ISO_8601).
|
||||||
|
|
||||||
* `format` : date time format for the resulting dimension value, in [Joda Time DateTimeFormat](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html).
|
* `format` : date time format for the resulting dimension value, in [Joda Time DateTimeFormat](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html), or null to use the default ISO8601 format.
|
||||||
* `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](http://www.oracle.com/technetwork/java/javase/java8locales-2095355.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc.
|
* `locale` : locale (language and country) to use, given as a [IETF BCP 47 language tag](http://www.oracle.com/technetwork/java/javase/java8locales-2095355.html#util-text), e.g. `en-US`, `en-GB`, `fr-FR`, `fr-CA`, etc.
|
||||||
* `timeZone` : time zone to use in [IANA tz database format](http://en.wikipedia.org/wiki/List_of_tz_database_time_zones), e.g. `Europe/Berlin` (this can possibly be different than the aggregation time-zone)
|
* `timeZone` : time zone to use in [IANA tz database format](http://en.wikipedia.org/wiki/List_of_tz_database_time_zones), e.g. `Europe/Berlin` (this can possibly be different than the aggregation time-zone)
|
||||||
|
* `granularity` : [granularity](granularities.html) to apply before formatting, or omit to not apply any granularity.
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{ "type" : "timeFormat",
|
{ "type" : "timeFormat",
|
||||||
"format" : <output_format>,
|
"format" : <output_format> (optional),
|
||||||
"timeZone" : <time_zone> (optional),
|
"timeZone" : <time_zone> (optional),
|
||||||
"locale" : <locale> (optional) }
|
"locale" : <locale> (optional),
|
||||||
|
"granularity" : <granularity> (optional) }
|
||||||
```
|
```
|
||||||
|
|
||||||
For example, the following dimension spec returns the day of the week for Montréal in French:
|
For example, the following dimension spec returns the day of the week for Montréal in French:
|
||||||
|
|
|
@ -20,12 +20,14 @@
|
||||||
package io.druid.query.extraction;
|
package io.druid.query.extraction;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.google.common.base.Preconditions;
|
|
||||||
import com.metamx.common.StringUtils;
|
import com.metamx.common.StringUtils;
|
||||||
|
import io.druid.granularity.QueryGranularities;
|
||||||
|
import io.druid.granularity.QueryGranularity;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
import org.joda.time.DateTimeZone;
|
import org.joda.time.DateTimeZone;
|
||||||
import org.joda.time.format.DateTimeFormat;
|
import org.joda.time.format.DateTimeFormat;
|
||||||
import org.joda.time.format.DateTimeFormatter;
|
import org.joda.time.format.DateTimeFormatter;
|
||||||
|
import org.joda.time.format.ISODateTimeFormat;
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
@ -33,24 +35,25 @@ import java.util.Locale;
|
||||||
public class TimeFormatExtractionFn implements ExtractionFn
|
public class TimeFormatExtractionFn implements ExtractionFn
|
||||||
{
|
{
|
||||||
private final DateTimeZone tz;
|
private final DateTimeZone tz;
|
||||||
private final String pattern;
|
private final String format;
|
||||||
private final Locale locale;
|
private final Locale locale;
|
||||||
|
private final QueryGranularity granularity;
|
||||||
private final DateTimeFormatter formatter;
|
private final DateTimeFormatter formatter;
|
||||||
|
|
||||||
public TimeFormatExtractionFn(
|
public TimeFormatExtractionFn(
|
||||||
@JsonProperty("format") String pattern,
|
@JsonProperty("format") String format,
|
||||||
@JsonProperty("timeZone") DateTimeZone tz,
|
@JsonProperty("timeZone") DateTimeZone tz,
|
||||||
@JsonProperty("locale") String localeString
|
@JsonProperty("locale") String localeString,
|
||||||
|
@JsonProperty("granularity") QueryGranularity granularity
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
Preconditions.checkArgument(pattern != null, "format cannot be null");
|
this.format = format;
|
||||||
|
|
||||||
this.pattern = pattern;
|
|
||||||
this.tz = tz;
|
this.tz = tz;
|
||||||
this.locale = localeString == null ? null : Locale.forLanguageTag(localeString);
|
this.locale = localeString == null ? null : Locale.forLanguageTag(localeString);
|
||||||
this.formatter = DateTimeFormat.forPattern(pattern)
|
this.granularity = granularity == null ? QueryGranularities.NONE : granularity;
|
||||||
.withZone(tz == null ? DateTimeZone.UTC : tz)
|
this.formatter = (format == null ? ISODateTimeFormat.dateTime() : DateTimeFormat.forPattern(format))
|
||||||
.withLocale(locale);
|
.withZone(tz == null ? DateTimeZone.UTC : tz)
|
||||||
|
.withLocale(locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
|
@ -62,7 +65,7 @@ public class TimeFormatExtractionFn implements ExtractionFn
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
public String getFormat()
|
public String getFormat()
|
||||||
{
|
{
|
||||||
return pattern;
|
return format;
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
|
@ -75,26 +78,35 @@ public class TimeFormatExtractionFn implements ExtractionFn
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@JsonProperty
|
||||||
|
public QueryGranularity getGranularity()
|
||||||
|
{
|
||||||
|
return granularity;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public byte[] getCacheKey()
|
public byte[] getCacheKey()
|
||||||
{
|
{
|
||||||
byte[] exprBytes = StringUtils.toUtf8(pattern + "\u0001" + tz.getID() + "\u0001" + locale.toLanguageTag());
|
final byte[] exprBytes = StringUtils.toUtf8(format + "\u0001" + tz.getID() + "\u0001" + locale.toLanguageTag());
|
||||||
return ByteBuffer.allocate(1 + exprBytes.length)
|
final byte[] granularityCacheKey = granularity.cacheKey();
|
||||||
|
return ByteBuffer.allocate(2 + exprBytes.length + granularityCacheKey.length)
|
||||||
.put(ExtractionCacheHelper.CACHE_TYPE_ID_TIME_FORMAT)
|
.put(ExtractionCacheHelper.CACHE_TYPE_ID_TIME_FORMAT)
|
||||||
.put(exprBytes)
|
.put(exprBytes)
|
||||||
|
.put((byte) 0xFF)
|
||||||
|
.put(granularityCacheKey)
|
||||||
.array();
|
.array();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String apply(long value)
|
public String apply(long value)
|
||||||
{
|
{
|
||||||
return formatter.print(value);
|
return formatter.print(granularity.truncate(value));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String apply(Object value)
|
public String apply(Object value)
|
||||||
{
|
{
|
||||||
return formatter.print(new DateTime(value));
|
return apply(new DateTime(value).getMillis());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -127,25 +139,26 @@ public class TimeFormatExtractionFn implements ExtractionFn
|
||||||
|
|
||||||
TimeFormatExtractionFn that = (TimeFormatExtractionFn) o;
|
TimeFormatExtractionFn that = (TimeFormatExtractionFn) o;
|
||||||
|
|
||||||
if (locale != null ? !locale.equals(that.locale) : that.locale != null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!pattern.equals(that.pattern)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (tz != null ? !tz.equals(that.tz) : that.tz != null) {
|
if (tz != null ? !tz.equals(that.tz) : that.tz != null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if (format != null ? !format.equals(that.format) : that.format != null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (locale != null ? !locale.equals(that.locale) : that.locale != null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return granularity.equals(that.granularity);
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode()
|
public int hashCode()
|
||||||
{
|
{
|
||||||
int result = tz != null ? tz.hashCode() : 0;
|
int result = tz != null ? tz.hashCode() : 0;
|
||||||
result = 31 * result + pattern.hashCode();
|
result = 31 * result + (format != null ? format.hashCode() : 0);
|
||||||
result = 31 * result + (locale != null ? locale.hashCode() : 0);
|
result = 31 * result + (locale != null ? locale.hashCode() : 0);
|
||||||
|
result = 31 * result + granularity.hashCode();
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
package io.druid.query.extraction;
|
package io.druid.query.extraction;
|
||||||
|
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import io.druid.granularity.QueryGranularities;
|
||||||
|
import io.druid.granularity.QueryGranularity;
|
||||||
import io.druid.jackson.DefaultObjectMapper;
|
import io.druid.jackson.DefaultObjectMapper;
|
||||||
import org.joda.time.DateTime;
|
import org.joda.time.DateTime;
|
||||||
import org.joda.time.DateTimeZone;
|
import org.joda.time.DateTimeZone;
|
||||||
|
@ -38,16 +40,10 @@ public class TimeFormatExtractionFnTest
|
||||||
new DateTime("2015-12-21T23:00:00Z").getMillis()
|
new DateTime("2015-12-21T23:00:00Z").getMillis()
|
||||||
};
|
};
|
||||||
|
|
||||||
@Test(expected = IllegalArgumentException.class)
|
|
||||||
public void testIAEForNullPattern() throws Exception
|
|
||||||
{
|
|
||||||
new TimeFormatExtractionFn(null, null, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDayOfWeekExtraction() throws Exception
|
public void testDayOfWeekExtraction() throws Exception
|
||||||
{
|
{
|
||||||
TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, null);
|
TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, null, null);
|
||||||
Assert.assertEquals("Thursday", fn.apply(timestamps[0]));
|
Assert.assertEquals("Thursday", fn.apply(timestamps[0]));
|
||||||
Assert.assertEquals("Friday", fn.apply(timestamps[1]));
|
Assert.assertEquals("Friday", fn.apply(timestamps[1]));
|
||||||
Assert.assertEquals("Tuesday", fn.apply(timestamps[2]));
|
Assert.assertEquals("Tuesday", fn.apply(timestamps[2]));
|
||||||
|
@ -55,13 +51,13 @@ public class TimeFormatExtractionFnTest
|
||||||
Assert.assertEquals("Saturday", fn.apply(timestamps[4]));
|
Assert.assertEquals("Saturday", fn.apply(timestamps[4]));
|
||||||
Assert.assertEquals("Monday", fn.apply(timestamps[5]));
|
Assert.assertEquals("Monday", fn.apply(timestamps[5]));
|
||||||
|
|
||||||
testSerde(fn, "EEEE", null, null);
|
testSerde(fn, "EEEE", null, null, QueryGranularities.NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testLocalizedExtraction() throws Exception
|
public void testLocalizedExtraction() throws Exception
|
||||||
{
|
{
|
||||||
TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, "is");
|
TimeFormatExtractionFn fn = new TimeFormatExtractionFn("EEEE", null, "is", null);
|
||||||
Assert.assertEquals("fimmtudagur", fn.apply(timestamps[0]));
|
Assert.assertEquals("fimmtudagur", fn.apply(timestamps[0]));
|
||||||
Assert.assertEquals("föstudagur", fn.apply(timestamps[1]));
|
Assert.assertEquals("föstudagur", fn.apply(timestamps[1]));
|
||||||
Assert.assertEquals("þriðjudagur", fn.apply(timestamps[2]));
|
Assert.assertEquals("þriðjudagur", fn.apply(timestamps[2]));
|
||||||
|
@ -69,13 +65,32 @@ public class TimeFormatExtractionFnTest
|
||||||
Assert.assertEquals("laugardagur", fn.apply(timestamps[4]));
|
Assert.assertEquals("laugardagur", fn.apply(timestamps[4]));
|
||||||
Assert.assertEquals("mánudagur", fn.apply(timestamps[5]));
|
Assert.assertEquals("mánudagur", fn.apply(timestamps[5]));
|
||||||
|
|
||||||
testSerde(fn, "EEEE", null, "is");
|
testSerde(fn, "EEEE", null, "is", QueryGranularities.NONE);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGranularExtractionWithNullPattern() throws Exception
|
||||||
|
{
|
||||||
|
TimeFormatExtractionFn fn = new TimeFormatExtractionFn(null, null, null, QueryGranularities.DAY);
|
||||||
|
Assert.assertEquals("2015-01-01T00:00:00.000Z", fn.apply(timestamps[0]));
|
||||||
|
Assert.assertEquals("2015-01-02T00:00:00.000Z", fn.apply(timestamps[1]));
|
||||||
|
Assert.assertEquals("2015-03-03T00:00:00.000Z", fn.apply(timestamps[2]));
|
||||||
|
Assert.assertEquals("2015-03-04T00:00:00.000Z", fn.apply(timestamps[3]));
|
||||||
|
Assert.assertEquals("2015-05-02T00:00:00.000Z", fn.apply(timestamps[4]));
|
||||||
|
Assert.assertEquals("2015-12-21T00:00:00.000Z", fn.apply(timestamps[5]));
|
||||||
|
|
||||||
|
testSerde(fn, null, null, null, QueryGranularities.DAY);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTimeZoneExtraction() throws Exception
|
public void testTimeZoneExtraction() throws Exception
|
||||||
{
|
{
|
||||||
TimeFormatExtractionFn fn = new TimeFormatExtractionFn("'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de");
|
TimeFormatExtractionFn fn = new TimeFormatExtractionFn(
|
||||||
|
"'In Berlin ist es schon 'EEEE",
|
||||||
|
DateTimeZone.forID("Europe/Berlin"),
|
||||||
|
"de",
|
||||||
|
null
|
||||||
|
);
|
||||||
Assert.assertEquals("In Berlin ist es schon Freitag", fn.apply(timestamps[0]));
|
Assert.assertEquals("In Berlin ist es schon Freitag", fn.apply(timestamps[0]));
|
||||||
Assert.assertEquals("In Berlin ist es schon Samstag", fn.apply(timestamps[1]));
|
Assert.assertEquals("In Berlin ist es schon Samstag", fn.apply(timestamps[1]));
|
||||||
Assert.assertEquals("In Berlin ist es schon Mittwoch", fn.apply(timestamps[2]));
|
Assert.assertEquals("In Berlin ist es schon Mittwoch", fn.apply(timestamps[2]));
|
||||||
|
@ -83,18 +98,25 @@ public class TimeFormatExtractionFnTest
|
||||||
Assert.assertEquals("In Berlin ist es schon Sonntag", fn.apply(timestamps[4]));
|
Assert.assertEquals("In Berlin ist es schon Sonntag", fn.apply(timestamps[4]));
|
||||||
Assert.assertEquals("In Berlin ist es schon Dienstag", fn.apply(timestamps[5]));
|
Assert.assertEquals("In Berlin ist es schon Dienstag", fn.apply(timestamps[5]));
|
||||||
|
|
||||||
testSerde(fn, "'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de");
|
testSerde(fn, "'In Berlin ist es schon 'EEEE", DateTimeZone.forID("Europe/Berlin"), "de", QueryGranularities.NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testSerde(TimeFormatExtractionFn fn, String format, DateTimeZone tz, String locale) throws Exception {
|
public void testSerde(
|
||||||
|
final TimeFormatExtractionFn fn,
|
||||||
|
final String format,
|
||||||
|
final DateTimeZone tz,
|
||||||
|
final String locale,
|
||||||
|
final QueryGranularity granularity
|
||||||
|
) throws Exception
|
||||||
|
{
|
||||||
final ObjectMapper objectMapper = new DefaultObjectMapper();
|
final ObjectMapper objectMapper = new DefaultObjectMapper();
|
||||||
final String json = objectMapper.writeValueAsString(fn);
|
final String json = objectMapper.writeValueAsString(fn);
|
||||||
TimeFormatExtractionFn deserialized = objectMapper.readValue(json, TimeFormatExtractionFn.class);
|
TimeFormatExtractionFn deserialized = objectMapper.readValue(json, TimeFormatExtractionFn.class);
|
||||||
|
|
||||||
Assert.assertEquals(format, deserialized.getFormat());
|
Assert.assertEquals(format, deserialized.getFormat());
|
||||||
Assert.assertEquals(tz, deserialized.getTimeZone());
|
Assert.assertEquals(tz, deserialized.getTimeZone());
|
||||||
Assert.assertEquals(locale, deserialized.getLocale());
|
Assert.assertEquals(locale, deserialized.getLocale());
|
||||||
|
Assert.assertEquals(granularity, deserialized.getGranularity());
|
||||||
Assert.assertEquals(fn, deserialized);
|
Assert.assertEquals(fn, deserialized);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4654,8 +4654,8 @@ public class GroupByQueryRunnerTest
|
||||||
.setGranularity(QueryRunnerTestHelper.dayGran)
|
.setGranularity(QueryRunnerTestHelper.dayGran)
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
final DimFilter fridayFilter = new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Friday", new TimeFormatExtractionFn("EEEE", null, null));
|
final DimFilter fridayFilter = new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Friday", new TimeFormatExtractionFn("EEEE", null, null, null));
|
||||||
final DimFilter firstDaysFilter = new InDimFilter(Column.TIME_COLUMN_NAME, ImmutableList.of("1", "2", "3"), new TimeFormatExtractionFn("d", null, null));
|
final DimFilter firstDaysFilter = new InDimFilter(Column.TIME_COLUMN_NAME, ImmutableList.of("1", "2", "3"), new TimeFormatExtractionFn("d", null, null, null));
|
||||||
final GroupByQuery query = GroupByQuery
|
final GroupByQuery query = GroupByQuery
|
||||||
.builder()
|
.builder()
|
||||||
.setDataSource(subquery)
|
.setDataSource(subquery)
|
||||||
|
@ -5131,7 +5131,7 @@ public class GroupByQueryRunnerTest
|
||||||
new ExtractionDimensionSpec(
|
new ExtractionDimensionSpec(
|
||||||
Column.TIME_COLUMN_NAME,
|
Column.TIME_COLUMN_NAME,
|
||||||
"dayOfWeek",
|
"dayOfWeek",
|
||||||
new TimeFormatExtractionFn("EEEE", null, null),
|
new TimeFormatExtractionFn("EEEE", null, null, null),
|
||||||
null
|
null
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -3041,7 +3041,7 @@ public class TopNQueryRunnerTest
|
||||||
new ExtractionDimensionSpec(
|
new ExtractionDimensionSpec(
|
||||||
Column.TIME_COLUMN_NAME,
|
Column.TIME_COLUMN_NAME,
|
||||||
"dayOfWeek",
|
"dayOfWeek",
|
||||||
new TimeFormatExtractionFn("EEEE", null, null),
|
new TimeFormatExtractionFn("EEEE", null, null, null),
|
||||||
null
|
null
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
|
@ -226,7 +226,7 @@ public class TimeFilteringTest extends BaseFilterTest
|
||||||
@Test
|
@Test
|
||||||
public void testTimeFilterWithTimeFormatExtractionFn()
|
public void testTimeFilterWithTimeFormatExtractionFn()
|
||||||
{
|
{
|
||||||
ExtractionFn exfn = new TimeFormatExtractionFn("EEEE", DateTimeZone.forID("America/New_York"), "en");
|
ExtractionFn exfn = new TimeFormatExtractionFn("EEEE", DateTimeZone.forID("America/New_York"), "en", null);
|
||||||
assertFilterMatches(
|
assertFilterMatches(
|
||||||
new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Wednesday", exfn),
|
new SelectorDimFilter(Column.TIME_COLUMN_NAME, "Wednesday", exfn),
|
||||||
ImmutableList.<String>of("0", "1", "2", "3", "4", "5")
|
ImmutableList.<String>of("0", "1", "2", "3", "4", "5")
|
||||||
|
|
Loading…
Reference in New Issue