Add "joda" option to timeFormat extractionFn. (#5448)

This commit is contained in:
Gian Merlino 2018-03-02 19:59:26 -08:00 committed by Fangjin Yang
parent f948066710
commit 7416d1d02d
5 changed files with 150 additions and 62 deletions

View File

@ -263,13 +263,18 @@ Note, if you are working with the `__time` dimension, you should consider using
[time extraction function instead](#time-format-extraction-function) instead, [time extraction function instead](#time-format-extraction-function) instead,
which works on time value directly as opposed to string values. which works on time value directly as opposed to string values.
Time formats are described in the If "joda" is true, time formats are described in the [Joda DateTimeFormat documentation](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html).
[SimpleDateFormat documentation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html) If "joda" is false (or unspecified) then formats are described in the [SimpleDateFormat documentation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html).
In general, we recommend setting "joda" to true since Joda format strings are more common in Druid APIs and since Joda handles certain edge cases (like weeks and weekyears near
the start and end of calendar years) in a more ISO8601 compliant way.
If a value cannot be parsed using the provided timeFormat, it will be returned as-is.
```json ```json
{ "type" : "time", { "type" : "time",
"timeFormat" : <input_format>, "timeFormat" : <input_format>,
"resultFormat" : <output_format> } "resultFormat" : <output_format>,
"joda" : <true, false> }
``` ```

View File

@ -70,10 +70,16 @@ public final class DateTimes
{ {
return innerFormatter.parseDateTime(instant); return innerFormatter.parseDateTime(instant);
} }
public String print(final DateTime instant)
{
return innerFormatter.print(instant);
}
} }
/** /**
* Creates a {@link UtcFormatter} that wraps around a {@link DateTimeFormatter}. * Creates a {@link UtcFormatter} that wraps around a {@link DateTimeFormatter}.
*
* @param formatter inner {@link DateTimeFormatter} used to parse {@link String} * @param formatter inner {@link DateTimeFormatter} used to parse {@link String}
*/ */
public static UtcFormatter wrapFormatter(final DateTimeFormatter formatter) public static UtcFormatter wrapFormatter(final DateTimeFormatter formatter)

View File

@ -24,43 +24,86 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
import com.google.common.base.Strings; import com.google.common.base.Strings;
import com.ibm.icu.text.SimpleDateFormat; import com.ibm.icu.text.SimpleDateFormat;
import io.druid.java.util.common.DateTimes;
import io.druid.java.util.common.StringUtils; import io.druid.java.util.common.StringUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.text.ParseException; import java.text.ParseException;
import java.util.Date; import java.util.Date;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.Supplier;
/** /**
*/ */
public class TimeDimExtractionFn extends DimExtractionFn public class TimeDimExtractionFn extends DimExtractionFn
{ {
private final String timeFormat; private final String timeFormat;
private final ThreadLocal<SimpleDateFormat> timeFormatter;
private final String resultFormat; private final String resultFormat;
private final ThreadLocal<SimpleDateFormat> resultFormatter; private final Supplier<Function<String, String>> fn;
private final boolean joda;
@JsonCreator @JsonCreator
public TimeDimExtractionFn( public TimeDimExtractionFn(
@JsonProperty("timeFormat") String timeFormat, @JsonProperty("timeFormat") String timeFormat,
@JsonProperty("resultFormat") String resultFormat @JsonProperty("resultFormat") String resultFormat,
@JsonProperty("joda") boolean joda
) )
{ {
Preconditions.checkNotNull(timeFormat, "timeFormat must not be null"); Preconditions.checkNotNull(timeFormat, "timeFormat must not be null");
Preconditions.checkNotNull(resultFormat, "resultFormat must not be null"); Preconditions.checkNotNull(resultFormat, "resultFormat must not be null");
this.joda = joda;
this.timeFormat = timeFormat; this.timeFormat = timeFormat;
this.timeFormatter = ThreadLocal.withInitial(() -> {
SimpleDateFormat formatter = new SimpleDateFormat(TimeDimExtractionFn.this.timeFormat);
formatter.setLenient(true);
return formatter;
});
this.resultFormat = resultFormat; this.resultFormat = resultFormat;
this.resultFormatter = ThreadLocal.withInitial(() -> { this.fn = makeFunctionSupplier();
SimpleDateFormat formatter = new SimpleDateFormat(TimeDimExtractionFn.this.resultFormat); }
return formatter;
}); private Supplier<Function<String, String>> makeFunctionSupplier()
{
if (joda) {
final DateTimes.UtcFormatter parser = DateTimes.wrapFormatter(DateTimeFormat.forPattern(timeFormat));
final DateTimes.UtcFormatter formatter = DateTimes.wrapFormatter(DateTimeFormat.forPattern(resultFormat));
final Function<String, String> fn = value -> {
DateTime date;
try {
date = parser.parse(value);
}
catch (IllegalArgumentException e) {
return value;
}
return formatter.print(date);
};
// Single shared function, since Joda formatters are thread-safe.
return () -> fn;
} else {
final ThreadLocal<Function<String, String>> threadLocal = ThreadLocal.withInitial(
() -> {
final SimpleDateFormat parser = new SimpleDateFormat(timeFormat);
final SimpleDateFormat formatter = new SimpleDateFormat(resultFormat);
parser.setLenient(true);
return value -> {
Date date;
try {
date = parser.parse(value);
}
catch (ParseException e) {
return value;
}
return formatter.format(date);
};
}
);
// Thread-local, since SimpleDateFormats are not thread-safe.
return threadLocal::get;
}
} }
@Override @Override
@ -81,28 +124,27 @@ public class TimeDimExtractionFn extends DimExtractionFn
return null; return null;
} }
Date date; return fn.get().apply(dimValue);
try {
date = timeFormatter.get().parse(dimValue);
}
catch (ParseException e) {
return dimValue;
}
return resultFormatter.get().format(date);
} }
@JsonProperty("timeFormat") @JsonProperty
public String getTimeFormat() public String getTimeFormat()
{ {
return timeFormat; return timeFormat;
} }
@JsonProperty("resultFormat") @JsonProperty
public String getResultFormat() public String getResultFormat()
{ {
return resultFormat; return resultFormat;
} }
@JsonProperty
public boolean isJoda()
{
return joda;
}
@Override @Override
public boolean preservesOrdering() public boolean preservesOrdering()
{ {
@ -116,16 +158,7 @@ public class TimeDimExtractionFn extends DimExtractionFn
} }
@Override @Override
public String toString() public boolean equals(final Object o)
{
return "TimeDimExtractionFn{" +
"timeFormat='" + timeFormat + '\'' +
", resultFormat='" + resultFormat + '\'' +
'}';
}
@Override
public boolean equals(Object o)
{ {
if (this == o) { if (this == o) {
return true; return true;
@ -133,24 +166,25 @@ public class TimeDimExtractionFn extends DimExtractionFn
if (o == null || getClass() != o.getClass()) { if (o == null || getClass() != o.getClass()) {
return false; return false;
} }
final TimeDimExtractionFn that = (TimeDimExtractionFn) o;
TimeDimExtractionFn that = (TimeDimExtractionFn) o; return joda == that.joda &&
Objects.equals(timeFormat, that.timeFormat) &&
if (!resultFormat.equals(that.resultFormat)) { Objects.equals(resultFormat, that.resultFormat);
return false;
}
if (!timeFormat.equals(that.timeFormat)) {
return false;
}
return true;
} }
@Override @Override
public int hashCode() public int hashCode()
{ {
int result = timeFormat.hashCode(); return Objects.hash(timeFormat, resultFormat, joda);
result = 31 * result + resultFormat.hashCode(); }
return result;
@Override
public String toString()
{
return "TimeDimExtractionFn{" +
"timeFormat='" + timeFormat + '\'' +
", resultFormat='" + resultFormat + '\'' +
", joda=" + joda +
'}';
} }
} }

View File

@ -25,6 +25,7 @@ import io.druid.jackson.DefaultObjectMapper;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Test; import org.junit.Test;
import java.util.Arrays;
import java.util.Set; import java.util.Set;
/** /**
@ -41,19 +42,39 @@ public class TimeDimExtractionFnTest
}; };
@Test @Test
public void testEmptyAndNullExtraction() public void testEmptyNullAndUnparseableExtraction()
{ {
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy"); for (boolean joda : Arrays.asList(true, false)) {
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", joda);
Assert.assertNull(extractionFn.apply(null)); Assert.assertNull(extractionFn.apply(null));
Assert.assertNull(extractionFn.apply("")); Assert.assertNull(extractionFn.apply(""));
Assert.assertEquals("foo", extractionFn.apply("foo"));
}
} }
@Test @Test
public void testMonthExtraction() public void testMonthExtraction()
{ {
Set<String> months = Sets.newHashSet(); Set<String> months = Sets.newHashSet();
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy"); ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", false);
for (String dim : dims) {
months.add(extractionFn.apply(dim));
}
Assert.assertEquals(months.size(), 4);
Assert.assertTrue(months.contains("01/2012"));
Assert.assertTrue(months.contains("03/2012"));
Assert.assertTrue(months.contains("05/2012"));
Assert.assertTrue(months.contains("12/2012"));
}
@Test
public void testMonthExtractionJoda()
{
Set<String> months = Sets.newHashSet();
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", true);
for (String dim : dims) { for (String dim : dims) {
months.add(extractionFn.apply(dim)); months.add(extractionFn.apply(dim));
@ -70,7 +91,7 @@ public class TimeDimExtractionFnTest
public void testQuarterExtraction() public void testQuarterExtraction()
{ {
Set<String> quarters = Sets.newHashSet(); Set<String> quarters = Sets.newHashSet();
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy"); ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy", false);
for (String dim : dims) { for (String dim : dims) {
quarters.add(extractionFn.apply(dim)); quarters.add(extractionFn.apply(dim));
@ -82,15 +103,37 @@ public class TimeDimExtractionFnTest
Assert.assertTrue(quarters.contains("Q4/2012")); Assert.assertTrue(quarters.contains("Q4/2012"));
} }
@Test
public void testWeeks()
{
final TimeDimExtractionFn weekFn = new TimeDimExtractionFn("yyyy-MM-dd", "YYYY-ww", false);
Assert.assertEquals("2016-01", weekFn.apply("2015-12-31"));
Assert.assertEquals("2016-01", weekFn.apply("2016-01-01"));
Assert.assertEquals("2017-01", weekFn.apply("2017-01-01"));
Assert.assertEquals("2018-01", weekFn.apply("2017-12-31"));
Assert.assertEquals("2018-01", weekFn.apply("2018-01-01"));
}
@Test
public void testWeeksJoda()
{
final TimeDimExtractionFn weekFn = new TimeDimExtractionFn("yyyy-MM-dd", "xxxx-ww", true);
Assert.assertEquals("2015-53", weekFn.apply("2015-12-31"));
Assert.assertEquals("2015-53", weekFn.apply("2016-01-01"));
Assert.assertEquals("2016-52", weekFn.apply("2017-01-01"));
Assert.assertEquals("2017-52", weekFn.apply("2017-12-31"));
Assert.assertEquals("2018-01", weekFn.apply("2018-01-01"));
}
@Test @Test
public void testSerde() throws Exception public void testSerde() throws Exception
{ {
final ObjectMapper objectMapper = new DefaultObjectMapper(); final ObjectMapper objectMapper = new DefaultObjectMapper();
final String json = "{ \"type\" : \"time\", \"timeFormat\" : \"MM/dd/yyyy\", \"resultFormat\" : \"QQQ/yyyy\" }"; final String json = "{ \"type\" : \"time\", \"timeFormat\" : \"MM/dd/yyyy\", \"resultFormat\" : \"yyyy-MM-dd\", \"joda\" : true }";
TimeDimExtractionFn extractionFn = (TimeDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class); TimeDimExtractionFn extractionFn = (TimeDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
Assert.assertEquals("MM/dd/yyyy", extractionFn.getTimeFormat()); Assert.assertEquals("MM/dd/yyyy", extractionFn.getTimeFormat());
Assert.assertEquals("QQQ/yyyy", extractionFn.getResultFormat()); Assert.assertEquals("yyyy-MM-dd", extractionFn.getResultFormat());
// round trip // round trip
Assert.assertEquals( Assert.assertEquals(

View File

@ -106,10 +106,10 @@ public class SelectorFilterTest extends BaseFilterTest
@Test @Test
public void testWithTimeExtractionFnNull() public void testWithTimeExtractionFnNull()
{ {
assertFilterMatches(new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.<String>of()); assertFilterMatches(new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of());
assertFilterMatches(new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.<String>of("3", "4", "5")); assertFilterMatches(new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("3", "4", "5"));
assertFilterMatches(new SelectorDimFilter("dim6", "2017-07", new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.<String>of("0", "1")); assertFilterMatches(new SelectorDimFilter("dim6", "2017-07", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("0", "1"));
assertFilterMatches(new SelectorDimFilter("dim6", "2017-05", new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.<String>of("2")); assertFilterMatches(new SelectorDimFilter("dim6", "2017-05", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("2"));
} }
@Test @Test