Add "joda" option to timeFormat extractionFn. (#5448)

This commit is contained in:
Gian Merlino 2018-03-02 19:59:26 -08:00 committed by Fangjin Yang
parent f948066710
commit 7416d1d02d
5 changed files with 150 additions and 62 deletions

View File

@ -263,13 +263,18 @@ Note, if you are working with the `__time` dimension, you should consider using
[time extraction function instead](#time-format-extraction-function) instead,
which works on time value directly as opposed to string values.
Time formats are described in the
[SimpleDateFormat documentation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html)
If "joda" is true, time formats are described in the [Joda DateTimeFormat documentation](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html).
If "joda" is false (or unspecified) then formats are described in the [SimpleDateFormat documentation](http://icu-project.org/apiref/icu4j/com/ibm/icu/text/SimpleDateFormat.html).
In general, we recommend setting "joda" to true since Joda format strings are more common in Druid APIs and since Joda handles certain edge cases (like weeks and weekyears near
the start and end of calendar years) in a more ISO8601 compliant way.
If a value cannot be parsed using the provided timeFormat, it will be returned as-is.
```json
{ "type" : "time",
"timeFormat" : <input_format>,
"resultFormat" : <output_format> }
"resultFormat" : <output_format>,
"joda" : <true, false> }
```

View File

@ -70,10 +70,16 @@ public final class DateTimes
{
return innerFormatter.parseDateTime(instant);
}
public String print(final DateTime instant)
{
return innerFormatter.print(instant);
}
}
/**
* Creates a {@link UtcFormatter} that wraps around a {@link DateTimeFormatter}.
*
* @param formatter inner {@link DateTimeFormatter} used to parse {@link String}
*/
public static UtcFormatter wrapFormatter(final DateTimeFormatter formatter)

View File

@ -24,43 +24,86 @@ import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.ibm.icu.text.SimpleDateFormat;
import io.druid.java.util.common.DateTimes;
import io.druid.java.util.common.StringUtils;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.text.ParseException;
import java.util.Date;
import java.util.Objects;
import java.util.function.Function;
import java.util.function.Supplier;
/**
*/
public class TimeDimExtractionFn extends DimExtractionFn
{
private final String timeFormat;
private final ThreadLocal<SimpleDateFormat> timeFormatter;
private final String resultFormat;
private final ThreadLocal<SimpleDateFormat> resultFormatter;
private final Supplier<Function<String, String>> fn;
private final boolean joda;
@JsonCreator
public TimeDimExtractionFn(
@JsonProperty("timeFormat") String timeFormat,
@JsonProperty("resultFormat") String resultFormat
@JsonProperty("resultFormat") String resultFormat,
@JsonProperty("joda") boolean joda
)
{
Preconditions.checkNotNull(timeFormat, "timeFormat must not be null");
Preconditions.checkNotNull(resultFormat, "resultFormat must not be null");
this.joda = joda;
this.timeFormat = timeFormat;
this.timeFormatter = ThreadLocal.withInitial(() -> {
SimpleDateFormat formatter = new SimpleDateFormat(TimeDimExtractionFn.this.timeFormat);
formatter.setLenient(true);
return formatter;
});
this.resultFormat = resultFormat;
this.resultFormatter = ThreadLocal.withInitial(() -> {
SimpleDateFormat formatter = new SimpleDateFormat(TimeDimExtractionFn.this.resultFormat);
return formatter;
});
this.fn = makeFunctionSupplier();
}
private Supplier<Function<String, String>> makeFunctionSupplier()
{
if (joda) {
final DateTimes.UtcFormatter parser = DateTimes.wrapFormatter(DateTimeFormat.forPattern(timeFormat));
final DateTimes.UtcFormatter formatter = DateTimes.wrapFormatter(DateTimeFormat.forPattern(resultFormat));
final Function<String, String> fn = value -> {
DateTime date;
try {
date = parser.parse(value);
}
catch (IllegalArgumentException e) {
return value;
}
return formatter.print(date);
};
// Single shared function, since Joda formatters are thread-safe.
return () -> fn;
} else {
final ThreadLocal<Function<String, String>> threadLocal = ThreadLocal.withInitial(
() -> {
final SimpleDateFormat parser = new SimpleDateFormat(timeFormat);
final SimpleDateFormat formatter = new SimpleDateFormat(resultFormat);
parser.setLenient(true);
return value -> {
Date date;
try {
date = parser.parse(value);
}
catch (ParseException e) {
return value;
}
return formatter.format(date);
};
}
);
// Thread-local, since SimpleDateFormats are not thread-safe.
return threadLocal::get;
}
}
@Override
@ -81,28 +124,27 @@ public class TimeDimExtractionFn extends DimExtractionFn
return null;
}
Date date;
try {
date = timeFormatter.get().parse(dimValue);
}
catch (ParseException e) {
return dimValue;
}
return resultFormatter.get().format(date);
return fn.get().apply(dimValue);
}
@JsonProperty("timeFormat")
@JsonProperty
public String getTimeFormat()
{
return timeFormat;
}
@JsonProperty("resultFormat")
@JsonProperty
public String getResultFormat()
{
return resultFormat;
}
@JsonProperty
public boolean isJoda()
{
return joda;
}
@Override
public boolean preservesOrdering()
{
@ -116,16 +158,7 @@ public class TimeDimExtractionFn extends DimExtractionFn
}
@Override
public String toString()
{
return "TimeDimExtractionFn{" +
"timeFormat='" + timeFormat + '\'' +
", resultFormat='" + resultFormat + '\'' +
'}';
}
@Override
public boolean equals(Object o)
public boolean equals(final Object o)
{
if (this == o) {
return true;
@ -133,24 +166,25 @@ public class TimeDimExtractionFn extends DimExtractionFn
if (o == null || getClass() != o.getClass()) {
return false;
}
TimeDimExtractionFn that = (TimeDimExtractionFn) o;
if (!resultFormat.equals(that.resultFormat)) {
return false;
}
if (!timeFormat.equals(that.timeFormat)) {
return false;
}
return true;
final TimeDimExtractionFn that = (TimeDimExtractionFn) o;
return joda == that.joda &&
Objects.equals(timeFormat, that.timeFormat) &&
Objects.equals(resultFormat, that.resultFormat);
}
@Override
public int hashCode()
{
int result = timeFormat.hashCode();
result = 31 * result + resultFormat.hashCode();
return result;
return Objects.hash(timeFormat, resultFormat, joda);
}
@Override
public String toString()
{
return "TimeDimExtractionFn{" +
"timeFormat='" + timeFormat + '\'' +
", resultFormat='" + resultFormat + '\'' +
", joda=" + joda +
'}';
}
}

View File

@ -25,6 +25,7 @@ import io.druid.jackson.DefaultObjectMapper;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
import java.util.Set;
/**
@ -41,19 +42,39 @@ public class TimeDimExtractionFnTest
};
@Test
public void testEmptyAndNullExtraction()
public void testEmptyNullAndUnparseableExtraction()
{
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy");
for (boolean joda : Arrays.asList(true, false)) {
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", joda);
Assert.assertNull(extractionFn.apply(null));
Assert.assertNull(extractionFn.apply(""));
Assert.assertNull(extractionFn.apply(null));
Assert.assertNull(extractionFn.apply(""));
Assert.assertEquals("foo", extractionFn.apply("foo"));
}
}
@Test
public void testMonthExtraction()
{
Set<String> months = Sets.newHashSet();
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy");
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", false);
for (String dim : dims) {
months.add(extractionFn.apply(dim));
}
Assert.assertEquals(months.size(), 4);
Assert.assertTrue(months.contains("01/2012"));
Assert.assertTrue(months.contains("03/2012"));
Assert.assertTrue(months.contains("05/2012"));
Assert.assertTrue(months.contains("12/2012"));
}
@Test
public void testMonthExtractionJoda()
{
Set<String> months = Sets.newHashSet();
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "MM/yyyy", true);
for (String dim : dims) {
months.add(extractionFn.apply(dim));
@ -70,7 +91,7 @@ public class TimeDimExtractionFnTest
public void testQuarterExtraction()
{
Set<String> quarters = Sets.newHashSet();
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy");
ExtractionFn extractionFn = new TimeDimExtractionFn("MM/dd/yyyy", "QQQ/yyyy", false);
for (String dim : dims) {
quarters.add(extractionFn.apply(dim));
@ -82,15 +103,37 @@ public class TimeDimExtractionFnTest
Assert.assertTrue(quarters.contains("Q4/2012"));
}
@Test
public void testWeeks()
{
final TimeDimExtractionFn weekFn = new TimeDimExtractionFn("yyyy-MM-dd", "YYYY-ww", false);
Assert.assertEquals("2016-01", weekFn.apply("2015-12-31"));
Assert.assertEquals("2016-01", weekFn.apply("2016-01-01"));
Assert.assertEquals("2017-01", weekFn.apply("2017-01-01"));
Assert.assertEquals("2018-01", weekFn.apply("2017-12-31"));
Assert.assertEquals("2018-01", weekFn.apply("2018-01-01"));
}
@Test
public void testWeeksJoda()
{
final TimeDimExtractionFn weekFn = new TimeDimExtractionFn("yyyy-MM-dd", "xxxx-ww", true);
Assert.assertEquals("2015-53", weekFn.apply("2015-12-31"));
Assert.assertEquals("2015-53", weekFn.apply("2016-01-01"));
Assert.assertEquals("2016-52", weekFn.apply("2017-01-01"));
Assert.assertEquals("2017-52", weekFn.apply("2017-12-31"));
Assert.assertEquals("2018-01", weekFn.apply("2018-01-01"));
}
@Test
public void testSerde() throws Exception
{
final ObjectMapper objectMapper = new DefaultObjectMapper();
final String json = "{ \"type\" : \"time\", \"timeFormat\" : \"MM/dd/yyyy\", \"resultFormat\" : \"QQQ/yyyy\" }";
final String json = "{ \"type\" : \"time\", \"timeFormat\" : \"MM/dd/yyyy\", \"resultFormat\" : \"yyyy-MM-dd\", \"joda\" : true }";
TimeDimExtractionFn extractionFn = (TimeDimExtractionFn) objectMapper.readValue(json, ExtractionFn.class);
Assert.assertEquals("MM/dd/yyyy", extractionFn.getTimeFormat());
Assert.assertEquals("QQQ/yyyy", extractionFn.getResultFormat());
Assert.assertEquals("yyyy-MM-dd", extractionFn.getResultFormat());
// round trip
Assert.assertEquals(

View File

@ -106,10 +106,10 @@ public class SelectorFilterTest extends BaseFilterTest
@Test
public void testWithTimeExtractionFnNull()
{
assertFilterMatches(new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.<String>of());
assertFilterMatches(new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.<String>of("3", "4", "5"));
assertFilterMatches(new SelectorDimFilter("dim6", "2017-07", new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.<String>of("0", "1"));
assertFilterMatches(new SelectorDimFilter("dim6", "2017-05", new TimeDimExtractionFn("yyyy-mm-dd", "yyyy-mm")), ImmutableList.<String>of("2"));
assertFilterMatches(new SelectorDimFilter("dim0", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of());
assertFilterMatches(new SelectorDimFilter("dim6", null, new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("3", "4", "5"));
assertFilterMatches(new SelectorDimFilter("dim6", "2017-07", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("0", "1"));
assertFilterMatches(new SelectorDimFilter("dim6", "2017-05", new TimeDimExtractionFn("yyyy-MM-dd", "yyyy-MM", true)), ImmutableList.of("2"));
}
@Test