Allow timezone info in timestamp column (#4727)

* Allow timezone info in timestamp column

* Address code review comments

* Incorporating code comments

* Add support for more valid timezone strings

* Incorporate review comments

* Incorporate code review comments
This commit is contained in:
Niketh Sabbineni 2017-09-13 13:19:39 -07:00 committed by Roman Leventov
parent 7919469de6
commit 4f6eb47e40
3 changed files with 89 additions and 82 deletions

View File

@ -24,9 +24,14 @@ import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.Sets;
import io.druid.java.util.common.StringUtils;
import org.joda.time.DateTimeZone;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.TimeZone;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
@ -34,6 +39,19 @@ public class ParserUtils
{
private static final String DEFAULT_COLUMN_NAME_PREFIX = "column_";
private static final Map<String, DateTimeZone> TIMEZONE_LOOKUP = new HashMap<>();
static {
for (String tz : TimeZone.getAvailableIDs()) {
try {
TIMEZONE_LOOKUP.put(tz, DateTimeZone.forTimeZone(TimeZone.getTimeZone(tz)));
}
catch (IllegalArgumentException e) {
// Ignore certain date time zone ids like SystemV/AST4. More here https://confluence.atlassian.com/confkb/the-datetime-zone-id-is-not-recognised-167183146.html
}
}
}
public static Function<String, Object> getMultiValueFunction(
final String listDelimiter,
final Splitter listSplitter
@ -92,6 +110,12 @@ public class ParserUtils
return input;
}
@Nullable
public static DateTimeZone getDateTimeZone(String timeZone)
{
return TIMEZONE_LOOKUP.get(timeZone);
}
/**
* Return a function to generate default column names.
* Note that the postfix for default column names starts from 1.

View File

@ -21,9 +21,11 @@ package io.druid.java.util.common.parsers;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import io.druid.java.util.common.DateTimes;
import io.druid.java.util.common.IAE;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.DateTimeFormatterBuilder;
@ -41,67 +43,54 @@ public class TimestampParser
if (format.equalsIgnoreCase("auto")) {
// Could be iso or millis
final DateTimeFormatter parser = createAutoParser();
return new Function<String, DateTime>()
{
@Override
public DateTime apply(String input)
{
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
for (int i = 0; i < input.length(); i++) {
if (input.charAt(i) < '0' || input.charAt(i) > '9') {
return parser.parseDateTime(ParserUtils.stripQuotes(input));
}
}
return (String input) -> {
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
return DateTimes.utc(Long.parseLong(input));
for (int i = 0; i < input.length(); i++) {
if (input.charAt(i) < '0' || input.charAt(i) > '9') {
input = ParserUtils.stripQuotes(input);
int lastIndex = input.lastIndexOf(' ');
DateTimeZone timeZone = DateTimeZone.UTC;
if (lastIndex > 0) {
DateTimeZone timeZoneFromString = ParserUtils.getDateTimeZone(input.substring(lastIndex + 1));
if (timeZoneFromString != null) {
timeZone = timeZoneFromString;
input = input.substring(0, lastIndex);
}
}
return new DateTime(parser.parseDateTime(input), timeZone);
}
}
return DateTimes.utc(Long.parseLong(input));
};
} else if (format.equalsIgnoreCase("iso")) {
return new Function<String, DateTime>()
{
@Override
public DateTime apply(String input)
{
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
return DateTimes.of(ParserUtils.stripQuotes(input));
}
return input -> {
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
return DateTimes.of(ParserUtils.stripQuotes(input));
};
} else if (format.equalsIgnoreCase("posix")
|| format.equalsIgnoreCase("millis")
|| format.equalsIgnoreCase("nano")) {
|| format.equalsIgnoreCase("millis")
|| format.equalsIgnoreCase("nano")) {
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
return new Function<String, DateTime>()
{
@Override
public DateTime apply(String input)
{
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
return numericFun.apply(Long.parseLong(ParserUtils.stripQuotes(input)));
}
return input -> {
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
return numericFun.apply(Long.parseLong(ParserUtils.stripQuotes(input)));
};
} else if (format.equalsIgnoreCase("ruby")) {
// Numeric parser ignores millis for ruby.
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
return new Function<String, DateTime>()
{
@Override
public DateTime apply(String input)
{
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
return numericFun.apply(Double.parseDouble(ParserUtils.stripQuotes(input)));
}
return input -> {
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
return numericFun.apply(Double.parseDouble(ParserUtils.stripQuotes(input)));
};
} else {
try {
final DateTimeFormatter formatter = DateTimeFormat.forPattern(format);
return new Function<String, DateTime>()
{
@Override
public DateTime apply(String input)
{
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
return formatter.parseDateTime(ParserUtils.stripQuotes(input));
}
return input -> {
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
return formatter.parseDateTime(ParserUtils.stripQuotes(input));
};
}
catch (Exception e) {
@ -116,32 +105,11 @@ public class TimestampParser
{
// Ignore millis for ruby
if (format.equalsIgnoreCase("posix") || format.equalsIgnoreCase("ruby")) {
return new Function<Number, DateTime>()
{
@Override
public DateTime apply(Number input)
{
return DateTimes.utc(TimeUnit.SECONDS.toMillis(input.longValue()));
}
};
return input -> DateTimes.utc(TimeUnit.SECONDS.toMillis(input.longValue()));
} else if (format.equalsIgnoreCase("nano")) {
return new Function<Number, DateTime>()
{
@Override
public DateTime apply(Number input)
{
return DateTimes.utc(TimeUnit.NANOSECONDS.toMillis(input.longValue()));
}
};
return input -> DateTimes.utc(TimeUnit.NANOSECONDS.toMillis(input.longValue()));
} else {
return new Function<Number, DateTime>()
{
@Override
public DateTime apply(Number input)
{
return DateTimes.utc(input.longValue());
}
};
return input -> DateTimes.utc(input.longValue());
}
}
@ -152,18 +120,13 @@ public class TimestampParser
final Function<String, DateTime> stringFun = createTimestampParser(format);
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
return new Function<Object, DateTime>()
{
@Override
public DateTime apply(Object o)
{
Preconditions.checkArgument(o != null, "null timestamp");
return o -> {
Preconditions.checkNotNull(o, "null timestamp");
if (o instanceof Number) {
return numericFun.apply((Number) o);
} else {
return stringFun.apply(o.toString());
}
if (o instanceof Number) {
return numericFun.apply((Number) o);
} else {
return stringFun.apply(o.toString());
}
};
}

View File

@ -28,6 +28,8 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import java.util.TimeZone;
public class TimestampParserTest
{
@Rule
@ -40,6 +42,19 @@ public class TimestampParserTest
Assert.assertEquals("hello world", ParserUtils.stripQuotes(" \" hello world \" "));
}
@Test
public void testExtractTimeZone() throws Exception
{
Assert.assertEquals(DateTimeZone.UTC, ParserUtils.getDateTimeZone("UTC"));
Assert.assertEquals(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")), ParserUtils.getDateTimeZone("PST"));
Assert.assertNull(ParserUtils.getDateTimeZone("Hello"));
Assert.assertNull(ParserUtils.getDateTimeZone("AEST"));
Assert.assertEquals(DateTimeZone.forTimeZone(TimeZone.getTimeZone("Australia/Hobart")),
ParserUtils.getDateTimeZone("Australia/Hobart"));
Assert.assertNull(ParserUtils.getDateTimeZone(""));
Assert.assertNull(ParserUtils.getDateTimeZone(null));
}
@Test
public void testAuto() throws Exception
{
@ -53,6 +68,11 @@ public class TimestampParserTest
Assert.assertEquals(DateTimes.of("2009-02-13T00:00:00Z"), parser.apply("\"2009-02-13\""));
Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply("2009-02-13 23:31:30"));
Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply(1234567890000L));
Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply("2009-02-13 23:31:30 UTC"));
Assert.assertEquals(new DateTime("2009-02-13T23:31:30Z", DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))),
parser.apply("2009-02-13 23:31:30 PST"));
Assert.assertEquals(new DateTime("2009-02-13T23:31:30Z", DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))),
parser.apply("\"2009-02-13 23:31:30 PST\""));
}
@Test
@ -60,7 +80,7 @@ public class TimestampParserTest
{
final Function<Object, DateTime> parser = TimestampParser.createObjectTimestampParser("auto");
expectedException.expect(IllegalArgumentException.class);
expectedException.expect(NullPointerException.class);
parser.apply(null);
}