From 4f6eb47e4096e38b11861a59cb3e273b7943b989 Mon Sep 17 00:00:00 2001 From: Niketh Sabbineni Date: Wed, 13 Sep 2017 13:19:39 -0700 Subject: [PATCH] Allow timezone info in timestamp column (#4727) * Allow timezone info in timestamp column * Address code review comments * Incorporating code comments * Add support for more valid timezone strings * Incorporate review comments * Incorporate code review comments --- .../java/util/common/parsers/ParserUtils.java | 24 ++++ .../util/common/parsers/TimestampParser.java | 125 ++++++------------ .../common/parsers/TimestampParserTest.java | 22 ++- 3 files changed, 89 insertions(+), 82 deletions(-) diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/ParserUtils.java b/java-util/src/main/java/io/druid/java/util/common/parsers/ParserUtils.java index a62b723752f..e3975ca15d4 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/ParserUtils.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/ParserUtils.java @@ -24,9 +24,14 @@ import com.google.common.base.Splitter; import com.google.common.base.Strings; import com.google.common.collect.Sets; import io.druid.java.util.common.StringUtils; +import org.joda.time.DateTimeZone; +import javax.annotation.Nullable; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; import java.util.Set; +import java.util.TimeZone; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -34,6 +39,19 @@ public class ParserUtils { private static final String DEFAULT_COLUMN_NAME_PREFIX = "column_"; + private static final Map TIMEZONE_LOOKUP = new HashMap<>(); + + static { + for (String tz : TimeZone.getAvailableIDs()) { + try { + TIMEZONE_LOOKUP.put(tz, DateTimeZone.forTimeZone(TimeZone.getTimeZone(tz))); + } + catch (IllegalArgumentException e) { + // Ignore certain date time zone ids like SystemV/AST4. More here https://confluence.atlassian.com/confkb/the-datetime-zone-id-is-not-recognised-167183146.html + } + } + } + public static Function getMultiValueFunction( final String listDelimiter, final Splitter listSplitter @@ -92,6 +110,12 @@ public class ParserUtils return input; } + @Nullable + public static DateTimeZone getDateTimeZone(String timeZone) + { + return TIMEZONE_LOOKUP.get(timeZone); + } + /** * Return a function to generate default column names. * Note that the postfix for default column names starts from 1. diff --git a/java-util/src/main/java/io/druid/java/util/common/parsers/TimestampParser.java b/java-util/src/main/java/io/druid/java/util/common/parsers/TimestampParser.java index 427f7ba3694..58b5af67f3f 100644 --- a/java-util/src/main/java/io/druid/java/util/common/parsers/TimestampParser.java +++ b/java-util/src/main/java/io/druid/java/util/common/parsers/TimestampParser.java @@ -21,9 +21,11 @@ package io.druid.java.util.common.parsers; import com.google.common.base.Function; import com.google.common.base.Preconditions; +import com.google.common.base.Strings; import io.druid.java.util.common.DateTimes; import io.druid.java.util.common.IAE; import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.joda.time.format.DateTimeFormatterBuilder; @@ -41,67 +43,54 @@ public class TimestampParser if (format.equalsIgnoreCase("auto")) { // Could be iso or millis final DateTimeFormatter parser = createAutoParser(); - return new Function() - { - @Override - public DateTime apply(String input) - { - Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp"); - for (int i = 0; i < input.length(); i++) { - if (input.charAt(i) < '0' || input.charAt(i) > '9') { - return parser.parseDateTime(ParserUtils.stripQuotes(input)); - } - } + return (String input) -> { + Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); - return DateTimes.utc(Long.parseLong(input)); + for (int i = 0; i < input.length(); i++) { + if (input.charAt(i) < '0' || input.charAt(i) > '9') { + input = ParserUtils.stripQuotes(input); + int lastIndex = input.lastIndexOf(' '); + DateTimeZone timeZone = DateTimeZone.UTC; + if (lastIndex > 0) { + DateTimeZone timeZoneFromString = ParserUtils.getDateTimeZone(input.substring(lastIndex + 1)); + if (timeZoneFromString != null) { + timeZone = timeZoneFromString; + input = input.substring(0, lastIndex); + } + } + + return new DateTime(parser.parseDateTime(input), timeZone); + } } + + return DateTimes.utc(Long.parseLong(input)); }; } else if (format.equalsIgnoreCase("iso")) { - return new Function() - { - @Override - public DateTime apply(String input) - { - Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp"); - return DateTimes.of(ParserUtils.stripQuotes(input)); - } + return input -> { + Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); + return DateTimes.of(ParserUtils.stripQuotes(input)); }; } else if (format.equalsIgnoreCase("posix") - || format.equalsIgnoreCase("millis") - || format.equalsIgnoreCase("nano")) { + || format.equalsIgnoreCase("millis") + || format.equalsIgnoreCase("nano")) { final Function numericFun = createNumericTimestampParser(format); - return new Function() - { - @Override - public DateTime apply(String input) - { - Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp"); - return numericFun.apply(Long.parseLong(ParserUtils.stripQuotes(input))); - } + return input -> { + Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); + return numericFun.apply(Long.parseLong(ParserUtils.stripQuotes(input))); }; } else if (format.equalsIgnoreCase("ruby")) { // Numeric parser ignores millis for ruby. final Function numericFun = createNumericTimestampParser(format); - return new Function() - { - @Override - public DateTime apply(String input) - { - Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp"); - return numericFun.apply(Double.parseDouble(ParserUtils.stripQuotes(input))); - } + return input -> { + Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); + return numericFun.apply(Double.parseDouble(ParserUtils.stripQuotes(input))); }; } else { try { final DateTimeFormatter formatter = DateTimeFormat.forPattern(format); - return new Function() - { - @Override - public DateTime apply(String input) - { - Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp"); - return formatter.parseDateTime(ParserUtils.stripQuotes(input)); - } + return input -> { + Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp"); + return formatter.parseDateTime(ParserUtils.stripQuotes(input)); }; } catch (Exception e) { @@ -116,32 +105,11 @@ public class TimestampParser { // Ignore millis for ruby if (format.equalsIgnoreCase("posix") || format.equalsIgnoreCase("ruby")) { - return new Function() - { - @Override - public DateTime apply(Number input) - { - return DateTimes.utc(TimeUnit.SECONDS.toMillis(input.longValue())); - } - }; + return input -> DateTimes.utc(TimeUnit.SECONDS.toMillis(input.longValue())); } else if (format.equalsIgnoreCase("nano")) { - return new Function() - { - @Override - public DateTime apply(Number input) - { - return DateTimes.utc(TimeUnit.NANOSECONDS.toMillis(input.longValue())); - } - }; + return input -> DateTimes.utc(TimeUnit.NANOSECONDS.toMillis(input.longValue())); } else { - return new Function() - { - @Override - public DateTime apply(Number input) - { - return DateTimes.utc(input.longValue()); - } - }; + return input -> DateTimes.utc(input.longValue()); } } @@ -152,18 +120,13 @@ public class TimestampParser final Function stringFun = createTimestampParser(format); final Function numericFun = createNumericTimestampParser(format); - return new Function() - { - @Override - public DateTime apply(Object o) - { - Preconditions.checkArgument(o != null, "null timestamp"); + return o -> { + Preconditions.checkNotNull(o, "null timestamp"); - if (o instanceof Number) { - return numericFun.apply((Number) o); - } else { - return stringFun.apply(o.toString()); - } + if (o instanceof Number) { + return numericFun.apply((Number) o); + } else { + return stringFun.apply(o.toString()); } }; } diff --git a/java-util/src/test/java/io/druid/java/util/common/parsers/TimestampParserTest.java b/java-util/src/test/java/io/druid/java/util/common/parsers/TimestampParserTest.java index 2c0ae1eea24..f159efc0367 100644 --- a/java-util/src/test/java/io/druid/java/util/common/parsers/TimestampParserTest.java +++ b/java-util/src/test/java/io/druid/java/util/common/parsers/TimestampParserTest.java @@ -28,6 +28,8 @@ import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; +import java.util.TimeZone; + public class TimestampParserTest { @Rule @@ -40,6 +42,19 @@ public class TimestampParserTest Assert.assertEquals("hello world", ParserUtils.stripQuotes(" \" hello world \" ")); } + @Test + public void testExtractTimeZone() throws Exception + { + Assert.assertEquals(DateTimeZone.UTC, ParserUtils.getDateTimeZone("UTC")); + Assert.assertEquals(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")), ParserUtils.getDateTimeZone("PST")); + Assert.assertNull(ParserUtils.getDateTimeZone("Hello")); + Assert.assertNull(ParserUtils.getDateTimeZone("AEST")); + Assert.assertEquals(DateTimeZone.forTimeZone(TimeZone.getTimeZone("Australia/Hobart")), + ParserUtils.getDateTimeZone("Australia/Hobart")); + Assert.assertNull(ParserUtils.getDateTimeZone("")); + Assert.assertNull(ParserUtils.getDateTimeZone(null)); + } + @Test public void testAuto() throws Exception { @@ -53,6 +68,11 @@ public class TimestampParserTest Assert.assertEquals(DateTimes.of("2009-02-13T00:00:00Z"), parser.apply("\"2009-02-13\"")); Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply("2009-02-13 23:31:30")); Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply(1234567890000L)); + Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply("2009-02-13 23:31:30 UTC")); + Assert.assertEquals(new DateTime("2009-02-13T23:31:30Z", DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))), + parser.apply("2009-02-13 23:31:30 PST")); + Assert.assertEquals(new DateTime("2009-02-13T23:31:30Z", DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))), + parser.apply("\"2009-02-13 23:31:30 PST\"")); } @Test @@ -60,7 +80,7 @@ public class TimestampParserTest { final Function parser = TimestampParser.createObjectTimestampParser("auto"); - expectedException.expect(IllegalArgumentException.class); + expectedException.expect(NullPointerException.class); parser.apply(null); }