mirror of https://github.com/apache/druid.git
Allow timezone info in timestamp column (#4727)
* Allow timezone info in timestamp column * Address code review comments * Incorporating code comments * Add support for more valid timezone strings * Incorporate review comments * Incorporate code review comments
This commit is contained in:
parent
7919469de6
commit
4f6eb47e40
|
@ -24,9 +24,14 @@ import com.google.common.base.Splitter;
|
|||
import com.google.common.base.Strings;
|
||||
import com.google.common.collect.Sets;
|
||||
import io.druid.java.util.common.StringUtils;
|
||||
import org.joda.time.DateTimeZone;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TimeZone;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.StreamSupport;
|
||||
|
||||
|
@ -34,6 +39,19 @@ public class ParserUtils
|
|||
{
|
||||
private static final String DEFAULT_COLUMN_NAME_PREFIX = "column_";
|
||||
|
||||
private static final Map<String, DateTimeZone> TIMEZONE_LOOKUP = new HashMap<>();
|
||||
|
||||
static {
|
||||
for (String tz : TimeZone.getAvailableIDs()) {
|
||||
try {
|
||||
TIMEZONE_LOOKUP.put(tz, DateTimeZone.forTimeZone(TimeZone.getTimeZone(tz)));
|
||||
}
|
||||
catch (IllegalArgumentException e) {
|
||||
// Ignore certain date time zone ids like SystemV/AST4. More here https://confluence.atlassian.com/confkb/the-datetime-zone-id-is-not-recognised-167183146.html
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static Function<String, Object> getMultiValueFunction(
|
||||
final String listDelimiter,
|
||||
final Splitter listSplitter
|
||||
|
@ -92,6 +110,12 @@ public class ParserUtils
|
|||
return input;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public static DateTimeZone getDateTimeZone(String timeZone)
|
||||
{
|
||||
return TIMEZONE_LOOKUP.get(timeZone);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a function to generate default column names.
|
||||
* Note that the postfix for default column names starts from 1.
|
||||
|
|
|
@ -21,9 +21,11 @@ package io.druid.java.util.common.parsers;
|
|||
|
||||
import com.google.common.base.Function;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.base.Strings;
|
||||
import io.druid.java.util.common.DateTimes;
|
||||
import io.druid.java.util.common.IAE;
|
||||
import org.joda.time.DateTime;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.joda.time.format.DateTimeFormat;
|
||||
import org.joda.time.format.DateTimeFormatter;
|
||||
import org.joda.time.format.DateTimeFormatterBuilder;
|
||||
|
@ -41,67 +43,54 @@ public class TimestampParser
|
|||
if (format.equalsIgnoreCase("auto")) {
|
||||
// Could be iso or millis
|
||||
final DateTimeFormatter parser = createAutoParser();
|
||||
return new Function<String, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(String input)
|
||||
{
|
||||
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
if (input.charAt(i) < '0' || input.charAt(i) > '9') {
|
||||
return parser.parseDateTime(ParserUtils.stripQuotes(input));
|
||||
}
|
||||
}
|
||||
return (String input) -> {
|
||||
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
|
||||
|
||||
return DateTimes.utc(Long.parseLong(input));
|
||||
for (int i = 0; i < input.length(); i++) {
|
||||
if (input.charAt(i) < '0' || input.charAt(i) > '9') {
|
||||
input = ParserUtils.stripQuotes(input);
|
||||
int lastIndex = input.lastIndexOf(' ');
|
||||
DateTimeZone timeZone = DateTimeZone.UTC;
|
||||
if (lastIndex > 0) {
|
||||
DateTimeZone timeZoneFromString = ParserUtils.getDateTimeZone(input.substring(lastIndex + 1));
|
||||
if (timeZoneFromString != null) {
|
||||
timeZone = timeZoneFromString;
|
||||
input = input.substring(0, lastIndex);
|
||||
}
|
||||
}
|
||||
|
||||
return new DateTime(parser.parseDateTime(input), timeZone);
|
||||
}
|
||||
}
|
||||
|
||||
return DateTimes.utc(Long.parseLong(input));
|
||||
};
|
||||
} else if (format.equalsIgnoreCase("iso")) {
|
||||
return new Function<String, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(String input)
|
||||
{
|
||||
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
|
||||
return DateTimes.of(ParserUtils.stripQuotes(input));
|
||||
}
|
||||
return input -> {
|
||||
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
|
||||
return DateTimes.of(ParserUtils.stripQuotes(input));
|
||||
};
|
||||
} else if (format.equalsIgnoreCase("posix")
|
||||
|| format.equalsIgnoreCase("millis")
|
||||
|| format.equalsIgnoreCase("nano")) {
|
||||
|| format.equalsIgnoreCase("millis")
|
||||
|| format.equalsIgnoreCase("nano")) {
|
||||
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
|
||||
return new Function<String, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(String input)
|
||||
{
|
||||
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
|
||||
return numericFun.apply(Long.parseLong(ParserUtils.stripQuotes(input)));
|
||||
}
|
||||
return input -> {
|
||||
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
|
||||
return numericFun.apply(Long.parseLong(ParserUtils.stripQuotes(input)));
|
||||
};
|
||||
} else if (format.equalsIgnoreCase("ruby")) {
|
||||
// Numeric parser ignores millis for ruby.
|
||||
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
|
||||
return new Function<String, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(String input)
|
||||
{
|
||||
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
|
||||
return numericFun.apply(Double.parseDouble(ParserUtils.stripQuotes(input)));
|
||||
}
|
||||
return input -> {
|
||||
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
|
||||
return numericFun.apply(Double.parseDouble(ParserUtils.stripQuotes(input)));
|
||||
};
|
||||
} else {
|
||||
try {
|
||||
final DateTimeFormatter formatter = DateTimeFormat.forPattern(format);
|
||||
return new Function<String, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(String input)
|
||||
{
|
||||
Preconditions.checkArgument(input != null && !input.isEmpty(), "null timestamp");
|
||||
return formatter.parseDateTime(ParserUtils.stripQuotes(input));
|
||||
}
|
||||
return input -> {
|
||||
Preconditions.checkArgument(!Strings.isNullOrEmpty(input), "null timestamp");
|
||||
return formatter.parseDateTime(ParserUtils.stripQuotes(input));
|
||||
};
|
||||
}
|
||||
catch (Exception e) {
|
||||
|
@ -116,32 +105,11 @@ public class TimestampParser
|
|||
{
|
||||
// Ignore millis for ruby
|
||||
if (format.equalsIgnoreCase("posix") || format.equalsIgnoreCase("ruby")) {
|
||||
return new Function<Number, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(Number input)
|
||||
{
|
||||
return DateTimes.utc(TimeUnit.SECONDS.toMillis(input.longValue()));
|
||||
}
|
||||
};
|
||||
return input -> DateTimes.utc(TimeUnit.SECONDS.toMillis(input.longValue()));
|
||||
} else if (format.equalsIgnoreCase("nano")) {
|
||||
return new Function<Number, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(Number input)
|
||||
{
|
||||
return DateTimes.utc(TimeUnit.NANOSECONDS.toMillis(input.longValue()));
|
||||
}
|
||||
};
|
||||
return input -> DateTimes.utc(TimeUnit.NANOSECONDS.toMillis(input.longValue()));
|
||||
} else {
|
||||
return new Function<Number, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(Number input)
|
||||
{
|
||||
return DateTimes.utc(input.longValue());
|
||||
}
|
||||
};
|
||||
return input -> DateTimes.utc(input.longValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -152,18 +120,13 @@ public class TimestampParser
|
|||
final Function<String, DateTime> stringFun = createTimestampParser(format);
|
||||
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
|
||||
|
||||
return new Function<Object, DateTime>()
|
||||
{
|
||||
@Override
|
||||
public DateTime apply(Object o)
|
||||
{
|
||||
Preconditions.checkArgument(o != null, "null timestamp");
|
||||
return o -> {
|
||||
Preconditions.checkNotNull(o, "null timestamp");
|
||||
|
||||
if (o instanceof Number) {
|
||||
return numericFun.apply((Number) o);
|
||||
} else {
|
||||
return stringFun.apply(o.toString());
|
||||
}
|
||||
if (o instanceof Number) {
|
||||
return numericFun.apply((Number) o);
|
||||
} else {
|
||||
return stringFun.apply(o.toString());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -28,6 +28,8 @@ import org.junit.Rule;
|
|||
import org.junit.Test;
|
||||
import org.junit.rules.ExpectedException;
|
||||
|
||||
import java.util.TimeZone;
|
||||
|
||||
public class TimestampParserTest
|
||||
{
|
||||
@Rule
|
||||
|
@ -40,6 +42,19 @@ public class TimestampParserTest
|
|||
Assert.assertEquals("hello world", ParserUtils.stripQuotes(" \" hello world \" "));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractTimeZone() throws Exception
|
||||
{
|
||||
Assert.assertEquals(DateTimeZone.UTC, ParserUtils.getDateTimeZone("UTC"));
|
||||
Assert.assertEquals(DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST")), ParserUtils.getDateTimeZone("PST"));
|
||||
Assert.assertNull(ParserUtils.getDateTimeZone("Hello"));
|
||||
Assert.assertNull(ParserUtils.getDateTimeZone("AEST"));
|
||||
Assert.assertEquals(DateTimeZone.forTimeZone(TimeZone.getTimeZone("Australia/Hobart")),
|
||||
ParserUtils.getDateTimeZone("Australia/Hobart"));
|
||||
Assert.assertNull(ParserUtils.getDateTimeZone(""));
|
||||
Assert.assertNull(ParserUtils.getDateTimeZone(null));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAuto() throws Exception
|
||||
{
|
||||
|
@ -53,6 +68,11 @@ public class TimestampParserTest
|
|||
Assert.assertEquals(DateTimes.of("2009-02-13T00:00:00Z"), parser.apply("\"2009-02-13\""));
|
||||
Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply("2009-02-13 23:31:30"));
|
||||
Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply(1234567890000L));
|
||||
Assert.assertEquals(DateTimes.of("2009-02-13T23:31:30Z"), parser.apply("2009-02-13 23:31:30 UTC"));
|
||||
Assert.assertEquals(new DateTime("2009-02-13T23:31:30Z", DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))),
|
||||
parser.apply("2009-02-13 23:31:30 PST"));
|
||||
Assert.assertEquals(new DateTime("2009-02-13T23:31:30Z", DateTimeZone.forTimeZone(TimeZone.getTimeZone("PST"))),
|
||||
parser.apply("\"2009-02-13 23:31:30 PST\""));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -60,7 +80,7 @@ public class TimestampParserTest
|
|||
{
|
||||
final Function<Object, DateTime> parser = TimestampParser.createObjectTimestampParser("auto");
|
||||
|
||||
expectedException.expect(IllegalArgumentException.class);
|
||||
expectedException.expect(NullPointerException.class);
|
||||
parser.apply(null);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue