Number based columns representing time in custom format cannot be used as timestamp column in Druid. (#9877)

* Number based columns representing time in custom format cannot be used as timestamp column in Druid.

Prior to this fix, if an integer column in parquet is storing dateint in format yyyyMMdd, it cannot be used as timestamp column in Druid as the timestamp parser interprets it as a number storing UTC time instead of treating it as a number representing time in yyyyMMdd format. Data formats like TSV or CSV don't suffer from this problem as the timestamp is passed in an as string which the timestamp parser is able to parse correctly.
This commit is contained in:
Samarth Jain 2020-05-18 11:17:28 -07:00 committed by GitHub
parent 2e9548d93d
commit 82e5b0573e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 56 additions and 1 deletions

View File

@ -122,11 +122,12 @@ public class TimestampParser
{ {
final Function<String, DateTime> stringFun = createTimestampParser(format); final Function<String, DateTime> stringFun = createTimestampParser(format);
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format); final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
final boolean isNumericFormat = isNumericFormat(format);
return o -> { return o -> {
Preconditions.checkNotNull(o, "null timestamp"); Preconditions.checkNotNull(o, "null timestamp");
if (o instanceof Number) { if (o instanceof Number && isNumericFormat) {
return numericFun.apply((Number) o); return numericFun.apply((Number) o);
} else { } else {
return stringFun.apply(o.toString()); return stringFun.apply(o.toString());
@ -134,6 +135,16 @@ public class TimestampParser
}; };
} }
private static boolean isNumericFormat(String format)
{
return "auto".equalsIgnoreCase(format)
|| "millis".equalsIgnoreCase(format)
|| "posix".equalsIgnoreCase(format)
|| "micro".equalsIgnoreCase(format)
|| "nano".equalsIgnoreCase(format)
|| "ruby".equalsIgnoreCase(format);
}
private static DateTimeFormatter createAutoParser() private static DateTimeFormatter createAutoParser()
{ {
final DateTimeFormatter offsetElement = new DateTimeFormatterBuilder() final DateTimeFormatter offsetElement = new DateTimeFormatterBuilder()

View File

@ -181,4 +181,48 @@ public class TimestampParserTest
.getMillis() .getMillis()
); );
} }
@Test
public void testFormatsForNumberBasedTimestamp()
{
int yearMonthDate = 20200514;
DateTime expectedDt = DateTimes.of("2020-05-14T00:00:00.000Z");
Function<Object, DateTime> parser = TimestampParser.createObjectTimestampParser("yyyyMMdd");
Assert.assertEquals("Timestamp of format yyyyMMdd not parsed correctly",
expectedDt, parser.apply(yearMonthDate));
int year = 2020;
expectedDt = DateTimes.of("2020-01-01T00:00:00.000Z");
parser = TimestampParser.createObjectTimestampParser("yyyy");
Assert.assertEquals("Timestamp of format yyyy not parsed correctly",
expectedDt, parser.apply(year));
int yearMonth = 202010;
expectedDt = DateTimes.of("2020-10-01T00:00:00.000Z");
parser = TimestampParser.createObjectTimestampParser("yyyyMM");
Assert.assertEquals("Timestamp of format yyyy not parsed correctly",
expectedDt, parser.apply(yearMonth));
// Friday, May 15, 2020 8:20:40 PM GMT
long millis = 1589574040000l;
expectedDt = DateTimes.of("2020-05-15T20:20:40.000Z");
parser = TimestampParser.createObjectTimestampParser("millis");
Assert.assertEquals("Timestamp of format millis not parsed correctly",
expectedDt, parser.apply(millis));
parser = TimestampParser.createObjectTimestampParser("auto");
Assert.assertEquals("Timestamp of format auto not parsed correctly",
expectedDt, parser.apply(millis));
int posix = 1589574040;
parser = TimestampParser.createObjectTimestampParser("posix");
Assert.assertEquals("Timestamp of format posix not parsed correctly",
expectedDt, parser.apply(posix));
long micro = 1589574040000000l;
parser = TimestampParser.createObjectTimestampParser("micro");
Assert.assertEquals("Timestamp of format micro not parsed correctly",
expectedDt, parser.apply(micro));
}
} }