mirror of https://github.com/apache/druid.git
Number based columns representing time in custom format cannot be used as timestamp column in Druid. (#9877)
* Number based columns representing time in custom format cannot be used as timestamp column in Druid. Prior to this fix, if an integer column in parquet is storing dateint in format yyyyMMdd, it cannot be used as timestamp column in Druid as the timestamp parser interprets it as a number storing UTC time instead of treating it as a number representing time in yyyyMMdd format. Data formats like TSV or CSV don't suffer from this problem as the timestamp is passed in an as string which the timestamp parser is able to parse correctly.
This commit is contained in:
parent
2e9548d93d
commit
82e5b0573e
|
@ -122,11 +122,12 @@ public class TimestampParser
|
||||||
{
|
{
|
||||||
final Function<String, DateTime> stringFun = createTimestampParser(format);
|
final Function<String, DateTime> stringFun = createTimestampParser(format);
|
||||||
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
|
final Function<Number, DateTime> numericFun = createNumericTimestampParser(format);
|
||||||
|
final boolean isNumericFormat = isNumericFormat(format);
|
||||||
|
|
||||||
return o -> {
|
return o -> {
|
||||||
Preconditions.checkNotNull(o, "null timestamp");
|
Preconditions.checkNotNull(o, "null timestamp");
|
||||||
|
|
||||||
if (o instanceof Number) {
|
if (o instanceof Number && isNumericFormat) {
|
||||||
return numericFun.apply((Number) o);
|
return numericFun.apply((Number) o);
|
||||||
} else {
|
} else {
|
||||||
return stringFun.apply(o.toString());
|
return stringFun.apply(o.toString());
|
||||||
|
@ -134,6 +135,16 @@ public class TimestampParser
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean isNumericFormat(String format)
|
||||||
|
{
|
||||||
|
return "auto".equalsIgnoreCase(format)
|
||||||
|
|| "millis".equalsIgnoreCase(format)
|
||||||
|
|| "posix".equalsIgnoreCase(format)
|
||||||
|
|| "micro".equalsIgnoreCase(format)
|
||||||
|
|| "nano".equalsIgnoreCase(format)
|
||||||
|
|| "ruby".equalsIgnoreCase(format);
|
||||||
|
}
|
||||||
|
|
||||||
private static DateTimeFormatter createAutoParser()
|
private static DateTimeFormatter createAutoParser()
|
||||||
{
|
{
|
||||||
final DateTimeFormatter offsetElement = new DateTimeFormatterBuilder()
|
final DateTimeFormatter offsetElement = new DateTimeFormatterBuilder()
|
||||||
|
|
|
@ -181,4 +181,48 @@ public class TimestampParserTest
|
||||||
.getMillis()
|
.getMillis()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testFormatsForNumberBasedTimestamp()
|
||||||
|
{
|
||||||
|
int yearMonthDate = 20200514;
|
||||||
|
DateTime expectedDt = DateTimes.of("2020-05-14T00:00:00.000Z");
|
||||||
|
Function<Object, DateTime> parser = TimestampParser.createObjectTimestampParser("yyyyMMdd");
|
||||||
|
Assert.assertEquals("Timestamp of format yyyyMMdd not parsed correctly",
|
||||||
|
expectedDt, parser.apply(yearMonthDate));
|
||||||
|
|
||||||
|
int year = 2020;
|
||||||
|
expectedDt = DateTimes.of("2020-01-01T00:00:00.000Z");
|
||||||
|
parser = TimestampParser.createObjectTimestampParser("yyyy");
|
||||||
|
Assert.assertEquals("Timestamp of format yyyy not parsed correctly",
|
||||||
|
expectedDt, parser.apply(year));
|
||||||
|
|
||||||
|
int yearMonth = 202010;
|
||||||
|
expectedDt = DateTimes.of("2020-10-01T00:00:00.000Z");
|
||||||
|
parser = TimestampParser.createObjectTimestampParser("yyyyMM");
|
||||||
|
Assert.assertEquals("Timestamp of format yyyy not parsed correctly",
|
||||||
|
expectedDt, parser.apply(yearMonth));
|
||||||
|
|
||||||
|
// Friday, May 15, 2020 8:20:40 PM GMT
|
||||||
|
long millis = 1589574040000l;
|
||||||
|
expectedDt = DateTimes.of("2020-05-15T20:20:40.000Z");
|
||||||
|
|
||||||
|
parser = TimestampParser.createObjectTimestampParser("millis");
|
||||||
|
Assert.assertEquals("Timestamp of format millis not parsed correctly",
|
||||||
|
expectedDt, parser.apply(millis));
|
||||||
|
parser = TimestampParser.createObjectTimestampParser("auto");
|
||||||
|
Assert.assertEquals("Timestamp of format auto not parsed correctly",
|
||||||
|
expectedDt, parser.apply(millis));
|
||||||
|
|
||||||
|
int posix = 1589574040;
|
||||||
|
parser = TimestampParser.createObjectTimestampParser("posix");
|
||||||
|
Assert.assertEquals("Timestamp of format posix not parsed correctly",
|
||||||
|
expectedDt, parser.apply(posix));
|
||||||
|
|
||||||
|
long micro = 1589574040000000l;
|
||||||
|
parser = TimestampParser.createObjectTimestampParser("micro");
|
||||||
|
Assert.assertEquals("Timestamp of format micro not parsed correctly",
|
||||||
|
expectedDt, parser.apply(micro));
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue