date formats: use a function instead of our own interface

Also turn the different date formats into an enum.
This commit is contained in:
javanna 2015-11-25 14:53:20 +01:00 committed by Luca Cavanna
parent 4759a6e50f
commit 5daa73b350
12 changed files with 199 additions and 374 deletions

View File

@ -0,0 +1,99 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.ISODateTimeFormat;
import java.util.Locale;
import java.util.Optional;
import java.util.function.Function;
enum DateFormat {
Iso8601 {
@Override
Function<String, DateTime> getFunction(DateTimeZone timezone) {
return ISODateTimeFormat.dateTimeParser().withZone(timezone)::parseDateTime;
}
},
Unix {
@Override
Function<String, DateTime> getFunction(DateTimeZone timezone) {
return (date) -> new DateTime((long)(Float.parseFloat(date) * 1000), timezone);
}
},
UnixMs {
@Override
Function<String, DateTime> getFunction(DateTimeZone timezone) {
return (date) -> new DateTime(Long.parseLong(date), timezone);
}
@Override
public String toString() {
return "UNIX_MS";
}
},
Tai64n {
@Override
Function<String, DateTime> getFunction(DateTimeZone timezone) {
return (date) -> new DateTime(parseMillis(date), timezone);
}
private long parseMillis(String date) {
if (date.startsWith("@")) {
date = date.substring(1);
}
long base = Long.parseLong(date.substring(1, 16), 16);
// 1356138046000
long rest = Long.parseLong(date.substring(16, 24), 16);
return ((base * 1000) - 10000) + (rest/1000000);
}
};
abstract Function<String, DateTime> getFunction(DateTimeZone timezone);
static Optional<DateFormat> fromString(String format) {
switch (format) {
case "ISO8601":
return Optional.of(Iso8601);
case "UNIX":
return Optional.of(Unix);
case "UNIX_MS":
return Optional.of(UnixMs);
case "TAI64N":
return Optional.of(Tai64n);
default:
return Optional.empty();
}
}
static Function<String, DateTime> getJodaFunction(String matchFormat, DateTimeZone timezone, Locale locale) {
return DateTimeFormat.forPattern(matchFormat)
.withDefaultYear((new DateTime(DateTimeZone.UTC)).getYear())
.withZone(timezone).withLocale(locale)::parseDateTime;
}
@Override
public String toString() {
return name().toUpperCase(Locale.ROOT);
}
}

View File

@ -1,35 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.joda.time.DateTime;
/**
* Parser for dates provided as strings. Parses into a joda {@link DateTime} object.
* We use our own joda wrapper as we support some formats that are not supported directly by joda.
*
*/
public interface DateParser {
/**
* Parser the date provided as a string argument into a joda {@link DateTime} object
*/
DateTime parseDateTime(String date);
}

View File

@ -1,48 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.joda.time.DateTimeZone;
import java.util.Locale;
public class DateParserFactory {
public static final String ISO8601 = "ISO8601";
public static final String UNIX = "UNIX";
public static final String UNIX_MS = "UNIX_MS";
public static final String TAI64N = "TAI64N";
public static DateParser createDateParser(String format, DateTimeZone timezone, Locale locale) {
switch(format) {
case ISO8601:
// TODO(talevy): fallback solution for almost ISO8601
return new ISO8601DateParser(timezone);
case UNIX:
return new UnixDateParser(timezone);
case UNIX_MS:
return new UnixMsDateParser(timezone);
case TAI64N:
return new TAI64NDateParser(timezone);
default:
return new JodaPatternDateParser(format, timezone, locale);
}
}
}

View File

@ -26,10 +26,8 @@ import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.ISODateTimeFormat;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.*;
import java.util.function.Function;
public final class DateProcessor implements Processor {
@ -41,7 +39,7 @@ public final class DateProcessor implements Processor {
private final String matchField;
private final String targetField;
private final List<String> matchFormats;
private final List<DateParser> dateParsers;
private final List<Function<String, DateTime>> dateParsers;
DateProcessor(DateTimeZone timezone, Locale locale, String matchField, List<String> matchFormats, String targetField) {
this.timezone = timezone;
@ -51,7 +49,14 @@ public final class DateProcessor implements Processor {
this.matchFormats = matchFormats;
this.dateParsers = new ArrayList<>();
for (String matchFormat : matchFormats) {
dateParsers.add(DateParserFactory.createDateParser(matchFormat, timezone, locale));
Optional<DateFormat> dateFormat = DateFormat.fromString(matchFormat);
Function<String, DateTime> stringToDateFunction;
if (dateFormat.isPresent()) {
stringToDateFunction = dateFormat.get().getFunction(timezone);
} else {
stringToDateFunction = DateFormat.getJodaFunction(matchFormat, timezone, locale);
}
dateParsers.add(stringToDateFunction);
}
}
@ -62,9 +67,9 @@ public final class DateProcessor implements Processor {
DateTime dateTime = null;
Exception lastException = null;
for (DateParser dateParser : dateParsers) {
for (Function<String, DateTime> dateParser : dateParsers) {
try {
dateTime = dateParser.parseDateTime(value);
dateTime = dateParser.apply(value);
} catch(Exception e) {
//try the next parser and keep track of the last exception
lastException = e;

View File

@ -1,39 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.ISODateTimeFormat;
public class ISO8601DateParser implements DateParser {
private final DateTimeFormatter formatter;
public ISO8601DateParser(DateTimeZone timezone) {
this.formatter = ISODateTimeFormat.dateTimeParser().withZone(timezone);
}
@Override
public DateTime parseDateTime(String date) {
return formatter.parseDateTime(date);
}
}

View File

@ -1,43 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import java.util.Locale;
public class JodaPatternDateParser implements DateParser {
private final DateTimeFormatter formatter;
public JodaPatternDateParser(String format, DateTimeZone timezone, Locale locale) {
formatter = DateTimeFormat.forPattern(format)
.withDefaultYear((new DateTime(DateTimeZone.UTC)).getYear())
.withZone(timezone).withLocale(locale);
}
@Override
public DateTime parseDateTime(String date) {
return formatter.parseDateTime(date);
}
}

View File

@ -1,47 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
public class TAI64NDateParser implements DateParser {
private DateTimeZone timezone;
public TAI64NDateParser(DateTimeZone timezone) {
this.timezone = timezone;
}
@Override
public DateTime parseDateTime(String date) {
return new DateTime(parseMillis(date), timezone);
}
private static long parseMillis(String date) {
if (date.startsWith("@")) {
date = date.substring(1);
}
long base = Long.parseLong(date.substring(1, 16), 16);
// 1356138046000
long rest = Long.parseLong(date.substring(16, 24), 16);
return ((base * 1000) - 10000) + (rest/1000000);
}
}

View File

@ -1,40 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
public class UnixDateParser implements DateParser {
private final DateTimeZone timezone;
public UnixDateParser(DateTimeZone timezone) {
this.timezone = timezone;
}
@Override
public DateTime parseDateTime(String date) {
return new DateTime(parseMillis(date), timezone);
}
private static long parseMillis(String date) {
return (long) (Float.parseFloat(date) * 1000);
}
}

View File

@ -1,36 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
public class UnixMsDateParser implements DateParser {
private final DateTimeZone timezone;
public UnixMsDateParser(DateTimeZone timezone) {
this.timezone = timezone;
}
@Override
public DateTime parseDateTime(String date) {
return new DateTime(Long.parseLong(date), timezone);
}
}

View File

@ -0,0 +1,84 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.elasticsearch.test.ESTestCase;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Locale;
import java.util.Optional;
import java.util.function.Function;
import static org.hamcrest.core.IsEqual.equalTo;
public class DateFormatTests extends ESTestCase {
public void testParseJoda() {
Function<String, DateTime> jodaFunction = DateFormat.getJodaFunction("MMM dd HH:mm:ss Z", DateTimeZone.forOffsetHours(-8), Locale.ENGLISH);
assertThat(Instant.ofEpochMilli(jodaFunction.apply("Nov 24 01:29:01 -0800").getMillis())
.atZone(ZoneId.of("GMT-8"))
.format(DateTimeFormatter.ofPattern("MM dd HH:mm:ss", Locale.ENGLISH)),
equalTo("11 24 01:29:01"));
}
public void testParseUnixMs() {
assertThat(DateFormat.UnixMs.getFunction(DateTimeZone.UTC).apply("1000500").getMillis(), equalTo(1000500L));
}
public void testParseUnix() {
assertThat(DateFormat.Unix.getFunction(DateTimeZone.UTC).apply("1000.5").getMillis(), equalTo(1000500L));
}
public void testParseISO8601() {
assertThat(DateFormat.Iso8601.getFunction(DateTimeZone.UTC).apply("2001-01-01T00:00:00-0800").getMillis(), equalTo(978336000000L));
}
public void testParseISO8601Failure() {
Function<String, DateTime> function = DateFormat.Iso8601.getFunction(DateTimeZone.UTC);
try {
function.apply("2001-01-0:00-0800");
fail("parse should have failed");
} catch(IllegalArgumentException e) {
//all good
}
}
public void testTAI64NParse() {
String input = "4000000050d506482dbdf024";
String expected = "2012-12-22T03:00:46.767+02:00";
assertThat(DateFormat.Tai64n.getFunction(DateTimeZone.forOffsetHours(2)).apply((randomBoolean() ? "@" : "") + input).toString(), equalTo(expected));
}
public void testFromString() {
assertThat(DateFormat.fromString("UNIX_MS"), equalTo(Optional.of(DateFormat.UnixMs)));
assertThat(DateFormat.fromString("unix_ms"), equalTo(Optional.empty()));
assertThat(DateFormat.fromString("UNIX"), equalTo(Optional.of(DateFormat.Unix)));
assertThat(DateFormat.fromString("unix"), equalTo(Optional.empty()));
assertThat(DateFormat.fromString("ISO8601"), equalTo(Optional.of(DateFormat.Iso8601)));
assertThat(DateFormat.fromString("iso8601"), equalTo(Optional.empty()));
assertThat(DateFormat.fromString("TAI64N"), equalTo(Optional.of(DateFormat.Tai64n)));
assertThat(DateFormat.fromString("tai64n"), equalTo(Optional.empty()));
assertThat(DateFormat.fromString("prefix-" + randomAsciiOfLengthBetween(1, 10)), equalTo(Optional.empty()));
}
}

View File

@ -1,75 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.ingest.processor.date;
import org.elasticsearch.test.ESTestCase;
import org.joda.time.DateTimeZone;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Locale;
import static org.hamcrest.core.IsEqual.equalTo;
public class DateParserTests extends ESTestCase {
public void testJodaPatternParse() {
JodaPatternDateParser parser = new JodaPatternDateParser("MMM dd HH:mm:ss Z",
DateTimeZone.forOffsetHours(-8), Locale.ENGLISH);
assertThat(Instant.ofEpochMilli(parser.parseDateTime("Nov 24 01:29:01 -0800").getMillis())
.atZone(ZoneId.of("GMT-8"))
.format(DateTimeFormatter.ofPattern("MM dd HH:mm:ss", Locale.ENGLISH)),
equalTo("11 24 01:29:01"));
}
public void testParseUnixMs() {
UnixMsDateParser parser = new UnixMsDateParser(DateTimeZone.UTC);
assertThat(parser.parseDateTime("1000500").getMillis(), equalTo(1000500L));
}
public void testUnixParse() {
UnixDateParser parser = new UnixDateParser(DateTimeZone.UTC);
assertThat(parser.parseDateTime("1000.5").getMillis(), equalTo(1000500L));
}
public void testParseISO8601() {
ISO8601DateParser parser = new ISO8601DateParser(DateTimeZone.UTC);
assertThat(parser.parseDateTime("2001-01-01T00:00:00-0800").getMillis(), equalTo(978336000000L));
}
public void testParseISO8601Failure() {
ISO8601DateParser parser = new ISO8601DateParser(DateTimeZone.UTC);
try {
parser.parseDateTime("2001-01-0:00-0800");
fail("parse should have failed");
} catch(IllegalArgumentException e) {
//all good
}
}
public void testTAI64NParse() {
TAI64NDateParser parser = new TAI64NDateParser(DateTimeZone.forOffsetHours(2));
String input = "4000000050d506482dbdf024";
String expected = "2012-12-22T03:00:46.767+02:00";
assertThat(parser.parseDateTime((randomBoolean() ? "@" : "") + input).toString(), equalTo(expected));
}
}

View File

@ -100,7 +100,7 @@ public class DateProcessorTests extends ESTestCase {
public void testTAI64N() {
DateProcessor dateProcessor = new DateProcessor(DateTimeZone.forOffsetHours(2), randomLocale(random()),
"date_as_string", Collections.singletonList(DateParserFactory.TAI64N), "date_as_date");
"date_as_string", Collections.singletonList(DateFormat.Tai64n.toString()), "date_as_date");
Map<String, Object> document = new HashMap<>();
String dateAsString = (randomBoolean() ? "@" : "") + "4000000050d506482dbdf024";
document.put("date_as_string", dateAsString);
@ -111,7 +111,7 @@ public class DateProcessorTests extends ESTestCase {
public void testUnixMs() {
DateProcessor dateProcessor = new DateProcessor(DateTimeZone.UTC, randomLocale(random()),
"date_as_string", Collections.singletonList(DateParserFactory.UNIX_MS), "date_as_date");
"date_as_string", Collections.singletonList(DateFormat.UnixMs.toString()), "date_as_date");
Map<String, Object> document = new HashMap<>();
document.put("date_as_string", "1000500");
IngestDocument ingestDocument = new IngestDocument("index", "type", "id", document);
@ -121,7 +121,7 @@ public class DateProcessorTests extends ESTestCase {
public void testUnix() {
DateProcessor dateProcessor = new DateProcessor(DateTimeZone.UTC, randomLocale(random()),
"date_as_string", Collections.singletonList(DateParserFactory.UNIX), "date_as_date");
"date_as_string", Collections.singletonList(DateFormat.Unix.toString()), "date_as_date");
Map<String, Object> document = new HashMap<>();
document.put("date_as_string", "1000.5");
IngestDocument ingestDocument = new IngestDocument("index", "type", "id", document);