mirror of https://github.com/apache/lucene.git
SOLR-12591: ParseDateFieldUpdateProcessorFactory: Use "lenient" and strip surrounding quotes.
More tests, ported from "extract" contrib stuff.
This commit is contained in:
parent
0d89ff2e61
commit
ec01cc981c
|
@ -55,8 +55,8 @@ Other Changes
|
||||||
|
|
||||||
* SOLR-12614: Make "Nodes" view the default in AdminUI "Cloud" tab (janhoy)
|
* SOLR-12614: Make "Nodes" view the default in AdminUI "Cloud" tab (janhoy)
|
||||||
|
|
||||||
* SOLR-12586: Remove Joda Time dependency. Upgrade ParseDateFieldUpdateProcessorFactory (present in "schemaless mode")
|
* SOLR-12586, SOLR-12591: Upgrade ParseDateFieldUpdateProcessorFactory (present in "schemaless mode") to use Java 8's
|
||||||
to use Java 8's java.time.DateTimeFormatter instead (see upgrade notes).
|
java.time.DateTimeFormatter instead of Joda time (see upgrade notes). "Lenient" is enabled. Removed Joda Time dependency.
|
||||||
(David Smiley, Bar Rotstein)
|
(David Smiley, Bar Rotstein)
|
||||||
|
|
||||||
================== 7.5.0 ==================
|
================== 7.5.0 ==================
|
||||||
|
|
|
@ -26,6 +26,7 @@ import java.time.ZoneOffset;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
||||||
import java.time.format.DateTimeFormatterBuilder;
|
import java.time.format.DateTimeFormatterBuilder;
|
||||||
import java.time.format.DateTimeParseException;
|
import java.time.format.DateTimeParseException;
|
||||||
|
import java.time.format.ResolverStyle;
|
||||||
import java.time.temporal.TemporalAccessor;
|
import java.time.temporal.TemporalAccessor;
|
||||||
import java.time.temporal.TemporalQueries;
|
import java.time.temporal.TemporalQueries;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
@ -51,7 +52,7 @@ import org.slf4j.LoggerFactory;
|
||||||
* Attempts to mutate selected fields that have only CharSequence-typed values
|
* Attempts to mutate selected fields that have only CharSequence-typed values
|
||||||
* into Date values. Solr will continue to index date/times in the UTC time
|
* into Date values. Solr will continue to index date/times in the UTC time
|
||||||
* zone, but the input date/times may be expressed using other time zones,
|
* zone, but the input date/times may be expressed using other time zones,
|
||||||
* and will be converted to UTC when they are mutated.
|
* and will be converted to an unambiguous {@link Date} when they are mutated.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
* The default selection behavior is to mutate both those fields that don't match
|
* The default selection behavior is to mutate both those fields that don't match
|
||||||
|
@ -67,6 +68,8 @@ import org.slf4j.LoggerFactory;
|
||||||
* One or more date "format" specifiers must be specified. See
|
* One or more date "format" specifiers must be specified. See
|
||||||
* <a href="https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html"
|
* <a href="https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html"
|
||||||
* >Java 8's DateTimeFormatter javadocs</a> for a description of format strings.
|
* >Java 8's DateTimeFormatter javadocs</a> for a description of format strings.
|
||||||
|
* Note that "lenient" and case insensitivity is enabled.
|
||||||
|
* Furthermore, inputs surrounded in single quotes will be removed if found.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
* A default time zone name or offset may optionally be specified for those dates
|
* A default time zone name or offset may optionally be specified for those dates
|
||||||
|
@ -120,6 +123,16 @@ public class ParseDateFieldUpdateProcessorFactory extends FieldMutatingUpdatePro
|
||||||
protected Object mutateValue(Object srcVal) {
|
protected Object mutateValue(Object srcVal) {
|
||||||
if (srcVal instanceof CharSequence) {
|
if (srcVal instanceof CharSequence) {
|
||||||
String srcStringVal = srcVal.toString();
|
String srcStringVal = srcVal.toString();
|
||||||
|
// trim single quotes around date if present
|
||||||
|
// see issue #5279 (Apache HttpClient)
|
||||||
|
int stringValLen = srcStringVal.length();
|
||||||
|
if (stringValLen > 1
|
||||||
|
&& srcStringVal.startsWith("'")
|
||||||
|
&& srcStringVal.endsWith("'")
|
||||||
|
) {
|
||||||
|
srcStringVal = srcStringVal.substring(1, stringValLen - 1);
|
||||||
|
}
|
||||||
|
|
||||||
for (Map.Entry<String,DateTimeFormatter> format : formats.entrySet()) {
|
for (Map.Entry<String,DateTimeFormatter> format : formats.entrySet()) {
|
||||||
DateTimeFormatter parser = format.getValue();
|
DateTimeFormatter parser = format.getValue();
|
||||||
try {
|
try {
|
||||||
|
@ -159,8 +172,9 @@ public class ParseDateFieldUpdateProcessorFactory extends FieldMutatingUpdatePro
|
||||||
Collection<String> formatsParam = args.removeConfigArgs(FORMATS_PARAM);
|
Collection<String> formatsParam = args.removeConfigArgs(FORMATS_PARAM);
|
||||||
if (null != formatsParam) {
|
if (null != formatsParam) {
|
||||||
for (String value : formatsParam) {
|
for (String value : formatsParam) {
|
||||||
DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseCaseInsensitive()
|
DateTimeFormatter formatter = new DateTimeFormatterBuilder().parseLenient().parseCaseInsensitive()
|
||||||
.appendPattern(value).toFormatter(locale).withZone(defaultTimeZone);
|
.appendPattern(value).toFormatter(locale)
|
||||||
|
.withResolverStyle(ResolverStyle.LENIENT).withZone(defaultTimeZone);
|
||||||
validateFormatter(formatter);
|
validateFormatter(formatter);
|
||||||
formats.put(value, formatter);
|
formats.put(value, formatter);
|
||||||
}
|
}
|
||||||
|
|
|
@ -109,6 +109,21 @@
|
||||||
</processor>
|
</processor>
|
||||||
</updateRequestProcessorChain>
|
</updateRequestProcessorChain>
|
||||||
|
|
||||||
|
<updateRequestProcessorChain name="parse-date-patterns-from-extract-contrib">
|
||||||
|
<processor class="solr.ParseDateFieldUpdateProcessorFactory">
|
||||||
|
<str name="defaultTimeZone">UTC</str>
|
||||||
|
<str name="locale">en</str>
|
||||||
|
<arr name="format">
|
||||||
|
<str>yyyy-MM-dd['T'[HH:mm:ss['.'SSS][z</str>
|
||||||
|
<str>yyyy-MM-dd HH:mm:ss</str>
|
||||||
|
<str>EEE MMM d HH:mm:ss [z ]yyyy</str>
|
||||||
|
<str>EEEE, dd-MMM-yy HH:mm:ss zzz</str>
|
||||||
|
<str>EEE, dd MMM yyyy HH:mm:ss zzz</str>
|
||||||
|
</arr>
|
||||||
|
</processor>
|
||||||
|
<processor class="solr.RunUpdateProcessorFactory" />
|
||||||
|
</updateRequestProcessorChain>
|
||||||
|
|
||||||
<updateRequestProcessorChain name="parse-int">
|
<updateRequestProcessorChain name="parse-int">
|
||||||
<processor class="solr.ParseIntFieldUpdateProcessorFactory"/>
|
<processor class="solr.ParseIntFieldUpdateProcessorFactory"/>
|
||||||
<processor class="solr.RunUpdateProcessorFactory"/>
|
<processor class="solr.RunUpdateProcessorFactory"/>
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.solr.update.processor;
|
package org.apache.solr.update.processor;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
|
@ -36,7 +37,6 @@ import java.util.Set;
|
||||||
import org.apache.solr.common.SolrInputDocument;
|
import org.apache.solr.common.SolrInputDocument;
|
||||||
import org.apache.solr.schema.IndexSchema;
|
import org.apache.solr.schema.IndexSchema;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for the field mutating update processors
|
* Tests for the field mutating update processors
|
||||||
* that parse Dates, Longs, Doubles, and Booleans.
|
* that parse Dates, Longs, Doubles, and Booleans.
|
||||||
|
@ -896,13 +896,124 @@ public class ParsingFieldUpdateProcessorsTest extends UpdateProcessorTestBase {
|
||||||
assertTrue(mixedDates.isEmpty());
|
assertTrue(mixedDates.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Date parse(DateTimeFormatter dateTimeFormatter, String dateString) {
|
// tests that mimic the tests that were in TestExtractionDateUtil
|
||||||
|
public void testISO8601() throws IOException {
|
||||||
|
// dates with atypical years
|
||||||
|
// This test tries to mimic TestExtractionDateUtil#testISO8601
|
||||||
|
|
||||||
|
String[] dateStrings = {
|
||||||
|
"0001-01-01T01:01:01Z", "+12021-12-01T03:03:03Z",
|
||||||
|
"0000-04-04T04:04:04Z", "-0005-05-05T05:05:05Z",
|
||||||
|
"-2021-12-01T04:04:04Z", "-12021-12-01T02:02:02Z"
|
||||||
|
};
|
||||||
|
|
||||||
|
int id = 1;
|
||||||
|
|
||||||
|
// ensure strings are parsed
|
||||||
|
for(String notInFormatDateString: dateStrings) {
|
||||||
|
IndexSchema schema = h.getCore().getLatestSchema();
|
||||||
|
assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field
|
||||||
|
SolrInputDocument d = processAdd("parse-date-patterns-from-extract-contrib", doc(f("id", id), f("date_dt", notInFormatDateString)));
|
||||||
|
assertNotNull(d);
|
||||||
|
assertTrue("Date string: " + notInFormatDateString + " was not parsed as a date", d.getFieldValue("date_dt") instanceof Date);
|
||||||
|
assertEquals(notInFormatDateString, ((Date) d.getField("date_dt").getFirstValue()).toInstant().toString());
|
||||||
|
assertU(commit());
|
||||||
|
assertQ(req("id:" + id), "//date[@name='date_dt'][.='" + notInFormatDateString + "']");
|
||||||
|
++id;
|
||||||
|
}
|
||||||
|
|
||||||
|
// odd values are date strings, even values are expected strings
|
||||||
|
String[] lenientDateStrings = {
|
||||||
|
"10995-12-31T23:59:59.990Z", "+10995-12-31T23:59:59.990Z",
|
||||||
|
"995-1-2T3:4:5Z", "0995-01-02T03:04:05Z",
|
||||||
|
"2021-01-01t03:04:05", "2021-01-01T03:04:05Z",
|
||||||
|
"2021-12-01 04:04:04", "2021-12-01T04:04:04Z"
|
||||||
|
};
|
||||||
|
|
||||||
|
// ensure sure strings that should be parsed using lenient resolver are properly parsed
|
||||||
|
for(int i = 0; i < lenientDateStrings.length; ++i) {
|
||||||
|
String lenientDateString = lenientDateStrings[i];
|
||||||
|
String expectedString = lenientDateStrings[++i];
|
||||||
|
IndexSchema schema = h.getCore().getLatestSchema();
|
||||||
|
assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field
|
||||||
|
SolrInputDocument d = processAdd("parse-date-patterns-from-extract-contrib", doc(f("id", id), f("date_dt", lenientDateString)));
|
||||||
|
assertNotNull(d);
|
||||||
|
assertTrue("Date string: " + lenientDateString + " was not parsed as a date",
|
||||||
|
d.getFieldValue("date_dt") instanceof Date);
|
||||||
|
assertEquals(expectedString, ((Date) d.getField("date_dt").getFirstValue()).toInstant().toString());
|
||||||
|
++id;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// this test has had problems when the JDK timezone is Americas/Metlakatla
|
||||||
|
public void testAKSTZone() throws IOException {
|
||||||
|
final String inputString = "Thu Nov 13 04:35:51 AKST 2008";
|
||||||
|
|
||||||
|
final long expectTs = 1226583351000L;
|
||||||
|
assertEquals(expectTs,
|
||||||
|
DateTimeFormatter.ofPattern("EEE MMM d HH:mm:ss z yyyy", Locale.ENGLISH)
|
||||||
|
.withZone(ZoneId.of("UTC")).parse(inputString, Instant::from).toEpochMilli());
|
||||||
|
|
||||||
|
assertParsedDate(inputString, Date.from(Instant.ofEpochMilli(expectTs)), "parse-date-patterns-from-extract-contrib");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testNoTime() throws IOException {
|
||||||
|
Instant instant = instant(2005, 10, 7, 0, 0, 0);
|
||||||
|
String inputString = "2005-10-07";
|
||||||
|
assertParsedDate(inputString, Date.from(instant), "parse-date-patterns-from-extract-contrib");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRfc1123() throws IOException {
|
||||||
|
assertParsedDate("Fri, 07 Oct 2005 13:14:15 GMT", Date.from(inst20051007131415()), "parse-date-patterns-from-extract-contrib");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRfc1036() throws IOException {
|
||||||
|
assertParsedDate("Friday, 07-Oct-05 13:14:15 GMT", Date.from(inst20051007131415()), "parse-date-patterns-from-extract-contrib");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testAnsiC() throws IOException {
|
||||||
|
assertParsedDate(
|
||||||
|
"Fri Oct 7 13:14:15 2005", Date.from(inst20051007131415()), "parse-date-patterns-from-extract-contrib");
|
||||||
|
|
||||||
|
assertParsedDate("Fri Oct 7 05:14:15 AKDT 2005", Date.from(inst20051007131415()), "parse-date-patterns-from-extract-contrib"); // with timezone (not ANSI C) in DST
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testLenient() throws IOException {
|
||||||
|
/// the Ansi C format, but input here has longer day of week
|
||||||
|
assertParsedDate("Friday Oct 7 13:14:15 2005", Date.from(inst20051007131415()), "parse-date-patterns-from-extract-contrib");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testParseQuotedDate() throws IOException {
|
||||||
|
// also using 2 digit day
|
||||||
|
assertParsedDate("'Fri, 14 Oct 2005 13:14:15 GMT'",
|
||||||
|
Date.from(instant(2005, 10, 14, 13, 14, 15)), "parse-date-patterns-from-extract-contrib");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Instant instant(final int year, final int month, final int day, int hour, int minute, int second) {
|
||||||
|
return LocalDate.of(year, month, day).atTime(hour, minute, second).toInstant(ZoneOffset.UTC);
|
||||||
|
}
|
||||||
|
|
||||||
|
private Instant inst20051007131415() {
|
||||||
|
return instant(2005, 10, 7, 13, 14, 15);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertParsedDate(String inputDateString, Date expectedDate, String chain) throws IOException {
|
||||||
|
IndexSchema schema = h.getCore().getLatestSchema();
|
||||||
|
assertNotNull(schema.getFieldOrNull("date_dt")); // should match "*_dt" dynamic field
|
||||||
|
SolrInputDocument d = processAdd(chain, doc(f("id", "1"), f("date_dt", inputDateString)));
|
||||||
|
assertNotNull(d);
|
||||||
|
assertTrue("Date string: " + inputDateString + " was not parsed as a date",
|
||||||
|
d.getFieldValue("date_dt") instanceof Date);
|
||||||
|
assertEquals(expectedDate, d.getField("date_dt").getFirstValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Date parse(DateTimeFormatter dateTimeFormatter, String dateString) {
|
||||||
final TemporalAccessor temporalAccessor = dateTimeFormatter.parseBest(dateString, OffsetDateTime::from,
|
final TemporalAccessor temporalAccessor = dateTimeFormatter.parseBest(dateString, OffsetDateTime::from,
|
||||||
ZonedDateTime::from, LocalDateTime::from, LocalDate::from, Instant::from);
|
ZonedDateTime::from, LocalDateTime::from, LocalDate::from, Instant::from);
|
||||||
return temporalToDate(temporalAccessor, dateTimeFormatter.getZone());
|
return temporalToDate(temporalAccessor, dateTimeFormatter.getZone());
|
||||||
}
|
}
|
||||||
|
|
||||||
private Date temporalToDate(TemporalAccessor in, ZoneId timeZoneId) {
|
private static Date temporalToDate(TemporalAccessor in, ZoneId timeZoneId) {
|
||||||
if (in instanceof OffsetDateTime) {
|
if (in instanceof OffsetDateTime) {
|
||||||
return Date.from(((OffsetDateTime) in).toInstant());
|
return Date.from(((OffsetDateTime) in).toInstant());
|
||||||
} else if (in instanceof ZonedDateTime) {
|
} else if (in instanceof ZonedDateTime) {
|
||||||
|
|
|
@ -317,7 +317,7 @@ These factories all provide functionality to _modify_ fields in a document as th
|
||||||
|
|
||||||
{solr-javadocs}/solr-core/org/apache/solr/update/processor/ParseBooleanFieldUpdateProcessorFactory.html[ParseBooleanFieldUpdateProcessorFactory]:: Attempts to mutate selected fields that have only CharSequence-typed values into Boolean values.
|
{solr-javadocs}/solr-core/org/apache/solr/update/processor/ParseBooleanFieldUpdateProcessorFactory.html[ParseBooleanFieldUpdateProcessorFactory]:: Attempts to mutate selected fields that have only CharSequence-typed values into Boolean values.
|
||||||
|
|
||||||
{solr-javadocs}/solr-core/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.html[ParseDateFieldUpdateProcessorFactory]:: Attempts to mutate selected fields that have only CharSequence-typed values into Solr date values.
|
{solr-javadocs}/solr-core/org/apache/solr/update/processor/ParseDateFieldUpdateProcessorFactory.html[ParseDateFieldUpdateProcessorFactory]:: Attempts to mutate selected fields that have only CharSequence-typed values into Date values.
|
||||||
|
|
||||||
{solr-javadocs}/solr-core/org/apache/solr/update/processor/ParseNumericFieldUpdateProcessorFactory.html[ParseNumericFieldUpdateProcessorFactory] derived classes::
|
{solr-javadocs}/solr-core/org/apache/solr/update/processor/ParseNumericFieldUpdateProcessorFactory.html[ParseNumericFieldUpdateProcessorFactory] derived classes::
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue