mirror of https://github.com/apache/lucene.git
SOLR-8903: Move SolrJ DateUtil to contrib/extraction as ExtractionDateUtil.
And removed obsolete methods.
This commit is contained in:
parent
39932f5758
commit
5e5fd66257
|
@ -180,6 +180,9 @@ Upgrading from Solr 5.x
|
|||
When there is a non-zero number of milliseconds, it is padded with zeros to 3 digits. Negative year (BC) dates are
|
||||
now possible. Parsing: It is now an error to supply a portion of the date out of its, range, like 67 seconds.
|
||||
|
||||
* SolrJ no longer includes DateUtil. If for some reason you need to format or parse dates, simply use Instant.format()
|
||||
and Instant.parse().
|
||||
|
||||
Detailed Change List
|
||||
----------------------
|
||||
|
||||
|
@ -522,6 +525,9 @@ Other Changes
|
|||
now parse (and format) dates with a leading '+' or '-' (BC dates or dates > 4 digit year.
|
||||
[value] and ms() and contrib/analytics now parse with date math. (David Smiley)
|
||||
|
||||
* SOLR-8904: DateUtil in SolrJ moved to the extraction contrib as ExtractionDateUtil. Obsolete methods were removed.
|
||||
(David Smiley)
|
||||
|
||||
================== 5.5.1 ==================
|
||||
|
||||
Bug Fixes
|
||||
|
|
|
@ -17,23 +17,6 @@
|
|||
package org.apache.solr.handler.extraction;
|
||||
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.DateUtil;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.security.AuthorizationContext;
|
||||
import org.apache.solr.security.PermissionNameProvider;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.apache.solr.handler.ContentStreamHandlerBase;
|
||||
import org.apache.solr.handler.loader.ContentStreamLoader;
|
||||
import org.apache.tika.config.TikaConfig;
|
||||
import org.apache.tika.mime.MimeTypeException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
|
@ -42,6 +25,22 @@ import java.util.HashSet;
|
|||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.ContentStreamHandlerBase;
|
||||
import org.apache.solr.handler.loader.ContentStreamLoader;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.security.AuthorizationContext;
|
||||
import org.apache.solr.security.PermissionNameProvider;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.util.plugin.SolrCoreAware;
|
||||
import org.apache.tika.config.TikaConfig;
|
||||
import org.apache.tika.mime.MimeTypeException;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
||||
/**
|
||||
* Handler for rich documents like PDF or Word or any other file format that Tika handles that need the text to be extracted
|
||||
|
@ -59,7 +58,7 @@ public class ExtractingRequestHandler extends ContentStreamHandlerBase implement
|
|||
protected ParseContextConfig parseContextConfig;
|
||||
|
||||
|
||||
protected Collection<String> dateFormats = DateUtil.DEFAULT_DATE_FORMATS;
|
||||
protected Collection<String> dateFormats = ExtractionDateUtil.DEFAULT_DATE_FORMATS;
|
||||
protected SolrContentHandlerFactory factory;
|
||||
|
||||
|
||||
|
|
|
@ -14,11 +14,13 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.common.util;
|
||||
import java.io.IOException;
|
||||
import java.text.DateFormat;
|
||||
package org.apache.solr.handler.extraction;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.time.Instant;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.time.format.DateTimeFormatterBuilder;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Calendar;
|
||||
|
@ -32,7 +34,7 @@ import java.util.TimeZone;
|
|||
/**
|
||||
* This class has some code from HttpClient DateUtil.
|
||||
*/
|
||||
public class DateUtil {
|
||||
public class ExtractionDateUtil {
|
||||
//start HttpClient
|
||||
/**
|
||||
* Date format pattern used to parse HTTP date headers in RFC 1123 format.
|
||||
|
@ -67,6 +69,12 @@ public class DateUtil {
|
|||
|
||||
//---------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Differs by {@link DateTimeFormatter#ISO_INSTANT} in that it's lenient.
|
||||
*/
|
||||
public static final DateTimeFormatter ISO_8601_PARSER = new DateTimeFormatterBuilder()
|
||||
.parseCaseInsensitive().parseLenient().appendInstant().toFormatter(Locale.ROOT);
|
||||
|
||||
/**
|
||||
* A suite of default date formats that can be parsed, and thus transformed to the Solr specific format
|
||||
*/
|
||||
|
@ -95,9 +103,12 @@ public class DateUtil {
|
|||
}
|
||||
|
||||
public static Date parseDate(String d, Collection<String> fmts) throws ParseException {
|
||||
// 2007-04-26T08:05:04Z
|
||||
if (d.endsWith("Z") && d.length() > 20) {
|
||||
return getThreadLocalDateFormat().parse(d);
|
||||
if (d.length() > 0 && d.charAt(d.length() - 1) == 'Z') {
|
||||
try {
|
||||
return new Date(ISO_8601_PARSER.parse(d, Instant::from).toEpochMilli());
|
||||
} catch (Exception e) {
|
||||
//ignore; perhaps we can parse with one of the formats below...
|
||||
}
|
||||
}
|
||||
return parseDate(d, fmts, null);
|
||||
}
|
||||
|
@ -140,6 +151,7 @@ public class DateUtil {
|
|||
dateValue = dateValue.substring(1, dateValue.length() - 1);
|
||||
}
|
||||
|
||||
//TODO upgrade to Java 8 DateTimeFormatter. But how to deal with the GMT as a default?
|
||||
SimpleDateFormat dateParser = null;
|
||||
Iterator formatIter = dateFormats.iterator();
|
||||
|
||||
|
@ -163,97 +175,4 @@ public class DateUtil {
|
|||
throw new ParseException("Unable to parse the date " + dateValue, 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a formatter that can be use by the current thread if needed to
|
||||
* convert Date objects to the Internal representation.
|
||||
*
|
||||
* @return The {@link java.text.DateFormat} for the current thread
|
||||
*/
|
||||
public static DateFormat getThreadLocalDateFormat() {
|
||||
return fmtThreadLocal.get();
|
||||
}
|
||||
|
||||
public static TimeZone UTC = TimeZone.getTimeZone("UTC");
|
||||
private static ThreadLocalDateFormat fmtThreadLocal = new ThreadLocalDateFormat();
|
||||
|
||||
private static class ThreadLocalDateFormat extends ThreadLocal<DateFormat> {
|
||||
DateFormat proto;
|
||||
|
||||
public ThreadLocalDateFormat() {
|
||||
super();
|
||||
//2007-04-26T08:05:04Z
|
||||
SimpleDateFormat tmp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
|
||||
tmp.setTimeZone(UTC);
|
||||
proto = tmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected DateFormat initialValue() {
|
||||
return (DateFormat) proto.clone();
|
||||
}
|
||||
}
|
||||
|
||||
/** Formats the date and returns the calendar instance that was used (which may be reused) */
|
||||
public static Calendar formatDate(Date date, Calendar cal, Appendable out) throws IOException {
|
||||
// using a stringBuilder for numbers can be nice since
|
||||
// a temporary string isn't used (it's added directly to the
|
||||
// builder's buffer.
|
||||
|
||||
StringBuilder sb = out instanceof StringBuilder ? (StringBuilder)out : new StringBuilder();
|
||||
if (cal==null) cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
|
||||
cal.setTime(date);
|
||||
|
||||
int i = cal.get(Calendar.YEAR);
|
||||
sb.append(i);
|
||||
sb.append('-');
|
||||
i = cal.get(Calendar.MONTH) + 1; // 0 based, so add 1
|
||||
if (i<10) sb.append('0');
|
||||
sb.append(i);
|
||||
sb.append('-');
|
||||
i=cal.get(Calendar.DAY_OF_MONTH);
|
||||
if (i<10) sb.append('0');
|
||||
sb.append(i);
|
||||
sb.append('T');
|
||||
i=cal.get(Calendar.HOUR_OF_DAY); // 24 hour time format
|
||||
if (i<10) sb.append('0');
|
||||
sb.append(i);
|
||||
sb.append(':');
|
||||
i=cal.get(Calendar.MINUTE);
|
||||
if (i<10) sb.append('0');
|
||||
sb.append(i);
|
||||
sb.append(':');
|
||||
i=cal.get(Calendar.SECOND);
|
||||
if (i<10) sb.append('0');
|
||||
sb.append(i);
|
||||
i=cal.get(Calendar.MILLISECOND);
|
||||
if (i != 0) {
|
||||
sb.append('.');
|
||||
if (i<100) sb.append('0');
|
||||
if (i<10) sb.append('0');
|
||||
sb.append(i);
|
||||
|
||||
// handle canonical format specifying fractional
|
||||
// seconds shall not end in '0'. Given the slowness of
|
||||
// integer div/mod, simply checking the last character
|
||||
// is probably the fastest way to check.
|
||||
int lastIdx = sb.length()-1;
|
||||
if (sb.charAt(lastIdx)=='0') {
|
||||
lastIdx--;
|
||||
if (sb.charAt(lastIdx)=='0') {
|
||||
lastIdx--;
|
||||
}
|
||||
sb.setLength(lastIdx+1);
|
||||
}
|
||||
|
||||
}
|
||||
sb.append('Z');
|
||||
|
||||
if (out != sb)
|
||||
out.append(sb);
|
||||
|
||||
return cal;
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.solr.handler.extraction;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.text.DateFormat;
|
||||
import java.util.ArrayDeque;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
@ -31,7 +30,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.DateUtil;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieDateField;
|
||||
|
@ -83,7 +81,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
|
|||
private Set<String> literalFieldNames = null;
|
||||
|
||||
public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
|
||||
this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
|
||||
this(metadata, params, schema, ExtractionDateUtil.DEFAULT_DATE_FORMATS);
|
||||
}
|
||||
|
||||
|
||||
|
@ -317,7 +315,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
|
|||
/**
|
||||
* Can be used to transform input values based on their {@link org.apache.solr.schema.SchemaField}
|
||||
* <p>
|
||||
* This implementation only formats dates using the {@link org.apache.solr.common.util.DateUtil}.
|
||||
* This implementation only formats dates using the {@link ExtractionDateUtil}.
|
||||
*
|
||||
* @param val The value to transform
|
||||
* @param schFld The {@link org.apache.solr.schema.SchemaField}
|
||||
|
@ -328,10 +326,8 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
|
|||
if (schFld != null && schFld.getType() instanceof TrieDateField) {
|
||||
//try to transform the date
|
||||
try {
|
||||
Date date = DateUtil.parseDate(val, dateFormats);
|
||||
DateFormat df = DateUtil.getThreadLocalDateFormat();
|
||||
result = df.format(date);
|
||||
|
||||
Date date = ExtractionDateUtil.parseDate(val, dateFormats); // may throw
|
||||
result = date.toInstant().toString();//ISO format
|
||||
} catch (Exception e) {
|
||||
// Let the specific fieldType handle errors
|
||||
// throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid value: " + val + " for field: " + schFld, e);
|
||||
|
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.handler.extraction;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.Date;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestExtractionDateUtil extends LuceneTestCase {
|
||||
|
||||
public void testISO8601() throws Exception {
|
||||
// dates with atypical years
|
||||
assertParseFormatEquals("0001-01-01T01:01:01Z", null);
|
||||
assertParseFormatEquals("+12021-12-01T03:03:03Z", null);
|
||||
|
||||
assertParseFormatEquals("0000-04-04T04:04:04Z", null); // note: 0 AD is also known as 1 BC
|
||||
|
||||
// dates with negative years (BC)
|
||||
assertParseFormatEquals("-0005-05-05T05:05:05Z", null);
|
||||
assertParseFormatEquals("-2021-12-01T04:04:04Z", null);
|
||||
assertParseFormatEquals("-12021-12-01T02:02:02Z", null);
|
||||
|
||||
// dates that only parse thanks to lenient mode of DateTimeFormatter
|
||||
assertParseFormatEquals("10995-12-31T23:59:59.990Z", "+10995-12-31T23:59:59.990Z"); // missing '+' 5 digit year
|
||||
assertParseFormatEquals("995-1-2T3:4:5Z", "0995-01-02T03:04:05Z"); // wasn't 0 padded
|
||||
}
|
||||
|
||||
private static void assertParseFormatEquals(String inputStr, String expectedStr) throws ParseException {
|
||||
if (expectedStr == null) {
|
||||
expectedStr = inputStr;
|
||||
}
|
||||
Date inputDate = ExtractionDateUtil.parseDate(inputStr);
|
||||
String resultStr = inputDate.toInstant().toString();
|
||||
assertEquals("d:" + inputDate.getTime(), expectedStr, resultStr);
|
||||
}
|
||||
|
||||
public void testParseDate() throws ParseException {
|
||||
assertParsedDate(1226583351000L, "Thu Nov 13 04:35:51 AKST 2008");
|
||||
}
|
||||
|
||||
private static void assertParsedDate(long ts, String dateStr) throws ParseException {
|
||||
long parsed = ExtractionDateUtil.parseDate(dateStr).getTime();
|
||||
assertTrue(String.format(Locale.ENGLISH, "Incorrect parsed timestamp: %d != %d (%s)", ts, parsed, dateStr), Math.abs(ts - parsed) <= 1000L);
|
||||
}
|
||||
}
|
|
@ -29,13 +29,19 @@ import java.util.Map;
|
|||
import java.util.Map.Entry;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ArrayListMultimap;
|
||||
import com.google.common.collect.ListMultimap;
|
||||
import com.google.common.io.Closeables;
|
||||
import com.typesafe.config.Config;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.params.MultiMapSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.DateUtil;
|
||||
import org.apache.solr.common.util.SuppressForbidden;
|
||||
import org.apache.solr.handler.extraction.ExtractingParams;
|
||||
import org.apache.solr.handler.extraction.ExtractionDateUtil;
|
||||
import org.apache.solr.handler.extraction.SolrContentHandler;
|
||||
import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
|
||||
import org.apache.solr.morphlines.solr.SolrLocator;
|
||||
|
@ -50,7 +56,6 @@ import org.apache.tika.sax.XHTMLContentHandler;
|
|||
import org.apache.tika.sax.xpath.Matcher;
|
||||
import org.apache.tika.sax.xpath.MatchingContentHandler;
|
||||
import org.apache.tika.sax.xpath.XPathParser;
|
||||
|
||||
import org.kitesdk.morphline.api.Command;
|
||||
import org.kitesdk.morphline.api.CommandBuilder;
|
||||
import org.kitesdk.morphline.api.MorphlineCompilationException;
|
||||
|
@ -63,13 +68,6 @@ import org.kitesdk.morphline.stdio.AbstractParser;
|
|||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Preconditions;
|
||||
import com.google.common.collect.ArrayListMultimap;
|
||||
import com.google.common.collect.ListMultimap;
|
||||
import com.google.common.io.Closeables;
|
||||
import com.typesafe.config.Config;
|
||||
|
||||
/**
|
||||
* Command that pipes the first attachment of a record into one of the given Tika parsers, then maps
|
||||
* the Tika output back to a record using SolrCell.
|
||||
|
@ -151,7 +149,7 @@ public final class SolrCellBuilder implements CommandBuilder {
|
|||
cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr);
|
||||
}
|
||||
|
||||
this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(DateUtil.DEFAULT_DATE_FORMATS));
|
||||
this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(ExtractionDateUtil.DEFAULT_DATE_FORMATS));
|
||||
|
||||
String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName());
|
||||
Class<? extends SolrContentHandlerFactory> factoryClass;
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.commons.io.FileUtils;
|
|||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.util.DateUtil;
|
||||
import org.apache.solr.handler.extraction.ExtractionDateUtil;
|
||||
import org.apache.solr.handler.extraction.SolrContentHandler;
|
||||
import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
|
@ -270,7 +270,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
|
|||
// which will cause the ContentHandler to be invoked.
|
||||
metadata.set(fieldName, getFoobarWithNonChars());
|
||||
StripNonCharSolrContentHandlerFactory contentHandlerFactory =
|
||||
new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS);
|
||||
new StripNonCharSolrContentHandlerFactory(ExtractionDateUtil.DEFAULT_DATE_FORMATS);
|
||||
IndexSchema schema = h.getCore().getLatestSchema();
|
||||
SolrContentHandler contentHandler =
|
||||
contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema);
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.common.util;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestDateUtil extends LuceneTestCase {
|
||||
|
||||
public void testParseDate() throws ParseException {
|
||||
assertParsedDate(1226583351000L, "Thu Nov 13 04:35:51 AKST 2008");
|
||||
}
|
||||
|
||||
private static void assertParsedDate(long ts, String dateStr) throws ParseException {
|
||||
long parsed = DateUtil.parseDate(dateStr).getTime();
|
||||
assertTrue(String.format(Locale.ENGLISH, "Incorrect parsed timestamp: %d != %d (%s)", ts, parsed, dateStr), Math.abs(ts - parsed) <= 1000L);
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue