SOLR-8903: Move SolrJ DateUtil to contrib/extraction as ExtractionDateUtil.

And removed obsolete methods.
2016-03-30 15:00:29 -04:00 · 2016-03-30 15:00:29 -04:00 · 5e5fd66257
parent 39932f5758
commit 5e5fd66257
8 changed files with 117 additions and 173 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -180,6 +180,9 @@ Upgrading from Solr 5.x
  When there is a non-zero number of milliseconds, it is padded with zeros to 3 digits. Negative year (BC) dates are
  now possible.  Parsing: It is now an error to supply a portion of the date out of its, range, like 67 seconds.

+* SolrJ no longer includes DateUtil. If for some reason you need to format or parse dates, simply use Instant.format()
+  and Instant.parse().
+
 Detailed Change List
 ----------------------

@ -522,6 +525,9 @@ Other Changes
  now parse (and format) dates with a leading '+' or '-' (BC dates or dates > 4 digit year.
  [value] and ms() and contrib/analytics now parse with date math. (David Smiley)

+* SOLR-8904: DateUtil in SolrJ moved to the extraction contrib as ExtractionDateUtil.  Obsolete methods were removed.
+  (David Smiley)
+
 ==================  5.5.1 ==================

 Bug Fixes
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractingRequestHandler.java
@ -17,23 +17,6 @@
 package org.apache.solr.handler.extraction;


-import org.apache.solr.common.SolrException;
-import org.apache.solr.common.SolrException.ErrorCode;
-import org.apache.solr.common.util.DateUtil;
-import org.apache.solr.common.util.NamedList;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.security.AuthorizationContext;
-import org.apache.solr.security.PermissionNameProvider;
-import org.apache.solr.update.processor.UpdateRequestProcessor;
-import org.apache.solr.util.plugin.SolrCoreAware;
-import org.apache.solr.handler.ContentStreamHandlerBase;
-import org.apache.solr.handler.loader.ContentStreamLoader;
-import org.apache.tika.config.TikaConfig;
-import org.apache.tika.mime.MimeTypeException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
 import java.io.File;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
@ -42,6 +25,22 @@ import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;

+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.handler.ContentStreamHandlerBase;
+import org.apache.solr.handler.loader.ContentStreamLoader;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.security.AuthorizationContext;
+import org.apache.solr.security.PermissionNameProvider;
+import org.apache.solr.update.processor.UpdateRequestProcessor;
+import org.apache.solr.util.plugin.SolrCoreAware;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.mime.MimeTypeException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+

 /**
 * Handler for rich documents like PDF or Word or any other file format that Tika handles that need the text to be extracted
@ -59,7 +58,7 @@ public class ExtractingRequestHandler extends ContentStreamHandlerBase implement
  protected ParseContextConfig parseContextConfig;


-  protected Collection<String> dateFormats = DateUtil.DEFAULT_DATE_FORMATS;
+  protected Collection<String> dateFormats = ExtractionDateUtil.DEFAULT_DATE_FORMATS;
  protected SolrContentHandlerFactory factory;


--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/ExtractionDateUtil.java
@ -14,11 +14,13 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-package org.apache.solr.common.util;
-import java.io.IOException;
-import java.text.DateFormat;
+package org.apache.solr.handler.extraction;
+
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+import java.time.Instant;
+import java.time.format.DateTimeFormatter;
+import java.time.format.DateTimeFormatterBuilder;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Calendar;
@ -32,7 +34,7 @@ import java.util.TimeZone;
 /**
 * This class has some code from HttpClient DateUtil.
 */
-public class DateUtil {
+public class ExtractionDateUtil {
  //start HttpClient
  /**
   * Date format pattern used to parse HTTP date headers in RFC 1123 format.
@ -67,6 +69,12 @@ public class DateUtil {

  //---------------------------------------------------------------------------------------

+  /**
+   * Differs by {@link DateTimeFormatter#ISO_INSTANT} in that it's lenient.
+   */
+  public static final DateTimeFormatter ISO_8601_PARSER = new DateTimeFormatterBuilder()
+      .parseCaseInsensitive().parseLenient().appendInstant().toFormatter(Locale.ROOT);
+
  /**
   * A suite of default date formats that can be parsed, and thus transformed to the Solr specific format
   */
@ -95,9 +103,12 @@ public class DateUtil {
  }

  public static Date parseDate(String d, Collection<String> fmts) throws ParseException {
-    // 2007-04-26T08:05:04Z
-    if (d.endsWith("Z") && d.length() > 20) {
-      return getThreadLocalDateFormat().parse(d);
+    if (d.length() > 0 && d.charAt(d.length() - 1) == 'Z') {
+      try {
+        return new Date(ISO_8601_PARSER.parse(d, Instant::from).toEpochMilli());
+      } catch (Exception e) {
+        //ignore; perhaps we can parse with one of the formats below...
+      }
    }
    return parseDate(d, fmts, null);
  }
@ -140,6 +151,7 @@ public class DateUtil {
      dateValue = dateValue.substring(1, dateValue.length() - 1);
    }

+    //TODO upgrade to Java 8 DateTimeFormatter. But how to deal with the GMT as a default?
    SimpleDateFormat dateParser = null;
    Iterator formatIter = dateFormats.iterator();

@ -163,97 +175,4 @@ public class DateUtil {
    throw new ParseException("Unable to parse the date " + dateValue, 0);
  }

-
-  /**
-   * Returns a formatter that can be use by the current thread if needed to
-   * convert Date objects to the Internal representation.
-   *
-   * @return The {@link java.text.DateFormat} for the current thread
-   */
-  public static DateFormat getThreadLocalDateFormat() {
-    return fmtThreadLocal.get();
-  }
-
-  public static TimeZone UTC = TimeZone.getTimeZone("UTC");
-  private static ThreadLocalDateFormat fmtThreadLocal = new ThreadLocalDateFormat();
-
-  private static class ThreadLocalDateFormat extends ThreadLocal<DateFormat> {
-    DateFormat proto;
-
-    public ThreadLocalDateFormat() {
-      super();
-      //2007-04-26T08:05:04Z
-      SimpleDateFormat tmp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'", Locale.ROOT);
-      tmp.setTimeZone(UTC);
-      proto = tmp;
-    }
-
-    @Override
-    protected DateFormat initialValue() {
-      return (DateFormat) proto.clone();
-    }
-  }
-
-  /** Formats the date and returns the calendar instance that was used (which may be reused) */
-  public static Calendar formatDate(Date date, Calendar cal, Appendable out) throws IOException {
-    // using a stringBuilder for numbers can be nice since
-    // a temporary string isn't used (it's added directly to the
-    // builder's buffer.
-
-    StringBuilder sb = out instanceof StringBuilder ? (StringBuilder)out : new StringBuilder();
-    if (cal==null) cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"), Locale.ROOT);
-    cal.setTime(date);
-
-    int i = cal.get(Calendar.YEAR);
-    sb.append(i);
-    sb.append('-');
-    i = cal.get(Calendar.MONTH) + 1;  // 0 based, so add 1
-    if (i<10) sb.append('0');
-    sb.append(i);
-    sb.append('-');
-    i=cal.get(Calendar.DAY_OF_MONTH);
-    if (i<10) sb.append('0');
-    sb.append(i);
-    sb.append('T');
-    i=cal.get(Calendar.HOUR_OF_DAY); // 24 hour time format
-    if (i<10) sb.append('0');
-    sb.append(i);
-    sb.append(':');
-    i=cal.get(Calendar.MINUTE);
-    if (i<10) sb.append('0');
-    sb.append(i);
-    sb.append(':');
-    i=cal.get(Calendar.SECOND);
-    if (i<10) sb.append('0');
-    sb.append(i);
-    i=cal.get(Calendar.MILLISECOND);
-    if (i != 0) {
-      sb.append('.');
-      if (i<100) sb.append('0');
-      if (i<10) sb.append('0');
-      sb.append(i);
-
-      // handle canonical format specifying fractional
-      // seconds shall not end in '0'.  Given the slowness of
-      // integer div/mod, simply checking the last character
-      // is probably the fastest way to check.
-      int lastIdx = sb.length()-1;
-      if (sb.charAt(lastIdx)=='0') {
-        lastIdx--;
-        if (sb.charAt(lastIdx)=='0') {
-          lastIdx--;
-        }
-        sb.setLength(lastIdx+1);
-      }
-
-    }
-    sb.append('Z');
-
-    if (out != sb)
-      out.append(sb);
-
-    return cal;
-  }
-
-
 }
--- a/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
+++ b/solr/contrib/extraction/src/java/org/apache/solr/handler/extraction/SolrContentHandler.java
@ -17,7 +17,6 @@
 package org.apache.solr.handler.extraction;

 import java.lang.invoke.MethodHandles;
-import java.text.DateFormat;
 import java.util.ArrayDeque;
 import java.util.Collection;
 import java.util.Collections;
@ -31,7 +30,6 @@ import java.util.Set;

 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.DateUtil;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
 import org.apache.solr.schema.TrieDateField;
@ -83,7 +81,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
  private Set<String> literalFieldNames = null;
  
  public SolrContentHandler(Metadata metadata, SolrParams params, IndexSchema schema) {
-    this(metadata, params, schema, DateUtil.DEFAULT_DATE_FORMATS);
+    this(metadata, params, schema, ExtractionDateUtil.DEFAULT_DATE_FORMATS);
  }


@ -317,7 +315,7 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
  /**
   * Can be used to transform input values based on their {@link org.apache.solr.schema.SchemaField}
   * <p>
-   * This implementation only formats dates using the {@link org.apache.solr.common.util.DateUtil}.
+   * This implementation only formats dates using the {@link ExtractionDateUtil}.
   *
   * @param val    The value to transform
   * @param schFld The {@link org.apache.solr.schema.SchemaField}
@ -328,10 +326,8 @@ public class SolrContentHandler extends DefaultHandler implements ExtractingPara
    if (schFld != null && schFld.getType() instanceof TrieDateField) {
      //try to transform the date
      try {
-        Date date = DateUtil.parseDate(val, dateFormats);
-        DateFormat df = DateUtil.getThreadLocalDateFormat();
-        result = df.format(date);
-
+        Date date = ExtractionDateUtil.parseDate(val, dateFormats); // may throw
+        result = date.toInstant().toString();//ISO format
      } catch (Exception e) {
        // Let the specific fieldType handle errors
        // throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Invalid value: " + val + " for field: " + schFld, e);
--- a/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
+++ b/solr/contrib/extraction/src/test/org/apache/solr/handler/extraction/TestExtractionDateUtil.java
@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.handler.extraction;
+
+import java.text.ParseException;
+import java.util.Date;
+import java.util.Locale;
+
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestExtractionDateUtil extends LuceneTestCase {
+
+  public void testISO8601() throws Exception {
+    // dates with atypical years
+    assertParseFormatEquals("0001-01-01T01:01:01Z", null);
+    assertParseFormatEquals("+12021-12-01T03:03:03Z", null);
+
+    assertParseFormatEquals("0000-04-04T04:04:04Z", null); // note: 0 AD is also known as 1 BC
+
+    // dates with negative years (BC)
+    assertParseFormatEquals("-0005-05-05T05:05:05Z", null);
+    assertParseFormatEquals("-2021-12-01T04:04:04Z", null);
+    assertParseFormatEquals("-12021-12-01T02:02:02Z", null);
+
+    // dates that only parse thanks to lenient mode of DateTimeFormatter
+    assertParseFormatEquals("10995-12-31T23:59:59.990Z", "+10995-12-31T23:59:59.990Z"); // missing '+' 5 digit year
+    assertParseFormatEquals("995-1-2T3:4:5Z", "0995-01-02T03:04:05Z"); // wasn't 0 padded
+  }
+
+  private static void assertParseFormatEquals(String inputStr, String expectedStr) throws ParseException {
+    if (expectedStr == null) {
+      expectedStr = inputStr;
+    }
+    Date inputDate = ExtractionDateUtil.parseDate(inputStr);
+    String resultStr = inputDate.toInstant().toString();
+    assertEquals("d:" + inputDate.getTime(), expectedStr, resultStr);
+  }
+
+  public void testParseDate() throws ParseException {
+    assertParsedDate(1226583351000L, "Thu Nov 13 04:35:51 AKST 2008");
+  }
+
+  private static void assertParsedDate(long ts, String dateStr) throws ParseException {
+    long parsed = ExtractionDateUtil.parseDate(dateStr).getTime();
+    assertTrue(String.format(Locale.ENGLISH, "Incorrect parsed timestamp: %d != %d (%s)", ts, parsed, dateStr), Math.abs(ts - parsed) <= 1000L);
+  }
+}
--- a/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
+++ b/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
@ -29,13 +29,19 @@ import java.util.Map;
 import java.util.Map.Entry;
 import java.util.TreeMap;

+import com.google.common.base.Joiner;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+import com.google.common.io.Closeables;
+import com.typesafe.config.Config;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;
 import org.apache.solr.common.params.MultiMapSolrParams;
 import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.DateUtil;
 import org.apache.solr.common.util.SuppressForbidden;
 import org.apache.solr.handler.extraction.ExtractingParams;
+import org.apache.solr.handler.extraction.ExtractionDateUtil;
 import org.apache.solr.handler.extraction.SolrContentHandler;
 import org.apache.solr.handler.extraction.SolrContentHandlerFactory;
 import org.apache.solr.morphlines.solr.SolrLocator;
@ -50,7 +56,6 @@ import org.apache.tika.sax.XHTMLContentHandler;
 import org.apache.tika.sax.xpath.Matcher;
 import org.apache.tika.sax.xpath.MatchingContentHandler;
 import org.apache.tika.sax.xpath.XPathParser;
-
 import org.kitesdk.morphline.api.Command;
 import org.kitesdk.morphline.api.CommandBuilder;
 import org.kitesdk.morphline.api.MorphlineCompilationException;
@ -63,13 +68,6 @@ import org.kitesdk.morphline.stdio.AbstractParser;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;

-import com.google.common.base.Joiner;
-import com.google.common.base.Preconditions;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
-import com.google.common.io.Closeables;
-import com.typesafe.config.Config;
-
 /**
 * Command that pipes the first attachment of a record into one of the given Tika parsers, then maps
 * the Tika output back to a record using SolrCell.
@ -151,7 +149,7 @@ public final class SolrCellBuilder implements CommandBuilder {
        cellParams.put(ExtractingParams.XPATH_EXPRESSION, xpathExpr);
      }
      
-      this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(DateUtil.DEFAULT_DATE_FORMATS));
+      this.dateFormats = getConfigs().getStringList(config, "dateFormats", new ArrayList<>(ExtractionDateUtil.DEFAULT_DATE_FORMATS));
      
      String handlerStr = getConfigs().getString(config, "solrContentHandlerFactory", TrimSolrContentHandlerFactory.class.getName());
      Class<? extends SolrContentHandlerFactory> factoryClass;
--- a/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
+++ b/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
@ -25,7 +25,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.lucene.util.Constants;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.MapSolrParams;
-import org.apache.solr.common.util.DateUtil;
+import org.apache.solr.handler.extraction.ExtractionDateUtil;
 import org.apache.solr.handler.extraction.SolrContentHandler;
 import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
 import org.apache.solr.schema.IndexSchema;
@ -270,7 +270,7 @@ public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
    // which will cause the ContentHandler to be invoked.
    metadata.set(fieldName, getFoobarWithNonChars());
    StripNonCharSolrContentHandlerFactory contentHandlerFactory =
-      new StripNonCharSolrContentHandlerFactory(DateUtil.DEFAULT_DATE_FORMATS);
+      new StripNonCharSolrContentHandlerFactory(ExtractionDateUtil.DEFAULT_DATE_FORMATS);
    IndexSchema schema = h.getCore().getLatestSchema();
    SolrContentHandler contentHandler =
      contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema);
--- a/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/TestDateUtil.java
@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.common.util;
-
-import java.text.ParseException;
-import java.util.Locale;
-
-import org.apache.lucene.util.LuceneTestCase;
-
-public class TestDateUtil extends LuceneTestCase {
-
-  public void testParseDate() throws ParseException {
-    assertParsedDate(1226583351000L, "Thu Nov 13 04:35:51 AKST 2008");
-  }
-    
-  private static void assertParsedDate(long ts, String dateStr) throws ParseException {
-    long parsed = DateUtil.parseDate(dateStr).getTime();
-    assertTrue(String.format(Locale.ENGLISH, "Incorrect parsed timestamp: %d != %d (%s)", ts, parsed, dateStr), Math.abs(ts - parsed) <= 1000L);
-  }
-
-}