diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java index b8708c3b7cb..e1f061ec3d3 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/backward_index/TestIndexSortBackwardsCompatibility.java @@ -18,13 +18,10 @@ package org.apache.lucene.backward_index; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import java.io.IOException; -import java.text.ParsePosition; -import java.text.SimpleDateFormat; +import java.time.LocalDateTime; +import java.time.ZoneOffset; import java.util.Arrays; -import java.util.Date; -import java.util.Locale; import java.util.Random; -import java.util.TimeZone; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntPoint; @@ -164,26 +161,17 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT conf.setIndexSort(new Sort(new SortField("dateDV", SortField.Type.LONG, true))); IndexWriter writer = new IndexWriter(directory, conf); LineFileDocs docs = new LineFileDocs(new Random(0)); - SimpleDateFormat parser = new SimpleDateFormat("yyyy-MM-dd", Locale.ROOT); - parser.setTimeZone(TimeZone.getTimeZone("UTC")); - ParsePosition position = new ParsePosition(0); + for (int i = 0; i < 50; i++) { Document doc = TestUtil.cloneDocument(docs.nextDoc()); String dateString = doc.get("date"); - position.setIndex(0); - Date date = parser.parse(dateString, position); - if (position.getErrorIndex() != -1) { - throw new AssertionError("failed to parse \"" + dateString + "\" as date"); - } - if (position.getIndex() != dateString.length()) { - throw new AssertionError("failed to parse \"" + dateString + "\" as date"); - } + LocalDateTime date = LineFileDocs.DATE_FIELD_VALUE_TO_LOCALDATETIME.apply(dateString); doc.add( new NumericDocValuesField( "docid_intDV", doc.getField("docid_int").numericValue().longValue())); doc.add( new SortedDocValuesField("titleDV", new BytesRef(doc.getField("title").stringValue()))); - doc.add(new NumericDocValuesField("dateDV", date.getTime())); + doc.add(new NumericDocValuesField("dateDV", date.toInstant(ZoneOffset.UTC).toEpochMilli())); if (i % 10 == 0) { // commit every 10 documents writer.commit(); } @@ -206,9 +194,6 @@ public class TestIndexSortBackwardsCompatibility extends BackwardsCompatibilityT topDocs = searcher.search(new FieldExistsQuery("titleDV"), 10); assertEquals(50, topDocs.totalHits.value); - topDocs = searcher.search(new TermQuery(new Term("body", "ja")), 10); - assertTrue(topDocs.totalHits.value > 0); - topDocs = searcher.search( IntPoint.newRangeQuery("docid_int", 42, 44), diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LineFileDocs.java b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LineFileDocs.java index 9d02549c7de..e0158c4c542 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/util/LineFileDocs.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/util/LineFileDocs.java @@ -29,10 +29,16 @@ import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.time.LocalDate; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeFormatterBuilder; import java.util.ArrayList; import java.util.List; +import java.util.Locale; import java.util.Random; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Function; import java.util.zip.GZIPInputStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -53,6 +59,35 @@ import org.apache.lucene.util.IOUtils; * created by benchmark's WriteLineDoc task */ public class LineFileDocs implements Closeable { + /** + * Converts date formats for europarl ("2023-02-23") and enwiki ("12-JAN-2010 12:32:45.000") into + * {@link LocalDateTime}. + */ + public static final Function DATE_FIELD_VALUE_TO_LOCALDATETIME = + new Function<>() { + final DateTimeFormatter euroParl = + new DateTimeFormatterBuilder() + .parseStrict() + .parseCaseInsensitive() + .appendPattern("uuuu-MM-dd") + .toFormatter(Locale.ROOT); + + final DateTimeFormatter enwiki = + new DateTimeFormatterBuilder() + .parseStrict() + .parseCaseInsensitive() + .appendPattern("dd-MMM-uuuu HH:mm:ss['.'SSS]") + .toFormatter(Locale.ROOT); + + @Override + public LocalDateTime apply(String s) { + if (s.matches("^[0-9]{4}-[0-9]{2}-[0-9]{2}$")) { + return euroParl.parse(s, LocalDate::from).atStartOfDay(); + } else { + return enwiki.parse(s, LocalDateTime::from); + } + } + }; private BufferedReader reader; private static final int BUFFER_SIZE = 1 << 16; // 64K diff --git a/lucene/test-framework/src/test/org/apache/lucene/tests/util/TestLineFileDocs.java b/lucene/test-framework/src/test/org/apache/lucene/tests/util/TestLineFileDocs.java new file mode 100644 index 00000000000..75a6a9d0522 --- /dev/null +++ b/lucene/test-framework/src/test/org/apache/lucene/tests/util/TestLineFileDocs.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.tests.util; + +import java.time.LocalDateTime; + +public class TestLineFileDocs extends LuceneTestCase { + /** + * Tests that {@link #expectThrows} behaves correctly when the Runnable throws (an instance of a + * subclass of) the expected Exception type: by returning that Exception. + */ + public void testDateFieldNormalization() { + // europarl corpus uses this data format. + assertEquals( + LocalDateTime.of(2023, 2, 23, 0, 0), + LineFileDocs.DATE_FIELD_VALUE_TO_LOCALDATETIME.apply("2023-02-23")); + // enwiki uses this data format. + assertEquals( + LocalDateTime.of(2010, 1, 12, 12, 32, 45), + LineFileDocs.DATE_FIELD_VALUE_TO_LOCALDATETIME.apply("12-JAN-2010 12:32:45.000")); + } +}