diff --git a/CHANGES.txt b/CHANGES.txt index 22d7c123d89..1aa3f10a4fa 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -23,6 +23,11 @@ Changes in runtime behavior (Mike McCandless) + 2. LUCENE-1045: SortField.AUTO didn't work with long. When detecting + the field type for sorting automatically, numbers used to be + interpreted as int, then as float, if parsing the number as an int + failed. Now the detection checks for int, then for long, + then for float. (Daniel Naber) API Changes diff --git a/src/java/org/apache/lucene/search/FieldCacheImpl.java b/src/java/org/apache/lucene/search/FieldCacheImpl.java index 16ed6939904..65ca40ef6ea 100644 --- a/src/java/org/apache/lucene/search/FieldCacheImpl.java +++ b/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermDocs; import org.apache.lucene.index.TermEnum; +import org.apache.lucene.search.ExtendedFieldCache.LongParser; import java.io.IOException; import java.util.Locale; @@ -149,6 +150,12 @@ implements FieldCache { } }; + private static final LongParser LONG_PARSER = new LongParser() { + public long parseLong(String value) { + return Long.parseLong(value); + } + }; + private static final FloatParser FLOAT_PARSER = new FloatParser() { public float parseFloat(String value) { return Float.parseFloat(value); @@ -272,6 +279,45 @@ implements FieldCache { } }; + // inherit javadocs + public long[] getLongs (IndexReader reader, String field) throws IOException { + return getLongs(reader, field, LONG_PARSER); + } + + // inherit javadocs + public long[] getLongs(IndexReader reader, String field, LongParser parser) + throws IOException { + return (long[]) longsCache.get(reader, new Entry(field, parser)); + } + + Cache longsCache = new Cache() { + + protected Object createValue(IndexReader reader, Object entryKey) + throws IOException { + Entry entry = (Entry) entryKey; + String field = entry.field; + LongParser parser = (LongParser) entry.custom; + final long[] retArray = new long[reader.maxDoc()]; + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field, "")); + try { + do { + Term term = termEnum.term(); + if (term==null || term.field() != field) break; + long termval = parser.parseLong(term.text()); + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + return retArray; + } + }; + // inherit javadocs public float[] getFloats (IndexReader reader, String field) throws IOException { @@ -455,10 +501,15 @@ implements FieldCache { ret = getInts (reader, field); } catch (NumberFormatException nfe1) { try { - Float.parseFloat (termtext); - ret = getFloats (reader, field); + Long.parseLong(termtext); + ret = getLongs (reader, field); } catch (NumberFormatException nfe2) { - ret = getStringIndex (reader, field); + try { + Float.parseFloat (termtext); + ret = getFloats (reader, field); + } catch (NumberFormatException nfe3) { + ret = getStringIndex (reader, field); + } } } } else { diff --git a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java index c51f1cc9871..af4f6b0d0dc 100644 --- a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java @@ -409,6 +409,8 @@ extends PriorityQueue { return comparatorString (reader, field); } else if (lookupArray instanceof int[]) { return comparatorInt (reader, field); + } else if (lookupArray instanceof long[]) { + return comparatorLong (reader, field); } else if (lookupArray instanceof float[]) { return comparatorFloat (reader, field); } else if (lookupArray instanceof String[]) { diff --git a/src/java/org/apache/lucene/search/Sort.java b/src/java/org/apache/lucene/search/Sort.java index b432c879504..6e925e9d864 100644 --- a/src/java/org/apache/lucene/search/Sort.java +++ b/src/java/org/apache/lucene/search/Sort.java @@ -30,24 +30,30 @@ import java.io.Serializable; * and does not need to be stored (unless you happen to want it back with the * rest of your document data). In other words: * - *

document.add (new Fieldable ("byNumber", Integer.toString(x), Fieldable.Store.NO, Fieldable.Index.UN_TOKENIZED));

+ *

document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.UN_TOKENIZED));

* * *

Valid Types of Values

* - *

There are three possible kinds of term values which may be put into - * sorting fields: Integers, Floats, or Strings. Unless + *

There are four possible kinds of term values which may be put into + * sorting fields: Integers, Longs, Floats, or Strings. Unless * {@link SortField SortField} objects are specified, the type of value * in the field is determined by parsing the first term in the field. * *

Integer term values should contain only digits and an optional - * preceeding negative sign. Values must be base 10 and in the range + * preceding negative sign. Values must be base 10 and in the range * Integer.MIN_VALUE and Integer.MAX_VALUE inclusive. * Documents which should appear first in the sort * should have low value integers, later documents high values * (i.e. the documents should be numbered 1..n where * 1 is the first and n the last). * + *

Long term values should contain only digits and an optional + * preceding negative sign. Values must be base 10 and in the range + * Long.MIN_VALUE and Long.MAX_VALUE inclusive. + * Documents which should appear first in the sort + * should have low value integers, later documents high values. + * *

Float term values should conform to values accepted by * {@link Float Float.valueOf(String)} (except that NaN * and Infinity are not supported). diff --git a/src/test/org/apache/lucene/search/TestDateSort.java b/src/test/org/apache/lucene/search/TestDateSort.java new file mode 100644 index 00000000000..0fe012f8ee0 --- /dev/null +++ b/src/test/org/apache/lucene/search/TestDateSort.java @@ -0,0 +1,118 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Arrays; + +import junit.framework.TestCase; + +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.DateTools; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.Hits; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; + +/** + * Test date sorting, i.e. auto-sorting of fields with type "long". + * See http://issues.apache.org/jira/browse/LUCENE-1045 + */ +public class TestDateSort extends TestCase { + + private static final String TEXT_FIELD = "text"; + private static final String DATE_TIME_FIELD = "dateTime"; + + private static Directory directory; + + public void setUp() throws Exception { + // Create an index writer. + directory = new RAMDirectory(); + IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true); + + // oldest doc: + // Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007 + writer.addDocument(createDocument("Document 1", 1192001122000L)); + // Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007 + writer.addDocument(createDocument("Document 2", 1192001126000L)); + // Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007 + writer.addDocument(createDocument("Document 3", 1192101133000L)); + // Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007 + writer.addDocument(createDocument("Document 4", 1192104129000L)); + // latest doc: + // Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007 + writer.addDocument(createDocument("Document 5", 1192209943000L)); + + writer.optimize(); + writer.close(); + } + + public void testReverseDateSort() throws Exception { + IndexSearcher searcher = new IndexSearcher(directory); + + // Create a Sort object. reverse is set to true. + // problem occurs only with SortField.AUTO: + Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true)); + + QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer()); + Query query = queryParser.parse("Document"); + + // Execute the search and process the search results. + String[] actualOrder = new String[5]; + Hits hits = searcher.search(query, sort); + for (int i = 0; i < hits.length(); i++) { + Document document = hits.doc(i); + String text = document.get(TEXT_FIELD); + actualOrder[i] = text; + } + searcher.close(); + + // Set up the expected order (i.e. Document 5, 4, 3, 2, 1). + String[] expectedOrder = new String[5]; + expectedOrder[0] = "Document 5"; + expectedOrder[1] = "Document 4"; + expectedOrder[2] = "Document 3"; + expectedOrder[3] = "Document 2"; + expectedOrder[4] = "Document 1"; + + assertEquals(Arrays.asList(expectedOrder), Arrays.asList(actualOrder)); + } + + private static Document createDocument(String text, long time) { + Document document = new Document(); + + // Add the text field. + Field textField = new Field(TEXT_FIELD, text, Field.Store.YES, Field.Index.TOKENIZED); + document.add(textField); + + // Add the date/time field. + String dateTimeString = DateTools.timeToString(time, DateTools.Resolution.SECOND); + Field dateTimeField = new Field(DATE_TIME_FIELD, dateTimeString, Field.Store.YES, + Field.Index.UN_TOKENIZED); + document.add(dateTimeField); + + return document; + } + +}