LUCENE-1045: make SortField.AUTO work with long

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@598376 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Daniel Naber 2007-11-26 18:45:39 +00:00
parent 498d582423
commit 5882ec0659
5 changed files with 189 additions and 7 deletions

View File

@ -23,6 +23,11 @@ Changes in runtime behavior
(Mike McCandless)
2. LUCENE-1045: SortField.AUTO didn't work with long. When detecting
the field type for sorting automatically, numbers used to be
interpreted as int, then as float, if parsing the number as an int
failed. Now the detection checks for int, then for long,
then for float. (Daniel Naber)
API Changes

View File

@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.search.ExtendedFieldCache.LongParser;
import java.io.IOException;
import java.util.Locale;
@ -149,6 +150,12 @@ implements FieldCache {
}
};
private static final LongParser LONG_PARSER = new LongParser() {
public long parseLong(String value) {
return Long.parseLong(value);
}
};
private static final FloatParser FLOAT_PARSER = new FloatParser() {
public float parseFloat(String value) {
return Float.parseFloat(value);
@ -272,6 +279,45 @@ implements FieldCache {
}
};
// inherit javadocs
public long[] getLongs (IndexReader reader, String field) throws IOException {
return getLongs(reader, field, LONG_PARSER);
}
// inherit javadocs
public long[] getLongs(IndexReader reader, String field, LongParser parser)
throws IOException {
return (long[]) longsCache.get(reader, new Entry(field, parser));
}
Cache longsCache = new Cache() {
protected Object createValue(IndexReader reader, Object entryKey)
throws IOException {
Entry entry = (Entry) entryKey;
String field = entry.field;
LongParser parser = (LongParser) entry.custom;
final long[] retArray = new long[reader.maxDoc()];
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field, ""));
try {
do {
Term term = termEnum.term();
if (term==null || term.field() != field) break;
long termval = parser.parseLong(term.text());
termDocs.seek (termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
return retArray;
}
};
// inherit javadocs
public float[] getFloats (IndexReader reader, String field)
throws IOException {
@ -455,10 +501,15 @@ implements FieldCache {
ret = getInts (reader, field);
} catch (NumberFormatException nfe1) {
try {
Float.parseFloat (termtext);
ret = getFloats (reader, field);
Long.parseLong(termtext);
ret = getLongs (reader, field);
} catch (NumberFormatException nfe2) {
ret = getStringIndex (reader, field);
try {
Float.parseFloat (termtext);
ret = getFloats (reader, field);
} catch (NumberFormatException nfe3) {
ret = getStringIndex (reader, field);
}
}
}
} else {

View File

@ -409,6 +409,8 @@ extends PriorityQueue {
return comparatorString (reader, field);
} else if (lookupArray instanceof int[]) {
return comparatorInt (reader, field);
} else if (lookupArray instanceof long[]) {
return comparatorLong (reader, field);
} else if (lookupArray instanceof float[]) {
return comparatorFloat (reader, field);
} else if (lookupArray instanceof String[]) {

View File

@ -30,24 +30,30 @@ import java.io.Serializable;
* and does not need to be stored (unless you happen to want it back with the
* rest of your document data). In other words:
*
* <p><code>document.add (new Fieldable ("byNumber", Integer.toString(x), Fieldable.Store.NO, Fieldable.Index.UN_TOKENIZED));</code></p>
* <p><code>document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.UN_TOKENIZED));</code></p>
*
*
* <p><h3>Valid Types of Values</h3>
*
* <p>There are three possible kinds of term values which may be put into
* sorting fields: Integers, Floats, or Strings. Unless
* <p>There are four possible kinds of term values which may be put into
* sorting fields: Integers, Longs, Floats, or Strings. Unless
* {@link SortField SortField} objects are specified, the type of value
* in the field is determined by parsing the first term in the field.
*
* <p>Integer term values should contain only digits and an optional
* preceeding negative sign. Values must be base 10 and in the range
* preceding negative sign. Values must be base 10 and in the range
* <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
* Documents which should appear first in the sort
* should have low value integers, later documents high values
* (i.e. the documents should be numbered <code>1..n</code> where
* <code>1</code> is the first and <code>n</code> the last).
*
* <p>Long term values should contain only digits and an optional
* preceding negative sign. Values must be base 10 and in the range
* <code>Long.MIN_VALUE</code> and <code>Long.MAX_VALUE</code> inclusive.
* Documents which should appear first in the sort
* should have low value integers, later documents high values.
*
* <p>Float term values should conform to values accepted by
* {@link Float Float.valueOf(String)} (except that <code>NaN</code>
* and <code>Infinity</code> are not supported).

View File

@ -0,0 +1,118 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
/**
* Test date sorting, i.e. auto-sorting of fields with type "long".
* See http://issues.apache.org/jira/browse/LUCENE-1045
*/
public class TestDateSort extends TestCase {
private static final String TEXT_FIELD = "text";
private static final String DATE_TIME_FIELD = "dateTime";
private static Directory directory;
public void setUp() throws Exception {
// Create an index writer.
directory = new RAMDirectory();
IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
// oldest doc:
// Add the first document. text = "Document 1" dateTime = Oct 10 03:25:22 EDT 2007
writer.addDocument(createDocument("Document 1", 1192001122000L));
// Add the second document. text = "Document 2" dateTime = Oct 10 03:25:26 EDT 2007
writer.addDocument(createDocument("Document 2", 1192001126000L));
// Add the third document. text = "Document 3" dateTime = Oct 11 07:12:13 EDT 2007
writer.addDocument(createDocument("Document 3", 1192101133000L));
// Add the fourth document. text = "Document 4" dateTime = Oct 11 08:02:09 EDT 2007
writer.addDocument(createDocument("Document 4", 1192104129000L));
// latest doc:
// Add the fifth document. text = "Document 5" dateTime = Oct 12 13:25:43 EDT 2007
writer.addDocument(createDocument("Document 5", 1192209943000L));
writer.optimize();
writer.close();
}
public void testReverseDateSort() throws Exception {
IndexSearcher searcher = new IndexSearcher(directory);
// Create a Sort object. reverse is set to true.
// problem occurs only with SortField.AUTO:
Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.AUTO, true));
QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer());
Query query = queryParser.parse("Document");
// Execute the search and process the search results.
String[] actualOrder = new String[5];
Hits hits = searcher.search(query, sort);
for (int i = 0; i < hits.length(); i++) {
Document document = hits.doc(i);
String text = document.get(TEXT_FIELD);
actualOrder[i] = text;
}
searcher.close();
// Set up the expected order (i.e. Document 5, 4, 3, 2, 1).
String[] expectedOrder = new String[5];
expectedOrder[0] = "Document 5";
expectedOrder[1] = "Document 4";
expectedOrder[2] = "Document 3";
expectedOrder[3] = "Document 2";
expectedOrder[4] = "Document 1";
assertEquals(Arrays.asList(expectedOrder), Arrays.asList(actualOrder));
}
private static Document createDocument(String text, long time) {
Document document = new Document();
// Add the text field.
Field textField = new Field(TEXT_FIELD, text, Field.Store.YES, Field.Index.TOKENIZED);
document.add(textField);
// Add the date/time field.
String dateTimeString = DateTools.timeToString(time, DateTools.Resolution.SECOND);
Field dateTimeField = new Field(DATE_TIME_FIELD, dateTimeString, Field.Store.YES,
Field.Index.UN_TOKENIZED);
document.add(dateTimeField);
return document;
}
}