expose term cache as a public object

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150348 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Jones 2004-05-19 23:05:27 +00:00
parent 1e2715d18d
commit 4e486c1434
16 changed files with 878 additions and 976 deletions

View File

@ -0,0 +1,135 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
/**
* Expert: Maintains caches of term values.
*
* <p>Created: May 19, 2004 11:13:14 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
public interface FieldCache {
/** Indicator for StringIndex values in the cache. */
// NOTE: the value assigned to this constant must not be
// the same as any of those in SortField!!
public static final int STRING_INDEX = -1;
/** Expert: Stores term text values and document ordering data. */
public static class StringIndex {
/** All the term values, in natural order. */
public final String[] lookup;
/** For each document, an index into the lookup array. */
public final int[] order;
/** Creates one of these objects */
public StringIndex (int[] values, String[] lookup) {
this.order = values;
this.lookup = lookup;
}
}
/** Expert: The cache used internally by sorting and range query classes. */
public static FieldCache DEFAULT = new FieldCacheImpl();
/** Checks the internal cache for an appropriate entry, and if none is
* found, reads the terms in <code>field</code> as integers and returns an array
* of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
* @param reader Used to get field values.
* @param field Which field contains the integers.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public int[] getInts (IndexReader reader, String field)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if
* none is found, reads the terms in <code>field</code> as floats and returns an array
* of size <code>reader.maxDoc()</code> of the value each document
* has in the given field.
* @param reader Used to get field values.
* @param field Which field contains the floats.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public float[] getFloats (IndexReader reader, String field)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found, reads the term values in <code>field</code> and returns an array
* of size <code>reader.maxDoc()</code> containing the value each document
* has in the given field.
* @param reader Used to get field values.
* @param field Which field contains the strings.
* @return The values in the given field for each document.
* @throws IOException If any error occurs.
*/
public String[] getStrings (IndexReader reader, String field)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found reads the term values in <code>field</code> and returns
* an array of them in natural order, along with an array telling
* which element in the term array each document uses.
* @param reader Used to get field values.
* @param field Which field contains the strings.
* @return Array of terms and index into the array for each document.
* @throws IOException If any error occurs.
*/
public StringIndex getStringIndex (IndexReader reader, String field)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if
* none is found reads <code>field</code> to see if it contains integers, floats
* or strings, and then calls one of the other methods in this class to get the
* values. For string values, a StringIndex is returned. After
* calling this method, there is an entry in the cache for both
* type <code>AUTO</code> and the actual found type.
* @param reader Used to get field values.
* @param field Which field contains the values.
* @return int[], float[] or StringIndex.
* @throws IOException If any error occurs.
*/
public Object getAuto (IndexReader reader, String field)
throws IOException;
/** Checks the internal cache for an appropriate entry, and if none
* is found reads the terms out of <code>field</code> and calls the given SortComparator
* to get the sort values. A hit in the cache will happen if <code>reader</code>,
* <code>field</code>, and <code>comparator</code> are the same (using <code>equals()</code>)
* as a previous call to this method.
* @param reader Used to get field values.
* @param field Which field contains the values.
* @param comparator Used to convert terms into something to sort by.
* @return Array of sort objects, one for each document.
* @throws IOException If any error occurs.
*/
public Comparable[] getCustom (IndexReader reader, String field, SortComparator comparator)
throws IOException;
}

View File

@ -0,0 +1,378 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
import java.util.Map;
import java.util.WeakHashMap;
/**
* Expert: The default cache implementation, storing all values in memory.
* A WeakHashMap is used for storage.
*
* <p>Created: May 19, 2004 4:40:36 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
class FieldCacheImpl
implements FieldCache {
/** Expert: Every key in the internal cache is of this type. */
static class Entry {
final IndexReader reader; // which Reader
final String field; // which Field
final int type; // which SortField type
final Object custom; // which custom comparator
final int hashcode; // unique for this object
/** Creates one of these objects. */
Entry (IndexReader reader, String field, int type) {
this.reader = reader;
this.field = field.intern();
this.type = type;
this.custom = null;
this.hashcode = reader.hashCode() ^ field.hashCode() ^ type;
}
/** Creates one of these objects for a custom comparator. */
Entry (IndexReader reader, String field, Object custom) {
this.reader = reader;
this.field = field.intern();
this.type = SortField.CUSTOM;
this.custom = custom;
this.hashcode = reader.hashCode() ^ field.hashCode() ^ type ^ custom.hashCode();
}
/** Two of these are equal iff they reference the same reader, field and type. */
public boolean equals (Object o) {
if (o instanceof Entry) {
Entry other = (Entry) o;
if (other.reader == reader && other.field == field && other.type == type) {
if (other.custom == null) {
if (custom == null) return true;
} else if (other.custom.equals (custom)) {
return true;
}
}
}
return false;
}
/** Composes a hashcode based on the referenced reader, field and type. */
public int hashCode() {
return hashcode;
}
}
/** The internal cache. Maps Entry to array of interpreted term values. **/
final Map cache = new WeakHashMap();
/** See if an object is in the cache. */
Object lookup (IndexReader reader, String field, int type) {
Entry entry = new Entry (reader, field, type);
synchronized (this) {
return cache.get (entry);
}
}
/** See if a custom object is in the cache. */
Object lookup (IndexReader reader, String field, Object comparer) {
Entry entry = new Entry (reader, field, comparer);
synchronized (this) {
return cache.get (entry);
}
}
/** Put an object into the cache. */
Object store (IndexReader reader, String field, int type, Object value) {
Entry entry = new Entry (reader, field, type);
synchronized (this) {
return cache.put (entry, value);
}
}
/** Put a custom object into the cache. */
Object store (IndexReader reader, String field, Object comparer, Object value) {
Entry entry = new Entry (reader, field, comparer);
synchronized (this) {
return cache.put (entry, value);
}
}
// inherit javadocs
public int[] getInts (IndexReader reader, String field)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, SortField.INT);
if (ret == null) {
final int[] retArray = new int[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field, ""));
try {
if (termEnum.term() == null) {
throw new RuntimeException ("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field) break;
int termval = Integer.parseInt (term.text());
termDocs.seek (termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
}
store (reader, field, SortField.INT, retArray);
return retArray;
}
return (int[]) ret;
}
// inherit javadocs
public float[] getFloats (IndexReader reader, String field)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, SortField.FLOAT);
if (ret == null) {
final float[] retArray = new float[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field, ""));
try {
if (termEnum.term() == null) {
throw new RuntimeException ("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field) break;
float termval = Float.parseFloat (term.text());
termDocs.seek (termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
}
store (reader, field, SortField.FLOAT, retArray);
return retArray;
}
return (float[]) ret;
}
// inherit javadocs
public String[] getStrings (IndexReader reader, String field)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, SortField.STRING);
if (ret == null) {
final String[] retArray = new String[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field, ""));
try {
if (termEnum.term() == null) {
throw new RuntimeException ("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field) break;
String termval = term.text();
termDocs.seek (termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
}
store (reader, field, SortField.STRING, retArray);
return retArray;
}
return (String[]) ret;
}
// inherit javadocs
public StringIndex getStringIndex (IndexReader reader, String field)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, STRING_INDEX);
if (ret == null) {
final int[] retArray = new int[reader.maxDoc()];
String[] mterms = new String[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field, ""));
int t = 0; // current term number
try {
if (termEnum.term() == null) {
throw new RuntimeException ("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field) break;
// store term text
// we expect that there is at most one term per document
if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \"" + field + "\"");
mterms[t] = term.text();
termDocs.seek (termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = t;
}
t++;
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
if (t == 0) {
// if there are no terms, make the term array
// have a single null entry
mterms = new String[1];
} else if (t < mterms.length) {
// if there are less terms than documents,
// trim off the dead array space
String[] terms = new String[t];
System.arraycopy (mterms, 0, terms, 0, t);
mterms = terms;
}
}
StringIndex value = new StringIndex (retArray, mterms);
store (reader, field, STRING_INDEX, value);
return value;
}
return (StringIndex) ret;
}
/** The pattern used to detect integer values in a field */
/** removed for java 1.3 compatibility
protected static final Pattern pIntegers = Pattern.compile ("[0-9\\-]+");
**/
/** The pattern used to detect float values in a field */
/**
* removed for java 1.3 compatibility
* protected static final Object pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+");
*/
// inherit javadocs
public Object getAuto (IndexReader reader, String field)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, SortField.AUTO);
if (ret == null) {
TermEnum enumerator = reader.terms (new Term (field, ""));
try {
Term term = enumerator.term();
if (term == null) {
throw new RuntimeException ("no terms in field " + field + " - cannot determine sort type");
}
if (term.field() == field) {
String termtext = term.text().trim();
/**
* Java 1.4 level code:
if (pIntegers.matcher(termtext).matches())
return IntegerSortedHitQueue.comparator (reader, enumerator, field);
else if (pFloats.matcher(termtext).matches())
return FloatSortedHitQueue.comparator (reader, enumerator, field);
*/
// Java 1.3 level code:
try {
Integer.parseInt (termtext);
ret = getInts (reader, field);
} catch (NumberFormatException nfe1) {
try {
Float.parseFloat (termtext);
ret = getFloats (reader, field);
} catch (NumberFormatException nfe2) {
ret = getStringIndex (reader, field);
}
}
if (ret != null) {
store (reader, field, SortField.AUTO, ret);
}
} else {
throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed");
}
} finally {
enumerator.close();
}
}
return ret;
}
// inherit javadocs
public Comparable[] getCustom (IndexReader reader, String field, SortComparator comparator)
throws IOException {
field = field.intern();
Object ret = lookup (reader, field, comparator);
if (ret == null) {
final Comparable[] retArray = new Comparable[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
TermEnum termEnum = reader.terms (new Term (field, ""));
try {
if (termEnum.term() == null) {
throw new RuntimeException ("no terms in field " + field);
}
do {
Term term = termEnum.term();
if (term.field() != field) break;
Comparable termval = comparator.getComparable (term.text());
termDocs.seek (termEnum);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (termEnum.next());
} finally {
termDocs.close();
termEnum.close();
}
}
store (reader, field, SortField.CUSTOM, retArray);
return retArray;
}
return (String[]) ret;
}
}

View File

@ -101,7 +101,9 @@ extends PriorityQueue {
case SortField.STRING:
String s1 = (String) docA.fields[i];
String s2 = (String) docB.fields[i];
c = s2.compareTo(s1);
if (s2 == null) c = -1; // could be null if there are
else if (s1 == null) c = 1; // no terms in the given field
else c = s2.compareTo(s1);
break;
case SortField.FLOAT:
float f1 = ((Float)docA.fields[i]).floatValue();
@ -139,7 +141,9 @@ extends PriorityQueue {
case SortField.STRING:
String s1 = (String) docA.fields[i];
String s2 = (String) docB.fields[i];
c = s1.compareTo(s2);
if (s1 == null) c = -1; // could be null if there are
else if (s2 == null) c = 1; // no terms in the given field
else c = s1.compareTo(s2);
break;
case SortField.FLOAT:
float f1 = ((Float)docA.fields[i]).floatValue();

View File

@ -17,311 +17,282 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
import java.util.Hashtable;
import java.util.WeakHashMap;
import java.util.Map;
/**
* Expert: Base class for collecting results from a search and sorting
* them by terms in a given field in each document.
*
* <p>When one of these objects is created, a TermEnumerator is
* created to fetch all the terms in the index for the given field.
* The value of each term is assumed to represent a
* sort position. Each document is assumed to contain one of the
* terms, indicating where in the sort it belongs.
*
* <p><h3>Memory Usage</h3>
*
* <p>A static cache is maintained. This cache contains an integer
* or float array of length <code>IndexReader.maxDoc()</code> for each field
* name for which a sort is performed. In other words, the size of the
* cache in bytes is:
*
* <p><code>4 * IndexReader.maxDoc() * (# of different fields actually used to sort)</code>
*
* <p>For String fields, the cache is larger: in addition to the
* above array, the value of every term in the field is kept in memory.
* If there are many unique terms in the field, this could
* be quite large.
*
* <p>Note that the size of the cache is not affected by how many
* fields are in the index and <i>might</i> be used to sort - only by
* the ones actually used to sort a result set.
*
* <p>The cache is cleared each time a new <code>IndexReader</code> is
* passed in, or if the value returned by <code>maxDoc()</code>
* changes for the current IndexReader. This class is not set up to
* be able to efficiently sort hits from more than one index
* simultaneously.
* Expert: A hit queue for sorting by hits by terms in more than one field.
* Uses <code>FieldCache.DEFAULT</code> for maintaining internal term lookup tables.
*
* <p>Created: Dec 8, 2003 12:56:03 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
* @see Searchable#search(Query,Filter,int,Sort)
* @see FieldCache
*/
abstract class FieldSortedHitQueue
class FieldSortedHitQueue
extends PriorityQueue {
/**
* Keeps track of the IndexReader which the cache
* applies to. If it changes, the cache is cleared.
* We only store the hashcode so as not to mess up
* garbage collection by having a reference to an
* IndexReader.
*/
protected static int lastReaderHash;
/**
* Contains the cache of sort information, mapping
* String (field names) to ScoreDocComparator.
*/
protected static final Hashtable fieldCache = new Hashtable();
/** The pattern used to detect integer values in a field */
/** removed for java 1.3 compatibility
protected static final Pattern pIntegers = Pattern.compile ("[0-9\\-]+");
**/
/** The pattern used to detect float values in a field */
/** removed for java 1.3 compatibility
protected static final Object pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+");
**/
/**
* Returns a comparator for the given field. If there is already one in the cache, it is returned.
* Otherwise one is created and put into the cache. If <code>reader</code> is different than the
* one used for the current cache, or has changed size, the cache is cleared first.
* @param reader Index to use.
* @param field Field to sort by.
* @return Comparator; never <code>null</code>.
* @throws IOException If an error occurs reading the index.
* @see #determineComparator
*/
static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type, final SortComparatorSource factory)
throws IOException {
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
// see if we have already generated a comparator for this field
if (reader.hashCode() == lastReaderHash) {
ScoreDocLookupComparator comparer = (ScoreDocLookupComparator) fieldCache.get (field);
if (comparer != null && comparer.sizeMatches(reader.maxDoc())) {
return comparer;
}
} else {
lastReaderHash = reader.hashCode();
fieldCache.clear();
}
ScoreDocComparator comparer = null;
switch (type) {
case SortField.AUTO: comparer = determineComparator (reader, field); break;
case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break;
case SortField.INT: comparer = IntegerSortedHitQueue.comparator (reader, field); break;
case SortField.FLOAT: comparer = FloatSortedHitQueue.comparator (reader, field); break;
case SortField.CUSTOM: comparer = factory.newComparator (reader, field); break;
default:
throw new RuntimeException ("invalid sort field type: "+type);
}
// store the comparator in the cache for reuse
fieldCache.put (field, comparer);
return comparer;
}
/** Clears the static cache of sorting information. */
static void clearCache() {
fieldCache.clear();
}
/**
* Returns a FieldSortedHitQueue sorted by the given ScoreDocComparator.
* @param comparator Comparator to use.
* @param size Number of hits to retain.
* @return Hit queue sorted using the given comparator.
*/
static FieldSortedHitQueue getInstance (ScoreDocComparator comparator, int size) {
return new FieldSortedHitQueue (comparator, size) {
// dummy out the abstract method
protected ScoreDocLookupComparator createComparator (IndexReader reader, String field) throws IOException {
return null;
}
};
}
/**
* Looks at the actual values in the field and determines whether
* they contain Integers, Floats or Strings. Only the first term in the field
* is looked at.
* <p>The following patterns are used to determine the content of the terms:
* <p><table border="1" cellspacing="0" cellpadding="3">
* <tr><th>Sequence</th><th>Pattern</th><th>Type</th></tr>
* <tr><td>1</td><td>[0-9\-]+</td><td>Integer</td></tr>
* <tr><td>2</td><td>[0-9+\-\.eEfFdD]+</td><td>Float</td></tr>
* <tr><td>3</td><td><i>(none - default)</i></td><td>String</td></tr>
* </table>
*
* @param reader Index to use.
* @param field Field to create comparator for.
* @return Comparator appropriate for the terms in the given field.
* @throws IOException If an error occurs reading the index.
*/
protected static ScoreDocComparator determineComparator (IndexReader reader, String field)
throws IOException {
field = field.intern();
TermEnum enumerator = reader.terms (new Term (field, ""));
try {
Term term = enumerator.term();
if (term == null) {
throw new RuntimeException ("no terms in field "+field+" - cannot determine sort type");
}
if (term.field() == field) {
String termtext = term.text().trim();
/**
* Java 1.4 level code:
if (pIntegers.matcher(termtext).matches())
return IntegerSortedHitQueue.comparator (reader, enumerator, field);
else if (pFloats.matcher(termtext).matches())
return FloatSortedHitQueue.comparator (reader, enumerator, field);
*/
// Java 1.3 level code:
try {
Integer.parseInt (termtext);
return IntegerSortedHitQueue.comparator (reader, enumerator, field);
} catch (NumberFormatException nfe) {
// nothing
}
try {
Float.parseFloat (termtext);
return FloatSortedHitQueue.comparator (reader, enumerator, field);
} catch (NumberFormatException nfe) {
// nothing
}
return StringSortedHitQueue.comparator (reader, enumerator, field);
} else {
throw new RuntimeException ("field \""+field+"\" does not appear to be indexed");
}
} finally {
enumerator.close();
}
}
/**
* The sorting priority used. The first element is set by the constructors.
* The result is that sorting is done by field value, then by index order.
*/
private final ScoreDocComparator[] comparators = new ScoreDocComparator[] {
null, ScoreDocComparator.INDEXORDER
};
/**
* Creates a hit queue sorted by the given field. Hits are sorted by the field, then
* by index order.
* @param reader IndexReader to use.
* @param field Field to sort by.
* @param size Number of hits to return - see {@link PriorityQueue#initialize(int) initialize}
* @throws IOException If the internal term enumerator fails.
*/
FieldSortedHitQueue (IndexReader reader, String field, int size)
throws IOException {
// reset the cache if we have a new reader
int hash = reader.hashCode();
if (hash != lastReaderHash) {
lastReaderHash = hash;
fieldCache.clear();
}
// initialize the PriorityQueue
initialize (size);
// set the sort
comparators[0] = initializeSort (reader, field);
/**
* Creates a hit queue sorted by the given list of fields.
* @param reader Index to use.
* @param fields Field names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
* @param size The number of hits to retain. Must be greater than zero.
* @throws IOException
*/
FieldSortedHitQueue (IndexReader reader, SortField[] fields, int size)
throws IOException {
final int n = fields.length;
comparators = new ScoreDocComparator[n];
this.fields = new SortField[n];
for (int i=0; i<n; ++i) {
String fieldname = fields[i].getField();
comparators[i] = getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getFactory());
this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
}
initialize (size);
}
/**
* Creates a sorted hit queue based on an existing comparator. The hits
* are sorted by the given comparator, then by index order.
* @param comparator Comparator used to sort hits.
* @param size Number of hits to retain.
*/
protected FieldSortedHitQueue (ScoreDocComparator comparator, int size) {
initialize (size); // initialize the PriorityQueue
comparators[0] = comparator; // set the sort
}
/** Stores a comparator corresponding to each field being sorted by */
protected ScoreDocComparator[] comparators;
/** Stores the sort criteria being used. */
protected SortField[] fields;
/** Stores the maximum score value encountered, for normalizing.
* we only care about scores greater than 1.0 - if all the scores
* are less than 1.0, we don't have to normalize. */
protected float maxscore = 1.0f;
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>
* @param a ScoreDoc
* @param b ScoreDoc
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
*/
protected final boolean lessThan (final Object a, final Object b) {
final ScoreDoc docA = (ScoreDoc) a;
final ScoreDoc docB = (ScoreDoc) b;
final int n = comparators.length;
int c = 0;
for (int i=0; i<n && c==0; ++i) {
c = comparators[i].compare (docA, docB);
}
return c > 0;
}
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>.
* @param a ScoreDoc
* @param b ScoreDoc
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
*/
protected final boolean lessThan (final Object a, final Object b) {
final ScoreDoc docA = (ScoreDoc) a;
final ScoreDoc docB = (ScoreDoc) b;
// keep track of maximum score
if (docA.score > maxscore) maxscore = docA.score;
if (docB.score > maxscore) maxscore = docB.score;
/**
* Initializes the cache of sort information. <code>fieldCache</code> is queried
* to see if it has the term information for the given field.
* If so, and if the reader still has the same value for maxDoc()
* (note that we assume new IndexReaders are caught during the
* constructor), the existing data is used. If not, all the term values
* for the given field are fetched. The value of the term is assumed
* to indicate the sort order for any documents containing the term. Documents
* should only have one term in the given field. Multiple documents
* can share the same term if desired, in which case they will be
* considered equal during the sort.
* @param reader The document index.
* @param field The field to sort by.
* @throws IOException If createComparator(IndexReader,String) fails - usually caused by the term enumerator failing.
*/
protected final ScoreDocComparator initializeSort (IndexReader reader, String field)
throws IOException {
ScoreDocLookupComparator comparer = (ScoreDocLookupComparator) fieldCache.get (field);
if (comparer == null || !comparer.sizeMatches(reader.maxDoc())) {
comparer = createComparator (reader, field);
fieldCache.put (field, comparer);
}
return comparer;
// run comparators
final int n = comparators.length;
int c = 0;
for (int i=0; i<n && c==0; ++i) {
c = (fields[i].reverse) ? comparators[i].compare (docB, docA)
: comparators[i].compare (docA, docB);
}
return c > 0;
}
/**
* Subclasses should implement this method to provide an appropriate ScoreDocLookupComparator.
* @param reader Index to use.
* @param field Field to use for sorting.
* @return Comparator to use to sort hits.
* @throws IOException If an error occurs reading the index.
*/
protected abstract ScoreDocLookupComparator createComparator (IndexReader reader, String field)
throws IOException;
}
/**
* Given a FieldDoc object, stores the values used
* to sort the given document. These values are not the raw
* values out of the index, but the internal representation
* of them. This is so the given search hit can be collated
* by a MultiSearcher with other search hits.
* @param doc The FieldDoc to store sort values into.
* @return The same FieldDoc passed in.
* @see Searchable#search(Query,Filter,int,Sort)
*/
FieldDoc fillFields (final FieldDoc doc) {
final int n = comparators.length;
final Comparable[] fields = new Comparable[n];
for (int i=0; i<n; ++i)
fields[i] = comparators[i].sortValue(doc);
doc.fields = fields;
if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores
return doc;
}
/** Returns the SortFields being used by this hit queue. */
SortField[] getFields() {
return fields;
}
/** Internal cache of comparators. Similar to FieldCache, only
* caches comparators instead of term values. */
static final Map Comparators = new WeakHashMap();
/** Returns a comparator if it is in the cache. */
static ScoreDocComparator lookup (IndexReader reader, String field, int type, Object factory) {
FieldCacheImpl.Entry entry = (factory != null) ? new FieldCacheImpl.Entry (reader, field, factory)
: new FieldCacheImpl.Entry (reader, field, type);
synchronized (Comparators) {
return (ScoreDocComparator) Comparators.get (entry);
}
}
/** Stores a comparator into the cache. */
static Object store (IndexReader reader, String field, int type, Object factory, Object value) {
FieldCacheImpl.Entry entry = (factory != null) ? new FieldCacheImpl.Entry (reader, field, factory)
: new FieldCacheImpl.Entry (reader, field, type);
synchronized (Comparators) {
return Comparators.put (entry, value);
}
}
static ScoreDocComparator getCachedComparator (IndexReader reader, String fieldname, int type, SortComparatorSource factory)
throws IOException {
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
ScoreDocComparator comparator = lookup (reader, fieldname, type, factory);
if (comparator == null) {
switch (type) {
case SortField.AUTO:
comparator = comparatorAuto (reader, fieldname);
break;
case SortField.INT:
comparator = comparatorInt (reader, fieldname);
break;
case SortField.FLOAT:
comparator = comparatorFloat (reader, fieldname);
break;
case SortField.STRING:
comparator = comparatorString (reader, fieldname);
break;
case SortField.CUSTOM:
comparator = factory.newComparator (reader, fieldname);
break;
default:
throw new RuntimeException ("unknown field type: "+type);
}
store (reader, fieldname, type, factory, comparator);
}
return comparator;
}
/**
* Returns a comparator for sorting hits according to a field containing integers.
* @param reader Index to use.
* @param fieldname Field containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocComparator comparatorInt (final IndexReader reader, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocComparator() {
final int[] fieldOrder = FieldCache.DEFAULT.getInts (reader, field);
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public Comparable sortValue (final ScoreDoc i) {
return new Integer (fieldOrder[i.doc]);
}
public int sortType() {
return SortField.INT;
}
};
}
/**
* Returns a comparator for sorting hits according to a field containing floats.
* @param reader Index to use.
* @param fieldname Field containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocComparator comparatorFloat (final IndexReader reader, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocComparator () {
protected final float[] fieldOrder = FieldCache.DEFAULT.getFloats (reader, field);
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final float fi = fieldOrder[i.doc];
final float fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public Comparable sortValue (final ScoreDoc i) {
return new Float (fieldOrder[i.doc]);
}
public int sortType() {
return SortField.FLOAT;
}
};
}
/**
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
* @param fieldname Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocComparator comparatorString (final IndexReader reader, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocComparator () {
final FieldCache.StringIndex index = FieldCache.DEFAULT.getStringIndex (reader, field);
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = index.order[i.doc];
final int fj = index.order[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public Comparable sortValue (final ScoreDoc i) {
return index.lookup[index.order[i.doc]];
}
public int sortType() {
return SortField.STRING;
}
};
}
/**
* Returns a comparator for sorting hits according to values in the given field.
* The terms in the field are looked at to determine whether they contain integers,
* floats or strings. Once the type is determined, one of the other static methods
* in this class is called to get the comparator.
* @param reader Index to use.
* @param fieldname Field containg values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocComparator comparatorAuto (final IndexReader reader, final String fieldname)
throws IOException {
final String field = fieldname.intern();
Object lookupArray = FieldCache.DEFAULT.getAuto (reader, field);
if (lookupArray instanceof FieldCache.StringIndex) {
return comparatorString (reader, field);
} else if (lookupArray instanceof int[]) {
return comparatorInt (reader, field);
} else if (lookupArray instanceof float[]) {
return comparatorFloat (reader, field);
} else if (lookupArray instanceof String[]) {
return comparatorString (reader, field);
} else {
throw new RuntimeException ("unknown data type in field '"+field+"'");
}
}
}

View File

@ -1,150 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
/**
* Expert: A sorted hit queue for fields that contain strictly floating point values.
* Hits are sorted into the queue by the values in the field and then by document number.
*
* <p>Created: Feb 2, 2004 9:23:03 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
class FloatSortedHitQueue
extends FieldSortedHitQueue {
/**
* Creates a hit queue sorted over the given field containing float values.
* @param reader Index to use.
* @param float_field Field containing float sort information
* @param size Number of hits to collect.
* @throws IOException If an error occurs reading the index.
*/
FloatSortedHitQueue (IndexReader reader, String float_field, int size)
throws IOException {
super (reader, float_field, size);
}
/**
* Returns a comparator for sorting hits according to a field containing floats.
* Just calls <code>comparator(IndexReader,String)</code>.
* @param reader Index to use.
* @param field Field containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
throws IOException {
return comparator (reader, field);
}
/**
* Returns a comparator for sorting hits according to a field containing floats.
* @param reader Index to use.
* @param fieldname Field containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname)
throws IOException {
TermEnum enumerator = reader.terms (new Term (fieldname, ""));
return comparator (reader, enumerator, fieldname);
}
/**
* Returns a comparator for sorting hits according to a field containing floats using the given enumerator
* to collect term values.
* @param reader Index to use.
* @param fieldname Field containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocLookupComparator () {
protected final float[] fieldOrder = generateSortIndex();
protected final float[] generateSortIndex()
throws IOException {
float[] retArray = new float[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs ();
try {
if (enumerator.term() == null) {
throw new RuntimeException ("no terms in field "+field);
}
do {
Term term = enumerator.term();
if (term.field() != field) break;
float termval = Float.parseFloat (term.text());
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (enumerator.next());
} finally {
termDocs.close();
}
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final float fi = fieldOrder[i.doc];
final float fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final float fi = fieldOrder[i.doc];
final float fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Comparable sortValue (final ScoreDoc i) {
return new Float (fieldOrder[i.doc]);
}
public int sortType() {
return SortField.FLOAT;
}
};
}
}

View File

@ -115,8 +115,8 @@ public class IndexSearcher extends Searcher {
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
final BitSet bits = filter != null ? filter.bits(reader) : null;
final MultiFieldSortedHitQueue hq =
new MultiFieldSortedHitQueue(reader, sort.fields, nDocs);
final FieldSortedHitQueue hq =
new FieldSortedHitQueue(reader, sort.fields, nDocs);
final int[] totalHits = new int[1];
scorer.score(new HitCollector() {
public final void collect(int doc, float score) {

View File

@ -1,151 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
/**
* Expert: A sorted hit queue for fields that contain strictly integer values.
* Hits are sorted into the queue by the values in the field and then by document number.
*
* <p>Created: Jan 30, 2004 3:35:09 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
class IntegerSortedHitQueue
extends FieldSortedHitQueue {
/**
* Creates a hit queue sorted over the given field containing integer values.
* @param reader Index to use.
* @param integer_field Field containing integer sort information
* @param size Number of hits to collect.
* @throws IOException If an error occurs reading the index.
*/
IntegerSortedHitQueue (IndexReader reader, String integer_field, int size)
throws IOException {
super (reader, integer_field, size);
}
/**
* Returns a comparator for sorting hits according to a field containing integers.
* Just calls <code>comparator(IndexReader,String)</code>.
* @param reader Index to use.
* @param field Field containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
throws IOException {
return comparator (reader, field);
}
/**
* Returns a comparator for sorting hits according to a field containing integers.
* @param reader Index to use.
* @param fieldname Field containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname)
throws IOException {
TermEnum enumerator = reader.terms (new Term (fieldname, ""));
return comparator (reader, enumerator, fieldname);
}
/**
* Returns a comparator for sorting hits according to a field containing integers using the given enumerator
* to collect term values.
* @param reader Index to use.
* @param fieldname Field containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocLookupComparator() {
protected final int[] fieldOrder = generateSortIndex();
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
try {
if (enumerator.term() == null) {
throw new RuntimeException ("no terms in field "+field);
}
do {
Term term = enumerator.term();
if (term.field() != field) break;
int termval = Integer.parseInt (term.text());
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (enumerator.next());
} finally {
termDocs.close();
}
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Comparable sortValue (final ScoreDoc i) {
return new Integer (fieldOrder[i.doc]);
}
public int sortType() {
return SortField.INT;
}
};
}
}

View File

@ -1,124 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
/**
* Expert: A hit queue for sorting by hits by terms in more than one field.
* The type of content in each field could be determined dynamically by
* FieldSortedHitQueue.determineComparator().
*
* <p>Created: Feb 3, 2004 4:46:55 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
* @see FieldSortedHitQueue
* @see Searchable#search(Query,Filter,int,Sort)
*/
class MultiFieldSortedHitQueue
extends PriorityQueue {
/**
* Creates a hit queue sorted by the given list of fields.
* @param reader Index to use.
* @param fields Field names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
* @param size The number of hits to retain. Must be greater than zero.
* @throws IOException
*/
MultiFieldSortedHitQueue (IndexReader reader, SortField[] fields, int size)
throws IOException {
final int n = fields.length;
comparators = new ScoreDocComparator[n];
this.fields = new SortField[n];
for (int i=0; i<n; ++i) {
String fieldname = fields[i].getField();
comparators[i] = FieldSortedHitQueue.getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getFactory());
this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
}
initialize (size);
}
/** Stores a comparator corresponding to each field being sorted by */
protected ScoreDocComparator[] comparators;
/** Stores the sort criteria being used. */
protected SortField[] fields;
/** Stores the maximum score value encountered, for normalizing.
* we only care about scores greater than 1.0 - if all the scores
* are less than 1.0, we don't have to normalize. */
protected float maxscore = 1.0f;
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>.
* @param a ScoreDoc
* @param b ScoreDoc
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
*/
protected final boolean lessThan (final Object a, final Object b) {
final ScoreDoc docA = (ScoreDoc) a;
final ScoreDoc docB = (ScoreDoc) b;
// keep track of maximum score
if (docA.score > maxscore) maxscore = docA.score;
if (docB.score > maxscore) maxscore = docB.score;
// run comparators
final int n = comparators.length;
int c = 0;
for (int i=0; i<n && c==0; ++i) {
c = (fields[i].reverse) ? comparators[i].compareReverse (docA, docB)
: comparators[i].compare (docA, docB);
}
return c > 0;
}
/**
* Given a FieldDoc object, stores the values used
* to sort the given document. These values are not the raw
* values out of the index, but the internal representation
* of them. This is so the given search hit can be collated
* by a MultiSearcher with other search hits.
* @param doc The FieldDoc to store sort values into.
* @return The same FieldDoc passed in.
* @see Searchable#search(Query,Filter,int,Sort)
*/
FieldDoc fillFields (final FieldDoc doc) {
final int n = comparators.length;
final Comparable[] fields = new Comparable[n];
for (int i=0; i<n; ++i)
fields[i] = comparators[i].sortValue(doc);
doc.fields = fields;
if (maxscore > 1.0f) doc.score /= maxscore; // normalize scores
return doc;
}
/** Returns the SortFields being used by this hit queue. */
SortField[] getFields() {
return fields;
}
}

View File

@ -35,11 +35,6 @@ public interface ScoreDocComparator {
if (i.score < j.score) return 1;
return 0;
}
public int compareReverse (ScoreDoc i, ScoreDoc j) {
if (i.score < j.score) return -1;
if (i.score > j.score) return 1;
return 0;
}
public Comparable sortValue (ScoreDoc i) {
return new Float (i.score);
}
@ -56,11 +51,6 @@ public interface ScoreDocComparator {
if (i.doc > j.doc) return 1;
return 0;
}
public int compareReverse (ScoreDoc i, ScoreDoc j) {
if (i.doc > j.doc) return -1;
if (i.doc < j.doc) return 1;
return 0;
}
public Comparable sortValue (ScoreDoc i) {
return new Integer (i.doc);
}
@ -81,21 +71,11 @@ public interface ScoreDocComparator {
int compare (ScoreDoc i, ScoreDoc j);
/**
* Compares two ScoreDoc objects and returns a result indicating their
* sort order in reverse.
* @param i First ScoreDoc
* @param j Second ScoreDoc
* @return <code>-1</code> if <code>j</code> should come before <code>i</code><br><code>1</code> if <code>j</code> should come after <code>i</code><br><code>0</code> if they are equal
* @see java.util.Comparator
*/
int compareReverse (ScoreDoc i, ScoreDoc j);
/**
* Returns the value used to sort the given document. The
* object returned must implement the java.io.Serializable
* interface.
* interface. This is used by multisearchers to determine how to collate results from their searchers.
* @see FieldDoc
* @param i Document
* @return Serializable object
*/
@ -105,6 +85,7 @@ public interface ScoreDocComparator {
/**
* Returns the type of sort. Should return <code>SortField.SCORE</code>, <code>SortField.DOC</code>, <code>SortField.STRING</code>, <code>SortField.INTEGER</code>,
* <code>SortField.FLOAT</code> or <code>SortField.CUSTOM</code>. It is not valid to return <code>SortField.AUTO</code>.
* This is used by multisearchers to determine how to collate results from their searchers.
* @return One of the constants in SortField.
* @see SortField
*/

View File

@ -1,40 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Expert: Compares two ScoreDoc objects for sorting using a lookup table.
*
* <p>Created: Feb 3, 2004 9:59:14 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
public interface ScoreDocLookupComparator
extends ScoreDocComparator {
/**
* Verifies that the internal lookup table is the correct size. This
* comparator uses a lookup table, so it is important to that the
* table matches the number of documents in the index.
* @param n Expected size of table.
* @return True if internal table matches expected size; false otherwise
*/
boolean sizeMatches (int n);
}

View File

@ -0,0 +1,68 @@
package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
import java.io.Serializable;
/**
* Abstract base class for sorting hits returned by a Query.
*
* <p>This class should only be used if the other SortField
* types (SCORE, DOC, STRING, INT, FLOAT) do not provide an
* adequate sorting. It maintains an internal cache of values which
* could be quite large. The cache is an array of Comparable,
* one for each document in the index. There is a distinct
* Comparable for each unique term in the field - if
* some documents have the same term in the field, the cache
* array will have entries which reference the same Comparable.
*
* <p>Created: Apr 21, 2004 5:08:38 PM
*
* @author Tim Jones
* @version $Id$
* @since 1.4
*/
public abstract class SortComparator
implements SortComparatorSource {
// inherit javadocs
public ScoreDocComparator newComparator (final IndexReader reader, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocComparator() {
protected Comparable[] cachedValues = FieldCache.DEFAULT.getCustom (reader, field, SortComparator.this);
public int compare (ScoreDoc i, ScoreDoc j) {
return cachedValues[i.doc].compareTo (cachedValues[j.doc]);
}
public Comparable sortValue (ScoreDoc i) {
return cachedValues[i.doc];
}
public int sortType(){
return SortField.CUSTOM;
}
};
}
/**
* Returns an object which, when sorted according to natural order,
* will order the Term values in the correct order.
* <p>For example, if the Terms contained integer values, this method
* would return <code>new Integer(termtext)</code>. Note that this
* might not always be the most efficient implementation - for this
* particular example, a better implementation might be to make a
* ScoreDocLookupComparator that uses an internal lookup table of int.
* @param termtext The textual value of the term.
* @return An object representing <code>termtext</code> that sorts according to the natural order of <code>termtext</code>.
* @see Comparable
* @see ScoreDocComparator
*/
protected abstract Comparable getComparable (String termtext);
}

View File

@ -23,6 +23,6 @@ extends Serializable {
* @return Comparator of ScoreDoc objects.
* @throws IOException If an error occurs reading the index.
*/
ScoreDocLookupComparator newComparator (IndexReader reader, String fieldname)
ScoreDocComparator newComparator (IndexReader reader, String fieldname)
throws IOException;
}

View File

@ -62,6 +62,11 @@ implements Serializable {
* sorting is done according to natural order. */
public static final int CUSTOM = 9;
// IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace"
// as the above static int values. Any new values must not have the same value
// as FieldCache.STRING_INDEX.
/** Represents sorting by document score (relevancy). */
public static final SortField FIELD_SCORE = new SortField (null, SCORE);

View File

@ -1,185 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
/**
* Expert: A sorted hit queue for fields that contain string values.
* Hits are sorted into the queue by the values in the field and then by document number.
* Warning: The internal cache could be quite large, depending on the number of terms
* in the field! All the terms are kept in memory, as well as a sorted array of
* integers representing their relative position.
*
* <p>Created: Feb 2, 2004 9:26:33 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
class StringSortedHitQueue
extends FieldSortedHitQueue {
/**
* Creates a hit queue sorted over the given field containing string values.
* @param reader Index to use.
* @param string_field Field containing string sort information
* @param size Number of hits to collect.
* @throws IOException If an error occurs reading the index.
*/
StringSortedHitQueue (IndexReader reader, String string_field, int size)
throws IOException {
super (reader, string_field, size);
}
/**
* Returns a comparator for sorting hits according to a field containing strings.
* Just calls <code>comparator(IndexReader,String)</code>.
* @param reader Index to use.
* @param field Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
throws IOException {
return comparator (reader, field);
}
/**
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
* @param fieldname Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname)
throws IOException {
TermEnum enumerator = reader.terms (new Term (fieldname, ""));
return comparator (reader, enumerator, fieldname);
}
/**
* Returns a comparator for sorting hits according to a field containing strings using the given enumerator
* to collect term values.
* @param reader Index to use.
* @param fieldname Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocLookupComparator() {
protected final int[] fieldOrder = generateSortIndex();
protected String[] terms;
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
final String[] mterms = new String[reader.maxDoc()]; // guess length
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs();
int t = 0; // current term number
try {
if (enumerator.term() == null) {
throw new RuntimeException ("no terms in field " + field);
}
// NOTE: the contract for TermEnum says the
// terms will be in natural order (which is
// ordering by field name, term text). The
// contract for TermDocs says the docs will
// be ordered by document number. So the
// following loop will automatically sort the
// terms in the correct order.
// if a given document has more than one term
// in the field, only the last one will be used.
do {
Term term = enumerator.term();
if (term.field() != field) break;
// store term text
// we expect that there is at most one term per document
if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \""+field+"\"");
mterms[t] = term.text();
// store which documents use this term
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = t;
}
t++;
} while (enumerator.next());
} finally {
termDocs.close();
}
// if there are less terms than documents,
// trim off the dead array space
if (t < mterms.length) {
terms = new String[t];
System.arraycopy (mterms, 0, terms, 0, t);
} else {
terms = mterms;
}
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Comparable sortValue (final ScoreDoc i) {
return terms[fieldOrder[i.doc]];
}
public int sortType() {
return SortField.STRING;
}
};
}
}

View File

@ -64,28 +64,20 @@ implements Comparable, Serializable {
return i;
}
public static SortComparatorSource getComparator () {
public static SortComparatorSource getComparatorSource () {
return new SortComparatorSource () {
public ScoreDocLookupComparator newComparator (final IndexReader reader, String fieldname)
public ScoreDocComparator newComparator (final IndexReader reader, String fieldname)
throws IOException {
final String field = fieldname.intern ();
final TermEnum enumerator = reader.terms (new Term (fieldname, ""));
try {
return new ScoreDocLookupComparator () {
return new ScoreDocComparator () {
protected Comparable[] cachedValues = fillCache (reader, enumerator, field);
public boolean sizeMatches (int n) {
return (cachedValues.length == n);
}
public int compare (ScoreDoc i, ScoreDoc j) {
return cachedValues[i.doc].compareTo (cachedValues[j.doc]);
}
public int compareReverse (ScoreDoc i, ScoreDoc j) {
return cachedValues[j.doc].compareTo (cachedValues[i.doc]);
}
public Comparable sortValue (ScoreDoc i) {
return cachedValues[i.doc];
}
@ -140,4 +132,12 @@ implements Comparable, Serializable {
}
};
}
public static SortComparator getComparator() {
return new SortComparator() {
protected Comparable getComparable (String termtext) {
return new SampleComparable (termtext);
}
};
}
}

View File

@ -253,9 +253,14 @@ implements Serializable {
public void testCustomSorts() throws Exception {
sort.setSort (new SortField ("custom", SampleComparable.getComparator()));
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));
assertMatches (full, queryX, sort, "CAIEG");
sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true));
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true));
assertMatches (full, queryY, sort, "HJDBF");
SortComparator custom = SampleComparable.getComparator();
sort.setSort (new SortField ("custom", custom));
assertMatches (full, queryX, sort, "CAIEG");
sort.setSort (new SortField ("custom", custom, true));
assertMatches (full, queryY, sort, "HJDBF");
}
@ -281,9 +286,14 @@ implements Serializable {
public void testRemoteCustomSort() throws Exception {
Searchable searcher = getRemote();
MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher });
sort.setSort (new SortField ("custom", SampleComparable.getComparator()));
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource()));
assertMatches (multi, queryX, sort, "CAIEG");
sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true));
sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true));
assertMatches (multi, queryY, sort, "HJDBF");
SortComparator custom = SampleComparable.getComparator();
sort.setSort (new SortField ("custom", custom));
assertMatches (multi, queryX, sort, "CAIEG");
sort.setSort (new SortField ("custom", custom, true));
assertMatches (multi, queryY, sort, "HJDBF");
}