Added hit sorting code, from Tim Jones.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150201 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2004-02-17 19:00:31 +00:00
parent a8d459a9f5
commit 32d5bcd950
21 changed files with 1948 additions and 498 deletions

View File

@ -35,9 +35,9 @@ $Id$
RemoteSearchable this makes it easy to implement distributed
search systems. (Jean-Francois Halleux via cutting)
5. Added IntegerSortedSearcher and FieldSortedHitQueue classes that
together provide the ability to sort by single-valued Integer
fields. (Tim Jones via Otis)
5. Added support for hit sorting. Results may now be sorted by any
indexed field. For details see the javadoc for
Searcher#search(Query, Sort). (Tim Jones via Cutting)
6. Changed FSDirectory to auto-create a full directory tree that it
needs by using mkdirs() instead of mkdir(). (Mladen Turk via Otis)

View File

@ -0,0 +1,52 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Expert: A ScoreDoc which also contains information about
* how to sort the referenced document.
*
* <p>Created: Feb 11, 2004 1:23:38 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
* @see TopFieldDocs
*/
public class FieldDoc
extends ScoreDoc {
/** The values which are used to sort the referenced document.
* The order of these will match the original sort criteria given by an
* Sort object.
* @see Sort
* @see Searchable#search(Query,Filter,int,Sort)
*/
public Object[] fields;
/** Creates one of these objects with empty sort information. */
public FieldDoc (int doc, float score) {
super (doc, score);
}
/** Creates one of these objects with the given sort information. */
public FieldDoc (int doc, float score, Object[] fields) {
super (doc, score);
this.fields = fields;
}
}

View File

@ -0,0 +1,151 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
/**
* Expert: Collects sorted results from Searchable's and collates them.
* The elements put into this queue must be of type FieldDoc.
*
* <p>Created: Feb 11, 2004 2:04:21 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
class FieldDocSortedHitQueue
extends PriorityQueue {
// this cannot contain AUTO fields
SortField[] fields;
/**
* Creates a hit queue sorted by the given list of fields.
* @param fields Field names, in priority order (highest priority first).
* @param size The number of hits to retain. Must be greater than zero.
* @throws IOException
*/
FieldDocSortedHitQueue (SortField[] fields, int size)
throws IOException {
this.fields = fields;
initialize (size);
}
/**
* Allows redefinition of sort fields if they are <code>null</code>.
* This is to handle the
* case using ParallelMultiSearcher where the original list
* contains AUTO and we don't know
* the actual sort type until the values come back. This
* method is thread safe.
* @param fields
*/
synchronized void setFields (SortField[] fields) {
if (fields == null) this.fields = fields;
}
/** Returns the fields being used to sort. */
SortField[] getFields() {
return fields;
}
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>.
* @param a ScoreDoc
* @param b ScoreDoc
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
*/
protected final boolean lessThan (final Object a, final Object b) {
final FieldDoc docA = (FieldDoc) a;
final FieldDoc docB = (FieldDoc) b;
final int n = fields.length;
int c = 0;
for (int i=0; i<n && c==0; ++i) {
final int type = fields[i].getType();
if (fields[i].getReverse()) {
switch (type) {
case SortField.SCORE:
float r1 = ((Float)docA.fields[i]).floatValue();
float r2 = ((Float)docB.fields[i]).floatValue();
if (r1 < r2) c = -1;
if (r1 > r2) c = 1;
break;
case SortField.DOC:
case SortField.INT:
case SortField.STRING:
int i1 = ((Integer)docA.fields[i]).intValue();
int i2 = ((Integer)docB.fields[i]).intValue();
if (i1 > i2) c = -1;
if (i1 < i2) c = 1;
break;
case SortField.FLOAT:
float f1 = ((Float)docA.fields[i]).floatValue();
float f2 = ((Float)docB.fields[i]).floatValue();
if (f1 > f2) c = -1;
if (f1 < f2) c = 1;
break;
case SortField.AUTO:
// we cannot handle this - even if we determine the type of object (Float or
// Integer), we don't necessarily know how to compare them (both SCORE and
// FLOAT both contain floats, but are sorted opposite of each other). Before
// we get here, each AUTO should have been replaced with its actual value.
throw new RuntimeException ("FieldDocSortedHitQueue cannot use an AUTO SortField");
default:
throw new RuntimeException ("invalid SortField type: "+type);
}
} else {
switch (type) {
case SortField.SCORE:
float r1 = ((Float)docA.fields[i]).floatValue();
float r2 = ((Float)docB.fields[i]).floatValue();
if (r1 > r2) c = -1;
if (r1 < r2) c = 1;
break;
case SortField.DOC:
case SortField.INT:
case SortField.STRING:
int i1 = ((Integer)docA.fields[i]).intValue();
int i2 = ((Integer)docB.fields[i]).intValue();
if (i1 < i2) c = -1;
if (i1 > i2) c = 1;
break;
case SortField.FLOAT:
float f1 = ((Float)docA.fields[i]).floatValue();
float f2 = ((Float)docB.fields[i]).floatValue();
if (f1 < f2) c = -1;
if (f1 > f2) c = 1;
break;
case SortField.AUTO:
// we cannot handle this - even if we determine the type of object (Float or
// Integer), we don't necessarily know how to compare them (both SCORE and
// FLOAT both contain floats, but are sorted opposite of each other). Before
// we get here, each AUTO should have been replaced with its actual value.
throw new RuntimeException ("FieldDocSortedHitQueue cannot use an AUTO SortField");
default:
throw new RuntimeException ("invalid SortField type: "+type);
}
}
}
return c > 0;
}
}

View File

@ -16,37 +16,29 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.PriorityQueue;
import java.util.HashMap;
import java.io.IOException;
import java.util.Hashtable;
import java.util.regex.Pattern;
/**
* Expert: collects results from a search and sorts them by terms in a
* given field in each document.
*
* <p>In this version (0.1) the field to sort by must contain strictly
* String representations of Integers.
* See {@link SortedIndexSearcher SortedIndexSearcher} for more
* information. Each document is assumed to have a single term in the
* given field, and the value of the term is the document's relative
* position in the given sort order.
* Expert: Base class for collecting results from a search and sorting
* them by terms in a given field in each document.
*
* <p>When one of these objects is created, a TermEnumerator is
* created to fetch all the terms in the index for the given field.
* The value of each term is assumed to be an integer representing a
* The value of each term is assumed to represent a
* sort position. Each document is assumed to contain one of the
* terms, indicating where in the sort it belongs.
*
* <p><h3>Memory Usage</h3>
*
* <p>A static cache is maintained. This cache contains an integer
* array of length <code>IndexReader.maxDoc()</code> for each field
* or float array of length <code>IndexReader.maxDoc()</code> for each field
* name for which a sort is performed. In other words, the size of
* the cache in bytes is:
*
@ -64,11 +56,11 @@ import java.io.IOException;
*
* <p>Created: Dec 8, 2003 12:56:03 PM
*
* @author "Tim Jones" &lt;tjluc@nacimiento.com&gt;
* @since lucene 1.3
* @version 0.1
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
public class FieldSortedHitQueue
abstract class FieldSortedHitQueue
extends PriorityQueue {
/**
@ -81,71 +73,196 @@ extends PriorityQueue {
protected static int lastReaderHash;
/**
* Contains the cache of sort information. The
* key is field name, the value an array of int.
* A HashMap is used, and we are careful how we
* handle synchronization. This is because best
* performance is obtained when the same IndexReader
* is used over and over, and we therefore perform
* many reads and few writes.
* Contains the cache of sort information, mapping
* String (field names) to ScoreDocComparator.
*/
protected static HashMap fieldCache;
protected static final Hashtable fieldCache = new Hashtable();
/** The pattern used to detect integer values in a field */
protected static final Pattern pIntegers = Pattern.compile ("[0-9\\-]+");
/** The pattern used to detect float values in a field */
protected static final Pattern pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+");
/** The sort information being used by this instance */
protected int[] fieldOrder;
/**
* Creates a hit queue sorted by the given field.
* Returns a comparator for the given field. If there is already one in the cache, it is returned.
* Otherwise one is created and put into the cache. If <code>reader</code> is different than the
* one used for the current cache, or has changed size, the cache is cleared first.
* @param reader Index to use.
* @param field Field to sort by.
* @return Comparator; never <code>null</code>.
* @throws IOException If an error occurs reading the index.
* @see #determineComparator
*/
static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type)
throws IOException {
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
// see if we have already generated a comparator for this field
if (reader.hashCode() == lastReaderHash) {
ScoreDocLookupComparator comparer = (ScoreDocLookupComparator) fieldCache.get (field);
if (comparer != null && comparer.sizeMatches(reader.maxDoc())) {
return comparer;
}
} else {
lastReaderHash = reader.hashCode();
fieldCache.clear();
}
ScoreDocComparator comparer = null;
switch (type) {
case SortField.SCORE: comparer = ScoreDocComparator.RELEVANCE; break;
case SortField.DOC: comparer = ScoreDocComparator.INDEXORDER; break;
case SortField.INT: comparer = IntegerSortedHitQueue.comparator (reader, field); break;
case SortField.FLOAT: comparer = FloatSortedHitQueue.comparator (reader, field); break;
case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break;
case SortField.AUTO: comparer = determineComparator (reader, field); break;
default:
throw new RuntimeException ("invalid sort field type: "+type);
}
// store the comparator in the cache for reuse
fieldCache.put (field, comparer);
return comparer;
}
/** Clears the static cache of sorting information. */
static void clearCache() {
fieldCache.clear();
}
/**
* Returns a FieldSortedHitQueue sorted by the given ScoreDocComparator.
* @param comparator Comparator to use.
* @param size Number of hits to retain.
* @return Hit queue sorted using the given comparator.
*/
static FieldSortedHitQueue getInstance (ScoreDocComparator comparator, int size) {
return new FieldSortedHitQueue (comparator, size) {
// dummy out the abstract method
protected ScoreDocLookupComparator createComparator (IndexReader reader, String field) throws IOException {
return null;
}
};
}
/**
* Looks at the actual values in the field and determines whether
* they contain Integers, Floats or Strings. Only the first term in the field
* is looked at.
* <p>The following patterns are used to determine the content of the terms:
* <p><table border="1" cellspacing="0" cellpadding="3">
* <tr><th>Sequence</th><th>Pattern</th><th>Type</th></tr>
* <tr><td>1</td><td>[0-9\-]+</td><td>Integer</td></tr>
* <tr><td>2</td><td>[0-9+\-\.eEfFdD]+</td><td>Float</td></tr>
* <tr><td>3</td><td><i>(none - default)</i></td><td>String</td></tr>
* </table>
*
* @param reader Index to use.
* @param field Field to create comparator for.
* @return Comparator appropriate for the terms in the given field.
* @throws IOException If an error occurs reading the index.
*/
protected static ScoreDocComparator determineComparator (IndexReader reader, String field)
throws IOException {
TermEnum enumerator = reader.terms (new Term (field, ""));
try {
Term term = enumerator.term();
if (term == null) {
throw new RuntimeException ("no terms in field "+field);
}
if (term.field() == field) {
String termtext = term.text().trim();
if (pIntegers.matcher(termtext).matches())
return IntegerSortedHitQueue.comparator (reader, enumerator, field);
else if (pFloats.matcher(termtext).matches())
return FloatSortedHitQueue.comparator (reader, enumerator, field);
return StringSortedHitQueue.comparator (reader, enumerator, field);
} else {
throw new RuntimeException ("field \""+field+"\" does not appear to be indexed");
}
} finally {
enumerator.close();
}
}
/**
* The sorting priority used. The first element is set by the constructors.
* The result is that sorting is done by field value, then by index order.
*/
private final ScoreDocComparator[] comparators = new ScoreDocComparator[] {
null, ScoreDocComparator.INDEXORDER
};
/**
* Creates a hit queue sorted by the given field. Hits are sorted by the field, then
* by index order.
* @param reader IndexReader to use.
* @param integer_field Field to sort by.
* @param field Field to sort by.
* @param size Number of hits to return - see {@link PriorityQueue#initialize(int) initialize}
* @throws IOException If the internal term enumerator fails.
*/
public FieldSortedHitQueue (IndexReader reader, String integer_field, int size)
FieldSortedHitQueue (IndexReader reader, String field, int size)
throws IOException {
// reset the cache if we have a new reader
int hash = reader.hashCode();
if (hash != lastReaderHash) {
lastReaderHash = hash;
if (fieldCache != null) {
fieldCache.clear();
}
fieldCache = new HashMap();
// initialize the PriorityQueue
initialize (size);
// set the sort
comparators[0] = initializeSort (reader, field);
}
initialize (size);
initializeSort (reader, integer_field);
}
/**
* Compares documents based on the value of the term in the field
* being sorted by. Documents which should appear at the top of the
* list should have low values in the term; documents which should
* appear at the end should have high values.
*
* <p>In the context of this method, "less than" means "less relevant",
* so documents at the top of the list are "greatest" and documents at
* the bottom are "least".
*
* <p>Document A is considered less than Document B
* if A.field.term > B.field.term or A.doc > B.doc.
*
* @param a ScoreDoc object for document a.
* @param b ScoreDoc object for document b.
* @return true if document a is less than document b.
* @see ScoreDoc
* Creates a sorted hit queue based on an existing comparator. The hits
* are sorted by the given comparator, then by index order.
* @param comparator Comparator used to sort hits.
* @param size Number of hits to retain.
*/
protected final boolean lessThan (Object a, Object b) {
ScoreDoc hitA = (ScoreDoc) a;
ScoreDoc hitB = (ScoreDoc) b;
int scoreA = fieldOrder[hitA.doc];
int scoreB = fieldOrder[hitB.doc];
if (scoreA == scoreB)
return hitA.doc > hitB.doc;
else
return scoreA > scoreB; // bigger is really less - the ones at the top should be the lowest
protected FieldSortedHitQueue (ScoreDocComparator comparator, int size) {
initialize (size); // initialize the PriorityQueue
comparators[0] = comparator; // set the sort
}
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>
* @param a ScoreDoc
* @param b ScoreDoc
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
*/
protected final boolean lessThan (final Object a, final Object b) {
final ScoreDoc docA = (ScoreDoc) a;
final ScoreDoc docB = (ScoreDoc) b;
final int n = comparators.length;
int c = 0;
for (int i=0; i<n && c==0; ++i) {
c = comparators[i].compare (docA, docB);
}
return c > 0;
}
/**
* Initializes the cache of sort information. <code>fieldCache</code> is queried
* to see if it has the term information for the given field.
@ -153,54 +270,33 @@ extends PriorityQueue {
* (note that we assume new IndexReaders are caught during the
* constructor), the existing data is used. If not, all the term values
* for the given field are fetched. The value of the term is assumed
* to be the sort index for any documents containing the term. Documents
* to indicate the sort order for any documents containing the term. Documents
* should only have one term in the given field. Multiple documents
* can share the same term if desired (documents with the same term will
* be sorted relative to each other by the order they were placed in
* the index).
* can share the same term if desired, in which case they will be
* considered equal during the sort.
* @param reader The document index.
* @param field The field to sort by.
* @throws IOException If the term enumerator fails.
* @throws IOException If createComparator(IndexReader,String) fails - usually caused by the term enumerator failing.
*/
protected final void initializeSort (IndexReader reader, String field)
protected final ScoreDocComparator initializeSort (IndexReader reader, String field)
throws IOException {
fieldOrder = (int[]) fieldCache.get (field);
if (fieldOrder == null || fieldOrder.length != reader.maxDoc()) {
fieldOrder = new int [reader.maxDoc()];
TermEnum enumerator = reader.terms (new Term (field, ""));
TermDocs termDocs = reader.termDocs();
if (enumerator.term() == null) {
throw new RuntimeException ("no terms in field "+field);
ScoreDocLookupComparator comparer = (ScoreDocLookupComparator) fieldCache.get (field);
if (comparer == null || !comparer.sizeMatches(reader.maxDoc())) {
comparer = createComparator (reader, field);
fieldCache.put (field, comparer);
}
return comparer;
}
try {
Term term = enumerator.term();
while (term.field() == field) {
termDocs.seek (term);
if (termDocs.next()) {
fieldOrder[termDocs.doc()] = Integer.parseInt (term.text());
} else {
throw new RuntimeException ("termDocs.next() failed!");
}
if (!enumerator.next()) {
break;
}
term = enumerator.term();
}
} finally {
enumerator.close();
termDocs.close();
}
// be careful how the cache is updated so we
// don't have synchronization problems. we do
// it this way because we assume updates will be
// few compared to the number of reads.
HashMap newCache = (HashMap) fieldCache.clone();
newCache.put (field, fieldOrder);
fieldCache = newCache;
}
}
/**
* Subclasses should implement this method to provide an appropriate ScoreDocLookupComparator.
* @param reader Index to use.
* @param field Field to use for sorting.
* @return Comparator to use to sort hits.
* @throws IOException If an error occurs reading the index.
*/
protected abstract ScoreDocLookupComparator createComparator (IndexReader reader, String field)
throws IOException;
}

View File

@ -0,0 +1,205 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
/**
* Expert: A sorted hit queue for fields that contain strictly floating point values.
* Hits are sorted into the queue by the values in the field and then by document number.
*
* <p>Created: Feb 2, 2004 9:23:03 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
class FloatSortedHitQueue
extends FieldSortedHitQueue {
/**
* Creates a hit queue sorted over the given field containing float values.
* @param reader Index to use.
* @param float_field Field containing float sort information
* @param size Number of hits to collect.
* @throws IOException If an error occurs reading the index.
*/
FloatSortedHitQueue (IndexReader reader, String float_field, int size)
throws IOException {
super (reader, float_field, size);
}
/**
* Returns a comparator for sorting hits according to a field containing floats.
* Just calls <code>comparator(IndexReader,String)</code>.
* @param reader Index to use.
* @param field Field containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
throws IOException {
return comparator (reader, field);
}
/**
* Returns a comparator for sorting hits according to a field containing floats.
* @param reader Index to use.
* @param field Field containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
throws IOException {
return new ScoreDocLookupComparator () {
protected final float[] fieldOrder = generateSortIndex();
protected final float[] generateSortIndex()
throws IOException {
float[] retArray = new float[reader.maxDoc()];
TermEnum enumerator = reader.terms (new Term (field, ""));
TermDocs termDocs = reader.termDocs ();
if (enumerator.term () == null) {
throw new RuntimeException ("no terms in field " + field);
}
try {
do {
Term term = enumerator.term ();
if (term.field () != field) break;
float termval = Float.parseFloat (term.text());
termDocs.seek (enumerator);
while (termDocs.next ()) {
retArray[termDocs.doc ()] = termval;
}
} while (enumerator.next ());
} finally {
enumerator.close ();
termDocs.close ();
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final float fi = fieldOrder[i.doc];
final float fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final float fi = fieldOrder[i.doc];
final float fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
return new Float (fieldOrder[i.doc]);
}
public int sortType() {
return SortField.FLOAT;
}
};
}
/**
* Returns a comparator for sorting hits according to a field containing floats using the given enumerator
* to collect term values.
* @param reader Index to use.
* @param field Field containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
throws IOException {
return new ScoreDocLookupComparator () {
protected final float[] fieldOrder = generateSortIndex();
protected final float[] generateSortIndex()
throws IOException {
float[] retArray = new float[reader.maxDoc()];
TermDocs termDocs = reader.termDocs ();
try {
do {
Term term = enumerator.term();
if (term.field() != field) break;
float termval = Float.parseFloat (term.text());
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (enumerator.next());
} finally {
termDocs.close();
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final float fi = fieldOrder[i.doc];
final float fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final float fi = fieldOrder[i.doc];
final float fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
return new Float (fieldOrder[i.doc]);
}
public int sortType() {
return SortField.FLOAT;
}
};
}
}

View File

@ -64,6 +64,7 @@ public final class Hits {
private Query query;
private Searcher searcher;
private Filter filter = null;
private Sort sort = null;
private int length; // the total number of hits
private Vector hitDocs = new Vector(); // cache of hits retrieved
@ -80,6 +81,14 @@ public final class Hits {
getMoreDocs(50); // retrieve 100 initially
}
Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
query = q;
searcher = s;
filter = f;
sort = o;
getMoreDocs(50); // retrieve 100 initially
}
/**
* Tries to add new documents to hitDocs.
* Ensures that the hit numbered <code>min</code> has been retrieved.
@ -90,7 +99,7 @@ public final class Hits {
}
int n = min * 2; // double # retrieved
TopDocs topDocs = searcher.search(query, filter, n);
TopDocs topDocs = (sort == null) ? searcher.search(query, filter, n) : searcher.search(query, filter, n, sort);
length = topDocs.totalHits;
ScoreDoc[] scoreDocs = topDocs.scoreDocs;

View File

@ -61,6 +61,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.PriorityQueue;
/** Implements search over a single IndexReader.
*
@ -149,6 +150,44 @@ public class IndexSearcher extends Searcher {
return new TopDocs(totalHits[0], scoreDocs);
}
/** Expert: Low-level search implementation. Finds the top <code>n</code>
* hits for <code>query</code>, applying <code>filter</code> if non-null.
* Results are ordered as specified by <code>sort</code>.
*
* <p>Called by {@link Hits}.
*
* <p>Applications should usually call {@link #search(Query)} or {@link
* #search(Query,Filter)} instead.
*/
public TopFieldDocs search(Query query, Filter filter, final int nDocs,
Sort sort)
throws IOException {
Scorer scorer = query.weight(this).scorer(reader);
if (scorer == null)
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
final BitSet bits = filter != null ? filter.bits(reader) : null;
final MultiFieldSortedHitQueue hq =
new MultiFieldSortedHitQueue(reader, sort.fields, nDocs);
final int[] totalHits = new int[1];
scorer.score(new HitCollector() {
public final void collect(int doc, float score) {
if (score > 0.0f && // ignore zeroed buckets
(bits==null || bits.get(doc))) { // skip docs not in bits
totalHits[0]++;
hq.insert(new FieldDoc(doc, score));
}
}
});
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields());
}
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero

View File

@ -0,0 +1,207 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
/**
* Expert: A sorted hit queue for fields that contain strictly integer values.
* Hits are sorted into the queue by the values in the field and then by document number.
*
* <p>Created: Jan 30, 2004 3:35:09 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
class IntegerSortedHitQueue
extends FieldSortedHitQueue {
/**
* Creates a hit queue sorted over the given field containing integer values.
* @param reader Index to use.
* @param integer_field Field containing integer sort information
* @param size Number of hits to collect.
* @throws IOException If an error occurs reading the index.
*/
IntegerSortedHitQueue (IndexReader reader, String integer_field, int size)
throws IOException {
super (reader, integer_field, size);
}
/**
* Returns a comparator for sorting hits according to a field containing integers.
* Just calls <code>comparator(IndexReader,String)</code>.
* @param reader Index to use.
* @param field Field containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
throws IOException {
return comparator (reader, field);
}
/**
* Returns a comparator for sorting hits according to a field containing integers.
* @param reader Index to use.
* @param field Field containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
throws IOException {
return new ScoreDocLookupComparator() {
/** The sort information being used by this instance */
protected final int[] fieldOrder = generateSortIndex();
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
TermEnum enumerator = reader.terms (new Term (field, ""));
TermDocs termDocs = reader.termDocs();
if (enumerator.term() == null) {
throw new RuntimeException ("no terms in field "+field);
}
try {
do {
Term term = enumerator.term();
if (term.field() != field) break;
int termval = Integer.parseInt (term.text());
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (enumerator.next());
} finally {
enumerator.close();
termDocs.close();
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
return new Integer (fieldOrder[i.doc]);
}
public int sortType() {
return SortField.INT;
}
};
}
/**
* Returns a comparator for sorting hits according to a field containing integers using the given enumerator
* to collect term values.
* @param reader Index to use.
* @param field Field containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
throws IOException {
return new ScoreDocLookupComparator() {
protected final int[] fieldOrder = generateSortIndex();
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term.field() != field) break;
int termval = Integer.parseInt (term.text());
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = termval;
}
} while (enumerator.next());
} finally {
termDocs.close();
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
return new Integer (fieldOrder[i.doc]);
}
public int sortType() {
return SortField.INT;
}
};
}
}

View File

@ -1,221 +0,0 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.search.*;
import org.apache.lucene.search.TopDocs;
import java.io.IOException;
import java.util.BitSet;
/**
* Implements search over an IndexReader using the values of terms in
* a field as the primary sort order. Secondary sort is by the order
* of documents in the index.
*
* <p>In this version (0.1) the field to sort by must contain strictly
* String representations of Integers (i.e. {@link Integer#toString Integer.toString()}).
*
* Each document is assumed to have a single term in the given field,
* and the value of the term is the document's relative position in
* the given sort order. The field must be indexed, but should not be
* stored or tokenized:
*
* <p><code>document.add(new Field("byAlpha", Integer.toString(x), false, true, false));</code>
*
* <p>In other words, the desired order of documents must be encoded
* at the time they are entered into the index. The first document
* should have a low value integer, the last document a high value
* (i.e. the documents should be numbered <code>1..n</code> where
* <code>1</code> is the first and <code>n</code> the last). Values
* must be between <code>Integer.MIN_VALUE</code> and
* <code>Integer.MAX_VALUE</code> inclusive.
*
* <p>Then, at search time, the field is designated to be used to sort
* the returned hits:
*
* <p><code>IndexSearcher searcher = new IntegerSortedSearcher(indexReader, "byAlpha");</code>
*
* <p>or:
*
* <p><code>IntegerSortedSearcher searcher = new IntegerSortedSearcher(indexReader, "bySomething");
* <br>Hits hits = searcher.search(query, filter);
* <br>...
* <br>searcher.setOrderByField("bySomethingElse");
* <br>hits = searcher.search(query, filter);
* <br>...
* </code>
*
* <p>Note the above example shows that one of these objects can be
* used multiple times, and the sort order changed between usages.
*
* <p><h3>Memory Usage</h3>
*
* <p>This object is almost identical to the regular IndexSearcher and
* makes no additional memory requirements on its own. Every time the
* <code>search()</code> method is called, however, a new
* {@link FieldSortedHitQueue FieldSortedHitQueue} object is created.
* That object is responsible for putting the hits in the correct order,
* and it maintains a cache of information based on the IndexReader
* given to it. See its documentation for more information on its
* memory usage.
*
* <p><h3>Concurrency</h3>
*
* <p>This object has the same behavior during concurrent updates to
* the index as does IndexSearcher. Namely, in the default
* implementation using
* {@link org.apache.lucene.store.FSDirectory FSDirectory}, the index
* can be updated (deletes, adds) without harm while this object
* exists, but this object will not see the changes. Ultimately this
* behavior is a result of the
* {@link org.apache.lucene.index.SegmentReader SegmentReader} class
* internal to FSDirectory, which caches information about documents
* in memory.
*
* <p>So, in order for IntegerSortedSearcher to be kept up to date with
* changes to the index, new instances must be created instead of the
* same one used over and over again. This will result in lower
* performance than if instances are reused.
*
* <p><h3>Updates</h3>
*
* <p>In order to be able to update the index without having to
* recalculate all the sort numbers, the numbers should be stored with
* "space" between them. That is, sort the documents and number them
* <code>1..n</code>. Then, as <code>i</code> goes between
* <code>1</code> and <code>n</code>:
*
* <p><code>document.add(new Field("byAlpha", Integer.toString(i*1000), false, true, false));</code>
*
* <p>Add a new document sorted between position 1 and 2 by:
*
* <p><code>document.add(new Field("byAlpha", Integer.toString(1500), false, true, false));</code>
*
* <p>Be careful not to overun <code>Integer.MAX_VALUE</code>
* (<code>2147483647</code>). Periodically a complete reindex should
* be run so the sort orders can be "normalized".
*
* <p>Created: Dec 8, 2003 12:47:26 PM
*
* @author "Tim Jones" &lt;tjluc@nacimiento.com&gt;
* @since lucene 1.3
* @version 0.1
* @see IndexSearcher
*/
public class IntegerSortedSearcher
extends IndexSearcher {
/** stores the field being used to sort by **/
protected String field;
/**
* Searches the index in the named directory using the given
* field as the primary sort.
* The terms in the field must contain strictly integers in
* the range <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
* @see IndexSearcher(java.lang.String,java.lang.String)
*/
public IntegerSortedSearcher(String path, String integer_field)
throws IOException {
this(IndexReader.open(path), integer_field);
}
/**
* Searches the index in the provided directory using the
* given field as the primary sort.
* The terms in the field must contain strictly integers in
* the range <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
* @see IndexSearcher(Directory,java.lang.String)
*/
public IntegerSortedSearcher(Directory directory, String integer_field)
throws IOException {
this(IndexReader.open(directory), integer_field);
}
/**
* Searches the provided index using the given field as the
* primary sort.
* The terms in the field must contain strictly integers in
* the range <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
* @see IndexSearcher(IndexReader)
*/
public IntegerSortedSearcher(IndexReader r, String integer_field) {
super(r);
this.field = integer_field.intern();
}
/**
* Sets the field to order results by. This can be called
* multiple times per instance of IntegerSortedSearcher.
* @param integer_field The field to sort results by.
*/
public void setOrderByField(String integer_field) {
this.field = integer_field.intern();
}
/**
* Returns the name of the field currently being used
* to sort results by.
* @return Field name.
*/
public String getOrderByField() {
return field;
}
/**
* Finds the top <code>nDocs</code>
* hits for <code>query</code>, applying <code>filter</code> if non-null.
*
* Overrides IndexSearcher.search to use a FieldSortedHitQueue instead of the
* default HitQueue.
*
* @see IndexSearcher#search
*/
public TopDocs search(Query query, Filter filter, final int nDocs)
throws IOException {
Scorer scorer = query.weight(this).scorer(reader);
if (scorer == null) {
return new TopDocs(0, new ScoreDoc[0]);
}
final BitSet bits = filter != null ? filter.bits(reader) : null;
final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, field, nDocs);
final int[] totalHits = new int[1];
scorer.score(
new HitCollector() {
public final void collect(int doc, float score) {
if (score > 0.0f && // ignore zeroed buckets
(bits == null || bits.get(doc))) { // skip docs not in bits
totalHits[0]++;
hq.insert(new ScoreDoc(doc, score));
}
}
});
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size() - 1; i >= 0; i--) { // put docs in array
scoreDocs[i] = (ScoreDoc) hq.pop();
}
return new TopDocs(totalHits[0], scoreDocs);
}
}

View File

@ -0,0 +1,110 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.PriorityQueue;
import java.io.IOException;
/**
* Expert: A hit queue for sorting by hits by terms in more than one field.
* The type of content in each field could be determined dynamically by
* FieldSortedHitQueue.determineComparator().
*
* <p>Created: Feb 3, 2004 4:46:55 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
* @see FieldSortedHitQueue
* @see Searchable#search(Query,Filter,int,Sort)
*/
class MultiFieldSortedHitQueue
extends PriorityQueue {
/**
* Creates a hit queue sorted by the given list of fields.
* @param reader Index to use.
* @param fields Field names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
* @param size The number of hits to retain. Must be greater than zero.
* @throws IOException
*/
MultiFieldSortedHitQueue (IndexReader reader, SortField[] fields, int size)
throws IOException {
final int n = fields.length;
comparators = new ScoreDocComparator[n];
this.fields = new SortField[n];
for (int i=0; i<n; ++i) {
comparators[i] = FieldSortedHitQueue.getCachedComparator (reader, fields[i].getField(), fields[i].getType());
this.fields[i] = new SortField (fields[i].getField(), comparators[i].sortType(), fields[i].getReverse());
}
initialize (size);
}
/** Stores a comparator corresponding to each field being sorted by */
protected ScoreDocComparator[] comparators;
/** Stores the sort criteria being used. */
protected SortField[] fields;
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>.
* @param a ScoreDoc
* @param b ScoreDoc
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
*/
protected final boolean lessThan (final Object a, final Object b) {
final ScoreDoc docA = (ScoreDoc) a;
final ScoreDoc docB = (ScoreDoc) b;
final int n = comparators.length;
int c = 0;
for (int i=0; i<n && c==0; ++i) {
c = (fields[i].reverse) ? comparators[i].compareReverse (docA, docB)
: comparators[i].compare (docA, docB);
}
return c > 0;
}
/**
* Given a FieldDoc object, stores the values used
* to sort the given document. These values are not the raw
* values out of the index, but the internal representation
* of them. This is so the given search hit can be collated
* by a MultiSearcher with other search hits.
* @param doc The FieldDoc to store sort values into.
* @return The same FieldDoc passed in.
* @see Searchable#search(Query,Filter,int,Sort)
*/
FieldDoc fillFields (final FieldDoc doc) {
final int n = comparators.length;
final Object[] fields = new Object[n];
for (int i=0; i<n; ++i)
fields[i] = comparators[i].sortValue(doc);
doc.fields = fields;
return doc;
}
/** Returns the SortFields being used by this hit queue. */
SortField[] getFields() {
return fields;
}
}

View File

@ -170,6 +170,32 @@ public class MultiSearcher extends Searcher {
}
public TopFieldDocs search (Query query, Filter filter, int n, Sort sort)
throws IOException {
FieldDocSortedHitQueue hq = null;
int totalHits = 0;
for (int i = 0; i < searchables.length; i++) { // search each searcher
TopFieldDocs docs = searchables[i].search (query, filter, n, sort);
if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n);
totalHits += docs.totalHits; // update totalHits
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
ScoreDoc scoreDoc = scoreDocs[j];
scoreDoc.doc += starts[i]; // convert doc
if (!hq.insert (scoreDoc))
break; // no more scores > minScore
}
}
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc) hq.pop();
return new TopFieldDocs (totalHits, scoreDocs, hq.getFields());
}
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero

View File

@ -57,6 +57,7 @@ package org.apache.lucene.search;
import java.io.IOException;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.PriorityQueue;
/** Implements parallel search over a set of <code>Searchables</code>.
*
@ -133,6 +134,55 @@ public class ParallelMultiSearcher extends MultiSearcher {
return new TopDocs(totalHits, scoreDocs);
}
/**
* A search implementation allowing sorting which spans a new thread for each
* Searchable, waits for each search to complete and merges
* the results back together.
*/
public TopFieldDocs search(Query query, Filter filter, int nDocs, Sort sort)
throws IOException {
// don't specify the fields - we'll wait to do this until we get results
FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue (null, nDocs);
int totalHits = 0;
MultiSearcherThread[] msta = new MultiSearcherThread[searchables.length];
for (int i = 0; i < searchables.length; i++) { // search each searcher
// Assume not too many searchables and cost of creating a thread is by far inferior to a search
msta[i] =
new MultiSearcherThread(
searchables[i],
query,
filter,
nDocs,
hq,
sort,
i,
starts,
"MultiSearcher thread #" + (i + 1));
msta[i].start();
}
for (int i = 0; i < searchables.length; i++) {
try {
msta[i].join();
} catch (InterruptedException ie) {
; // TODO: what should we do with this???
}
IOException ioe = msta[i].getIOException();
if (ioe == null) {
totalHits += msta[i].hits();
} else {
// if one search produced an IOException, rethrow it
throw ioe;
}
}
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
scoreDocs[i] = (ScoreDoc) hq.pop();
return new TopFieldDocs(totalHits, scoreDocs, hq.getFields());
}
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
@ -190,9 +240,10 @@ class MultiSearcherThread extends Thread {
private int hits;
private TopDocs docs;
private int i;
private HitQueue hq;
private PriorityQueue hq;
private int[] starts;
private IOException ioe;
private Sort sort;
public MultiSearcherThread(
Searchable searchable,
@ -213,15 +264,43 @@ class MultiSearcherThread extends Thread {
this.starts = starts;
}
public MultiSearcherThread(
Searchable searchable,
Query query,
Filter filter,
int nDocs,
FieldDocSortedHitQueue hq,
Sort sort,
int i,
int[] starts,
String name) {
super(name);
this.searchable = searchable;
this.query = query;
this.filter = filter;
this.nDocs = nDocs;
this.hq = hq;
this.i = i;
this.starts = starts;
this.sort = sort;
}
public void run() {
try {
docs = searchable.search(query, filter, nDocs);
docs = (sort == null) ? searchable.search (query, filter, nDocs)
: searchable.search (query, filter, nDocs, sort);
}
// Store the IOException for later use by the caller of this thread
catch (IOException ioe) {
this.ioe = ioe;
}
if (ioe == null) {
// if we are sorting by fields, we need to tell the field sorted hit queue
// the actual type of fields, in case the original list contained AUTO.
// if the searchable returns null for fields, we'll have problems.
if (sort != null) {
((FieldDocSortedHitQueue)hq).setFields (((TopFieldDocs)docs).fields);
}
ScoreDoc[] scoreDocs = docs.scoreDocs;
for (int j = 0;
j < scoreDocs.length;

View File

@ -98,6 +98,11 @@ public class RemoteSearchable
return local.search(query, filter, n);
}
public TopFieldDocs search (Query query, Filter filter, int n, Sort sort)
throws IOException {
return local.search (query, filter, n, sort);
}
public Document doc(int i) throws IOException {
return local.doc(i);
}

View File

@ -0,0 +1,111 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Expert: Compares two ScoreDoc objects for sorting.
*
* <p>Created: Feb 3, 2004 9:00:16 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
interface ScoreDocComparator {
/** Special comparator for sorting hits according to computed relevance (document score). */
static final ScoreDocComparator RELEVANCE = new ScoreDocComparator() {
public int compare (ScoreDoc i, ScoreDoc j) {
if (i.score > j.score) return -1;
if (i.score < j.score) return 1;
return 0;
}
public int compareReverse (ScoreDoc i, ScoreDoc j) {
if (i.score < j.score) return -1;
if (i.score > j.score) return 1;
return 0;
}
public Object sortValue (ScoreDoc i) {
return new Float (i.score);
}
public int sortType() {
return SortField.SCORE;
}
};
/** Special comparator for sorting hits according to index order (document number). */
static final ScoreDocComparator INDEXORDER = new ScoreDocComparator() {
public int compare (ScoreDoc i, ScoreDoc j) {
if (i.doc < j.doc) return -1;
if (i.doc > j.doc) return 1;
return 0;
}
public int compareReverse (ScoreDoc i, ScoreDoc j) {
if (i.doc > j.doc) return -1;
if (i.doc < j.doc) return 1;
return 0;
}
public Object sortValue (ScoreDoc i) {
return new Integer (i.doc);
}
public int sortType() {
return SortField.DOC;
}
};
/**
* Compares two ScoreDoc objects and returns a result indicating their
* sort order.
* @param i First ScoreDoc
* @param j Second ScoreDoc
* @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
* @see java.util.Comparator
*/
int compare (ScoreDoc i, ScoreDoc j);
/**
* Compares two ScoreDoc objects and returns a result indicating their
* sort order in reverse.
* @param i First ScoreDoc
* @param j Second ScoreDoc
* @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
* @see java.util.Comparator
*/
int compareReverse (ScoreDoc i, ScoreDoc j);
/**
* Returns the value used to sort the given document. This is
* currently always either an Integer or Float, but could be extended
* to return any object used to sort by.
* @param i Document
* @return Integer or Float
*/
Object sortValue (ScoreDoc i);
/**
* Returns the type of sort.
* @return One of the constants in SortField.
* @see SortField
*/
int sortType();
}

View File

@ -0,0 +1,40 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Expert: Compares two ScoreDoc objects for sorting using a lookup table.
*
* <p>Created: Feb 3, 2004 9:59:14 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
interface ScoreDocLookupComparator
extends ScoreDocComparator {
/**
* Verifies that the internal lookup table is the correct size. This
* comparator uses a lookup table, so it is important to that the
* table matches the number of documents in the index.
* @param n Expected size of table.
* @return True if internal table matches expected size; false otherwise
*/
boolean sizeMatches (int n);
}

View File

@ -127,5 +127,14 @@ public interface Searchable extends java.rmi.Remote {
*/
Explanation explain(Query query, int doc) throws IOException;
/** Expert: Low-level search implementation with arbitrary sorting. Finds
* the top <code>n</code> hits for <code>query</code>, applying
* <code>filter</code> if non-null, and sorting the hits by the criteria in
* <code>sort</code>.
*
* <p>Applications should usually call {@link
* Searcher#search(Query,Filter,Sort)} instead.
*/
TopFieldDocs search(Query query, Filter filter, int n, Sort sort)
throws IOException;
}

View File

@ -71,6 +71,22 @@ public abstract class Searcher implements Searchable {
return new Hits(this, query, filter);
}
/** Returns documents matching <code>query</code> sorted by
* <code>sort</code>.
*/
public Hits search(Query query, Sort sort)
throws IOException {
return new Hits(this, query, null, sort);
}
/** Returns documents matching <code>query</code> and <code>filter</code>,
* sorted by <code>sort</code>.
*/
public Hits search(Query query, Filter filter, Sort sort)
throws IOException {
return new Hits(this, query, filter, sort);
}
/** Lower-level search API.
*
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero

View File

@ -0,0 +1,110 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Serializable;
/**
* Encapsulates sort criteria for returned hits. The sort criteria can
* be changed between calls to Searcher#search(). This class is thread safe.
*
* <p>Created: Feb 12, 2004 10:53:57 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
public class Sort
implements Serializable {
/** Represents sorting by computed relevance. Using this sort criteria
* returns the same results with slightly more overhead as calling
* Searcher#search() without a sort criteria. */
public static final Sort RELEVANCE =
new Sort (new SortField[] { SortField.FIELD_SCORE, SortField.FIELD_DOC });
/** Represents sorting by index order. */
public static final Sort INDEXORDER = new Sort (SortField.FIELD_DOC);
// internal representation of the sort criteria
SortField[] fields;
/** Sorts by the terms in <code>field</code> then by index order (document
* number). */
public Sort (String field) {
setSort (field, false);
}
/** Sorts possibly in reverse by the terms in <code>field</code> then by
* index order (document number). */
public Sort (String field, boolean reverse) {
setSort (field, reverse);
}
/** Sorts in succession by the terms in each field. */
public Sort (String[] fields) {
setSort (fields);
}
/** Sorts by the criteria in the given SortField. */
public Sort (SortField field) {
setSort (field);
}
/** Sorts in succession by the criteria in each SortField. */
public Sort (SortField[] fields) {
setSort (fields);
}
/** Sets the sort to the terms in <code>field</code> then by index order
* (document number). */
public final void setSort (String field) {
setSort (field, false);
}
/** Sets the sort to the terms in <code>field</code> possibly in reverse,
* then by index order (document number). */
public void setSort (String field, boolean reverse) {
SortField[] nfields = new SortField[] {
new SortField (field, SortField.AUTO, reverse),
new SortField (field, SortField.DOC)
};
fields = nfields;
}
/** Sets the sort to the terms in each field in succession. */
public void setSort (String[] fieldnames) {
final int n = fieldnames.length;
SortField[] nfields = new SortField[n];
for (int i=0; i<n; ++i) {
nfields[i] = new SortField (fieldnames[i], SortField.AUTO);
}
fields = nfields;
}
/** Sets the sort to the given criteria. */
public void setSort (SortField field) {
this.fields = new SortField[] { field };
}
/** Sets the sort to the given criteria in succession. */
public void setSort (SortField[] fields) {
this.fields = fields;
}
}

View File

@ -0,0 +1,135 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Serializable;
/**
* Stores information about how to sort documents by terms in an individual
* field. Fields must be indexed in order to sort by them.
*
* <p>Created: Feb 11, 2004 1:25:29 PM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
public class SortField
implements Serializable {
/** Sort by document score (relevancy). Sort values are Float and higher
* values are at the front. */
public static final int SCORE = 0;
/** Sort by document number (index order). Sort values are Integer and lower
* values are at the front. */
public static final int DOC = 1;
/** Guess type of sort based on field contents. A regular expression is used
* to look at the first term indexed for the field and determine if it
* represents an integer number, a floating point number, or just arbitrary
* string characters. */
public static final int AUTO = 2;
/** Sort using term values as Strings. Sort values are String and lower
* values are at the front. */
public static final int STRING = 3;
/** Sort using term values as encoded Integers. Sort values are Integer and
* lower values are at the front. */
public static final int INT = 4;
/** Sort using term values as encoded Floats. Sort values are Float and
* lower values are at the front. */
public static final int FLOAT = 5;
/** Represents sorting by document score (relevancy). */
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
/** Represents sorting by document number (index order). */
public static final SortField FIELD_DOC = new SortField (null, DOC);
private String field;
private int type = AUTO; // defaults to determining type dynamically
boolean reverse = false; // defaults to natural order
/** Creates a sort by terms in the given field where the type of term value
* is determined dynamically ({@link #AUTO AUTO}).
* @param field Name of field to sort by, cannot be <code>null</code>.
*/
public SortField (String field) {
this.field = field;
}
/** Creates a sort, possibly in reverse, by terms in the given field where
* the type of term value is determined dynamically ({@link #AUTO AUTO}).
* @param field Name of field to sort by, cannot be <code>null</code>.
* @param reverse True if natural order should be reversed.
*/
public SortField (String field, boolean reverse) {
this.field = field;
this.reverse = reverse;
}
/** Creates a sort by terms in the given field with the type of term
* values explicitly given.
* @param field Name of field to sort by. Can be <code>null</code> if
* <code>type</code> is SCORE or DOC.
* @param type Type of values in the terms.
*/
public SortField (String field, int type) {
this.field = field;
this.type = type;
}
/** Creates a sort, possibly in reverse, by terms in the given field with the
* type of term values explicitly given.
* @param field Name of field to sort by. Can be <code>null</code> if
* <code>type</code> is SCORE or DOC.
* @param type Type of values in the terms.
* @param reverse True if natural order should be reversed.
*/
public SortField (String field, int type, boolean reverse) {
this.field = field;
this.type = type;
this.reverse = reverse;
}
/** Returns the name of the field. Could return <code>null</code>
* if the sort is by SCORE or DOC.
* @return Name of field, possibly <code>null</code>.
*/
public String getField() {
return field;
}
/** Returns the type of contents in the field.
* @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT.
*/
public int getType() {
return type;
}
/** Returns whether the sort should be reversed.
* @return True if natural order should be reversed.
*/
public boolean getReverse() {
return reverse;
}
}

View File

@ -0,0 +1,226 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
/**
* Expert: A sorted hit queue for fields that contain string values.
* Hits are sorted into the queue by the values in the field and then by document number.
* The internal cache contains integers - the strings are sorted and
* then only their sequence number cached.
*
* <p>Created: Feb 2, 2004 9:26:33 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
*/
class StringSortedHitQueue
extends FieldSortedHitQueue {
/**
* Creates a hit queue sorted over the given field containing string values.
* @param reader Index to use.
* @param string_field Field containing string sort information
* @param size Number of hits to collect.
* @throws IOException If an error occurs reading the index.
*/
StringSortedHitQueue (IndexReader reader, String string_field, int size)
throws IOException {
super (reader, string_field, size);
}
/**
* Returns a comparator for sorting hits according to a field containing strings.
* Just calls <code>comparator(IndexReader,String)</code>.
* @param reader Index to use.
* @param field Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
throws IOException {
return comparator (reader, field);
}
/**
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
* @param field Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
throws IOException {
return new ScoreDocLookupComparator() {
/** The sort information being used by this instance */
protected final int[] fieldOrder = generateSortIndex();
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
TermEnum enumerator = reader.terms (new Term (field, ""));
TermDocs termDocs = reader.termDocs();
if (enumerator.term() == null) {
throw new RuntimeException ("no terms in field " + field);
}
// NOTE: the contract for TermEnum says the
// terms will be in natural order (which is
// ordering by field name, term text). The
// contract for TermDocs says the docs will
// be ordered by document number. So the
// following loop will automatically sort the
// terms in the correct order.
try {
int t = 0; // current term number
do {
Term term = enumerator.term();
if (term.field() != field) break;
t++;
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = t;
}
} while (enumerator.next());
} finally {
enumerator.close();
termDocs.close();
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
return new Integer(fieldOrder[i.doc]);
}
public int sortType() {
return SortField.INT;
}
};
}
/**
* Returns a comparator for sorting hits according to a field containing strings using the given enumerator
* to collect term values.
* @param reader Index to use.
* @param field Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
throws IOException {
return new ScoreDocLookupComparator() {
protected final int[] fieldOrder = generateSortIndex();
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
// NOTE: the contract for TermEnum says the
// terms will be in natural order (which is
// ordering by field name, term text). The
// contract for TermDocs says the docs will
// be ordered by document number. So the
// following loop will automatically sort the
// terms in the correct order.
TermDocs termDocs = reader.termDocs();
try {
int t = 0; // current term number
do {
Term term = enumerator.term();
if (term.field() != field) break;
t++;
termDocs.seek (enumerator);
while (termDocs.next()) {
retArray[termDocs.doc()] = t;
}
} while (enumerator.next());
} finally {
termDocs.close();
}
return retArray;
}
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi < fj) return -1;
if (fi > fj) return 1;
return 0;
}
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
final int fi = fieldOrder[i.doc];
final int fj = fieldOrder[j.doc];
if (fi > fj) return -1;
if (fi < fj) return 1;
return 0;
}
public final boolean sizeMatches (final int n) {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
return new Integer(fieldOrder[i.doc]);
}
public int sortType() {
return SortField.INT;
}
};
}
}

View File

@ -0,0 +1,45 @@
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Expert: Returned by low-level sorted search implementations.
*
* <p>Created: Feb 12, 2004 8:58:46 AM
*
* @author Tim Jones (Nacimiento Software)
* @since lucene 1.4
* @version $Id$
* @see Searchable#search(Query,Filter,int,Sort)
*/
public class TopFieldDocs
extends TopDocs {
/** The fields which were used to sort results by. */
public SortField[] fields;
/** Creates one of these objects.
* @param totalHits Total number of hits for the query.
* @param scoreDocs The top hits for the query.
* @param fields The sort criteria used to find the top hits.
*/
TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields) {
super (totalHits, scoreDocs);
this.fields = fields;
}
}