mirror of https://github.com/apache/lucene.git
Added hit sorting code, from Tim Jones.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150201 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a8d459a9f5
commit
32d5bcd950
|
@ -35,9 +35,9 @@ $Id$
|
|||
RemoteSearchable this makes it easy to implement distributed
|
||||
search systems. (Jean-Francois Halleux via cutting)
|
||||
|
||||
5. Added IntegerSortedSearcher and FieldSortedHitQueue classes that
|
||||
together provide the ability to sort by single-valued Integer
|
||||
fields. (Tim Jones via Otis)
|
||||
5. Added support for hit sorting. Results may now be sorted by any
|
||||
indexed field. For details see the javadoc for
|
||||
Searcher#search(Query, Sort). (Tim Jones via Cutting)
|
||||
|
||||
6. Changed FSDirectory to auto-create a full directory tree that it
|
||||
needs by using mkdirs() instead of mkdir(). (Mladen Turk via Otis)
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Expert: A ScoreDoc which also contains information about
|
||||
* how to sort the referenced document.
|
||||
*
|
||||
* <p>Created: Feb 11, 2004 1:23:38 PM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
* @see TopFieldDocs
|
||||
*/
|
||||
public class FieldDoc
|
||||
extends ScoreDoc {
|
||||
|
||||
/** The values which are used to sort the referenced document.
|
||||
* The order of these will match the original sort criteria given by an
|
||||
* Sort object.
|
||||
* @see Sort
|
||||
* @see Searchable#search(Query,Filter,int,Sort)
|
||||
*/
|
||||
public Object[] fields;
|
||||
|
||||
/** Creates one of these objects with empty sort information. */
|
||||
public FieldDoc (int doc, float score) {
|
||||
super (doc, score);
|
||||
}
|
||||
|
||||
/** Creates one of these objects with the given sort information. */
|
||||
public FieldDoc (int doc, float score, Object[] fields) {
|
||||
super (doc, score);
|
||||
this.fields = fields;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,151 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: Collects sorted results from Searchable's and collates them.
|
||||
* The elements put into this queue must be of type FieldDoc.
|
||||
*
|
||||
* <p>Created: Feb 11, 2004 2:04:21 PM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
class FieldDocSortedHitQueue
|
||||
extends PriorityQueue {
|
||||
|
||||
// this cannot contain AUTO fields
|
||||
SortField[] fields;
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted by the given list of fields.
|
||||
* @param fields Field names, in priority order (highest priority first).
|
||||
* @param size The number of hits to retain. Must be greater than zero.
|
||||
* @throws IOException
|
||||
*/
|
||||
FieldDocSortedHitQueue (SortField[] fields, int size)
|
||||
throws IOException {
|
||||
this.fields = fields;
|
||||
initialize (size);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Allows redefinition of sort fields if they are <code>null</code>.
|
||||
* This is to handle the
|
||||
* case using ParallelMultiSearcher where the original list
|
||||
* contains AUTO and we don't know
|
||||
* the actual sort type until the values come back. This
|
||||
* method is thread safe.
|
||||
* @param fields
|
||||
*/
|
||||
synchronized void setFields (SortField[] fields) {
|
||||
if (fields == null) this.fields = fields;
|
||||
}
|
||||
|
||||
|
||||
/** Returns the fields being used to sort. */
|
||||
SortField[] getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||
* @param a ScoreDoc
|
||||
* @param b ScoreDoc
|
||||
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
|
||||
*/
|
||||
protected final boolean lessThan (final Object a, final Object b) {
|
||||
final FieldDoc docA = (FieldDoc) a;
|
||||
final FieldDoc docB = (FieldDoc) b;
|
||||
final int n = fields.length;
|
||||
int c = 0;
|
||||
for (int i=0; i<n && c==0; ++i) {
|
||||
final int type = fields[i].getType();
|
||||
if (fields[i].getReverse()) {
|
||||
switch (type) {
|
||||
case SortField.SCORE:
|
||||
float r1 = ((Float)docA.fields[i]).floatValue();
|
||||
float r2 = ((Float)docB.fields[i]).floatValue();
|
||||
if (r1 < r2) c = -1;
|
||||
if (r1 > r2) c = 1;
|
||||
break;
|
||||
case SortField.DOC:
|
||||
case SortField.INT:
|
||||
case SortField.STRING:
|
||||
int i1 = ((Integer)docA.fields[i]).intValue();
|
||||
int i2 = ((Integer)docB.fields[i]).intValue();
|
||||
if (i1 > i2) c = -1;
|
||||
if (i1 < i2) c = 1;
|
||||
break;
|
||||
case SortField.FLOAT:
|
||||
float f1 = ((Float)docA.fields[i]).floatValue();
|
||||
float f2 = ((Float)docB.fields[i]).floatValue();
|
||||
if (f1 > f2) c = -1;
|
||||
if (f1 < f2) c = 1;
|
||||
break;
|
||||
case SortField.AUTO:
|
||||
// we cannot handle this - even if we determine the type of object (Float or
|
||||
// Integer), we don't necessarily know how to compare them (both SCORE and
|
||||
// FLOAT both contain floats, but are sorted opposite of each other). Before
|
||||
// we get here, each AUTO should have been replaced with its actual value.
|
||||
throw new RuntimeException ("FieldDocSortedHitQueue cannot use an AUTO SortField");
|
||||
default:
|
||||
throw new RuntimeException ("invalid SortField type: "+type);
|
||||
}
|
||||
} else {
|
||||
switch (type) {
|
||||
case SortField.SCORE:
|
||||
float r1 = ((Float)docA.fields[i]).floatValue();
|
||||
float r2 = ((Float)docB.fields[i]).floatValue();
|
||||
if (r1 > r2) c = -1;
|
||||
if (r1 < r2) c = 1;
|
||||
break;
|
||||
case SortField.DOC:
|
||||
case SortField.INT:
|
||||
case SortField.STRING:
|
||||
int i1 = ((Integer)docA.fields[i]).intValue();
|
||||
int i2 = ((Integer)docB.fields[i]).intValue();
|
||||
if (i1 < i2) c = -1;
|
||||
if (i1 > i2) c = 1;
|
||||
break;
|
||||
case SortField.FLOAT:
|
||||
float f1 = ((Float)docA.fields[i]).floatValue();
|
||||
float f2 = ((Float)docB.fields[i]).floatValue();
|
||||
if (f1 < f2) c = -1;
|
||||
if (f1 > f2) c = 1;
|
||||
break;
|
||||
case SortField.AUTO:
|
||||
// we cannot handle this - even if we determine the type of object (Float or
|
||||
// Integer), we don't necessarily know how to compare them (both SCORE and
|
||||
// FLOAT both contain floats, but are sorted opposite of each other). Before
|
||||
// we get here, each AUTO should have been replaced with its actual value.
|
||||
throw new RuntimeException ("FieldDocSortedHitQueue cannot use an AUTO SortField");
|
||||
default:
|
||||
throw new RuntimeException ("invalid SortField type: "+type);
|
||||
}
|
||||
}
|
||||
}
|
||||
return c > 0;
|
||||
}
|
||||
}
|
|
@ -16,37 +16,29 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.io.IOException;
|
||||
import java.util.Hashtable;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Expert: collects results from a search and sorts them by terms in a
|
||||
* given field in each document.
|
||||
*
|
||||
* <p>In this version (0.1) the field to sort by must contain strictly
|
||||
* String representations of Integers.
|
||||
* See {@link SortedIndexSearcher SortedIndexSearcher} for more
|
||||
* information. Each document is assumed to have a single term in the
|
||||
* given field, and the value of the term is the document's relative
|
||||
* position in the given sort order.
|
||||
* Expert: Base class for collecting results from a search and sorting
|
||||
* them by terms in a given field in each document.
|
||||
*
|
||||
* <p>When one of these objects is created, a TermEnumerator is
|
||||
* created to fetch all the terms in the index for the given field.
|
||||
* The value of each term is assumed to be an integer representing a
|
||||
* The value of each term is assumed to represent a
|
||||
* sort position. Each document is assumed to contain one of the
|
||||
* terms, indicating where in the sort it belongs.
|
||||
*
|
||||
* <p><h3>Memory Usage</h3>
|
||||
*
|
||||
* <p>A static cache is maintained. This cache contains an integer
|
||||
* array of length <code>IndexReader.maxDoc()</code> for each field
|
||||
* or float array of length <code>IndexReader.maxDoc()</code> for each field
|
||||
* name for which a sort is performed. In other words, the size of
|
||||
* the cache in bytes is:
|
||||
*
|
||||
|
@ -64,11 +56,11 @@ import java.io.IOException;
|
|||
*
|
||||
* <p>Created: Dec 8, 2003 12:56:03 PM
|
||||
*
|
||||
* @author "Tim Jones" <tjluc@nacimiento.com>
|
||||
* @since lucene 1.3
|
||||
* @version 0.1
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
public class FieldSortedHitQueue
|
||||
abstract class FieldSortedHitQueue
|
||||
extends PriorityQueue {
|
||||
|
||||
/**
|
||||
|
@ -81,71 +73,196 @@ extends PriorityQueue {
|
|||
protected static int lastReaderHash;
|
||||
|
||||
/**
|
||||
* Contains the cache of sort information. The
|
||||
* key is field name, the value an array of int.
|
||||
* A HashMap is used, and we are careful how we
|
||||
* handle synchronization. This is because best
|
||||
* performance is obtained when the same IndexReader
|
||||
* is used over and over, and we therefore perform
|
||||
* many reads and few writes.
|
||||
* Contains the cache of sort information, mapping
|
||||
* String (field names) to ScoreDocComparator.
|
||||
*/
|
||||
protected static HashMap fieldCache;
|
||||
protected static final Hashtable fieldCache = new Hashtable();
|
||||
|
||||
/** The pattern used to detect integer values in a field */
|
||||
protected static final Pattern pIntegers = Pattern.compile ("[0-9\\-]+");
|
||||
|
||||
/** The pattern used to detect float values in a field */
|
||||
protected static final Pattern pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+");
|
||||
|
||||
/** The sort information being used by this instance */
|
||||
protected int[] fieldOrder;
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted by the given field.
|
||||
* Returns a comparator for the given field. If there is already one in the cache, it is returned.
|
||||
* Otherwise one is created and put into the cache. If <code>reader</code> is different than the
|
||||
* one used for the current cache, or has changed size, the cache is cleared first.
|
||||
* @param reader Index to use.
|
||||
* @param field Field to sort by.
|
||||
* @return Comparator; never <code>null</code>.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
* @see #determineComparator
|
||||
*/
|
||||
static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type)
|
||||
throws IOException {
|
||||
|
||||
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
|
||||
if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE;
|
||||
|
||||
// see if we have already generated a comparator for this field
|
||||
if (reader.hashCode() == lastReaderHash) {
|
||||
ScoreDocLookupComparator comparer = (ScoreDocLookupComparator) fieldCache.get (field);
|
||||
if (comparer != null && comparer.sizeMatches(reader.maxDoc())) {
|
||||
return comparer;
|
||||
}
|
||||
} else {
|
||||
lastReaderHash = reader.hashCode();
|
||||
fieldCache.clear();
|
||||
}
|
||||
|
||||
ScoreDocComparator comparer = null;
|
||||
switch (type) {
|
||||
case SortField.SCORE: comparer = ScoreDocComparator.RELEVANCE; break;
|
||||
case SortField.DOC: comparer = ScoreDocComparator.INDEXORDER; break;
|
||||
case SortField.INT: comparer = IntegerSortedHitQueue.comparator (reader, field); break;
|
||||
case SortField.FLOAT: comparer = FloatSortedHitQueue.comparator (reader, field); break;
|
||||
case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break;
|
||||
case SortField.AUTO: comparer = determineComparator (reader, field); break;
|
||||
default:
|
||||
throw new RuntimeException ("invalid sort field type: "+type);
|
||||
}
|
||||
|
||||
// store the comparator in the cache for reuse
|
||||
fieldCache.put (field, comparer);
|
||||
|
||||
return comparer;
|
||||
}
|
||||
|
||||
|
||||
/** Clears the static cache of sorting information. */
|
||||
static void clearCache() {
|
||||
fieldCache.clear();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a FieldSortedHitQueue sorted by the given ScoreDocComparator.
|
||||
* @param comparator Comparator to use.
|
||||
* @param size Number of hits to retain.
|
||||
* @return Hit queue sorted using the given comparator.
|
||||
*/
|
||||
static FieldSortedHitQueue getInstance (ScoreDocComparator comparator, int size) {
|
||||
return new FieldSortedHitQueue (comparator, size) {
|
||||
// dummy out the abstract method
|
||||
protected ScoreDocLookupComparator createComparator (IndexReader reader, String field) throws IOException {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Looks at the actual values in the field and determines whether
|
||||
* they contain Integers, Floats or Strings. Only the first term in the field
|
||||
* is looked at.
|
||||
* <p>The following patterns are used to determine the content of the terms:
|
||||
* <p><table border="1" cellspacing="0" cellpadding="3">
|
||||
* <tr><th>Sequence</th><th>Pattern</th><th>Type</th></tr>
|
||||
* <tr><td>1</td><td>[0-9\-]+</td><td>Integer</td></tr>
|
||||
* <tr><td>2</td><td>[0-9+\-\.eEfFdD]+</td><td>Float</td></tr>
|
||||
* <tr><td>3</td><td><i>(none - default)</i></td><td>String</td></tr>
|
||||
* </table>
|
||||
*
|
||||
* @param reader Index to use.
|
||||
* @param field Field to create comparator for.
|
||||
* @return Comparator appropriate for the terms in the given field.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
protected static ScoreDocComparator determineComparator (IndexReader reader, String field)
|
||||
throws IOException {
|
||||
|
||||
TermEnum enumerator = reader.terms (new Term (field, ""));
|
||||
try {
|
||||
Term term = enumerator.term();
|
||||
if (term == null) {
|
||||
throw new RuntimeException ("no terms in field "+field);
|
||||
}
|
||||
if (term.field() == field) {
|
||||
String termtext = term.text().trim();
|
||||
|
||||
if (pIntegers.matcher(termtext).matches())
|
||||
return IntegerSortedHitQueue.comparator (reader, enumerator, field);
|
||||
|
||||
else if (pFloats.matcher(termtext).matches())
|
||||
return FloatSortedHitQueue.comparator (reader, enumerator, field);
|
||||
|
||||
return StringSortedHitQueue.comparator (reader, enumerator, field);
|
||||
|
||||
} else {
|
||||
throw new RuntimeException ("field \""+field+"\" does not appear to be indexed");
|
||||
}
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The sorting priority used. The first element is set by the constructors.
|
||||
* The result is that sorting is done by field value, then by index order.
|
||||
*/
|
||||
private final ScoreDocComparator[] comparators = new ScoreDocComparator[] {
|
||||
null, ScoreDocComparator.INDEXORDER
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted by the given field. Hits are sorted by the field, then
|
||||
* by index order.
|
||||
* @param reader IndexReader to use.
|
||||
* @param integer_field Field to sort by.
|
||||
* @param field Field to sort by.
|
||||
* @param size Number of hits to return - see {@link PriorityQueue#initialize(int) initialize}
|
||||
* @throws IOException If the internal term enumerator fails.
|
||||
*/
|
||||
public FieldSortedHitQueue (IndexReader reader, String integer_field, int size)
|
||||
FieldSortedHitQueue (IndexReader reader, String field, int size)
|
||||
throws IOException {
|
||||
|
||||
// reset the cache if we have a new reader
|
||||
int hash = reader.hashCode();
|
||||
if (hash != lastReaderHash) {
|
||||
lastReaderHash = hash;
|
||||
if (fieldCache != null) {
|
||||
fieldCache.clear();
|
||||
}
|
||||
fieldCache = new HashMap();
|
||||
|
||||
// initialize the PriorityQueue
|
||||
initialize (size);
|
||||
|
||||
// set the sort
|
||||
comparators[0] = initializeSort (reader, field);
|
||||
}
|
||||
|
||||
initialize (size);
|
||||
initializeSort (reader, integer_field);
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares documents based on the value of the term in the field
|
||||
* being sorted by. Documents which should appear at the top of the
|
||||
* list should have low values in the term; documents which should
|
||||
* appear at the end should have high values.
|
||||
*
|
||||
* <p>In the context of this method, "less than" means "less relevant",
|
||||
* so documents at the top of the list are "greatest" and documents at
|
||||
* the bottom are "least".
|
||||
*
|
||||
* <p>Document A is considered less than Document B
|
||||
* if A.field.term > B.field.term or A.doc > B.doc.
|
||||
*
|
||||
* @param a ScoreDoc object for document a.
|
||||
* @param b ScoreDoc object for document b.
|
||||
* @return true if document a is less than document b.
|
||||
* @see ScoreDoc
|
||||
* Creates a sorted hit queue based on an existing comparator. The hits
|
||||
* are sorted by the given comparator, then by index order.
|
||||
* @param comparator Comparator used to sort hits.
|
||||
* @param size Number of hits to retain.
|
||||
*/
|
||||
protected final boolean lessThan (Object a, Object b) {
|
||||
ScoreDoc hitA = (ScoreDoc) a;
|
||||
ScoreDoc hitB = (ScoreDoc) b;
|
||||
int scoreA = fieldOrder[hitA.doc];
|
||||
int scoreB = fieldOrder[hitB.doc];
|
||||
if (scoreA == scoreB)
|
||||
return hitA.doc > hitB.doc;
|
||||
else
|
||||
return scoreA > scoreB; // bigger is really less - the ones at the top should be the lowest
|
||||
protected FieldSortedHitQueue (ScoreDocComparator comparator, int size) {
|
||||
initialize (size); // initialize the PriorityQueue
|
||||
comparators[0] = comparator; // set the sort
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether <code>a</code> is less relevant than <code>b</code>
|
||||
* @param a ScoreDoc
|
||||
* @param b ScoreDoc
|
||||
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
|
||||
*/
|
||||
protected final boolean lessThan (final Object a, final Object b) {
|
||||
final ScoreDoc docA = (ScoreDoc) a;
|
||||
final ScoreDoc docB = (ScoreDoc) b;
|
||||
final int n = comparators.length;
|
||||
int c = 0;
|
||||
for (int i=0; i<n && c==0; ++i) {
|
||||
c = comparators[i].compare (docA, docB);
|
||||
}
|
||||
return c > 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initializes the cache of sort information. <code>fieldCache</code> is queried
|
||||
* to see if it has the term information for the given field.
|
||||
|
@ -153,54 +270,33 @@ extends PriorityQueue {
|
|||
* (note that we assume new IndexReaders are caught during the
|
||||
* constructor), the existing data is used. If not, all the term values
|
||||
* for the given field are fetched. The value of the term is assumed
|
||||
* to be the sort index for any documents containing the term. Documents
|
||||
* to indicate the sort order for any documents containing the term. Documents
|
||||
* should only have one term in the given field. Multiple documents
|
||||
* can share the same term if desired (documents with the same term will
|
||||
* be sorted relative to each other by the order they were placed in
|
||||
* the index).
|
||||
* can share the same term if desired, in which case they will be
|
||||
* considered equal during the sort.
|
||||
* @param reader The document index.
|
||||
* @param field The field to sort by.
|
||||
* @throws IOException If the term enumerator fails.
|
||||
* @throws IOException If createComparator(IndexReader,String) fails - usually caused by the term enumerator failing.
|
||||
*/
|
||||
protected final void initializeSort (IndexReader reader, String field)
|
||||
protected final ScoreDocComparator initializeSort (IndexReader reader, String field)
|
||||
throws IOException {
|
||||
|
||||
fieldOrder = (int[]) fieldCache.get (field);
|
||||
if (fieldOrder == null || fieldOrder.length != reader.maxDoc()) {
|
||||
fieldOrder = new int [reader.maxDoc()];
|
||||
|
||||
TermEnum enumerator = reader.terms (new Term (field, ""));
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
if (enumerator.term() == null) {
|
||||
throw new RuntimeException ("no terms in field "+field);
|
||||
ScoreDocLookupComparator comparer = (ScoreDocLookupComparator) fieldCache.get (field);
|
||||
if (comparer == null || !comparer.sizeMatches(reader.maxDoc())) {
|
||||
comparer = createComparator (reader, field);
|
||||
fieldCache.put (field, comparer);
|
||||
}
|
||||
return comparer;
|
||||
}
|
||||
|
||||
try {
|
||||
Term term = enumerator.term();
|
||||
while (term.field() == field) {
|
||||
termDocs.seek (term);
|
||||
if (termDocs.next()) {
|
||||
fieldOrder[termDocs.doc()] = Integer.parseInt (term.text());
|
||||
} else {
|
||||
throw new RuntimeException ("termDocs.next() failed!");
|
||||
}
|
||||
if (!enumerator.next()) {
|
||||
break;
|
||||
}
|
||||
term = enumerator.term();
|
||||
}
|
||||
} finally {
|
||||
enumerator.close();
|
||||
termDocs.close();
|
||||
}
|
||||
|
||||
// be careful how the cache is updated so we
|
||||
// don't have synchronization problems. we do
|
||||
// it this way because we assume updates will be
|
||||
// few compared to the number of reads.
|
||||
HashMap newCache = (HashMap) fieldCache.clone();
|
||||
newCache.put (field, fieldOrder);
|
||||
fieldCache = newCache;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Subclasses should implement this method to provide an appropriate ScoreDocLookupComparator.
|
||||
* @param reader Index to use.
|
||||
* @param field Field to use for sorting.
|
||||
* @return Comparator to use to sort hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
protected abstract ScoreDocLookupComparator createComparator (IndexReader reader, String field)
|
||||
throws IOException;
|
||||
}
|
|
@ -0,0 +1,205 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: A sorted hit queue for fields that contain strictly floating point values.
|
||||
* Hits are sorted into the queue by the values in the field and then by document number.
|
||||
*
|
||||
* <p>Created: Feb 2, 2004 9:23:03 AM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
class FloatSortedHitQueue
|
||||
extends FieldSortedHitQueue {
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted over the given field containing float values.
|
||||
* @param reader Index to use.
|
||||
* @param float_field Field containing float sort information
|
||||
* @param size Number of hits to collect.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
FloatSortedHitQueue (IndexReader reader, String float_field, int size)
|
||||
throws IOException {
|
||||
super (reader, float_field, size);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing floats.
|
||||
* Just calls <code>comparator(IndexReader,String)</code>.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg float values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
|
||||
throws IOException {
|
||||
return comparator (reader, field);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing floats.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg float values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
|
||||
throws IOException {
|
||||
return new ScoreDocLookupComparator () {
|
||||
|
||||
protected final float[] fieldOrder = generateSortIndex();
|
||||
|
||||
protected final float[] generateSortIndex()
|
||||
throws IOException {
|
||||
|
||||
float[] retArray = new float[reader.maxDoc()];
|
||||
|
||||
TermEnum enumerator = reader.terms (new Term (field, ""));
|
||||
TermDocs termDocs = reader.termDocs ();
|
||||
if (enumerator.term () == null) {
|
||||
throw new RuntimeException ("no terms in field " + field);
|
||||
}
|
||||
|
||||
try {
|
||||
do {
|
||||
Term term = enumerator.term ();
|
||||
if (term.field () != field) break;
|
||||
float termval = Float.parseFloat (term.text());
|
||||
termDocs.seek (enumerator);
|
||||
while (termDocs.next ()) {
|
||||
retArray[termDocs.doc ()] = termval;
|
||||
}
|
||||
} while (enumerator.next ());
|
||||
} finally {
|
||||
enumerator.close ();
|
||||
termDocs.close ();
|
||||
}
|
||||
|
||||
return retArray;
|
||||
}
|
||||
|
||||
public final int compare (final ScoreDoc i, final ScoreDoc j) {
|
||||
final float fi = fieldOrder[i.doc];
|
||||
final float fj = fieldOrder[j.doc];
|
||||
if (fi < fj) return -1;
|
||||
if (fi > fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
|
||||
final float fi = fieldOrder[i.doc];
|
||||
final float fj = fieldOrder[j.doc];
|
||||
if (fi > fj) return -1;
|
||||
if (fi < fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final boolean sizeMatches (final int n) {
|
||||
return fieldOrder.length == n;
|
||||
}
|
||||
|
||||
public Object sortValue (final ScoreDoc i) {
|
||||
return new Float (fieldOrder[i.doc]);
|
||||
}
|
||||
|
||||
public int sortType() {
|
||||
return SortField.FLOAT;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing floats using the given enumerator
|
||||
* to collect term values.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg float values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
|
||||
throws IOException {
|
||||
return new ScoreDocLookupComparator () {
|
||||
|
||||
protected final float[] fieldOrder = generateSortIndex();
|
||||
|
||||
protected final float[] generateSortIndex()
|
||||
throws IOException {
|
||||
|
||||
float[] retArray = new float[reader.maxDoc()];
|
||||
|
||||
TermDocs termDocs = reader.termDocs ();
|
||||
try {
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term.field() != field) break;
|
||||
float termval = Float.parseFloat (term.text());
|
||||
termDocs.seek (enumerator);
|
||||
while (termDocs.next()) {
|
||||
retArray[termDocs.doc()] = termval;
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
|
||||
return retArray;
|
||||
}
|
||||
|
||||
public final int compare (final ScoreDoc i, final ScoreDoc j) {
|
||||
final float fi = fieldOrder[i.doc];
|
||||
final float fj = fieldOrder[j.doc];
|
||||
if (fi < fj) return -1;
|
||||
if (fi > fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
|
||||
final float fi = fieldOrder[i.doc];
|
||||
final float fj = fieldOrder[j.doc];
|
||||
if (fi > fj) return -1;
|
||||
if (fi < fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final boolean sizeMatches (final int n) {
|
||||
return fieldOrder.length == n;
|
||||
}
|
||||
|
||||
public Object sortValue (final ScoreDoc i) {
|
||||
return new Float (fieldOrder[i.doc]);
|
||||
}
|
||||
|
||||
public int sortType() {
|
||||
return SortField.FLOAT;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -64,6 +64,7 @@ public final class Hits {
|
|||
private Query query;
|
||||
private Searcher searcher;
|
||||
private Filter filter = null;
|
||||
private Sort sort = null;
|
||||
|
||||
private int length; // the total number of hits
|
||||
private Vector hitDocs = new Vector(); // cache of hits retrieved
|
||||
|
@ -80,6 +81,14 @@ public final class Hits {
|
|||
getMoreDocs(50); // retrieve 100 initially
|
||||
}
|
||||
|
||||
Hits(Searcher s, Query q, Filter f, Sort o) throws IOException {
|
||||
query = q;
|
||||
searcher = s;
|
||||
filter = f;
|
||||
sort = o;
|
||||
getMoreDocs(50); // retrieve 100 initially
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to add new documents to hitDocs.
|
||||
* Ensures that the hit numbered <code>min</code> has been retrieved.
|
||||
|
@ -90,7 +99,7 @@ public final class Hits {
|
|||
}
|
||||
|
||||
int n = min * 2; // double # retrieved
|
||||
TopDocs topDocs = searcher.search(query, filter, n);
|
||||
TopDocs topDocs = (sort == null) ? searcher.search(query, filter, n) : searcher.search(query, filter, n, sort);
|
||||
length = topDocs.totalHits;
|
||||
ScoreDoc[] scoreDocs = topDocs.scoreDocs;
|
||||
|
||||
|
|
|
@ -61,6 +61,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Implements search over a single IndexReader.
|
||||
*
|
||||
|
@ -149,6 +150,44 @@ public class IndexSearcher extends Searcher {
|
|||
return new TopDocs(totalHits[0], scoreDocs);
|
||||
}
|
||||
|
||||
/** Expert: Low-level search implementation. Finds the top <code>n</code>
|
||||
* hits for <code>query</code>, applying <code>filter</code> if non-null.
|
||||
* Results are ordered as specified by <code>sort</code>.
|
||||
*
|
||||
* <p>Called by {@link Hits}.
|
||||
*
|
||||
* <p>Applications should usually call {@link #search(Query)} or {@link
|
||||
* #search(Query,Filter)} instead.
|
||||
*/
|
||||
public TopFieldDocs search(Query query, Filter filter, final int nDocs,
|
||||
Sort sort)
|
||||
throws IOException {
|
||||
Scorer scorer = query.weight(this).scorer(reader);
|
||||
if (scorer == null)
|
||||
return new TopFieldDocs(0, new ScoreDoc[0], sort.fields);
|
||||
|
||||
final BitSet bits = filter != null ? filter.bits(reader) : null;
|
||||
final MultiFieldSortedHitQueue hq =
|
||||
new MultiFieldSortedHitQueue(reader, sort.fields, nDocs);
|
||||
final int[] totalHits = new int[1];
|
||||
scorer.score(new HitCollector() {
|
||||
public final void collect(int doc, float score) {
|
||||
if (score > 0.0f && // ignore zeroed buckets
|
||||
(bits==null || bits.get(doc))) { // skip docs not in bits
|
||||
totalHits[0]++;
|
||||
hq.insert(new FieldDoc(doc, score));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||
for (int i = hq.size()-1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = hq.fillFields ((FieldDoc) hq.pop());
|
||||
|
||||
return new TopFieldDocs(totalHits[0], scoreDocs, hq.getFields());
|
||||
}
|
||||
|
||||
|
||||
/** Lower-level search API.
|
||||
*
|
||||
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
|
||||
|
|
|
@ -0,0 +1,207 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Expert: A sorted hit queue for fields that contain strictly integer values.
|
||||
* Hits are sorted into the queue by the values in the field and then by document number.
|
||||
*
|
||||
* <p>Created: Jan 30, 2004 3:35:09 PM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
class IntegerSortedHitQueue
|
||||
extends FieldSortedHitQueue {
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted over the given field containing integer values.
|
||||
* @param reader Index to use.
|
||||
* @param integer_field Field containing integer sort information
|
||||
* @param size Number of hits to collect.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
IntegerSortedHitQueue (IndexReader reader, String integer_field, int size)
|
||||
throws IOException {
|
||||
super (reader, integer_field, size);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing integers.
|
||||
* Just calls <code>comparator(IndexReader,String)</code>.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg integer values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
|
||||
throws IOException {
|
||||
return comparator (reader, field);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing integers.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg integer values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
|
||||
throws IOException {
|
||||
return new ScoreDocLookupComparator() {
|
||||
|
||||
/** The sort information being used by this instance */
|
||||
protected final int[] fieldOrder = generateSortIndex();
|
||||
|
||||
private final int[] generateSortIndex()
|
||||
throws IOException {
|
||||
|
||||
final int[] retArray = new int[reader.maxDoc()];
|
||||
|
||||
TermEnum enumerator = reader.terms (new Term (field, ""));
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
if (enumerator.term() == null) {
|
||||
throw new RuntimeException ("no terms in field "+field);
|
||||
}
|
||||
|
||||
try {
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term.field() != field) break;
|
||||
int termval = Integer.parseInt (term.text());
|
||||
termDocs.seek (enumerator);
|
||||
while (termDocs.next()) {
|
||||
retArray[termDocs.doc()] = termval;
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
enumerator.close();
|
||||
termDocs.close();
|
||||
}
|
||||
|
||||
return retArray;
|
||||
}
|
||||
|
||||
public final int compare (final ScoreDoc i, final ScoreDoc j) {
|
||||
final int fi = fieldOrder[i.doc];
|
||||
final int fj = fieldOrder[j.doc];
|
||||
if (fi < fj) return -1;
|
||||
if (fi > fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
|
||||
final int fi = fieldOrder[i.doc];
|
||||
final int fj = fieldOrder[j.doc];
|
||||
if (fi > fj) return -1;
|
||||
if (fi < fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final boolean sizeMatches (final int n) {
|
||||
return fieldOrder.length == n;
|
||||
}
|
||||
|
||||
public Object sortValue (final ScoreDoc i) {
|
||||
return new Integer (fieldOrder[i.doc]);
|
||||
}
|
||||
|
||||
public int sortType() {
|
||||
return SortField.INT;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing integers using the given enumerator
|
||||
* to collect term values.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg integer values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
|
||||
throws IOException {
|
||||
return new ScoreDocLookupComparator() {
|
||||
|
||||
protected final int[] fieldOrder = generateSortIndex();
|
||||
|
||||
private final int[] generateSortIndex()
|
||||
throws IOException {
|
||||
|
||||
final int[] retArray = new int[reader.maxDoc()];
|
||||
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term.field() != field) break;
|
||||
int termval = Integer.parseInt (term.text());
|
||||
termDocs.seek (enumerator);
|
||||
while (termDocs.next()) {
|
||||
retArray[termDocs.doc()] = termval;
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
|
||||
return retArray;
|
||||
}
|
||||
|
||||
public final int compare (final ScoreDoc i, final ScoreDoc j) {
|
||||
final int fi = fieldOrder[i.doc];
|
||||
final int fj = fieldOrder[j.doc];
|
||||
if (fi < fj) return -1;
|
||||
if (fi > fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
|
||||
final int fi = fieldOrder[i.doc];
|
||||
final int fj = fieldOrder[j.doc];
|
||||
if (fi > fj) return -1;
|
||||
if (fi < fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final boolean sizeMatches (final int n) {
|
||||
return fieldOrder.length == n;
|
||||
}
|
||||
|
||||
public Object sortValue (final ScoreDoc i) {
|
||||
return new Integer (fieldOrder[i.doc]);
|
||||
}
|
||||
|
||||
public int sortType() {
|
||||
return SortField.INT;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -1,221 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* Implements search over an IndexReader using the values of terms in
|
||||
* a field as the primary sort order. Secondary sort is by the order
|
||||
* of documents in the index.
|
||||
*
|
||||
* <p>In this version (0.1) the field to sort by must contain strictly
|
||||
* String representations of Integers (i.e. {@link Integer#toString Integer.toString()}).
|
||||
*
|
||||
* Each document is assumed to have a single term in the given field,
|
||||
* and the value of the term is the document's relative position in
|
||||
* the given sort order. The field must be indexed, but should not be
|
||||
* stored or tokenized:
|
||||
*
|
||||
* <p><code>document.add(new Field("byAlpha", Integer.toString(x), false, true, false));</code>
|
||||
*
|
||||
* <p>In other words, the desired order of documents must be encoded
|
||||
* at the time they are entered into the index. The first document
|
||||
* should have a low value integer, the last document a high value
|
||||
* (i.e. the documents should be numbered <code>1..n</code> where
|
||||
* <code>1</code> is the first and <code>n</code> the last). Values
|
||||
* must be between <code>Integer.MIN_VALUE</code> and
|
||||
* <code>Integer.MAX_VALUE</code> inclusive.
|
||||
*
|
||||
* <p>Then, at search time, the field is designated to be used to sort
|
||||
* the returned hits:
|
||||
*
|
||||
* <p><code>IndexSearcher searcher = new IntegerSortedSearcher(indexReader, "byAlpha");</code>
|
||||
*
|
||||
* <p>or:
|
||||
*
|
||||
* <p><code>IntegerSortedSearcher searcher = new IntegerSortedSearcher(indexReader, "bySomething");
|
||||
* <br>Hits hits = searcher.search(query, filter);
|
||||
* <br>...
|
||||
* <br>searcher.setOrderByField("bySomethingElse");
|
||||
* <br>hits = searcher.search(query, filter);
|
||||
* <br>...
|
||||
* </code>
|
||||
*
|
||||
* <p>Note the above example shows that one of these objects can be
|
||||
* used multiple times, and the sort order changed between usages.
|
||||
*
|
||||
* <p><h3>Memory Usage</h3>
|
||||
*
|
||||
* <p>This object is almost identical to the regular IndexSearcher and
|
||||
* makes no additional memory requirements on its own. Every time the
|
||||
* <code>search()</code> method is called, however, a new
|
||||
* {@link FieldSortedHitQueue FieldSortedHitQueue} object is created.
|
||||
* That object is responsible for putting the hits in the correct order,
|
||||
* and it maintains a cache of information based on the IndexReader
|
||||
* given to it. See its documentation for more information on its
|
||||
* memory usage.
|
||||
*
|
||||
* <p><h3>Concurrency</h3>
|
||||
*
|
||||
* <p>This object has the same behavior during concurrent updates to
|
||||
* the index as does IndexSearcher. Namely, in the default
|
||||
* implementation using
|
||||
* {@link org.apache.lucene.store.FSDirectory FSDirectory}, the index
|
||||
* can be updated (deletes, adds) without harm while this object
|
||||
* exists, but this object will not see the changes. Ultimately this
|
||||
* behavior is a result of the
|
||||
* {@link org.apache.lucene.index.SegmentReader SegmentReader} class
|
||||
* internal to FSDirectory, which caches information about documents
|
||||
* in memory.
|
||||
*
|
||||
* <p>So, in order for IntegerSortedSearcher to be kept up to date with
|
||||
* changes to the index, new instances must be created instead of the
|
||||
* same one used over and over again. This will result in lower
|
||||
* performance than if instances are reused.
|
||||
*
|
||||
* <p><h3>Updates</h3>
|
||||
*
|
||||
* <p>In order to be able to update the index without having to
|
||||
* recalculate all the sort numbers, the numbers should be stored with
|
||||
* "space" between them. That is, sort the documents and number them
|
||||
* <code>1..n</code>. Then, as <code>i</code> goes between
|
||||
* <code>1</code> and <code>n</code>:
|
||||
*
|
||||
* <p><code>document.add(new Field("byAlpha", Integer.toString(i*1000), false, true, false));</code>
|
||||
*
|
||||
* <p>Add a new document sorted between position 1 and 2 by:
|
||||
*
|
||||
* <p><code>document.add(new Field("byAlpha", Integer.toString(1500), false, true, false));</code>
|
||||
*
|
||||
* <p>Be careful not to overun <code>Integer.MAX_VALUE</code>
|
||||
* (<code>2147483647</code>). Periodically a complete reindex should
|
||||
* be run so the sort orders can be "normalized".
|
||||
*
|
||||
* <p>Created: Dec 8, 2003 12:47:26 PM
|
||||
*
|
||||
* @author "Tim Jones" <tjluc@nacimiento.com>
|
||||
* @since lucene 1.3
|
||||
* @version 0.1
|
||||
* @see IndexSearcher
|
||||
*/
|
||||
public class IntegerSortedSearcher
|
||||
extends IndexSearcher {
|
||||
|
||||
/** stores the field being used to sort by **/
|
||||
protected String field;
|
||||
|
||||
/**
|
||||
* Searches the index in the named directory using the given
|
||||
* field as the primary sort.
|
||||
* The terms in the field must contain strictly integers in
|
||||
* the range <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
|
||||
* @see IndexSearcher(java.lang.String,java.lang.String)
|
||||
*/
|
||||
public IntegerSortedSearcher(String path, String integer_field)
|
||||
throws IOException {
|
||||
this(IndexReader.open(path), integer_field);
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the index in the provided directory using the
|
||||
* given field as the primary sort.
|
||||
* The terms in the field must contain strictly integers in
|
||||
* the range <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
|
||||
* @see IndexSearcher(Directory,java.lang.String)
|
||||
*/
|
||||
public IntegerSortedSearcher(Directory directory, String integer_field)
|
||||
throws IOException {
|
||||
this(IndexReader.open(directory), integer_field);
|
||||
}
|
||||
|
||||
/**
|
||||
* Searches the provided index using the given field as the
|
||||
* primary sort.
|
||||
* The terms in the field must contain strictly integers in
|
||||
* the range <code>Integer.MIN_VALUE</code> and <code>Integer.MAX_VALUE</code> inclusive.
|
||||
* @see IndexSearcher(IndexReader)
|
||||
*/
|
||||
public IntegerSortedSearcher(IndexReader r, String integer_field) {
|
||||
super(r);
|
||||
this.field = integer_field.intern();
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the field to order results by. This can be called
|
||||
* multiple times per instance of IntegerSortedSearcher.
|
||||
* @param integer_field The field to sort results by.
|
||||
*/
|
||||
public void setOrderByField(String integer_field) {
|
||||
this.field = integer_field.intern();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the name of the field currently being used
|
||||
* to sort results by.
|
||||
* @return Field name.
|
||||
*/
|
||||
public String getOrderByField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds the top <code>nDocs</code>
|
||||
* hits for <code>query</code>, applying <code>filter</code> if non-null.
|
||||
*
|
||||
* Overrides IndexSearcher.search to use a FieldSortedHitQueue instead of the
|
||||
* default HitQueue.
|
||||
*
|
||||
* @see IndexSearcher#search
|
||||
*/
|
||||
public TopDocs search(Query query, Filter filter, final int nDocs)
|
||||
throws IOException {
|
||||
|
||||
Scorer scorer = query.weight(this).scorer(reader);
|
||||
if (scorer == null) {
|
||||
return new TopDocs(0, new ScoreDoc[0]);
|
||||
}
|
||||
|
||||
final BitSet bits = filter != null ? filter.bits(reader) : null;
|
||||
final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, field, nDocs);
|
||||
final int[] totalHits = new int[1];
|
||||
scorer.score(
|
||||
new HitCollector() {
|
||||
public final void collect(int doc, float score) {
|
||||
if (score > 0.0f && // ignore zeroed buckets
|
||||
(bits == null || bits.get(doc))) { // skip docs not in bits
|
||||
totalHits[0]++;
|
||||
hq.insert(new ScoreDoc(doc, score));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||
for (int i = hq.size() - 1; i >= 0; i--) { // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc) hq.pop();
|
||||
}
|
||||
|
||||
return new TopDocs(totalHits[0], scoreDocs);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,110 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: A hit queue for sorting by hits by terms in more than one field.
|
||||
* The type of content in each field could be determined dynamically by
|
||||
* FieldSortedHitQueue.determineComparator().
|
||||
*
|
||||
* <p>Created: Feb 3, 2004 4:46:55 PM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
* @see FieldSortedHitQueue
|
||||
* @see Searchable#search(Query,Filter,int,Sort)
|
||||
*/
|
||||
class MultiFieldSortedHitQueue
|
||||
extends PriorityQueue {
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted by the given list of fields.
|
||||
* @param reader Index to use.
|
||||
* @param fields Field names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
|
||||
* @param size The number of hits to retain. Must be greater than zero.
|
||||
* @throws IOException
|
||||
*/
|
||||
MultiFieldSortedHitQueue (IndexReader reader, SortField[] fields, int size)
|
||||
throws IOException {
|
||||
final int n = fields.length;
|
||||
comparators = new ScoreDocComparator[n];
|
||||
this.fields = new SortField[n];
|
||||
for (int i=0; i<n; ++i) {
|
||||
comparators[i] = FieldSortedHitQueue.getCachedComparator (reader, fields[i].getField(), fields[i].getType());
|
||||
this.fields[i] = new SortField (fields[i].getField(), comparators[i].sortType(), fields[i].getReverse());
|
||||
}
|
||||
initialize (size);
|
||||
}
|
||||
|
||||
|
||||
/** Stores a comparator corresponding to each field being sorted by */
|
||||
protected ScoreDocComparator[] comparators;
|
||||
|
||||
/** Stores the sort criteria being used. */
|
||||
protected SortField[] fields;
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||
* @param a ScoreDoc
|
||||
* @param b ScoreDoc
|
||||
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
|
||||
*/
|
||||
protected final boolean lessThan (final Object a, final Object b) {
|
||||
final ScoreDoc docA = (ScoreDoc) a;
|
||||
final ScoreDoc docB = (ScoreDoc) b;
|
||||
final int n = comparators.length;
|
||||
int c = 0;
|
||||
for (int i=0; i<n && c==0; ++i) {
|
||||
c = (fields[i].reverse) ? comparators[i].compareReverse (docA, docB)
|
||||
: comparators[i].compare (docA, docB);
|
||||
}
|
||||
return c > 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Given a FieldDoc object, stores the values used
|
||||
* to sort the given document. These values are not the raw
|
||||
* values out of the index, but the internal representation
|
||||
* of them. This is so the given search hit can be collated
|
||||
* by a MultiSearcher with other search hits.
|
||||
* @param doc The FieldDoc to store sort values into.
|
||||
* @return The same FieldDoc passed in.
|
||||
* @see Searchable#search(Query,Filter,int,Sort)
|
||||
*/
|
||||
FieldDoc fillFields (final FieldDoc doc) {
|
||||
final int n = comparators.length;
|
||||
final Object[] fields = new Object[n];
|
||||
for (int i=0; i<n; ++i)
|
||||
fields[i] = comparators[i].sortValue(doc);
|
||||
doc.fields = fields;
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
/** Returns the SortFields being used by this hit queue. */
|
||||
SortField[] getFields() {
|
||||
return fields;
|
||||
}
|
||||
}
|
|
@ -170,6 +170,32 @@ public class MultiSearcher extends Searcher {
|
|||
}
|
||||
|
||||
|
||||
public TopFieldDocs search (Query query, Filter filter, int n, Sort sort)
|
||||
throws IOException {
|
||||
FieldDocSortedHitQueue hq = null;
|
||||
int totalHits = 0;
|
||||
|
||||
for (int i = 0; i < searchables.length; i++) { // search each searcher
|
||||
TopFieldDocs docs = searchables[i].search (query, filter, n, sort);
|
||||
if (hq == null) hq = new FieldDocSortedHitQueue (docs.fields, n);
|
||||
totalHits += docs.totalHits; // update totalHits
|
||||
ScoreDoc[] scoreDocs = docs.scoreDocs;
|
||||
for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
|
||||
ScoreDoc scoreDoc = scoreDocs[j];
|
||||
scoreDoc.doc += starts[i]; // convert doc
|
||||
if (!hq.insert (scoreDoc))
|
||||
break; // no more scores > minScore
|
||||
}
|
||||
}
|
||||
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc) hq.pop();
|
||||
|
||||
return new TopFieldDocs (totalHits, scoreDocs, hq.getFields());
|
||||
}
|
||||
|
||||
|
||||
/** Lower-level search API.
|
||||
*
|
||||
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
|
||||
|
|
|
@ -57,6 +57,7 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/** Implements parallel search over a set of <code>Searchables</code>.
|
||||
*
|
||||
|
@ -133,6 +134,55 @@ public class ParallelMultiSearcher extends MultiSearcher {
|
|||
return new TopDocs(totalHits, scoreDocs);
|
||||
}
|
||||
|
||||
/**
|
||||
* A search implementation allowing sorting which spans a new thread for each
|
||||
* Searchable, waits for each search to complete and merges
|
||||
* the results back together.
|
||||
*/
|
||||
public TopFieldDocs search(Query query, Filter filter, int nDocs, Sort sort)
|
||||
throws IOException {
|
||||
// don't specify the fields - we'll wait to do this until we get results
|
||||
FieldDocSortedHitQueue hq = new FieldDocSortedHitQueue (null, nDocs);
|
||||
int totalHits = 0;
|
||||
MultiSearcherThread[] msta = new MultiSearcherThread[searchables.length];
|
||||
for (int i = 0; i < searchables.length; i++) { // search each searcher
|
||||
// Assume not too many searchables and cost of creating a thread is by far inferior to a search
|
||||
msta[i] =
|
||||
new MultiSearcherThread(
|
||||
searchables[i],
|
||||
query,
|
||||
filter,
|
||||
nDocs,
|
||||
hq,
|
||||
sort,
|
||||
i,
|
||||
starts,
|
||||
"MultiSearcher thread #" + (i + 1));
|
||||
msta[i].start();
|
||||
}
|
||||
|
||||
for (int i = 0; i < searchables.length; i++) {
|
||||
try {
|
||||
msta[i].join();
|
||||
} catch (InterruptedException ie) {
|
||||
; // TODO: what should we do with this???
|
||||
}
|
||||
IOException ioe = msta[i].getIOException();
|
||||
if (ioe == null) {
|
||||
totalHits += msta[i].hits();
|
||||
} else {
|
||||
// if one search produced an IOException, rethrow it
|
||||
throw ioe;
|
||||
}
|
||||
}
|
||||
|
||||
ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
|
||||
for (int i = hq.size() - 1; i >= 0; i--) // put docs in array
|
||||
scoreDocs[i] = (ScoreDoc) hq.pop();
|
||||
|
||||
return new TopFieldDocs(totalHits, scoreDocs, hq.getFields());
|
||||
}
|
||||
|
||||
/** Lower-level search API.
|
||||
*
|
||||
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
|
||||
|
@ -190,9 +240,10 @@ class MultiSearcherThread extends Thread {
|
|||
private int hits;
|
||||
private TopDocs docs;
|
||||
private int i;
|
||||
private HitQueue hq;
|
||||
private PriorityQueue hq;
|
||||
private int[] starts;
|
||||
private IOException ioe;
|
||||
private Sort sort;
|
||||
|
||||
public MultiSearcherThread(
|
||||
Searchable searchable,
|
||||
|
@ -213,15 +264,43 @@ class MultiSearcherThread extends Thread {
|
|||
this.starts = starts;
|
||||
}
|
||||
|
||||
public MultiSearcherThread(
|
||||
Searchable searchable,
|
||||
Query query,
|
||||
Filter filter,
|
||||
int nDocs,
|
||||
FieldDocSortedHitQueue hq,
|
||||
Sort sort,
|
||||
int i,
|
||||
int[] starts,
|
||||
String name) {
|
||||
super(name);
|
||||
this.searchable = searchable;
|
||||
this.query = query;
|
||||
this.filter = filter;
|
||||
this.nDocs = nDocs;
|
||||
this.hq = hq;
|
||||
this.i = i;
|
||||
this.starts = starts;
|
||||
this.sort = sort;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
docs = searchable.search(query, filter, nDocs);
|
||||
docs = (sort == null) ? searchable.search (query, filter, nDocs)
|
||||
: searchable.search (query, filter, nDocs, sort);
|
||||
}
|
||||
// Store the IOException for later use by the caller of this thread
|
||||
catch (IOException ioe) {
|
||||
this.ioe = ioe;
|
||||
}
|
||||
if (ioe == null) {
|
||||
// if we are sorting by fields, we need to tell the field sorted hit queue
|
||||
// the actual type of fields, in case the original list contained AUTO.
|
||||
// if the searchable returns null for fields, we'll have problems.
|
||||
if (sort != null) {
|
||||
((FieldDocSortedHitQueue)hq).setFields (((TopFieldDocs)docs).fields);
|
||||
}
|
||||
ScoreDoc[] scoreDocs = docs.scoreDocs;
|
||||
for (int j = 0;
|
||||
j < scoreDocs.length;
|
||||
|
|
|
@ -98,6 +98,11 @@ public class RemoteSearchable
|
|||
return local.search(query, filter, n);
|
||||
}
|
||||
|
||||
public TopFieldDocs search (Query query, Filter filter, int n, Sort sort)
|
||||
throws IOException {
|
||||
return local.search (query, filter, n, sort);
|
||||
}
|
||||
|
||||
public Document doc(int i) throws IOException {
|
||||
return local.doc(i);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Expert: Compares two ScoreDoc objects for sorting.
|
||||
*
|
||||
* <p>Created: Feb 3, 2004 9:00:16 AM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
interface ScoreDocComparator {
|
||||
|
||||
/** Special comparator for sorting hits according to computed relevance (document score). */
|
||||
static final ScoreDocComparator RELEVANCE = new ScoreDocComparator() {
|
||||
public int compare (ScoreDoc i, ScoreDoc j) {
|
||||
if (i.score > j.score) return -1;
|
||||
if (i.score < j.score) return 1;
|
||||
return 0;
|
||||
}
|
||||
public int compareReverse (ScoreDoc i, ScoreDoc j) {
|
||||
if (i.score < j.score) return -1;
|
||||
if (i.score > j.score) return 1;
|
||||
return 0;
|
||||
}
|
||||
public Object sortValue (ScoreDoc i) {
|
||||
return new Float (i.score);
|
||||
}
|
||||
public int sortType() {
|
||||
return SortField.SCORE;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/** Special comparator for sorting hits according to index order (document number). */
|
||||
static final ScoreDocComparator INDEXORDER = new ScoreDocComparator() {
|
||||
public int compare (ScoreDoc i, ScoreDoc j) {
|
||||
if (i.doc < j.doc) return -1;
|
||||
if (i.doc > j.doc) return 1;
|
||||
return 0;
|
||||
}
|
||||
public int compareReverse (ScoreDoc i, ScoreDoc j) {
|
||||
if (i.doc > j.doc) return -1;
|
||||
if (i.doc < j.doc) return 1;
|
||||
return 0;
|
||||
}
|
||||
public Object sortValue (ScoreDoc i) {
|
||||
return new Integer (i.doc);
|
||||
}
|
||||
public int sortType() {
|
||||
return SortField.DOC;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Compares two ScoreDoc objects and returns a result indicating their
|
||||
* sort order.
|
||||
* @param i First ScoreDoc
|
||||
* @param j Second ScoreDoc
|
||||
* @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
|
||||
* @see java.util.Comparator
|
||||
*/
|
||||
int compare (ScoreDoc i, ScoreDoc j);
|
||||
|
||||
|
||||
/**
|
||||
* Compares two ScoreDoc objects and returns a result indicating their
|
||||
* sort order in reverse.
|
||||
* @param i First ScoreDoc
|
||||
* @param j Second ScoreDoc
|
||||
* @return <code>-1</code> if <code>i</code> should come before <code>j</code><br><code>1</code> if <code>i</code> should come after <code>j</code><br><code>0</code> if they are equal
|
||||
* @see java.util.Comparator
|
||||
*/
|
||||
int compareReverse (ScoreDoc i, ScoreDoc j);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the value used to sort the given document. This is
|
||||
* currently always either an Integer or Float, but could be extended
|
||||
* to return any object used to sort by.
|
||||
* @param i Document
|
||||
* @return Integer or Float
|
||||
*/
|
||||
Object sortValue (ScoreDoc i);
|
||||
|
||||
|
||||
/**
|
||||
* Returns the type of sort.
|
||||
* @return One of the constants in SortField.
|
||||
* @see SortField
|
||||
*/
|
||||
int sortType();
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Expert: Compares two ScoreDoc objects for sorting using a lookup table.
|
||||
*
|
||||
* <p>Created: Feb 3, 2004 9:59:14 AM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
interface ScoreDocLookupComparator
|
||||
extends ScoreDocComparator {
|
||||
|
||||
/**
|
||||
* Verifies that the internal lookup table is the correct size. This
|
||||
* comparator uses a lookup table, so it is important to that the
|
||||
* table matches the number of documents in the index.
|
||||
* @param n Expected size of table.
|
||||
* @return True if internal table matches expected size; false otherwise
|
||||
*/
|
||||
boolean sizeMatches (int n);
|
||||
}
|
|
@ -127,5 +127,14 @@ public interface Searchable extends java.rmi.Remote {
|
|||
*/
|
||||
Explanation explain(Query query, int doc) throws IOException;
|
||||
|
||||
|
||||
/** Expert: Low-level search implementation with arbitrary sorting. Finds
|
||||
* the top <code>n</code> hits for <code>query</code>, applying
|
||||
* <code>filter</code> if non-null, and sorting the hits by the criteria in
|
||||
* <code>sort</code>.
|
||||
*
|
||||
* <p>Applications should usually call {@link
|
||||
* Searcher#search(Query,Filter,Sort)} instead.
|
||||
*/
|
||||
TopFieldDocs search(Query query, Filter filter, int n, Sort sort)
|
||||
throws IOException;
|
||||
}
|
||||
|
|
|
@ -71,6 +71,22 @@ public abstract class Searcher implements Searchable {
|
|||
return new Hits(this, query, filter);
|
||||
}
|
||||
|
||||
/** Returns documents matching <code>query</code> sorted by
|
||||
* <code>sort</code>.
|
||||
*/
|
||||
public Hits search(Query query, Sort sort)
|
||||
throws IOException {
|
||||
return new Hits(this, query, null, sort);
|
||||
}
|
||||
|
||||
/** Returns documents matching <code>query</code> and <code>filter</code>,
|
||||
* sorted by <code>sort</code>.
|
||||
*/
|
||||
public Hits search(Query query, Filter filter, Sort sort)
|
||||
throws IOException {
|
||||
return new Hits(this, query, filter, sort);
|
||||
}
|
||||
|
||||
/** Lower-level search API.
|
||||
*
|
||||
* <p>{@link HitCollector#collect(int,float)} is called for every non-zero
|
||||
|
|
|
@ -0,0 +1,110 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Encapsulates sort criteria for returned hits. The sort criteria can
|
||||
* be changed between calls to Searcher#search(). This class is thread safe.
|
||||
*
|
||||
* <p>Created: Feb 12, 2004 10:53:57 AM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
public class Sort
|
||||
implements Serializable {
|
||||
|
||||
/** Represents sorting by computed relevance. Using this sort criteria
|
||||
* returns the same results with slightly more overhead as calling
|
||||
* Searcher#search() without a sort criteria. */
|
||||
public static final Sort RELEVANCE =
|
||||
new Sort (new SortField[] { SortField.FIELD_SCORE, SortField.FIELD_DOC });
|
||||
|
||||
/** Represents sorting by index order. */
|
||||
public static final Sort INDEXORDER = new Sort (SortField.FIELD_DOC);
|
||||
|
||||
|
||||
// internal representation of the sort criteria
|
||||
SortField[] fields;
|
||||
|
||||
|
||||
/** Sorts by the terms in <code>field</code> then by index order (document
|
||||
* number). */
|
||||
public Sort (String field) {
|
||||
setSort (field, false);
|
||||
}
|
||||
|
||||
/** Sorts possibly in reverse by the terms in <code>field</code> then by
|
||||
* index order (document number). */
|
||||
public Sort (String field, boolean reverse) {
|
||||
setSort (field, reverse);
|
||||
}
|
||||
|
||||
/** Sorts in succession by the terms in each field. */
|
||||
public Sort (String[] fields) {
|
||||
setSort (fields);
|
||||
}
|
||||
|
||||
/** Sorts by the criteria in the given SortField. */
|
||||
public Sort (SortField field) {
|
||||
setSort (field);
|
||||
}
|
||||
|
||||
/** Sorts in succession by the criteria in each SortField. */
|
||||
public Sort (SortField[] fields) {
|
||||
setSort (fields);
|
||||
}
|
||||
|
||||
/** Sets the sort to the terms in <code>field</code> then by index order
|
||||
* (document number). */
|
||||
public final void setSort (String field) {
|
||||
setSort (field, false);
|
||||
}
|
||||
|
||||
/** Sets the sort to the terms in <code>field</code> possibly in reverse,
|
||||
* then by index order (document number). */
|
||||
public void setSort (String field, boolean reverse) {
|
||||
SortField[] nfields = new SortField[] {
|
||||
new SortField (field, SortField.AUTO, reverse),
|
||||
new SortField (field, SortField.DOC)
|
||||
};
|
||||
fields = nfields;
|
||||
}
|
||||
|
||||
/** Sets the sort to the terms in each field in succession. */
|
||||
public void setSort (String[] fieldnames) {
|
||||
final int n = fieldnames.length;
|
||||
SortField[] nfields = new SortField[n];
|
||||
for (int i=0; i<n; ++i) {
|
||||
nfields[i] = new SortField (fieldnames[i], SortField.AUTO);
|
||||
}
|
||||
fields = nfields;
|
||||
}
|
||||
|
||||
/** Sets the sort to the given criteria. */
|
||||
public void setSort (SortField field) {
|
||||
this.fields = new SortField[] { field };
|
||||
}
|
||||
|
||||
/** Sets the sort to the given criteria in succession. */
|
||||
public void setSort (SortField[] fields) {
|
||||
this.fields = fields;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
/**
|
||||
* Stores information about how to sort documents by terms in an individual
|
||||
* field. Fields must be indexed in order to sort by them.
|
||||
*
|
||||
* <p>Created: Feb 11, 2004 1:25:29 PM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
public class SortField
|
||||
implements Serializable {
|
||||
|
||||
/** Sort by document score (relevancy). Sort values are Float and higher
|
||||
* values are at the front. */
|
||||
public static final int SCORE = 0;
|
||||
|
||||
/** Sort by document number (index order). Sort values are Integer and lower
|
||||
* values are at the front. */
|
||||
public static final int DOC = 1;
|
||||
|
||||
/** Guess type of sort based on field contents. A regular expression is used
|
||||
* to look at the first term indexed for the field and determine if it
|
||||
* represents an integer number, a floating point number, or just arbitrary
|
||||
* string characters. */
|
||||
public static final int AUTO = 2;
|
||||
|
||||
/** Sort using term values as Strings. Sort values are String and lower
|
||||
* values are at the front. */
|
||||
public static final int STRING = 3;
|
||||
|
||||
/** Sort using term values as encoded Integers. Sort values are Integer and
|
||||
* lower values are at the front. */
|
||||
public static final int INT = 4;
|
||||
|
||||
/** Sort using term values as encoded Floats. Sort values are Float and
|
||||
* lower values are at the front. */
|
||||
public static final int FLOAT = 5;
|
||||
|
||||
/** Represents sorting by document score (relevancy). */
|
||||
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
|
||||
|
||||
/** Represents sorting by document number (index order). */
|
||||
public static final SortField FIELD_DOC = new SortField (null, DOC);
|
||||
|
||||
|
||||
private String field;
|
||||
private int type = AUTO; // defaults to determining type dynamically
|
||||
boolean reverse = false; // defaults to natural order
|
||||
|
||||
|
||||
/** Creates a sort by terms in the given field where the type of term value
|
||||
* is determined dynamically ({@link #AUTO AUTO}).
|
||||
* @param field Name of field to sort by, cannot be <code>null</code>.
|
||||
*/
|
||||
public SortField (String field) {
|
||||
this.field = field;
|
||||
}
|
||||
|
||||
/** Creates a sort, possibly in reverse, by terms in the given field where
|
||||
* the type of term value is determined dynamically ({@link #AUTO AUTO}).
|
||||
* @param field Name of field to sort by, cannot be <code>null</code>.
|
||||
* @param reverse True if natural order should be reversed.
|
||||
*/
|
||||
public SortField (String field, boolean reverse) {
|
||||
this.field = field;
|
||||
this.reverse = reverse;
|
||||
}
|
||||
|
||||
/** Creates a sort by terms in the given field with the type of term
|
||||
* values explicitly given.
|
||||
* @param field Name of field to sort by. Can be <code>null</code> if
|
||||
* <code>type</code> is SCORE or DOC.
|
||||
* @param type Type of values in the terms.
|
||||
*/
|
||||
public SortField (String field, int type) {
|
||||
this.field = field;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/** Creates a sort, possibly in reverse, by terms in the given field with the
|
||||
* type of term values explicitly given.
|
||||
* @param field Name of field to sort by. Can be <code>null</code> if
|
||||
* <code>type</code> is SCORE or DOC.
|
||||
* @param type Type of values in the terms.
|
||||
* @param reverse True if natural order should be reversed.
|
||||
*/
|
||||
public SortField (String field, int type, boolean reverse) {
|
||||
this.field = field;
|
||||
this.type = type;
|
||||
this.reverse = reverse;
|
||||
}
|
||||
|
||||
/** Returns the name of the field. Could return <code>null</code>
|
||||
* if the sort is by SCORE or DOC.
|
||||
* @return Name of field, possibly <code>null</code>.
|
||||
*/
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
/** Returns the type of contents in the field.
|
||||
* @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT.
|
||||
*/
|
||||
public int getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/** Returns whether the sort should be reversed.
|
||||
* @return True if natural order should be reversed.
|
||||
*/
|
||||
public boolean getReverse() {
|
||||
return reverse;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,226 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Expert: A sorted hit queue for fields that contain string values.
|
||||
* Hits are sorted into the queue by the values in the field and then by document number.
|
||||
* The internal cache contains integers - the strings are sorted and
|
||||
* then only their sequence number cached.
|
||||
*
|
||||
* <p>Created: Feb 2, 2004 9:26:33 AM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
*/
|
||||
class StringSortedHitQueue
|
||||
extends FieldSortedHitQueue {
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted over the given field containing string values.
|
||||
* @param reader Index to use.
|
||||
* @param string_field Field containing string sort information
|
||||
* @param size Number of hits to collect.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
StringSortedHitQueue (IndexReader reader, String string_field, int size)
|
||||
throws IOException {
|
||||
super (reader, string_field, size);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing strings.
|
||||
* Just calls <code>comparator(IndexReader,String)</code>.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg string values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field)
|
||||
throws IOException {
|
||||
return comparator (reader, field);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing strings.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg string values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
|
||||
throws IOException {
|
||||
return new ScoreDocLookupComparator() {
|
||||
|
||||
/** The sort information being used by this instance */
|
||||
protected final int[] fieldOrder = generateSortIndex();
|
||||
|
||||
private final int[] generateSortIndex()
|
||||
throws IOException {
|
||||
|
||||
final int[] retArray = new int[reader.maxDoc()];
|
||||
|
||||
TermEnum enumerator = reader.terms (new Term (field, ""));
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
if (enumerator.term() == null) {
|
||||
throw new RuntimeException ("no terms in field " + field);
|
||||
}
|
||||
|
||||
// NOTE: the contract for TermEnum says the
|
||||
// terms will be in natural order (which is
|
||||
// ordering by field name, term text). The
|
||||
// contract for TermDocs says the docs will
|
||||
// be ordered by document number. So the
|
||||
// following loop will automatically sort the
|
||||
// terms in the correct order.
|
||||
|
||||
try {
|
||||
int t = 0; // current term number
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term.field() != field) break;
|
||||
t++;
|
||||
termDocs.seek (enumerator);
|
||||
while (termDocs.next()) {
|
||||
retArray[termDocs.doc()] = t;
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
enumerator.close();
|
||||
termDocs.close();
|
||||
}
|
||||
|
||||
return retArray;
|
||||
}
|
||||
|
||||
public final int compare (final ScoreDoc i, final ScoreDoc j) {
|
||||
final int fi = fieldOrder[i.doc];
|
||||
final int fj = fieldOrder[j.doc];
|
||||
if (fi < fj) return -1;
|
||||
if (fi > fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
|
||||
final int fi = fieldOrder[i.doc];
|
||||
final int fj = fieldOrder[j.doc];
|
||||
if (fi > fj) return -1;
|
||||
if (fi < fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final boolean sizeMatches (final int n) {
|
||||
return fieldOrder.length == n;
|
||||
}
|
||||
|
||||
public Object sortValue (final ScoreDoc i) {
|
||||
return new Integer(fieldOrder[i.doc]);
|
||||
}
|
||||
|
||||
public int sortType() {
|
||||
return SortField.INT;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a comparator for sorting hits according to a field containing strings using the given enumerator
|
||||
* to collect term values.
|
||||
* @param reader Index to use.
|
||||
* @param field Field containg string values.
|
||||
* @return Comparator for sorting hits.
|
||||
* @throws IOException If an error occurs reading the index.
|
||||
*/
|
||||
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
|
||||
throws IOException {
|
||||
return new ScoreDocLookupComparator() {
|
||||
|
||||
protected final int[] fieldOrder = generateSortIndex();
|
||||
|
||||
private final int[] generateSortIndex()
|
||||
throws IOException {
|
||||
|
||||
final int[] retArray = new int[reader.maxDoc()];
|
||||
|
||||
// NOTE: the contract for TermEnum says the
|
||||
// terms will be in natural order (which is
|
||||
// ordering by field name, term text). The
|
||||
// contract for TermDocs says the docs will
|
||||
// be ordered by document number. So the
|
||||
// following loop will automatically sort the
|
||||
// terms in the correct order.
|
||||
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
int t = 0; // current term number
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term.field() != field) break;
|
||||
t++;
|
||||
termDocs.seek (enumerator);
|
||||
while (termDocs.next()) {
|
||||
retArray[termDocs.doc()] = t;
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
|
||||
return retArray;
|
||||
}
|
||||
|
||||
public final int compare (final ScoreDoc i, final ScoreDoc j) {
|
||||
final int fi = fieldOrder[i.doc];
|
||||
final int fj = fieldOrder[j.doc];
|
||||
if (fi < fj) return -1;
|
||||
if (fi > fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final int compareReverse (final ScoreDoc i, final ScoreDoc j) {
|
||||
final int fi = fieldOrder[i.doc];
|
||||
final int fj = fieldOrder[j.doc];
|
||||
if (fi > fj) return -1;
|
||||
if (fi < fj) return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
public final boolean sizeMatches (final int n) {
|
||||
return fieldOrder.length == n;
|
||||
}
|
||||
|
||||
public Object sortValue (final ScoreDoc i) {
|
||||
return new Integer(fieldOrder[i.doc]);
|
||||
}
|
||||
|
||||
public int sortType() {
|
||||
return SortField.INT;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Expert: Returned by low-level sorted search implementations.
|
||||
*
|
||||
* <p>Created: Feb 12, 2004 8:58:46 AM
|
||||
*
|
||||
* @author Tim Jones (Nacimiento Software)
|
||||
* @since lucene 1.4
|
||||
* @version $Id$
|
||||
* @see Searchable#search(Query,Filter,int,Sort)
|
||||
*/
|
||||
public class TopFieldDocs
|
||||
extends TopDocs {
|
||||
|
||||
/** The fields which were used to sort results by. */
|
||||
public SortField[] fields;
|
||||
|
||||
/** Creates one of these objects.
|
||||
* @param totalHits Total number of hits for the query.
|
||||
* @param scoreDocs The top hits for the query.
|
||||
* @param fields The sort criteria used to find the top hits.
|
||||
*/
|
||||
TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields) {
|
||||
super (totalHits, scoreDocs);
|
||||
this.fields = fields;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue