Fixed problem with sorting.

git-svn-id: 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doug Cutting 2004-02-24 20:41:16 +00:00
parent 0aca5291dc
commit 31974f29f0

View File

@ -26,8 +26,9 @@ import;
* Expert: A sorted hit queue for fields that contain string values.
* Hits are sorted into the queue by the values in the field and then by document number.
* The internal cache contains integers - the strings are sorted and
* then only their sequence number cached.
* Warning: The internal cache could be quite large, depending on the number of terms
* in the field! All the terms are kept in memory, as well as a sorted array of
* integers representing their relative position.
* <p>Created: Feb 2, 2004 9:26:33 AM
@ -68,21 +69,24 @@ extends FieldSortedHitQueue {
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
* @param field Field containg string values.
* @param fieldname Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
static ScoreDocLookupComparator comparator (final IndexReader reader, final String field)
static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocLookupComparator() {
/** The sort information being used by this instance */
protected final int[] fieldOrder = generateSortIndex();
protected String[] terms;
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
final String[] mterms = new String[reader.maxDoc()]; // guess length
TermEnum enumerator = reader.terms (new Term (field, ""));
TermDocs termDocs = reader.termDocs();
@ -98,22 +102,43 @@ extends FieldSortedHitQueue {
// following loop will automatically sort the
// terms in the correct order.
// if a given document has more than one term
// in the field, only the last one will be used.
int t = 0; // current term number
try {
int t = 0; // current term number
do {
Term term = enumerator.term();
if (term.field() != field) break;
// store term text
// we expect that there is at most one term per document
if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \""+field+"\"");
mterms[t] = term.text();
// store which documents use this term (enumerator);
while ( {
retArray[termDocs.doc()] = t;
} while (;
} finally {
// if there are less terms than documents,
// trim off the dead array space
if (t < mterms.length) {
terms = new String[t];
System.arraycopy (mterms, 0, terms, 0, t);
} else {
terms = mterms;
return retArray;
@ -138,11 +163,11 @@ extends FieldSortedHitQueue {
public Object sortValue (final ScoreDoc i) {
return new Integer(fieldOrder[i.doc]);
return terms[fieldOrder[i.doc]];
public int sortType() {
return SortField.INT;
return SortField.STRING;
@ -152,20 +177,23 @@ extends FieldSortedHitQueue {
* Returns a comparator for sorting hits according to a field containing strings using the given enumerator
* to collect term values.
* @param reader Index to use.
* @param field Field containg string values.
* @param fieldname Field containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String field)
static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname)
throws IOException {
final String field = fieldname.intern();
return new ScoreDocLookupComparator() {
protected final int[] fieldOrder = generateSortIndex();
protected String[] terms;
private final int[] generateSortIndex()
throws IOException {
final int[] retArray = new int[reader.maxDoc()];
final String[] mterms = new String[reader.maxDoc()]; // guess length
// NOTE: the contract for TermEnum says the
// terms will be in natural order (which is
@ -175,22 +203,42 @@ extends FieldSortedHitQueue {
// following loop will automatically sort the
// terms in the correct order.
// if a given document has more than one term
// in the field, only the last one will be used.
TermDocs termDocs = reader.termDocs();
int t = 0; // current term number
try {
int t = 0; // current term number
do {
Term term = enumerator.term();
if (term.field() != field) break;
// store term text
// we expect that there is at most one term per document
if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \""+field+"\"");
mterms[t] = term.text();
// store which documents use this term (enumerator);
while ( {
retArray[termDocs.doc()] = t;
} while (;
} finally {
// if there are less terms than documents,
// trim off the dead array space
if (t < mterms.length) {
terms = new String[t];
System.arraycopy (mterms, 0, terms, 0, t);
} else {
terms = mterms;
return retArray;
@ -215,11 +263,11 @@ extends FieldSortedHitQueue {
public Object sortValue (final ScoreDoc i) {
return new Integer(fieldOrder[i.doc]);
return terms[fieldOrder[i.doc]];
public int sortType() {
return SortField.INT;
return SortField.STRING;