new SortField type for using a custom comparator

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150311 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Jones 2004-04-22 22:23:15 +00:00
parent 59e61bb910
commit c8f455b482
13 changed files with 248 additions and 34 deletions

View File

@ -48,7 +48,7 @@ extends ScoreDoc {
* @see Sort
* @see Searchable#search(Query,Filter,int,Sort)
*/
public Object[] fields;
public Comparable[] fields;
/** Expert: Creates one of these objects with empty sort information. */
public FieldDoc (int doc, float score) {
@ -56,7 +56,7 @@ extends ScoreDoc {
}
/** Expert: Creates one of these objects with the given sort information. */
public FieldDoc (int doc, float score, Object[] fields) {
public FieldDoc (int doc, float score, Comparable[] fields) {
super (doc, score);
this.fields = fields;
}

View File

@ -109,6 +109,9 @@ extends PriorityQueue {
if (f1 > f2) c = -1;
if (f1 < f2) c = 1;
break;
case SortField.CUSTOM:
c = docB.fields[i].compareTo (docA.fields[i]);
break;
case SortField.AUTO:
// we cannot handle this - even if we determine the type of object (Float or
// Integer), we don't necessarily know how to compare them (both SCORE and
@ -144,6 +147,9 @@ extends PriorityQueue {
if (f1 < f2) c = -1;
if (f1 > f2) c = 1;
break;
case SortField.CUSTOM:
c = docA.fields[i].compareTo (docB.fields[i]);
break;
case SortField.AUTO:
// we cannot handle this - even if we determine the type of object (Float or
// Integer), we don't necessarily know how to compare them (both SCORE and

View File

@ -103,7 +103,7 @@ extends PriorityQueue {
* @throws IOException If an error occurs reading the index.
* @see #determineComparator
*/
static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type)
static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type, final SortComparatorSource factory)
throws IOException {
if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER;
@ -124,10 +124,11 @@ extends PriorityQueue {
switch (type) {
case SortField.SCORE: comparer = ScoreDocComparator.RELEVANCE; break;
case SortField.DOC: comparer = ScoreDocComparator.INDEXORDER; break;
case SortField.AUTO: comparer = determineComparator (reader, field); break;
case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break;
case SortField.INT: comparer = IntegerSortedHitQueue.comparator (reader, field); break;
case SortField.FLOAT: comparer = FloatSortedHitQueue.comparator (reader, field); break;
case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break;
case SortField.AUTO: comparer = determineComparator (reader, field); break;
case SortField.CUSTOM: comparer = factory.newComparator (reader, field); break;
default:
throw new RuntimeException ("invalid sort field type: "+type);
}

View File

@ -138,7 +138,7 @@ extends FieldSortedHitQueue {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
public Comparable sortValue (final ScoreDoc i) {
return new Float (fieldOrder[i.doc]);
}

View File

@ -139,7 +139,7 @@ extends FieldSortedHitQueue {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
public Comparable sortValue (final ScoreDoc i) {
return new Integer (fieldOrder[i.doc]);
}

View File

@ -51,7 +51,7 @@ extends PriorityQueue {
this.fields = new SortField[n];
for (int i=0; i<n; ++i) {
String fieldname = fields[i].getField();
comparators[i] = FieldSortedHitQueue.getCachedComparator (reader, fieldname, fields[i].getType());
comparators[i] = FieldSortedHitQueue.getCachedComparator (reader, fieldname, fields[i].getType(), fields[i].getFactory());
this.fields[i] = new SortField (fieldname, comparators[i].sortType(), fields[i].getReverse());
}
initialize (size);
@ -107,7 +107,7 @@ extends PriorityQueue {
*/
FieldDoc fillFields (final FieldDoc doc) {
final int n = comparators.length;
final Object[] fields = new Object[n];
final Comparable[] fields = new Comparable[n];
for (int i=0; i<n; ++i)
fields[i] = comparators[i].sortValue(doc);
doc.fields = fields;

View File

@ -26,7 +26,7 @@ package org.apache.lucene.search;
* @since lucene 1.4
* @version $Id$
*/
interface ScoreDocComparator {
public interface ScoreDocComparator {
/** Special comparator for sorting hits according to computed relevance (document score). */
static final ScoreDocComparator RELEVANCE = new ScoreDocComparator() {
@ -40,7 +40,7 @@ interface ScoreDocComparator {
if (i.score > j.score) return 1;
return 0;
}
public Object sortValue (ScoreDoc i) {
public Comparable sortValue (ScoreDoc i) {
return new Float (i.score);
}
public int sortType() {
@ -61,7 +61,7 @@ interface ScoreDocComparator {
if (i.doc < j.doc) return 1;
return 0;
}
public Object sortValue (ScoreDoc i) {
public Comparable sortValue (ScoreDoc i) {
return new Integer (i.doc);
}
public int sortType() {
@ -93,13 +93,13 @@ interface ScoreDocComparator {
/**
* Returns the value used to sort the given document. This is
* currently always either an Integer or Float, but could be extended
* to return any object used to sort by.
* Returns the value used to sort the given document. The
* object returned must implement the java.io.Serializable
* interface.
* @param i Document
* @return Integer or Float
* @return Serializable object
*/
Object sortValue (ScoreDoc i);
Comparable sortValue (ScoreDoc i);
/**

View File

@ -26,7 +26,7 @@ package org.apache.lucene.search;
* @since lucene 1.4
* @version $Id$
*/
interface ScoreDocLookupComparator
public interface ScoreDocLookupComparator
extends ScoreDocComparator {
/**

View File

@ -0,0 +1,28 @@
package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.io.Serializable;
/**
* Expert: returns a comparator for sorting ScoreDocs.
*
* <p>Created: Apr 21, 2004 3:49:28 PM
*
* @author Tim Jones
* @version $Id$
* @since 1.4
*/
public interface SortComparatorSource
extends Serializable {
/**
* Creates a comparator for the field in the given index.
* @param reader Index to create comparator for.
* @param fieldname Field to create comparator for.
* @return Comparator of ScoreDoc objects.
* @throws IOException If an error occurs reading the index.
*/
ScoreDocLookupComparator newComparator (IndexReader reader, String fieldname)
throws IOException;
}

View File

@ -58,6 +58,10 @@ implements Serializable {
* lower values are at the front. */
public static final int FLOAT = 5;
/** Sort using a custom Comparator. Sort values are any Comparable and
* sorting is done according to natural order. */
public static final int CUSTOM = 9;
/** Represents sorting by document score (relevancy). */
public static final SortField FIELD_SCORE = new SortField (null, SCORE);
@ -68,7 +72,7 @@ implements Serializable {
private String field;
private int type = AUTO; // defaults to determining type dynamically
boolean reverse = false; // defaults to natural order
private SortComparatorSource factory;
/** Creates a sort by terms in the given field where the type of term value
* is determined dynamically ({@link #AUTO AUTO}).
@ -112,6 +116,28 @@ implements Serializable {
this.reverse = reverse;
}
/** Creates a sort with a custom comparison function.
* @param field Name of field to sort by; cannot be <code>null</code>.
* @param comparator Returns a comparator for sorting hits.
*/
public SortField (String field, SortComparatorSource comparator) {
this.field = (field != null) ? field.intern() : field;
this.type = CUSTOM;
this.factory = comparator;
}
/** Creates a sort, possibly in reverse, with a custom comparison function.
* @param field Name of field to sort by; cannot be <code>null</code>.
* @param comparator Returns a comparator for sorting hits.
* @param reverse True if natural order should be reversed.
*/
public SortField (String field, SortComparatorSource comparator, boolean reverse) {
this.field = (field != null) ? field.intern() : field;
this.type = CUSTOM;
this.reverse = reverse;
this.factory = comparator;
}
/** Returns the name of the field. Could return <code>null</code>
* if the sort is by SCORE or DOC.
* @return Name of field, possibly <code>null</code>.
@ -134,6 +160,10 @@ implements Serializable {
return reverse;
}
public SortComparatorSource getFactory() {
return factory;
}
public String toString() {
StringBuffer buffer = new StringBuffer();
switch (type) {
@ -143,6 +173,9 @@ implements Serializable {
case DOC: buffer.append("<doc>");
break;
case CUSTOM: buffer.append ("<custom:\""+"\">");
break;
default: buffer.append("\"" + field + "\"");
break;
}

View File

@ -173,7 +173,7 @@ extends FieldSortedHitQueue {
return fieldOrder.length == n;
}
public Object sortValue (final ScoreDoc i) {
public Comparable sortValue (final ScoreDoc i) {
return terms[fieldOrder[i.doc]];
}

View File

@ -0,0 +1,127 @@
package org.apache.lucene.search;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import java.io.IOException;
import java.io.Serializable;
/**
* An example Comparable for use with the custom sort tests.
* It implements a comparable for "id" sort of values which
* consist of an alphanumeric part and a numeric part, such as:
* <p/>
* <P>ABC-123, A-1, A-7, A-100, B-99999
* <p/>
* <p>Such values cannot be sorted as strings, since A-100 needs
* to come after A-7.
* <p/>
* <p>It could be argued that the "ids" should be rewritten as
* A-0001, A-0100, etc. so they will sort as strings. That is
* a valid alternate way to solve it - but
* this is only supposed to be a simple test case.
* <p/>
* <p>Created: Apr 21, 2004 5:34:47 PM
*
* @author Tim Jones
* @version $Id$
* @since 1.4
*/
public class SampleComparable
implements Comparable, Serializable {
String string_part;
Integer int_part;
public SampleComparable (String s) {
int i = s.indexOf ("-");
string_part = s.substring (0, i);
int_part = new Integer (s.substring (i + 1));
}
public int compareTo (Object o) {
SampleComparable otherid = (SampleComparable) o;
int i = string_part.compareTo (otherid.string_part);
if (i == 0) return int_part.compareTo (otherid.int_part);
return i;
}
public static SortComparatorSource getComparator () {
return new SortComparatorSource () {
public ScoreDocLookupComparator newComparator (final IndexReader reader, String fieldname)
throws IOException {
final String field = fieldname.intern ();
final TermEnum enumerator = reader.terms (new Term (fieldname, ""));
try {
return new ScoreDocLookupComparator () {
protected Comparable[] cachedValues = fillCache (reader, enumerator, field);
public boolean sizeMatches (int n) {
return (cachedValues.length == n);
}
public int compare (ScoreDoc i, ScoreDoc j) {
return cachedValues[i.doc].compareTo (cachedValues[j.doc]);
}
public int compareReverse (ScoreDoc i, ScoreDoc j) {
return cachedValues[j.doc].compareTo (cachedValues[i.doc]);
}
public Comparable sortValue (ScoreDoc i) {
return cachedValues[i.doc];
}
public int sortType () {
return SortField.CUSTOM;
}
};
} finally {
enumerator.close ();
}
}
/**
* Returns an array of objects which represent that natural order
* of the term values in the given field.
*
* @param reader Terms are in this index.
* @param enumerator Use this to get the term values and TermDocs.
* @param fieldname Comparables should be for this field.
* @return Array of objects representing natural order of terms in field.
* @throws IOException If an error occurs reading the index.
*/
protected Comparable[] fillCache (IndexReader reader, TermEnum enumerator, String fieldname)
throws IOException {
final String field = fieldname.intern ();
Comparable[] retArray = new Comparable[reader.maxDoc ()];
if (retArray.length > 0) {
TermDocs termDocs = reader.termDocs ();
try {
if (enumerator.term () == null) {
throw new RuntimeException ("no terms in field " + field);
}
do {
Term term = enumerator.term ();
if (term.field () != field) break;
Comparable termval = getComparable (term.text ());
termDocs.seek (enumerator);
while (termDocs.next ()) {
retArray[termDocs.doc ()] = termval;
}
} while (enumerator.next ());
} finally {
termDocs.close ();
}
}
return retArray;
}
Comparable getComparable (String termtext) {
return new SampleComparable (termtext);
}
};
}
}

View File

@ -17,8 +17,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.*;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@ -27,6 +26,7 @@ import java.rmi.Naming;
import java.rmi.registry.LocateRegistry;
import java.rmi.registry.Registry;
import java.io.IOException;
import java.io.Serializable;
import java.util.regex.Pattern;
import java.util.HashMap;
import java.util.Iterator;
@ -47,7 +47,8 @@ import junit.textui.TestRunner;
*/
public class TestSort
extends TestCase {
extends TestCase
implements Serializable {
private Searcher full;
private Searcher searchX;
@ -89,17 +90,17 @@ extends TestCase {
// the float field to sort by float
// the string field to sort by string
private String[][] data = new String[][] {
// tracer contents int float string
{ "A", "x a", "5", "4f", "c" },
{ "B", "y a", "5", "3.4028235E38", "i" },
{ "C", "x a b c", "2147483647", "1.0", "j" },
{ "D", "y a b c", "-1", "0.0f", "a" },
{ "E", "x a b c d", "5", "2f", "h" },
{ "F", "y a b c d", "2", "3.14159f", "g" },
{ "G", "x a b c d", "3", "-1.0", "f" },
{ "H", "y a b c d", "0", "1.4E-45", "e" },
{ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d" },
{ "J", "y a b c d e f", "4", ".5", "b" },
// tracer contents int float string custom
{ "A", "x a", "5", "4f", "c", "A-3" },
{ "B", "y a", "5", "3.4028235E38", "i", "B-10" },
{ "C", "x a b c", "2147483647", "1.0", "j", "A-2" },
{ "D", "y a b c", "-1", "0.0f", "a", "C-0" },
{ "E", "x a b c d", "5", "2f", "h", "B-8" },
{ "F", "y a b c d", "2", "3.14159f", "g", "B-1" },
{ "G", "x a b c d", "3", "-1.0", "f", "C-100" },
{ "H", "y a b c d", "0", "1.4E-45", "e", "C-88" },
{ "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10" },
{ "J", "y a b c d e f", "4", ".5", "b", "C-7" },
};
// create an index of all the documents, or just the x, or just the y documents
@ -115,6 +116,7 @@ extends TestCase {
doc.add (new Field ("int", data[i][2], false, true, false));
doc.add (new Field ("float", data[i][3], false, true, false));
doc.add (new Field ("string", data[i][4], false, true, false));
doc.add (new Field ("custom", data[i][5], false, true, false));
writer.addDocument (doc);
}
}
@ -249,6 +251,14 @@ extends TestCase {
assertMatches (full, queryX, sort, "GICEA");
}
public void testCustomSorts() throws Exception {
sort.setSort (new SortField ("custom", SampleComparable.getComparator()));
assertMatches (full, queryX, sort, "CAIEG");
sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true));
assertMatches (full, queryY, sort, "HJDBF");
}
// test a variety of sorts using more than one searcher
public void testMultiSort() throws Exception {
MultiSearcher searcher = new MultiSearcher (new Searchable[] { searchX, searchY });
@ -268,6 +278,15 @@ extends TestCase {
runMultiSorts (multi);
}
public void testRemoteCustomSort() throws Exception {
Searchable searcher = getRemote();
MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher });
sort.setSort (new SortField ("custom", SampleComparable.getComparator()));
assertMatches (multi, queryX, sort, "CAIEG");
sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true));
assertMatches (multi, queryY, sort, "HJDBF");
}
// test that the relevancy scores are the same even if
// hits are sorted
public void testNormalizedScores() throws Exception {