From c8f455b48221f806b7916ef23833a5adce77cd13 Mon Sep 17 00:00:00 2001 From: Tim Jones Date: Thu, 22 Apr 2004 22:23:15 +0000 Subject: [PATCH] new SortField type for using a custom comparator git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150311 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/search/FieldDoc.java | 4 +- .../lucene/search/FieldDocSortedHitQueue.java | 6 + .../lucene/search/FieldSortedHitQueue.java | 7 +- .../lucene/search/FloatSortedHitQueue.java | 2 +- .../lucene/search/IntegerSortedHitQueue.java | 2 +- .../search/MultiFieldSortedHitQueue.java | 4 +- .../lucene/search/ScoreDocComparator.java | 16 +-- .../search/ScoreDocLookupComparator.java | 2 +- .../lucene/search/SortComparatorSource.java | 28 ++++ .../org/apache/lucene/search/SortField.java | 35 ++++- .../lucene/search/StringSortedHitQueue.java | 2 +- .../lucene/search/SampleComparable.java | 127 ++++++++++++++++++ .../org/apache/lucene/search/TestSort.java | 47 +++++-- 13 files changed, 248 insertions(+), 34 deletions(-) create mode 100644 src/java/org/apache/lucene/search/SortComparatorSource.java create mode 100644 src/test/org/apache/lucene/search/SampleComparable.java diff --git a/src/java/org/apache/lucene/search/FieldDoc.java b/src/java/org/apache/lucene/search/FieldDoc.java index 36b73204e77..b9a9f975eb3 100644 --- a/src/java/org/apache/lucene/search/FieldDoc.java +++ b/src/java/org/apache/lucene/search/FieldDoc.java @@ -48,7 +48,7 @@ extends ScoreDoc { * @see Sort * @see Searchable#search(Query,Filter,int,Sort) */ - public Object[] fields; + public Comparable[] fields; /** Expert: Creates one of these objects with empty sort information. */ public FieldDoc (int doc, float score) { @@ -56,7 +56,7 @@ extends ScoreDoc { } /** Expert: Creates one of these objects with the given sort information. */ - public FieldDoc (int doc, float score, Object[] fields) { + public FieldDoc (int doc, float score, Comparable[] fields) { super (doc, score); this.fields = fields; } diff --git a/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java index 7c11de14154..c28d6af5026 100644 --- a/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java @@ -109,6 +109,9 @@ extends PriorityQueue { if (f1 > f2) c = -1; if (f1 < f2) c = 1; break; + case SortField.CUSTOM: + c = docB.fields[i].compareTo (docA.fields[i]); + break; case SortField.AUTO: // we cannot handle this - even if we determine the type of object (Float or // Integer), we don't necessarily know how to compare them (both SCORE and @@ -144,6 +147,9 @@ extends PriorityQueue { if (f1 < f2) c = -1; if (f1 > f2) c = 1; break; + case SortField.CUSTOM: + c = docA.fields[i].compareTo (docB.fields[i]); + break; case SortField.AUTO: // we cannot handle this - even if we determine the type of object (Float or // Integer), we don't necessarily know how to compare them (both SCORE and diff --git a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java index d2c61767100..cf57f961abb 100644 --- a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java @@ -103,7 +103,7 @@ extends PriorityQueue { * @throws IOException If an error occurs reading the index. * @see #determineComparator */ - static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type) + static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type, final SortComparatorSource factory) throws IOException { if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER; @@ -124,10 +124,11 @@ extends PriorityQueue { switch (type) { case SortField.SCORE: comparer = ScoreDocComparator.RELEVANCE; break; case SortField.DOC: comparer = ScoreDocComparator.INDEXORDER; break; + case SortField.AUTO: comparer = determineComparator (reader, field); break; + case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break; case SortField.INT: comparer = IntegerSortedHitQueue.comparator (reader, field); break; case SortField.FLOAT: comparer = FloatSortedHitQueue.comparator (reader, field); break; - case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break; - case SortField.AUTO: comparer = determineComparator (reader, field); break; + case SortField.CUSTOM: comparer = factory.newComparator (reader, field); break; default: throw new RuntimeException ("invalid sort field type: "+type); } diff --git a/src/java/org/apache/lucene/search/FloatSortedHitQueue.java b/src/java/org/apache/lucene/search/FloatSortedHitQueue.java index 1d4e2aec15d..62df2a90329 100644 --- a/src/java/org/apache/lucene/search/FloatSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FloatSortedHitQueue.java @@ -138,7 +138,7 @@ extends FieldSortedHitQueue { return fieldOrder.length == n; } - public Object sortValue (final ScoreDoc i) { + public Comparable sortValue (final ScoreDoc i) { return new Float (fieldOrder[i.doc]); } diff --git a/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java b/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java index cf33820ea94..eb0d4490f7b 100644 --- a/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java @@ -139,7 +139,7 @@ extends FieldSortedHitQueue { return fieldOrder.length == n; } - public Object sortValue (final ScoreDoc i) { + public Comparable sortValue (final ScoreDoc i) { return new Integer (fieldOrder[i.doc]); } diff --git a/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java b/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java index 2436f653207..2a5e69bc542 100644 --- a/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java @@ -51,7 +51,7 @@ extends PriorityQueue { this.fields = new SortField[n]; for (int i=0; i j.score) return 1; return 0; } - public Object sortValue (ScoreDoc i) { + public Comparable sortValue (ScoreDoc i) { return new Float (i.score); } public int sortType() { @@ -61,7 +61,7 @@ interface ScoreDocComparator { if (i.doc < j.doc) return 1; return 0; } - public Object sortValue (ScoreDoc i) { + public Comparable sortValue (ScoreDoc i) { return new Integer (i.doc); } public int sortType() { @@ -93,13 +93,13 @@ interface ScoreDocComparator { /** - * Returns the value used to sort the given document. This is - * currently always either an Integer or Float, but could be extended - * to return any object used to sort by. + * Returns the value used to sort the given document. The + * object returned must implement the java.io.Serializable + * interface. * @param i Document - * @return Integer or Float + * @return Serializable object */ - Object sortValue (ScoreDoc i); + Comparable sortValue (ScoreDoc i); /** diff --git a/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java b/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java index 5c7efe4afb8..17ef32c9cb5 100644 --- a/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java +++ b/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java @@ -26,7 +26,7 @@ package org.apache.lucene.search; * @since lucene 1.4 * @version $Id$ */ -interface ScoreDocLookupComparator +public interface ScoreDocLookupComparator extends ScoreDocComparator { /** diff --git a/src/java/org/apache/lucene/search/SortComparatorSource.java b/src/java/org/apache/lucene/search/SortComparatorSource.java new file mode 100644 index 00000000000..8bdb0651291 --- /dev/null +++ b/src/java/org/apache/lucene/search/SortComparatorSource.java @@ -0,0 +1,28 @@ +package org.apache.lucene.search; + +import org.apache.lucene.index.IndexReader; +import java.io.IOException; +import java.io.Serializable; + +/** + * Expert: returns a comparator for sorting ScoreDocs. + * + *

Created: Apr 21, 2004 3:49:28 PM + * + * @author Tim Jones + * @version $Id$ + * @since 1.4 + */ +public interface SortComparatorSource +extends Serializable { + + /** + * Creates a comparator for the field in the given index. + * @param reader Index to create comparator for. + * @param fieldname Field to create comparator for. + * @return Comparator of ScoreDoc objects. + * @throws IOException If an error occurs reading the index. + */ + ScoreDocLookupComparator newComparator (IndexReader reader, String fieldname) + throws IOException; +} \ No newline at end of file diff --git a/src/java/org/apache/lucene/search/SortField.java b/src/java/org/apache/lucene/search/SortField.java index 1ca5b19578e..629ce6c6d7e 100644 --- a/src/java/org/apache/lucene/search/SortField.java +++ b/src/java/org/apache/lucene/search/SortField.java @@ -58,6 +58,10 @@ implements Serializable { * lower values are at the front. */ public static final int FLOAT = 5; + /** Sort using a custom Comparator. Sort values are any Comparable and + * sorting is done according to natural order. */ + public static final int CUSTOM = 9; + /** Represents sorting by document score (relevancy). */ public static final SortField FIELD_SCORE = new SortField (null, SCORE); @@ -68,7 +72,7 @@ implements Serializable { private String field; private int type = AUTO; // defaults to determining type dynamically boolean reverse = false; // defaults to natural order - + private SortComparatorSource factory; /** Creates a sort by terms in the given field where the type of term value * is determined dynamically ({@link #AUTO AUTO}). @@ -112,6 +116,28 @@ implements Serializable { this.reverse = reverse; } + /** Creates a sort with a custom comparison function. + * @param field Name of field to sort by; cannot be null. + * @param comparator Returns a comparator for sorting hits. + */ + public SortField (String field, SortComparatorSource comparator) { + this.field = (field != null) ? field.intern() : field; + this.type = CUSTOM; + this.factory = comparator; + } + + /** Creates a sort, possibly in reverse, with a custom comparison function. + * @param field Name of field to sort by; cannot be null. + * @param comparator Returns a comparator for sorting hits. + * @param reverse True if natural order should be reversed. + */ + public SortField (String field, SortComparatorSource comparator, boolean reverse) { + this.field = (field != null) ? field.intern() : field; + this.type = CUSTOM; + this.reverse = reverse; + this.factory = comparator; + } + /** Returns the name of the field. Could return null * if the sort is by SCORE or DOC. * @return Name of field, possibly null. @@ -134,6 +160,10 @@ implements Serializable { return reverse; } + public SortComparatorSource getFactory() { + return factory; + } + public String toString() { StringBuffer buffer = new StringBuffer(); switch (type) { @@ -143,6 +173,9 @@ implements Serializable { case DOC: buffer.append(""); break; + case CUSTOM: buffer.append (""); + break; + default: buffer.append("\"" + field + "\""); break; } diff --git a/src/java/org/apache/lucene/search/StringSortedHitQueue.java b/src/java/org/apache/lucene/search/StringSortedHitQueue.java index 3adbc9e8a29..e6d94009086 100644 --- a/src/java/org/apache/lucene/search/StringSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/StringSortedHitQueue.java @@ -173,7 +173,7 @@ extends FieldSortedHitQueue { return fieldOrder.length == n; } - public Object sortValue (final ScoreDoc i) { + public Comparable sortValue (final ScoreDoc i) { return terms[fieldOrder[i.doc]]; } diff --git a/src/test/org/apache/lucene/search/SampleComparable.java b/src/test/org/apache/lucene/search/SampleComparable.java new file mode 100644 index 00000000000..752629a92b8 --- /dev/null +++ b/src/test/org/apache/lucene/search/SampleComparable.java @@ -0,0 +1,127 @@ +package org.apache.lucene.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; + +import java.io.IOException; +import java.io.Serializable; + +/** + * An example Comparable for use with the custom sort tests. + * It implements a comparable for "id" sort of values which + * consist of an alphanumeric part and a numeric part, such as: + *

+ *

ABC-123, A-1, A-7, A-100, B-99999 + *

+ *

Such values cannot be sorted as strings, since A-100 needs + * to come after A-7. + *

+ *

It could be argued that the "ids" should be rewritten as + * A-0001, A-0100, etc. so they will sort as strings. That is + * a valid alternate way to solve it - but + * this is only supposed to be a simple test case. + *

+ *

Created: Apr 21, 2004 5:34:47 PM + * + * @author Tim Jones + * @version $Id$ + * @since 1.4 + */ +public class SampleComparable +implements Comparable, Serializable { + + String string_part; + Integer int_part; + + public SampleComparable (String s) { + int i = s.indexOf ("-"); + string_part = s.substring (0, i); + int_part = new Integer (s.substring (i + 1)); + } + + public int compareTo (Object o) { + SampleComparable otherid = (SampleComparable) o; + int i = string_part.compareTo (otherid.string_part); + if (i == 0) return int_part.compareTo (otherid.int_part); + return i; + } + + public static SortComparatorSource getComparator () { + return new SortComparatorSource () { + public ScoreDocLookupComparator newComparator (final IndexReader reader, String fieldname) + throws IOException { + final String field = fieldname.intern (); + final TermEnum enumerator = reader.terms (new Term (fieldname, "")); + try { + return new ScoreDocLookupComparator () { + protected Comparable[] cachedValues = fillCache (reader, enumerator, field); + + public boolean sizeMatches (int n) { + return (cachedValues.length == n); + } + + public int compare (ScoreDoc i, ScoreDoc j) { + return cachedValues[i.doc].compareTo (cachedValues[j.doc]); + } + + public int compareReverse (ScoreDoc i, ScoreDoc j) { + return cachedValues[j.doc].compareTo (cachedValues[i.doc]); + } + + public Comparable sortValue (ScoreDoc i) { + return cachedValues[i.doc]; + } + + public int sortType () { + return SortField.CUSTOM; + } + }; + } finally { + enumerator.close (); + } + } + + /** + * Returns an array of objects which represent that natural order + * of the term values in the given field. + * + * @param reader Terms are in this index. + * @param enumerator Use this to get the term values and TermDocs. + * @param fieldname Comparables should be for this field. + * @return Array of objects representing natural order of terms in field. + * @throws IOException If an error occurs reading the index. + */ + protected Comparable[] fillCache (IndexReader reader, TermEnum enumerator, String fieldname) + throws IOException { + final String field = fieldname.intern (); + Comparable[] retArray = new Comparable[reader.maxDoc ()]; + if (retArray.length > 0) { + TermDocs termDocs = reader.termDocs (); + try { + if (enumerator.term () == null) { + throw new RuntimeException ("no terms in field " + field); + } + do { + Term term = enumerator.term (); + if (term.field () != field) break; + Comparable termval = getComparable (term.text ()); + termDocs.seek (enumerator); + while (termDocs.next ()) { + retArray[termDocs.doc ()] = termval; + } + } while (enumerator.next ()); + } finally { + termDocs.close (); + } + } + return retArray; + } + + Comparable getComparable (String termtext) { + return new SampleComparable (termtext); + } + }; + } +} \ No newline at end of file diff --git a/src/test/org/apache/lucene/search/TestSort.java b/src/test/org/apache/lucene/search/TestSort.java index 8505b7defae..f3eff289907 100644 --- a/src/test/org/apache/lucene/search/TestSort.java +++ b/src/test/org/apache/lucene/search/TestSort.java @@ -17,8 +17,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.Term; +import org.apache.lucene.index.*; import org.apache.lucene.analysis.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -27,6 +26,7 @@ import java.rmi.Naming; import java.rmi.registry.LocateRegistry; import java.rmi.registry.Registry; import java.io.IOException; +import java.io.Serializable; import java.util.regex.Pattern; import java.util.HashMap; import java.util.Iterator; @@ -47,7 +47,8 @@ import junit.textui.TestRunner; */ public class TestSort -extends TestCase { +extends TestCase +implements Serializable { private Searcher full; private Searcher searchX; @@ -89,17 +90,17 @@ extends TestCase { // the float field to sort by float // the string field to sort by string private String[][] data = new String[][] { - // tracer contents int float string - { "A", "x a", "5", "4f", "c" }, - { "B", "y a", "5", "3.4028235E38", "i" }, - { "C", "x a b c", "2147483647", "1.0", "j" }, - { "D", "y a b c", "-1", "0.0f", "a" }, - { "E", "x a b c d", "5", "2f", "h" }, - { "F", "y a b c d", "2", "3.14159f", "g" }, - { "G", "x a b c d", "3", "-1.0", "f" }, - { "H", "y a b c d", "0", "1.4E-45", "e" }, - { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d" }, - { "J", "y a b c d e f", "4", ".5", "b" }, + // tracer contents int float string custom + { "A", "x a", "5", "4f", "c", "A-3" }, + { "B", "y a", "5", "3.4028235E38", "i", "B-10" }, + { "C", "x a b c", "2147483647", "1.0", "j", "A-2" }, + { "D", "y a b c", "-1", "0.0f", "a", "C-0" }, + { "E", "x a b c d", "5", "2f", "h", "B-8" }, + { "F", "y a b c d", "2", "3.14159f", "g", "B-1" }, + { "G", "x a b c d", "3", "-1.0", "f", "C-100" }, + { "H", "y a b c d", "0", "1.4E-45", "e", "C-88" }, + { "I", "x a b c d e f", "-2147483648", "1.0e+0", "d", "A-10" }, + { "J", "y a b c d e f", "4", ".5", "b", "C-7" }, }; // create an index of all the documents, or just the x, or just the y documents @@ -115,6 +116,7 @@ extends TestCase { doc.add (new Field ("int", data[i][2], false, true, false)); doc.add (new Field ("float", data[i][3], false, true, false)); doc.add (new Field ("string", data[i][4], false, true, false)); + doc.add (new Field ("custom", data[i][5], false, true, false)); writer.addDocument (doc); } } @@ -249,6 +251,14 @@ extends TestCase { assertMatches (full, queryX, sort, "GICEA"); } + + public void testCustomSorts() throws Exception { + sort.setSort (new SortField ("custom", SampleComparable.getComparator())); + assertMatches (full, queryX, sort, "CAIEG"); + sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true)); + assertMatches (full, queryY, sort, "HJDBF"); + } + // test a variety of sorts using more than one searcher public void testMultiSort() throws Exception { MultiSearcher searcher = new MultiSearcher (new Searchable[] { searchX, searchY }); @@ -268,6 +278,15 @@ extends TestCase { runMultiSorts (multi); } + public void testRemoteCustomSort() throws Exception { + Searchable searcher = getRemote(); + MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher }); + sort.setSort (new SortField ("custom", SampleComparable.getComparator())); + assertMatches (multi, queryX, sort, "CAIEG"); + sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true)); + assertMatches (multi, queryY, sort, "HJDBF"); + } + // test that the relevancy scores are the same even if // hits are sorted public void testNormalizedScores() throws Exception {