From 4e486c14349b20278c5d1e00b9335db0a1dffe86 Mon Sep 17 00:00:00 2001 From: Tim Jones Date: Wed, 19 May 2004 23:05:27 +0000 Subject: [PATCH] expose term cache as a public object git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150348 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/lucene/search/FieldCache.java | 135 +++++ .../apache/lucene/search/FieldCacheImpl.java | 378 ++++++++++++ .../lucene/search/FieldDocSortedHitQueue.java | 8 +- .../lucene/search/FieldSortedHitQueue.java | 539 +++++++++--------- .../lucene/search/FloatSortedHitQueue.java | 150 ----- .../apache/lucene/search/IndexSearcher.java | 4 +- .../lucene/search/IntegerSortedHitQueue.java | 151 ----- .../search/MultiFieldSortedHitQueue.java | 124 ---- .../lucene/search/ScoreDocComparator.java | 25 +- .../search/ScoreDocLookupComparator.java | 40 -- .../apache/lucene/search/SortComparator.java | 68 +++ .../lucene/search/SortComparatorSource.java | 2 +- .../org/apache/lucene/search/SortField.java | 5 + .../lucene/search/StringSortedHitQueue.java | 185 ------ .../lucene/search/SampleComparable.java | 22 +- .../org/apache/lucene/search/TestSort.java | 18 +- 16 files changed, 878 insertions(+), 976 deletions(-) create mode 100644 src/java/org/apache/lucene/search/FieldCache.java create mode 100644 src/java/org/apache/lucene/search/FieldCacheImpl.java delete mode 100644 src/java/org/apache/lucene/search/FloatSortedHitQueue.java delete mode 100644 src/java/org/apache/lucene/search/IntegerSortedHitQueue.java delete mode 100644 src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java delete mode 100644 src/java/org/apache/lucene/search/ScoreDocLookupComparator.java create mode 100644 src/java/org/apache/lucene/search/SortComparator.java delete mode 100644 src/java/org/apache/lucene/search/StringSortedHitQueue.java diff --git a/src/java/org/apache/lucene/search/FieldCache.java b/src/java/org/apache/lucene/search/FieldCache.java new file mode 100644 index 00000000000..276eb605390 --- /dev/null +++ b/src/java/org/apache/lucene/search/FieldCache.java @@ -0,0 +1,135 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import java.io.IOException; + +/** + * Expert: Maintains caches of term values. + * + *

Created: May 19, 2004 11:13:14 AM + * + * @author Tim Jones (Nacimiento Software) + * @since lucene 1.4 + * @version $Id$ + */ +public interface FieldCache { + + /** Indicator for StringIndex values in the cache. */ + // NOTE: the value assigned to this constant must not be + // the same as any of those in SortField!! + public static final int STRING_INDEX = -1; + + + /** Expert: Stores term text values and document ordering data. */ + public static class StringIndex { + + /** All the term values, in natural order. */ + public final String[] lookup; + + /** For each document, an index into the lookup array. */ + public final int[] order; + + /** Creates one of these objects */ + public StringIndex (int[] values, String[] lookup) { + this.order = values; + this.lookup = lookup; + } + } + + + /** Expert: The cache used internally by sorting and range query classes. */ + public static FieldCache DEFAULT = new FieldCacheImpl(); + + + /** Checks the internal cache for an appropriate entry, and if none is + * found, reads the terms in field as integers and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the integers. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public int[] getInts (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as floats and returns an array + * of size reader.maxDoc() of the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the floats. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public float[] getFloats (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found, reads the term values in field and returns an array + * of size reader.maxDoc() containing the value each document + * has in the given field. + * @param reader Used to get field values. + * @param field Which field contains the strings. + * @return The values in the given field for each document. + * @throws IOException If any error occurs. + */ + public String[] getStrings (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found reads the term values in field and returns + * an array of them in natural order, along with an array telling + * which element in the term array each document uses. + * @param reader Used to get field values. + * @param field Which field contains the strings. + * @return Array of terms and index into the array for each document. + * @throws IOException If any error occurs. + */ + public StringIndex getStringIndex (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if + * none is found reads field to see if it contains integers, floats + * or strings, and then calls one of the other methods in this class to get the + * values. For string values, a StringIndex is returned. After + * calling this method, there is an entry in the cache for both + * type AUTO and the actual found type. + * @param reader Used to get field values. + * @param field Which field contains the values. + * @return int[], float[] or StringIndex. + * @throws IOException If any error occurs. + */ + public Object getAuto (IndexReader reader, String field) + throws IOException; + + /** Checks the internal cache for an appropriate entry, and if none + * is found reads the terms out of field and calls the given SortComparator + * to get the sort values. A hit in the cache will happen if reader, + * field, and comparator are the same (using equals()) + * as a previous call to this method. + * @param reader Used to get field values. + * @param field Which field contains the values. + * @param comparator Used to convert terms into something to sort by. + * @return Array of sort objects, one for each document. + * @throws IOException If any error occurs. + */ + public Comparable[] getCustom (IndexReader reader, String field, SortComparator comparator) + throws IOException; +} diff --git a/src/java/org/apache/lucene/search/FieldCacheImpl.java b/src/java/org/apache/lucene/search/FieldCacheImpl.java new file mode 100644 index 00000000000..cccd05f85de --- /dev/null +++ b/src/java/org/apache/lucene/search/FieldCacheImpl.java @@ -0,0 +1,378 @@ +package org.apache.lucene.search; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; + +import java.io.IOException; +import java.util.Map; +import java.util.WeakHashMap; + +/** + * Expert: The default cache implementation, storing all values in memory. + * A WeakHashMap is used for storage. + * + *

Created: May 19, 2004 4:40:36 PM + * + * @author Tim Jones (Nacimiento Software) + * @since lucene 1.4 + * @version $Id$ + */ +class FieldCacheImpl +implements FieldCache { + + /** Expert: Every key in the internal cache is of this type. */ + static class Entry { + final IndexReader reader; // which Reader + final String field; // which Field + final int type; // which SortField type + final Object custom; // which custom comparator + final int hashcode; // unique for this object + + /** Creates one of these objects. */ + Entry (IndexReader reader, String field, int type) { + this.reader = reader; + this.field = field.intern(); + this.type = type; + this.custom = null; + this.hashcode = reader.hashCode() ^ field.hashCode() ^ type; + } + + /** Creates one of these objects for a custom comparator. */ + Entry (IndexReader reader, String field, Object custom) { + this.reader = reader; + this.field = field.intern(); + this.type = SortField.CUSTOM; + this.custom = custom; + this.hashcode = reader.hashCode() ^ field.hashCode() ^ type ^ custom.hashCode(); + } + + /** Two of these are equal iff they reference the same reader, field and type. */ + public boolean equals (Object o) { + if (o instanceof Entry) { + Entry other = (Entry) o; + if (other.reader == reader && other.field == field && other.type == type) { + if (other.custom == null) { + if (custom == null) return true; + } else if (other.custom.equals (custom)) { + return true; + } + } + } + return false; + } + + /** Composes a hashcode based on the referenced reader, field and type. */ + public int hashCode() { + return hashcode; + } + } + + + /** The internal cache. Maps Entry to array of interpreted term values. **/ + final Map cache = new WeakHashMap(); + + /** See if an object is in the cache. */ + Object lookup (IndexReader reader, String field, int type) { + Entry entry = new Entry (reader, field, type); + synchronized (this) { + return cache.get (entry); + } + } + + /** See if a custom object is in the cache. */ + Object lookup (IndexReader reader, String field, Object comparer) { + Entry entry = new Entry (reader, field, comparer); + synchronized (this) { + return cache.get (entry); + } + } + + /** Put an object into the cache. */ + Object store (IndexReader reader, String field, int type, Object value) { + Entry entry = new Entry (reader, field, type); + synchronized (this) { + return cache.put (entry, value); + } + } + + /** Put a custom object into the cache. */ + Object store (IndexReader reader, String field, Object comparer, Object value) { + Entry entry = new Entry (reader, field, comparer); + synchronized (this) { + return cache.put (entry, value); + } + } + + // inherit javadocs + public int[] getInts (IndexReader reader, String field) + throws IOException { + field = field.intern(); + Object ret = lookup (reader, field, SortField.INT); + if (ret == null) { + final int[] retArray = new int[reader.maxDoc()]; + if (retArray.length > 0) { + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field, "")); + try { + if (termEnum.term() == null) { + throw new RuntimeException ("no terms in field " + field); + } + do { + Term term = termEnum.term(); + if (term.field() != field) break; + int termval = Integer.parseInt (term.text()); + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + } + store (reader, field, SortField.INT, retArray); + return retArray; + } + return (int[]) ret; + } + + // inherit javadocs + public float[] getFloats (IndexReader reader, String field) + throws IOException { + field = field.intern(); + Object ret = lookup (reader, field, SortField.FLOAT); + if (ret == null) { + final float[] retArray = new float[reader.maxDoc()]; + if (retArray.length > 0) { + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field, "")); + try { + if (termEnum.term() == null) { + throw new RuntimeException ("no terms in field " + field); + } + do { + Term term = termEnum.term(); + if (term.field() != field) break; + float termval = Float.parseFloat (term.text()); + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + } + store (reader, field, SortField.FLOAT, retArray); + return retArray; + } + return (float[]) ret; + } + + // inherit javadocs + public String[] getStrings (IndexReader reader, String field) + throws IOException { + field = field.intern(); + Object ret = lookup (reader, field, SortField.STRING); + if (ret == null) { + final String[] retArray = new String[reader.maxDoc()]; + if (retArray.length > 0) { + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field, "")); + try { + if (termEnum.term() == null) { + throw new RuntimeException ("no terms in field " + field); + } + do { + Term term = termEnum.term(); + if (term.field() != field) break; + String termval = term.text(); + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + } + store (reader, field, SortField.STRING, retArray); + return retArray; + } + return (String[]) ret; + } + + // inherit javadocs + public StringIndex getStringIndex (IndexReader reader, String field) + throws IOException { + field = field.intern(); + Object ret = lookup (reader, field, STRING_INDEX); + if (ret == null) { + final int[] retArray = new int[reader.maxDoc()]; + String[] mterms = new String[reader.maxDoc()]; + if (retArray.length > 0) { + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field, "")); + int t = 0; // current term number + try { + if (termEnum.term() == null) { + throw new RuntimeException ("no terms in field " + field); + } + do { + Term term = termEnum.term(); + if (term.field() != field) break; + + // store term text + // we expect that there is at most one term per document + if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \"" + field + "\""); + mterms[t] = term.text(); + + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = t; + } + + t++; + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + + if (t == 0) { + // if there are no terms, make the term array + // have a single null entry + mterms = new String[1]; + } else if (t < mterms.length) { + // if there are less terms than documents, + // trim off the dead array space + String[] terms = new String[t]; + System.arraycopy (mterms, 0, terms, 0, t); + mterms = terms; + } + } + StringIndex value = new StringIndex (retArray, mterms); + store (reader, field, STRING_INDEX, value); + return value; + } + return (StringIndex) ret; + } + + /** The pattern used to detect integer values in a field */ + /** removed for java 1.3 compatibility + protected static final Pattern pIntegers = Pattern.compile ("[0-9\\-]+"); + **/ + + /** The pattern used to detect float values in a field */ + /** + * removed for java 1.3 compatibility + * protected static final Object pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+"); + */ + + // inherit javadocs + public Object getAuto (IndexReader reader, String field) + throws IOException { + field = field.intern(); + Object ret = lookup (reader, field, SortField.AUTO); + if (ret == null) { + TermEnum enumerator = reader.terms (new Term (field, "")); + try { + Term term = enumerator.term(); + if (term == null) { + throw new RuntimeException ("no terms in field " + field + " - cannot determine sort type"); + } + if (term.field() == field) { + String termtext = term.text().trim(); + + /** + * Java 1.4 level code: + + if (pIntegers.matcher(termtext).matches()) + return IntegerSortedHitQueue.comparator (reader, enumerator, field); + + else if (pFloats.matcher(termtext).matches()) + return FloatSortedHitQueue.comparator (reader, enumerator, field); + */ + + // Java 1.3 level code: + try { + Integer.parseInt (termtext); + ret = getInts (reader, field); + } catch (NumberFormatException nfe1) { + try { + Float.parseFloat (termtext); + ret = getFloats (reader, field); + } catch (NumberFormatException nfe2) { + ret = getStringIndex (reader, field); + } + } + if (ret != null) { + store (reader, field, SortField.AUTO, ret); + } + } else { + throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed"); + } + } finally { + enumerator.close(); + } + + } + return ret; + } + + // inherit javadocs + public Comparable[] getCustom (IndexReader reader, String field, SortComparator comparator) + throws IOException { + field = field.intern(); + Object ret = lookup (reader, field, comparator); + if (ret == null) { + final Comparable[] retArray = new Comparable[reader.maxDoc()]; + if (retArray.length > 0) { + TermDocs termDocs = reader.termDocs(); + TermEnum termEnum = reader.terms (new Term (field, "")); + try { + if (termEnum.term() == null) { + throw new RuntimeException ("no terms in field " + field); + } + do { + Term term = termEnum.term(); + if (term.field() != field) break; + Comparable termval = comparator.getComparable (term.text()); + termDocs.seek (termEnum); + while (termDocs.next()) { + retArray[termDocs.doc()] = termval; + } + } while (termEnum.next()); + } finally { + termDocs.close(); + termEnum.close(); + } + } + store (reader, field, SortField.CUSTOM, retArray); + return retArray; + } + return (String[]) ret; + } + +} diff --git a/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java index c28d6af5026..d2af685c66b 100644 --- a/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java @@ -101,7 +101,9 @@ extends PriorityQueue { case SortField.STRING: String s1 = (String) docA.fields[i]; String s2 = (String) docB.fields[i]; - c = s2.compareTo(s1); + if (s2 == null) c = -1; // could be null if there are + else if (s1 == null) c = 1; // no terms in the given field + else c = s2.compareTo(s1); break; case SortField.FLOAT: float f1 = ((Float)docA.fields[i]).floatValue(); @@ -139,7 +141,9 @@ extends PriorityQueue { case SortField.STRING: String s1 = (String) docA.fields[i]; String s2 = (String) docB.fields[i]; - c = s1.compareTo(s2); + if (s1 == null) c = -1; // could be null if there are + else if (s2 == null) c = 1; // no terms in the given field + else c = s1.compareTo(s2); break; case SortField.FLOAT: float f1 = ((Float)docA.fields[i]).floatValue(); diff --git a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java index 3467c3974a6..249b1a64e2c 100644 --- a/src/java/org/apache/lucene/search/FieldSortedHitQueue.java +++ b/src/java/org/apache/lucene/search/FieldSortedHitQueue.java @@ -17,311 +17,282 @@ package org.apache.lucene.search; */ import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; import org.apache.lucene.util.PriorityQueue; import java.io.IOException; -import java.util.Hashtable; +import java.util.WeakHashMap; +import java.util.Map; /** - * Expert: Base class for collecting results from a search and sorting - * them by terms in a given field in each document. - * - *

When one of these objects is created, a TermEnumerator is - * created to fetch all the terms in the index for the given field. - * The value of each term is assumed to represent a - * sort position. Each document is assumed to contain one of the - * terms, indicating where in the sort it belongs. - * - *

Memory Usage

- * - *

A static cache is maintained. This cache contains an integer - * or float array of length IndexReader.maxDoc() for each field - * name for which a sort is performed. In other words, the size of the - * cache in bytes is: - * - *

4 * IndexReader.maxDoc() * (# of different fields actually used to sort) - * - *

For String fields, the cache is larger: in addition to the - * above array, the value of every term in the field is kept in memory. - * If there are many unique terms in the field, this could - * be quite large. - * - *

Note that the size of the cache is not affected by how many - * fields are in the index and might be used to sort - only by - * the ones actually used to sort a result set. - * - *

The cache is cleared each time a new IndexReader is - * passed in, or if the value returned by maxDoc() - * changes for the current IndexReader. This class is not set up to - * be able to efficiently sort hits from more than one index - * simultaneously. + * Expert: A hit queue for sorting by hits by terms in more than one field. + * Uses FieldCache.DEFAULT for maintaining internal term lookup tables. * *

Created: Dec 8, 2003 12:56:03 PM * * @author Tim Jones (Nacimiento Software) * @since lucene 1.4 * @version $Id$ + * @see Searchable#search(Query,Filter,int,Sort) + * @see FieldCache */ -abstract class FieldSortedHitQueue +class FieldSortedHitQueue extends PriorityQueue { - /** - * Keeps track of the IndexReader which the cache - * applies to. If it changes, the cache is cleared. - * We only store the hashcode so as not to mess up - * garbage collection by having a reference to an - * IndexReader. - */ - protected static int lastReaderHash; - - /** - * Contains the cache of sort information, mapping - * String (field names) to ScoreDocComparator. - */ - protected static final Hashtable fieldCache = new Hashtable(); - - /** The pattern used to detect integer values in a field */ - /** removed for java 1.3 compatibility - protected static final Pattern pIntegers = Pattern.compile ("[0-9\\-]+"); - **/ - - /** The pattern used to detect float values in a field */ - /** removed for java 1.3 compatibility - protected static final Object pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+"); - **/ - - - /** - * Returns a comparator for the given field. If there is already one in the cache, it is returned. - * Otherwise one is created and put into the cache. If reader is different than the - * one used for the current cache, or has changed size, the cache is cleared first. - * @param reader Index to use. - * @param field Field to sort by. - * @return Comparator; never null. - * @throws IOException If an error occurs reading the index. - * @see #determineComparator - */ - static ScoreDocComparator getCachedComparator (final IndexReader reader, final String field, final int type, final SortComparatorSource factory) - throws IOException { - - if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER; - if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE; - - // see if we have already generated a comparator for this field - if (reader.hashCode() == lastReaderHash) { - ScoreDocLookupComparator comparer = (ScoreDocLookupComparator) fieldCache.get (field); - if (comparer != null && comparer.sizeMatches(reader.maxDoc())) { - return comparer; - } - } else { - lastReaderHash = reader.hashCode(); - fieldCache.clear(); - } - - ScoreDocComparator comparer = null; - switch (type) { - case SortField.AUTO: comparer = determineComparator (reader, field); break; - case SortField.STRING: comparer = StringSortedHitQueue.comparator (reader, field); break; - case SortField.INT: comparer = IntegerSortedHitQueue.comparator (reader, field); break; - case SortField.FLOAT: comparer = FloatSortedHitQueue.comparator (reader, field); break; - case SortField.CUSTOM: comparer = factory.newComparator (reader, field); break; - default: - throw new RuntimeException ("invalid sort field type: "+type); - } - - // store the comparator in the cache for reuse - fieldCache.put (field, comparer); - - return comparer; - } - - - /** Clears the static cache of sorting information. */ - static void clearCache() { - fieldCache.clear(); - } - - - /** - * Returns a FieldSortedHitQueue sorted by the given ScoreDocComparator. - * @param comparator Comparator to use. - * @param size Number of hits to retain. - * @return Hit queue sorted using the given comparator. - */ - static FieldSortedHitQueue getInstance (ScoreDocComparator comparator, int size) { - return new FieldSortedHitQueue (comparator, size) { - // dummy out the abstract method - protected ScoreDocLookupComparator createComparator (IndexReader reader, String field) throws IOException { - return null; - } - }; - } - - - /** - * Looks at the actual values in the field and determines whether - * they contain Integers, Floats or Strings. Only the first term in the field - * is looked at. - *

The following patterns are used to determine the content of the terms: - *

- * - * - * - * - *
SequencePatternType
1[0-9\-]+Integer
2[0-9+\-\.eEfFdD]+Float
3(none - default)String
- * - * @param reader Index to use. - * @param field Field to create comparator for. - * @return Comparator appropriate for the terms in the given field. - * @throws IOException If an error occurs reading the index. - */ - protected static ScoreDocComparator determineComparator (IndexReader reader, String field) - throws IOException { - field = field.intern(); - TermEnum enumerator = reader.terms (new Term (field, "")); - try { - Term term = enumerator.term(); - if (term == null) { - throw new RuntimeException ("no terms in field "+field+" - cannot determine sort type"); - } - if (term.field() == field) { - String termtext = term.text().trim(); - - /** - * Java 1.4 level code: - - if (pIntegers.matcher(termtext).matches()) - return IntegerSortedHitQueue.comparator (reader, enumerator, field); - - else if (pFloats.matcher(termtext).matches()) - return FloatSortedHitQueue.comparator (reader, enumerator, field); - */ - - // Java 1.3 level code: - try { - Integer.parseInt (termtext); - return IntegerSortedHitQueue.comparator (reader, enumerator, field); - } catch (NumberFormatException nfe) { - // nothing - } - try { - Float.parseFloat (termtext); - return FloatSortedHitQueue.comparator (reader, enumerator, field); - } catch (NumberFormatException nfe) { - // nothing - } - - return StringSortedHitQueue.comparator (reader, enumerator, field); - - } else { - throw new RuntimeException ("field \""+field+"\" does not appear to be indexed"); - } - } finally { - enumerator.close(); - } - } - - /** - * The sorting priority used. The first element is set by the constructors. - * The result is that sorting is done by field value, then by index order. - */ - private final ScoreDocComparator[] comparators = new ScoreDocComparator[] { - null, ScoreDocComparator.INDEXORDER - }; - - - /** - * Creates a hit queue sorted by the given field. Hits are sorted by the field, then - * by index order. - * @param reader IndexReader to use. - * @param field Field to sort by. - * @param size Number of hits to return - see {@link PriorityQueue#initialize(int) initialize} - * @throws IOException If the internal term enumerator fails. - */ - FieldSortedHitQueue (IndexReader reader, String field, int size) - throws IOException { - - // reset the cache if we have a new reader - int hash = reader.hashCode(); - if (hash != lastReaderHash) { - lastReaderHash = hash; - fieldCache.clear(); - } - - // initialize the PriorityQueue - initialize (size); - - // set the sort - comparators[0] = initializeSort (reader, field); + /** + * Creates a hit queue sorted by the given list of fields. + * @param reader Index to use. + * @param fields Field names, in priority order (highest priority first). Cannot be null or empty. + * @param size The number of hits to retain. Must be greater than zero. + * @throws IOException + */ + FieldSortedHitQueue (IndexReader reader, SortField[] fields, int size) + throws IOException { + final int n = fields.length; + comparators = new ScoreDocComparator[n]; + this.fields = new SortField[n]; + for (int i=0; ia is less relevant than b - * @param a ScoreDoc - * @param b ScoreDoc - * @return true if document a should be sorted after document b. - */ - protected final boolean lessThan (final Object a, final Object b) { - final ScoreDoc docA = (ScoreDoc) a; - final ScoreDoc docB = (ScoreDoc) b; - final int n = comparators.length; - int c = 0; - for (int i=0; i 0; - } + /** + * Returns whether a is less relevant than b. + * @param a ScoreDoc + * @param b ScoreDoc + * @return true if document a should be sorted after document b. + */ + protected final boolean lessThan (final Object a, final Object b) { + final ScoreDoc docA = (ScoreDoc) a; + final ScoreDoc docB = (ScoreDoc) b; + // keep track of maximum score + if (docA.score > maxscore) maxscore = docA.score; + if (docB.score > maxscore) maxscore = docB.score; - /** - * Initializes the cache of sort information. fieldCache is queried - * to see if it has the term information for the given field. - * If so, and if the reader still has the same value for maxDoc() - * (note that we assume new IndexReaders are caught during the - * constructor), the existing data is used. If not, all the term values - * for the given field are fetched. The value of the term is assumed - * to indicate the sort order for any documents containing the term. Documents - * should only have one term in the given field. Multiple documents - * can share the same term if desired, in which case they will be - * considered equal during the sort. - * @param reader The document index. - * @param field The field to sort by. - * @throws IOException If createComparator(IndexReader,String) fails - usually caused by the term enumerator failing. - */ - protected final ScoreDocComparator initializeSort (IndexReader reader, String field) - throws IOException { - - ScoreDocLookupComparator comparer = (ScoreDocLookupComparator) fieldCache.get (field); - if (comparer == null || !comparer.sizeMatches(reader.maxDoc())) { - comparer = createComparator (reader, field); - fieldCache.put (field, comparer); - } - return comparer; + // run comparators + final int n = comparators.length; + int c = 0; + for (int i=0; i 0; + } - /** - * Subclasses should implement this method to provide an appropriate ScoreDocLookupComparator. - * @param reader Index to use. - * @param field Field to use for sorting. - * @return Comparator to use to sort hits. - * @throws IOException If an error occurs reading the index. - */ - protected abstract ScoreDocLookupComparator createComparator (IndexReader reader, String field) - throws IOException; -} \ No newline at end of file + /** + * Given a FieldDoc object, stores the values used + * to sort the given document. These values are not the raw + * values out of the index, but the internal representation + * of them. This is so the given search hit can be collated + * by a MultiSearcher with other search hits. + * @param doc The FieldDoc to store sort values into. + * @return The same FieldDoc passed in. + * @see Searchable#search(Query,Filter,int,Sort) + */ + FieldDoc fillFields (final FieldDoc doc) { + final int n = comparators.length; + final Comparable[] fields = new Comparable[n]; + for (int i=0; i 1.0f) doc.score /= maxscore; // normalize scores + return doc; + } + + + /** Returns the SortFields being used by this hit queue. */ + SortField[] getFields() { + return fields; + } + + /** Internal cache of comparators. Similar to FieldCache, only + * caches comparators instead of term values. */ + static final Map Comparators = new WeakHashMap(); + + /** Returns a comparator if it is in the cache. */ + static ScoreDocComparator lookup (IndexReader reader, String field, int type, Object factory) { + FieldCacheImpl.Entry entry = (factory != null) ? new FieldCacheImpl.Entry (reader, field, factory) + : new FieldCacheImpl.Entry (reader, field, type); + synchronized (Comparators) { + return (ScoreDocComparator) Comparators.get (entry); + } + } + + /** Stores a comparator into the cache. */ + static Object store (IndexReader reader, String field, int type, Object factory, Object value) { + FieldCacheImpl.Entry entry = (factory != null) ? new FieldCacheImpl.Entry (reader, field, factory) + : new FieldCacheImpl.Entry (reader, field, type); + synchronized (Comparators) { + return Comparators.put (entry, value); + } + } + + static ScoreDocComparator getCachedComparator (IndexReader reader, String fieldname, int type, SortComparatorSource factory) + throws IOException { + if (type == SortField.DOC) return ScoreDocComparator.INDEXORDER; + if (type == SortField.SCORE) return ScoreDocComparator.RELEVANCE; + ScoreDocComparator comparator = lookup (reader, fieldname, type, factory); + if (comparator == null) { + switch (type) { + case SortField.AUTO: + comparator = comparatorAuto (reader, fieldname); + break; + case SortField.INT: + comparator = comparatorInt (reader, fieldname); + break; + case SortField.FLOAT: + comparator = comparatorFloat (reader, fieldname); + break; + case SortField.STRING: + comparator = comparatorString (reader, fieldname); + break; + case SortField.CUSTOM: + comparator = factory.newComparator (reader, fieldname); + break; + default: + throw new RuntimeException ("unknown field type: "+type); + } + store (reader, fieldname, type, factory, comparator); + } + return comparator; + } + + /** + * Returns a comparator for sorting hits according to a field containing integers. + * @param reader Index to use. + * @param fieldname Field containg integer values. + * @return Comparator for sorting hits. + * @throws IOException If an error occurs reading the index. + */ + static ScoreDocComparator comparatorInt (final IndexReader reader, final String fieldname) + throws IOException { + final String field = fieldname.intern(); + return new ScoreDocComparator() { + + final int[] fieldOrder = FieldCache.DEFAULT.getInts (reader, field); + + public final int compare (final ScoreDoc i, final ScoreDoc j) { + final int fi = fieldOrder[i.doc]; + final int fj = fieldOrder[j.doc]; + if (fi < fj) return -1; + if (fi > fj) return 1; + return 0; + } + + public Comparable sortValue (final ScoreDoc i) { + return new Integer (fieldOrder[i.doc]); + } + + public int sortType() { + return SortField.INT; + } + }; + } + + /** + * Returns a comparator for sorting hits according to a field containing floats. + * @param reader Index to use. + * @param fieldname Field containg float values. + * @return Comparator for sorting hits. + * @throws IOException If an error occurs reading the index. + */ + static ScoreDocComparator comparatorFloat (final IndexReader reader, final String fieldname) + throws IOException { + final String field = fieldname.intern(); + return new ScoreDocComparator () { + + protected final float[] fieldOrder = FieldCache.DEFAULT.getFloats (reader, field); + + public final int compare (final ScoreDoc i, final ScoreDoc j) { + final float fi = fieldOrder[i.doc]; + final float fj = fieldOrder[j.doc]; + if (fi < fj) return -1; + if (fi > fj) return 1; + return 0; + } + + public Comparable sortValue (final ScoreDoc i) { + return new Float (fieldOrder[i.doc]); + } + + public int sortType() { + return SortField.FLOAT; + } + }; + } + + /** + * Returns a comparator for sorting hits according to a field containing strings. + * @param reader Index to use. + * @param fieldname Field containg string values. + * @return Comparator for sorting hits. + * @throws IOException If an error occurs reading the index. + */ + static ScoreDocComparator comparatorString (final IndexReader reader, final String fieldname) + throws IOException { + final String field = fieldname.intern(); + return new ScoreDocComparator () { + final FieldCache.StringIndex index = FieldCache.DEFAULT.getStringIndex (reader, field); + + public final int compare (final ScoreDoc i, final ScoreDoc j) { + final int fi = index.order[i.doc]; + final int fj = index.order[j.doc]; + if (fi < fj) return -1; + if (fi > fj) return 1; + return 0; + } + + public Comparable sortValue (final ScoreDoc i) { + return index.lookup[index.order[i.doc]]; + } + + public int sortType() { + return SortField.STRING; + } + }; + } + + /** + * Returns a comparator for sorting hits according to values in the given field. + * The terms in the field are looked at to determine whether they contain integers, + * floats or strings. Once the type is determined, one of the other static methods + * in this class is called to get the comparator. + * @param reader Index to use. + * @param fieldname Field containg values. + * @return Comparator for sorting hits. + * @throws IOException If an error occurs reading the index. + */ + static ScoreDocComparator comparatorAuto (final IndexReader reader, final String fieldname) + throws IOException { + final String field = fieldname.intern(); + Object lookupArray = FieldCache.DEFAULT.getAuto (reader, field); + if (lookupArray instanceof FieldCache.StringIndex) { + return comparatorString (reader, field); + } else if (lookupArray instanceof int[]) { + return comparatorInt (reader, field); + } else if (lookupArray instanceof float[]) { + return comparatorFloat (reader, field); + } else if (lookupArray instanceof String[]) { + return comparatorString (reader, field); + } else { + throw new RuntimeException ("unknown data type in field '"+field+"'"); + } + } +} diff --git a/src/java/org/apache/lucene/search/FloatSortedHitQueue.java b/src/java/org/apache/lucene/search/FloatSortedHitQueue.java deleted file mode 100644 index 62df2a90329..00000000000 --- a/src/java/org/apache/lucene/search/FloatSortedHitQueue.java +++ /dev/null @@ -1,150 +0,0 @@ -package org.apache.lucene.search; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; - -import java.io.IOException; - -/** - * Expert: A sorted hit queue for fields that contain strictly floating point values. - * Hits are sorted into the queue by the values in the field and then by document number. - * - *

Created: Feb 2, 2004 9:23:03 AM - * - * @author Tim Jones (Nacimiento Software) - * @since lucene 1.4 - * @version $Id$ - */ -class FloatSortedHitQueue -extends FieldSortedHitQueue { - - /** - * Creates a hit queue sorted over the given field containing float values. - * @param reader Index to use. - * @param float_field Field containing float sort information - * @param size Number of hits to collect. - * @throws IOException If an error occurs reading the index. - */ - FloatSortedHitQueue (IndexReader reader, String float_field, int size) - throws IOException { - super (reader, float_field, size); - } - - - /** - * Returns a comparator for sorting hits according to a field containing floats. - * Just calls comparator(IndexReader,String). - * @param reader Index to use. - * @param field Field containg float values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field) - throws IOException { - return comparator (reader, field); - } - - - /** - * Returns a comparator for sorting hits according to a field containing floats. - * @param reader Index to use. - * @param fieldname Field containg float values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname) - throws IOException { - TermEnum enumerator = reader.terms (new Term (fieldname, "")); - return comparator (reader, enumerator, fieldname); - } - - - /** - * Returns a comparator for sorting hits according to a field containing floats using the given enumerator - * to collect term values. - * @param reader Index to use. - * @param fieldname Field containg float values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname) - throws IOException { - final String field = fieldname.intern(); - return new ScoreDocLookupComparator () { - - protected final float[] fieldOrder = generateSortIndex(); - - protected final float[] generateSortIndex() - throws IOException { - - float[] retArray = new float[reader.maxDoc()]; - if (retArray.length > 0) { - TermDocs termDocs = reader.termDocs (); - try { - if (enumerator.term() == null) { - throw new RuntimeException ("no terms in field "+field); - } - do { - Term term = enumerator.term(); - if (term.field() != field) break; - float termval = Float.parseFloat (term.text()); - termDocs.seek (enumerator); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; - } - } while (enumerator.next()); - } finally { - termDocs.close(); - } - } - return retArray; - } - - public final int compare (final ScoreDoc i, final ScoreDoc j) { - final float fi = fieldOrder[i.doc]; - final float fj = fieldOrder[j.doc]; - if (fi < fj) return -1; - if (fi > fj) return 1; - return 0; - } - - public final int compareReverse (final ScoreDoc i, final ScoreDoc j) { - final float fi = fieldOrder[i.doc]; - final float fj = fieldOrder[j.doc]; - if (fi > fj) return -1; - if (fi < fj) return 1; - return 0; - } - - public final boolean sizeMatches (final int n) { - return fieldOrder.length == n; - } - - public Comparable sortValue (final ScoreDoc i) { - return new Float (fieldOrder[i.doc]); - } - - public int sortType() { - return SortField.FLOAT; - } - }; - } -} diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java index 3af096596ae..0795ce35c71 100644 --- a/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/src/java/org/apache/lucene/search/IndexSearcher.java @@ -115,8 +115,8 @@ public class IndexSearcher extends Searcher { return new TopFieldDocs(0, new ScoreDoc[0], sort.fields); final BitSet bits = filter != null ? filter.bits(reader) : null; - final MultiFieldSortedHitQueue hq = - new MultiFieldSortedHitQueue(reader, sort.fields, nDocs); + final FieldSortedHitQueue hq = + new FieldSortedHitQueue(reader, sort.fields, nDocs); final int[] totalHits = new int[1]; scorer.score(new HitCollector() { public final void collect(int doc, float score) { diff --git a/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java b/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java deleted file mode 100644 index eb0d4490f7b..00000000000 --- a/src/java/org/apache/lucene/search/IntegerSortedHitQueue.java +++ /dev/null @@ -1,151 +0,0 @@ -package org.apache.lucene.search; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; - -import java.io.IOException; - - -/** - * Expert: A sorted hit queue for fields that contain strictly integer values. - * Hits are sorted into the queue by the values in the field and then by document number. - * - *

Created: Jan 30, 2004 3:35:09 PM - * - * @author Tim Jones (Nacimiento Software) - * @since lucene 1.4 - * @version $Id$ - */ -class IntegerSortedHitQueue -extends FieldSortedHitQueue { - - /** - * Creates a hit queue sorted over the given field containing integer values. - * @param reader Index to use. - * @param integer_field Field containing integer sort information - * @param size Number of hits to collect. - * @throws IOException If an error occurs reading the index. - */ - IntegerSortedHitQueue (IndexReader reader, String integer_field, int size) - throws IOException { - super (reader, integer_field, size); - } - - - /** - * Returns a comparator for sorting hits according to a field containing integers. - * Just calls comparator(IndexReader,String). - * @param reader Index to use. - * @param field Field containg integer values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field) - throws IOException { - return comparator (reader, field); - } - - - /** - * Returns a comparator for sorting hits according to a field containing integers. - * @param reader Index to use. - * @param fieldname Field containg integer values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname) - throws IOException { - TermEnum enumerator = reader.terms (new Term (fieldname, "")); - return comparator (reader, enumerator, fieldname); - } - - - /** - * Returns a comparator for sorting hits according to a field containing integers using the given enumerator - * to collect term values. - * @param reader Index to use. - * @param fieldname Field containg integer values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname) - throws IOException { - final String field = fieldname.intern(); - return new ScoreDocLookupComparator() { - - protected final int[] fieldOrder = generateSortIndex(); - - private final int[] generateSortIndex() - throws IOException { - - final int[] retArray = new int[reader.maxDoc()]; - if (retArray.length > 0) { - TermDocs termDocs = reader.termDocs(); - try { - if (enumerator.term() == null) { - throw new RuntimeException ("no terms in field "+field); - } - do { - Term term = enumerator.term(); - if (term.field() != field) break; - int termval = Integer.parseInt (term.text()); - termDocs.seek (enumerator); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; - } - } while (enumerator.next()); - } finally { - termDocs.close(); - } - } - return retArray; - } - - public final int compare (final ScoreDoc i, final ScoreDoc j) { - final int fi = fieldOrder[i.doc]; - final int fj = fieldOrder[j.doc]; - if (fi < fj) return -1; - if (fi > fj) return 1; - return 0; - } - - public final int compareReverse (final ScoreDoc i, final ScoreDoc j) { - final int fi = fieldOrder[i.doc]; - final int fj = fieldOrder[j.doc]; - if (fi > fj) return -1; - if (fi < fj) return 1; - return 0; - } - - public final boolean sizeMatches (final int n) { - return fieldOrder.length == n; - } - - public Comparable sortValue (final ScoreDoc i) { - return new Integer (fieldOrder[i.doc]); - } - - public int sortType() { - return SortField.INT; - } - }; - } -} diff --git a/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java b/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java deleted file mode 100644 index 2a5e69bc542..00000000000 --- a/src/java/org/apache/lucene/search/MultiFieldSortedHitQueue.java +++ /dev/null @@ -1,124 +0,0 @@ -package org.apache.lucene.search; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.PriorityQueue; - -import java.io.IOException; - -/** - * Expert: A hit queue for sorting by hits by terms in more than one field. - * The type of content in each field could be determined dynamically by - * FieldSortedHitQueue.determineComparator(). - * - *

Created: Feb 3, 2004 4:46:55 PM - * - * @author Tim Jones (Nacimiento Software) - * @since lucene 1.4 - * @version $Id$ - * @see FieldSortedHitQueue - * @see Searchable#search(Query,Filter,int,Sort) - */ -class MultiFieldSortedHitQueue -extends PriorityQueue { - - /** - * Creates a hit queue sorted by the given list of fields. - * @param reader Index to use. - * @param fields Field names, in priority order (highest priority first). Cannot be null or empty. - * @param size The number of hits to retain. Must be greater than zero. - * @throws IOException - */ - MultiFieldSortedHitQueue (IndexReader reader, SortField[] fields, int size) - throws IOException { - final int n = fields.length; - comparators = new ScoreDocComparator[n]; - this.fields = new SortField[n]; - for (int i=0; ia is less relevant than b. - * @param a ScoreDoc - * @param b ScoreDoc - * @return true if document a should be sorted after document b. - */ - protected final boolean lessThan (final Object a, final Object b) { - final ScoreDoc docA = (ScoreDoc) a; - final ScoreDoc docB = (ScoreDoc) b; - - // keep track of maximum score - if (docA.score > maxscore) maxscore = docA.score; - if (docB.score > maxscore) maxscore = docB.score; - - // run comparators - final int n = comparators.length; - int c = 0; - for (int i=0; i 0; - } - - - /** - * Given a FieldDoc object, stores the values used - * to sort the given document. These values are not the raw - * values out of the index, but the internal representation - * of them. This is so the given search hit can be collated - * by a MultiSearcher with other search hits. - * @param doc The FieldDoc to store sort values into. - * @return The same FieldDoc passed in. - * @see Searchable#search(Query,Filter,int,Sort) - */ - FieldDoc fillFields (final FieldDoc doc) { - final int n = comparators.length; - final Comparable[] fields = new Comparable[n]; - for (int i=0; i 1.0f) doc.score /= maxscore; // normalize scores - return doc; - } - - - /** Returns the SortFields being used by this hit queue. */ - SortField[] getFields() { - return fields; - } - -} diff --git a/src/java/org/apache/lucene/search/ScoreDocComparator.java b/src/java/org/apache/lucene/search/ScoreDocComparator.java index 408f9dea731..5d34930f937 100644 --- a/src/java/org/apache/lucene/search/ScoreDocComparator.java +++ b/src/java/org/apache/lucene/search/ScoreDocComparator.java @@ -35,11 +35,6 @@ public interface ScoreDocComparator { if (i.score < j.score) return 1; return 0; } - public int compareReverse (ScoreDoc i, ScoreDoc j) { - if (i.score < j.score) return -1; - if (i.score > j.score) return 1; - return 0; - } public Comparable sortValue (ScoreDoc i) { return new Float (i.score); } @@ -56,11 +51,6 @@ public interface ScoreDocComparator { if (i.doc > j.doc) return 1; return 0; } - public int compareReverse (ScoreDoc i, ScoreDoc j) { - if (i.doc > j.doc) return -1; - if (i.doc < j.doc) return 1; - return 0; - } public Comparable sortValue (ScoreDoc i) { return new Integer (i.doc); } @@ -81,21 +71,11 @@ public interface ScoreDocComparator { int compare (ScoreDoc i, ScoreDoc j); - /** - * Compares two ScoreDoc objects and returns a result indicating their - * sort order in reverse. - * @param i First ScoreDoc - * @param j Second ScoreDoc - * @return -1 if j should come before i
1 if j should come after i
0 if they are equal - * @see java.util.Comparator - */ - int compareReverse (ScoreDoc i, ScoreDoc j); - - /** * Returns the value used to sort the given document. The * object returned must implement the java.io.Serializable - * interface. + * interface. This is used by multisearchers to determine how to collate results from their searchers. + * @see FieldDoc * @param i Document * @return Serializable object */ @@ -105,6 +85,7 @@ public interface ScoreDocComparator { /** * Returns the type of sort. Should return SortField.SCORE, SortField.DOC, SortField.STRING, SortField.INTEGER, * SortField.FLOAT or SortField.CUSTOM. It is not valid to return SortField.AUTO. + * This is used by multisearchers to determine how to collate results from their searchers. * @return One of the constants in SortField. * @see SortField */ diff --git a/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java b/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java deleted file mode 100644 index 17ef32c9cb5..00000000000 --- a/src/java/org/apache/lucene/search/ScoreDocLookupComparator.java +++ /dev/null @@ -1,40 +0,0 @@ -package org.apache.lucene.search; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - -/** - * Expert: Compares two ScoreDoc objects for sorting using a lookup table. - * - *

Created: Feb 3, 2004 9:59:14 AM - * - * @author Tim Jones (Nacimiento Software) - * @since lucene 1.4 - * @version $Id$ - */ -public interface ScoreDocLookupComparator -extends ScoreDocComparator { - - /** - * Verifies that the internal lookup table is the correct size. This - * comparator uses a lookup table, so it is important to that the - * table matches the number of documents in the index. - * @param n Expected size of table. - * @return True if internal table matches expected size; false otherwise - */ - boolean sizeMatches (int n); -} \ No newline at end of file diff --git a/src/java/org/apache/lucene/search/SortComparator.java b/src/java/org/apache/lucene/search/SortComparator.java new file mode 100644 index 00000000000..32e9bff7559 --- /dev/null +++ b/src/java/org/apache/lucene/search/SortComparator.java @@ -0,0 +1,68 @@ +package org.apache.lucene.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; + +import java.io.IOException; +import java.io.Serializable; + +/** + * Abstract base class for sorting hits returned by a Query. + * + *

This class should only be used if the other SortField + * types (SCORE, DOC, STRING, INT, FLOAT) do not provide an + * adequate sorting. It maintains an internal cache of values which + * could be quite large. The cache is an array of Comparable, + * one for each document in the index. There is a distinct + * Comparable for each unique term in the field - if + * some documents have the same term in the field, the cache + * array will have entries which reference the same Comparable. + * + *

Created: Apr 21, 2004 5:08:38 PM + * + * @author Tim Jones + * @version $Id$ + * @since 1.4 + */ +public abstract class SortComparator +implements SortComparatorSource { + + // inherit javadocs + public ScoreDocComparator newComparator (final IndexReader reader, final String fieldname) + throws IOException { + final String field = fieldname.intern(); + return new ScoreDocComparator() { + protected Comparable[] cachedValues = FieldCache.DEFAULT.getCustom (reader, field, SortComparator.this); + + public int compare (ScoreDoc i, ScoreDoc j) { + return cachedValues[i.doc].compareTo (cachedValues[j.doc]); + } + + public Comparable sortValue (ScoreDoc i) { + return cachedValues[i.doc]; + } + + public int sortType(){ + return SortField.CUSTOM; + } + }; + } + + /** + * Returns an object which, when sorted according to natural order, + * will order the Term values in the correct order. + *

For example, if the Terms contained integer values, this method + * would return new Integer(termtext). Note that this + * might not always be the most efficient implementation - for this + * particular example, a better implementation might be to make a + * ScoreDocLookupComparator that uses an internal lookup table of int. + * @param termtext The textual value of the term. + * @return An object representing termtext that sorts according to the natural order of termtext. + * @see Comparable + * @see ScoreDocComparator + */ + protected abstract Comparable getComparable (String termtext); + +} \ No newline at end of file diff --git a/src/java/org/apache/lucene/search/SortComparatorSource.java b/src/java/org/apache/lucene/search/SortComparatorSource.java index 8bdb0651291..c4508571a52 100644 --- a/src/java/org/apache/lucene/search/SortComparatorSource.java +++ b/src/java/org/apache/lucene/search/SortComparatorSource.java @@ -23,6 +23,6 @@ extends Serializable { * @return Comparator of ScoreDoc objects. * @throws IOException If an error occurs reading the index. */ - ScoreDocLookupComparator newComparator (IndexReader reader, String fieldname) + ScoreDocComparator newComparator (IndexReader reader, String fieldname) throws IOException; } \ No newline at end of file diff --git a/src/java/org/apache/lucene/search/SortField.java b/src/java/org/apache/lucene/search/SortField.java index fb17602b6c9..b4a9727a758 100644 --- a/src/java/org/apache/lucene/search/SortField.java +++ b/src/java/org/apache/lucene/search/SortField.java @@ -62,6 +62,11 @@ implements Serializable { * sorting is done according to natural order. */ public static final int CUSTOM = 9; + // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace" + // as the above static int values. Any new values must not have the same value + // as FieldCache.STRING_INDEX. + + /** Represents sorting by document score (relevancy). */ public static final SortField FIELD_SCORE = new SortField (null, SCORE); diff --git a/src/java/org/apache/lucene/search/StringSortedHitQueue.java b/src/java/org/apache/lucene/search/StringSortedHitQueue.java deleted file mode 100644 index e6d94009086..00000000000 --- a/src/java/org/apache/lucene/search/StringSortedHitQueue.java +++ /dev/null @@ -1,185 +0,0 @@ -package org.apache.lucene.search; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; - -import java.io.IOException; - -/** - * Expert: A sorted hit queue for fields that contain string values. - * Hits are sorted into the queue by the values in the field and then by document number. - * Warning: The internal cache could be quite large, depending on the number of terms - * in the field! All the terms are kept in memory, as well as a sorted array of - * integers representing their relative position. - * - *

Created: Feb 2, 2004 9:26:33 AM - * - * @author Tim Jones (Nacimiento Software) - * @since lucene 1.4 - * @version $Id$ - */ -class StringSortedHitQueue -extends FieldSortedHitQueue { - - /** - * Creates a hit queue sorted over the given field containing string values. - * @param reader Index to use. - * @param string_field Field containing string sort information - * @param size Number of hits to collect. - * @throws IOException If an error occurs reading the index. - */ - StringSortedHitQueue (IndexReader reader, String string_field, int size) - throws IOException { - super (reader, string_field, size); - } - - - /** - * Returns a comparator for sorting hits according to a field containing strings. - * Just calls comparator(IndexReader,String). - * @param reader Index to use. - * @param field Field containg string values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - protected ScoreDocLookupComparator createComparator (final IndexReader reader, final String field) - throws IOException { - return comparator (reader, field); - } - - - /** - * Returns a comparator for sorting hits according to a field containing strings. - * @param reader Index to use. - * @param fieldname Field containg string values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - static ScoreDocLookupComparator comparator (final IndexReader reader, final String fieldname) - throws IOException { - TermEnum enumerator = reader.terms (new Term (fieldname, "")); - return comparator (reader, enumerator, fieldname); - } - - - /** - * Returns a comparator for sorting hits according to a field containing strings using the given enumerator - * to collect term values. - * @param reader Index to use. - * @param fieldname Field containg string values. - * @return Comparator for sorting hits. - * @throws IOException If an error occurs reading the index. - */ - static ScoreDocLookupComparator comparator (final IndexReader reader, final TermEnum enumerator, final String fieldname) - throws IOException { - final String field = fieldname.intern(); - return new ScoreDocLookupComparator() { - - protected final int[] fieldOrder = generateSortIndex(); - protected String[] terms; - - private final int[] generateSortIndex() - throws IOException { - - final int[] retArray = new int[reader.maxDoc()]; - final String[] mterms = new String[reader.maxDoc()]; // guess length - if (retArray.length > 0) { - TermDocs termDocs = reader.termDocs(); - int t = 0; // current term number - try { - if (enumerator.term() == null) { - throw new RuntimeException ("no terms in field " + field); - } - - // NOTE: the contract for TermEnum says the - // terms will be in natural order (which is - // ordering by field name, term text). The - // contract for TermDocs says the docs will - // be ordered by document number. So the - // following loop will automatically sort the - // terms in the correct order. - - // if a given document has more than one term - // in the field, only the last one will be used. - - do { - Term term = enumerator.term(); - if (term.field() != field) break; - - // store term text - // we expect that there is at most one term per document - if (t >= mterms.length) throw new RuntimeException ("there are more terms than documents in field \""+field+"\""); - mterms[t] = term.text(); - - // store which documents use this term - termDocs.seek (enumerator); - while (termDocs.next()) { - retArray[termDocs.doc()] = t; - } - - t++; - } while (enumerator.next()); - } finally { - termDocs.close(); - } - - // if there are less terms than documents, - // trim off the dead array space - if (t < mterms.length) { - terms = new String[t]; - System.arraycopy (mterms, 0, terms, 0, t); - } else { - terms = mterms; - } - } - return retArray; - } - - public final int compare (final ScoreDoc i, final ScoreDoc j) { - final int fi = fieldOrder[i.doc]; - final int fj = fieldOrder[j.doc]; - if (fi < fj) return -1; - if (fi > fj) return 1; - return 0; - } - - public final int compareReverse (final ScoreDoc i, final ScoreDoc j) { - final int fi = fieldOrder[i.doc]; - final int fj = fieldOrder[j.doc]; - if (fi > fj) return -1; - if (fi < fj) return 1; - return 0; - } - - public final boolean sizeMatches (final int n) { - return fieldOrder.length == n; - } - - public Comparable sortValue (final ScoreDoc i) { - return terms[fieldOrder[i.doc]]; - } - - public int sortType() { - return SortField.STRING; - } - }; - } -} diff --git a/src/test/org/apache/lucene/search/SampleComparable.java b/src/test/org/apache/lucene/search/SampleComparable.java index 6a47a424213..3d0ce6d6e2a 100644 --- a/src/test/org/apache/lucene/search/SampleComparable.java +++ b/src/test/org/apache/lucene/search/SampleComparable.java @@ -64,28 +64,20 @@ implements Comparable, Serializable { return i; } - public static SortComparatorSource getComparator () { + public static SortComparatorSource getComparatorSource () { return new SortComparatorSource () { - public ScoreDocLookupComparator newComparator (final IndexReader reader, String fieldname) + public ScoreDocComparator newComparator (final IndexReader reader, String fieldname) throws IOException { final String field = fieldname.intern (); final TermEnum enumerator = reader.terms (new Term (fieldname, "")); try { - return new ScoreDocLookupComparator () { + return new ScoreDocComparator () { protected Comparable[] cachedValues = fillCache (reader, enumerator, field); - public boolean sizeMatches (int n) { - return (cachedValues.length == n); - } - public int compare (ScoreDoc i, ScoreDoc j) { return cachedValues[i.doc].compareTo (cachedValues[j.doc]); } - public int compareReverse (ScoreDoc i, ScoreDoc j) { - return cachedValues[j.doc].compareTo (cachedValues[i.doc]); - } - public Comparable sortValue (ScoreDoc i) { return cachedValues[i.doc]; } @@ -140,4 +132,12 @@ implements Comparable, Serializable { } }; } + + public static SortComparator getComparator() { + return new SortComparator() { + protected Comparable getComparable (String termtext) { + return new SampleComparable (termtext); + } + }; + } } \ No newline at end of file diff --git a/src/test/org/apache/lucene/search/TestSort.java b/src/test/org/apache/lucene/search/TestSort.java index f3eff289907..bb647d051dd 100644 --- a/src/test/org/apache/lucene/search/TestSort.java +++ b/src/test/org/apache/lucene/search/TestSort.java @@ -253,9 +253,14 @@ implements Serializable { public void testCustomSorts() throws Exception { - sort.setSort (new SortField ("custom", SampleComparable.getComparator())); + sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource())); assertMatches (full, queryX, sort, "CAIEG"); - sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true)); + sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true)); + assertMatches (full, queryY, sort, "HJDBF"); + SortComparator custom = SampleComparable.getComparator(); + sort.setSort (new SortField ("custom", custom)); + assertMatches (full, queryX, sort, "CAIEG"); + sort.setSort (new SortField ("custom", custom, true)); assertMatches (full, queryY, sort, "HJDBF"); } @@ -281,9 +286,14 @@ implements Serializable { public void testRemoteCustomSort() throws Exception { Searchable searcher = getRemote(); MultiSearcher multi = new MultiSearcher (new Searchable[] { searcher }); - sort.setSort (new SortField ("custom", SampleComparable.getComparator())); + sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource())); assertMatches (multi, queryX, sort, "CAIEG"); - sort.setSort (new SortField ("custom", SampleComparable.getComparator(), true)); + sort.setSort (new SortField ("custom", SampleComparable.getComparatorSource(), true)); + assertMatches (multi, queryY, sort, "HJDBF"); + SortComparator custom = SampleComparable.getComparator(); + sort.setSort (new SortField ("custom", custom)); + assertMatches (multi, queryX, sort, "CAIEG"); + sort.setSort (new SortField ("custom", custom, true)); assertMatches (multi, queryY, sort, "HJDBF"); }