https://issues.apache.org/jira/browse/LUCENE-822

Applied Mark Miller's patch. Added Unit tests to TestRemoteSearchable and TestMultiSearcher. To get RemoteSearchable to work, had to make FieldSelector and FieldSelectorResult serializable. All tests pass. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@514675 13f79535-47bb-0310-9956-ffa450edef68
2007-03-05 14:28:01 +00:00 · 2007-03-05 14:28:01 +00:00 · 83efa50f5e
parent 4081e733b1
commit 83efa50f5e
10 changed files with 154 additions and 41 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -43,6 +43,8 @@ New features
 1. LUCENE-759: Added two n-gram-producing TokenFilters.
    (Otis Gospodnetic)

+ 2. LUCENE-822: Added FieldSelector capabilities to Searchable for use with RemoteSearcher, and other Searchable implementations. (Mark Miller, Grant Ingersoll)
+
 Optimizations

 ======================= Release 2.1.0 2007-02-14 =======================
--- a/src/java/org/apache/lucene/document/FieldSelector.java
+++ b/src/java/org/apache/lucene/document/FieldSelector.java
@ -1,4 +1,6 @@
 package org.apache.lucene.document;
+
+import java.io.Serializable;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
@ -20,7 +22,7 @@ package org.apache.lucene.document;
 * what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
 *
 **/
-public interface FieldSelector {
+public interface FieldSelector extends Serializable {

  /**
   * 
--- a/src/java/org/apache/lucene/document/FieldSelectorResult.java
+++ b/src/java/org/apache/lucene/document/FieldSelectorResult.java
@ -1,4 +1,6 @@
 package org.apache.lucene.document;
+
+import java.io.Serializable;
 /**
 * Copyright 2004 The Apache Software Foundation
 *
@ -20,7 +22,7 @@ package org.apache.lucene.document;
 *
 **/
 //Replace with an enumerated type in 1.5
-public final class FieldSelectorResult {
+public final class FieldSelectorResult implements Serializable {

    /**
     * Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encounterd.
@ -28,7 +30,7 @@ public final class FieldSelectorResult {
     *<p/>
     * {@link Document#add(Fieldable)} should be called by the Reader.
      */
-  public static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
+  public transient static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
    /**
     * Lazily load this {@link Field}.  This means the {@link Field} is valid, but it may not actually contain its data until
     * invoked.  {@link Document#getField(String)} SHOULD NOT BE USED.  {@link Document#getFieldable(String)} is safe to use and should
@ -36,14 +38,14 @@ public final class FieldSelectorResult {
     *<p/>
     * {@link Document#add(Fieldable)} should be called by the Reader.
     */
-  public static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
+  public transient static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
    /**
     * Do not load the {@link Field}.  {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
     * {@link Document#add(Fieldable)} is not called.
     * <p/>
     * {@link Document#add(Fieldable)} should not be called by the Reader.
     */
-  public static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
+  public transient static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
    /**
     * Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}.  Thus, the
     * Document may not have its complete set of Fields.  {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
@ -51,23 +53,23 @@ public final class FieldSelectorResult {
     * <p/>
     * {@link Document#add(Fieldable)} should be called by the Reader.
     */
-  public static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
+  public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
    /**
     * Behaves much like {@link #LOAD} but does not uncompress any compressed data.  This is used for internal purposes.
     * {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
     * <p/>
     * {@link Document#add(Fieldable)} should be called by the Reader.
     */
-  public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
+  public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);

     /** Expert:  Load the size of this {@link Field} rather than its value.
       * Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
      * The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
      */
-  public static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
+  public transient static final FieldSelectorResult SIZE = new FieldSelectorResult(5);

  /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */         
-  public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
+  public transient static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);



--- a/src/java/org/apache/lucene/document/MapFieldSelector.java
+++ b/src/java/org/apache/lucene/document/MapFieldSelector.java
@ -21,14 +21,14 @@ public class MapFieldSelector implements FieldSelector {
    Map fieldSelections;
    
    /** Create a a MapFieldSelector
-     * @param fieldSelections maps from field names to FieldSelectorResults
+     * @param fieldSelections maps from field names (String) to FieldSelectorResults
     */
    public MapFieldSelector(Map fieldSelections) {
        this.fieldSelections = fieldSelections;
    }
    
    /** Create a a MapFieldSelector
-     * @param fields fields to LOAD.  All other fields are NO_LOAD.
+     * @param fields fields to LOAD.  List of Strings.  All other fields are NO_LOAD.
     */
    public MapFieldSelector(List fields) {
        fieldSelections = new HashMap(fields.size()*5/3);
--- a/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@ -17,14 +17,15 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

-import java.io.IOException;
-import java.util.BitSet;
-
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.store.Directory;
+
+import java.io.IOException;
+import java.util.BitSet;

 /** Implements search over a single IndexReader.
 *
@ -91,6 +92,11 @@ public class IndexSearcher extends Searcher {
    return reader.document(i);
  }
  
+  // inherit javadoc
+  public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+	    return reader.document(i, fieldSelector);
+  }
+  
  // inherit javadoc
  public int maxDoc() throws IOException {
    return reader.maxDoc();
--- a/src/java/org/apache/lucene/search/MultiSearcher.java
+++ b/src/java/org/apache/lucene/search/MultiSearcher.java
@ -17,16 +17,17 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.Term;
+
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;

-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.CorruptIndexException;
-
 /** Implements search over a set of <code>Searchables</code>.
 *
 * <p>Applications usually need only call the inherited {@link #search(Query)}
@ -86,6 +87,10 @@ public class MultiSearcher extends Searcher {
      throw new UnsupportedOperationException();
    }
    
+    public Document doc(int i, FieldSelector fieldSelector) {
+        throw new UnsupportedOperationException();
+    }
+
    public Explanation explain(Weight weight,int doc) {
      throw new UnsupportedOperationException();
    }
@ -148,6 +153,11 @@ public class MultiSearcher extends Searcher {
    return searchables[i].doc(n - starts[i]);	  // dispatch to searcher
  }

+  // inherit javadoc
+  public Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+    int i = subSearcher(n);			  // find searcher index
+    return searchables[i].doc(n - starts[i], fieldSelector);	  // dispatch to searcher
+  }
  
  /** Returns index of the searcher for document <code>n</code> in the array
   * used to construct this searcher. */
--- a/src/java/org/apache/lucene/search/RemoteSearchable.java
+++ b/src/java/org/apache/lucene/search/RemoteSearchable.java
@ -18,6 +18,7 @@ package org.apache.lucene.search;
 */

 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.CorruptIndexException;

@ -81,6 +82,10 @@ public class RemoteSearchable
    return local.doc(i);
  }

+  public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+	    return local.doc(i, fieldSelector);
+  }
+  
  public Query rewrite(Query original) throws IOException {
    return local.rewrite(original);
  }
--- a/src/java/org/apache/lucene/search/Searchable.java
+++ b/src/java/org/apache/lucene/search/Searchable.java
@ -18,6 +18,7 @@ package org.apache.lucene.search;
 */

 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.CorruptIndexException;
@ -98,6 +99,29 @@ public interface Searchable extends java.rmi.Remote {
   */
  Document doc(int i) throws CorruptIndexException, IOException;

+  /**
+   * Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
+   * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
+   * 
+   * <b>NOTE:</b> If the underlying Reader (more specifically, the underlying <code>FieldsReader</code>) is closed before the lazy {@link org.apache.lucene.document.Field} is
+   * loaded an exception may be thrown.  If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
+   * explicitly load it or fetch the Document again with a new loader.
+   * 
+   *  
+   * @param n Get the document at the <code>n</code><sup>th</sup> position
+   * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document.  May be null, in which case all Fields will be loaded.
+   * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   * 
+   * @see IndexReader#document(int, FieldSelector)
+   * @see org.apache.lucene.document.Fieldable
+   * @see org.apache.lucene.document.FieldSelector
+   * @see org.apache.lucene.document.SetBasedFieldSelector
+   * @see org.apache.lucene.document.LoadFirstFieldSelector
+   */
+  Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
+  
  /** Expert: called to re-write queries into primitive queries.
   * @throws BooleanQuery.TooManyClauses
   */
--- a/src/test/org/apache/lucene/search/TestMultiSearcher.java
+++ b/src/test/org/apache/lucene/search/TestMultiSearcher.java
@ -17,21 +17,23 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import junit.framework.TestCase;
 import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SetBasedFieldSelector;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.Searcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;

-import junit.framework.TestCase;
-
 import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;

 /**
 * Tests {@link MultiSearcher} class.
@ -200,7 +202,7 @@ public class TestMultiSearcher extends TestCase
        Document document=new Document();
        
        document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.UN_TOKENIZED));
-        
+      document.add(new Field("other", "other contents", Field.Store.YES, Field.Index.UN_TOKENIZED));
        if (contents2!=null) {
            document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.UN_TOKENIZED));
        }
@ -224,11 +226,56 @@ public class TestMultiSearcher extends TestCase
        }
    }

-    /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
-    public void testNormalization1() throws IOException {
-        testNormalization(1, "Using 1 document per index:");
-    }
-     */
+  public void testFieldSelector() throws Exception {
+    RAMDirectory ramDirectory1, ramDirectory2;
+    IndexSearcher indexSearcher1, indexSearcher2;
+
+    ramDirectory1 = new RAMDirectory();
+    ramDirectory2 = new RAMDirectory();
+    Query query = new TermQuery(new Term("contents", "doc0"));
+
+    // Now put the documents in a different index
+    initIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
+    initIndex(ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
+
+    indexSearcher1 = new IndexSearcher(ramDirectory1);
+    indexSearcher2 = new IndexSearcher(ramDirectory2);
+
+    MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2});
+    assertTrue("searcher is null and it shouldn't be", searcher != null);
+    Hits hits = searcher.search(query);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue(hits.length() + " does not equal: " + 2, hits.length() == 2);
+    Document document = searcher.doc(hits.id(0));
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
+    //Should be one document from each directory
+    //they both have two fields, contents and other
+    Set ftl = new HashSet();
+    ftl.add("other");
+    SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
+    document = searcher.doc(hits.id(0), fs);
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
+    String value = document.get("contents");
+    assertTrue("value is not null and it should be", value == null);
+    value = document.get("other");
+    assertTrue("value is null and it shouldn't be", value != null);
+    ftl.clear();
+    ftl.add("contents");
+    fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
+    document = searcher.doc(hits.id(1), fs);
+    value = document.get("contents");
+    assertTrue("value is null and it shouldn't be", value != null);    
+    value = document.get("other");
+    assertTrue("value is not null and it should be", value == null);
+  }
+
+  /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
+ public void testNormalization1() throws IOException {
+     testNormalization(1, "Using 1 document per index:");
+ }
+  */
    
    public void testNormalization10() throws IOException {
        testNormalization(10, "Using 10 documents per index:");
--- a/src/test/org/apache/lucene/search/TestRemoteSearchable.java
+++ b/src/test/org/apache/lucene/search/TestRemoteSearchable.java
@ -18,16 +18,17 @@ package org.apache.lucene.search;
 */

 import junit.framework.TestCase;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.*;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;

 import java.rmi.Naming;
 import java.rmi.registry.LocateRegistry;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import java.util.Collections;
+import java.util.Set;
+import java.util.HashSet;

 /**
 * @version $Id$
@ -56,6 +57,7 @@ public class TestRemoteSearchable extends TestCase {
    IndexWriter writer = new IndexWriter(indexStore,new SimpleAnalyzer(),true);
    Document doc = new Document();
    doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED));
+    doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED));
    writer.addDocument(doc);
    writer.optimize();
    writer.close();
@ -74,7 +76,20 @@ public class TestRemoteSearchable extends TestCase {
    Hits result = searcher.search(query);

    assertEquals(1, result.length());
-    assertEquals("test text", result.doc(0).get("test"));
+    Document document = result.doc(0);
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertEquals("test text", document.get("test"));
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
+    Set ftl = new HashSet();
+    ftl.add("other");
+    FieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
+    document = searcher.doc(0, fs);
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
+    fs = new MapFieldSelector(new String[]{"other"});
+    document = searcher.doc(0, fs);
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
  }

  public void testTermQuery() throws Exception {