From 83efa50f5ed6614873b5c4508428921ee1fb7621 Mon Sep 17 00:00:00 2001
From: Grant Ingersoll <gsingers@apache.org>
Date: Mon, 5 Mar 2007 14:28:01 +0000
Subject: [PATCH] https://issues.apache.org/jira/browse/LUCENE-822

Applied Mark Miller's patch.  Added Unit tests to TestRemoteSearchable and TestMultiSearcher.  To get RemoteSearchable to work, had to make FieldSelector and FieldSelectorResult serializable.

All tests pass.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@514675 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES.txt                                   |  2 +
 .../apache/lucene/document/FieldSelector.java |  4 +-
 .../lucene/document/FieldSelectorResult.java  | 18 ++---
 .../lucene/document/MapFieldSelector.java     |  4 +-
 .../apache/lucene/search/IndexSearcher.java   | 18 +++--
 .../apache/lucene/search/MultiSearcher.java   | 20 ++++--
 .../lucene/search/RemoteSearchable.java       |  5 ++
 .../org/apache/lucene/search/Searchable.java  | 24 +++++++
 .../lucene/search/TestMultiSearcher.java      | 69 ++++++++++++++++---
 .../lucene/search/TestRemoteSearchable.java   | 31 ++++++---
 10 files changed, 154 insertions(+), 41 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index d594af0234a..09a00f4f971 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -43,6 +43,8 @@ New features
  1. LUCENE-759: Added two n-gram-producing TokenFilters.
     (Otis Gospodnetic)
 
+ 2. LUCENE-822: Added FieldSelector capabilities to Searchable for use with RemoteSearcher, and other Searchable implementations. (Mark Miller, Grant Ingersoll)
+
 Optimizations
 
 ======================= Release 2.1.0 2007-02-14 =======================
diff --git a/src/java/org/apache/lucene/document/FieldSelector.java b/src/java/org/apache/lucene/document/FieldSelector.java
index 1915c3a44fe..0ed52ca01e6 100755
--- a/src/java/org/apache/lucene/document/FieldSelector.java
+++ b/src/java/org/apache/lucene/document/FieldSelector.java
@@ -1,4 +1,6 @@
 package org.apache.lucene.document;
+
+import java.io.Serializable;
 /**
  * Copyright 2004 The Apache Software Foundation
  *
@@ -20,7 +22,7 @@ package org.apache.lucene.document;
  * what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
  *
  **/
-public interface FieldSelector {
+public interface FieldSelector extends Serializable {
 
   /**
    * 
diff --git a/src/java/org/apache/lucene/document/FieldSelectorResult.java b/src/java/org/apache/lucene/document/FieldSelectorResult.java
index 41c334dfa13..0dac759a835 100755
--- a/src/java/org/apache/lucene/document/FieldSelectorResult.java
+++ b/src/java/org/apache/lucene/document/FieldSelectorResult.java
@@ -1,4 +1,6 @@
 package org.apache.lucene.document;
+
+import java.io.Serializable;
 /**
  * Copyright 2004 The Apache Software Foundation
  *
@@ -20,7 +22,7 @@ package org.apache.lucene.document;
  *
  **/
 //Replace with an enumerated type in 1.5
-public final class FieldSelectorResult {
+public final class FieldSelectorResult implements Serializable {
 
     /**
      * Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encounterd.
@@ -28,7 +30,7 @@ public final class FieldSelectorResult {
      *<p/>
      * {@link Document#add(Fieldable)} should be called by the Reader.
       */
-  public static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
+  public transient static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
     /**
      * Lazily load this {@link Field}.  This means the {@link Field} is valid, but it may not actually contain its data until
      * invoked.  {@link Document#getField(String)} SHOULD NOT BE USED.  {@link Document#getFieldable(String)} is safe to use and should
@@ -36,14 +38,14 @@ public final class FieldSelectorResult {
      *<p/>
      * {@link Document#add(Fieldable)} should be called by the Reader.
      */
-  public static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
+  public transient static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
     /**
      * Do not load the {@link Field}.  {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
      * {@link Document#add(Fieldable)} is not called.
      * <p/>
      * {@link Document#add(Fieldable)} should not be called by the Reader.
      */
-  public static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
+  public transient static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
     /**
      * Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}.  Thus, the
      * Document may not have its complete set of Fields.  {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
@@ -51,23 +53,23 @@ public final class FieldSelectorResult {
      * <p/>
      * {@link Document#add(Fieldable)} should be called by the Reader.
      */
-  public static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
+  public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
     /**
      * Behaves much like {@link #LOAD} but does not uncompress any compressed data.  This is used for internal purposes.
      * {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
      * <p/>
      * {@link Document#add(Fieldable)} should be called by the Reader.
      */
-  public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
+  public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
 
      /** Expert:  Load the size of this {@link Field} rather than its value.
        * Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
       * The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
       */
-  public static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
+  public transient static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
 
   /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */         
-  public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
+  public transient static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
 
 
 
diff --git a/src/java/org/apache/lucene/document/MapFieldSelector.java b/src/java/org/apache/lucene/document/MapFieldSelector.java
index 02cc06743e3..fe6489b99bc 100644
--- a/src/java/org/apache/lucene/document/MapFieldSelector.java
+++ b/src/java/org/apache/lucene/document/MapFieldSelector.java
@@ -21,14 +21,14 @@ public class MapFieldSelector implements FieldSelector {
     Map fieldSelections;
     
     /** Create a a MapFieldSelector
-     * @param fieldSelections maps from field names to FieldSelectorResults
+     * @param fieldSelections maps from field names (String) to FieldSelectorResults
      */
     public MapFieldSelector(Map fieldSelections) {
         this.fieldSelections = fieldSelections;
     }
     
     /** Create a a MapFieldSelector
-     * @param fields fields to LOAD.  All other fields are NO_LOAD.
+     * @param fields fields to LOAD.  List of Strings.  All other fields are NO_LOAD.
      */
     public MapFieldSelector(List fields) {
         fieldSelections = new HashMap(fields.size()*5/3);
diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java
index 60a7779d203..031a8d55a77 100644
--- a/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -17,14 +17,15 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import java.io.IOException;
-import java.util.BitSet;
-
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.store.Directory;
+
+import java.io.IOException;
+import java.util.BitSet;
 
 /** Implements search over a single IndexReader.
  *
@@ -90,7 +91,12 @@ public class IndexSearcher extends Searcher {
   public Document doc(int i) throws CorruptIndexException, IOException {
     return reader.document(i);
   }
-
+  
+  // inherit javadoc
+  public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+	    return reader.document(i, fieldSelector);
+  }
+  
   // inherit javadoc
   public int maxDoc() throws IOException {
     return reader.maxDoc();
diff --git a/src/java/org/apache/lucene/search/MultiSearcher.java b/src/java/org/apache/lucene/search/MultiSearcher.java
index fee7bad5f0a..3ef5631fd9e 100644
--- a/src/java/org/apache/lucene/search/MultiSearcher.java
+++ b/src/java/org/apache/lucene/search/MultiSearcher.java
@@ -17,16 +17,17 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.Term;
+
 import java.io.IOException;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
 
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.CorruptIndexException;
-
 /** Implements search over a set of <code>Searchables</code>.
  *
  * <p>Applications usually need only call the inherited {@link #search(Query)}
@@ -85,6 +86,10 @@ public class MultiSearcher extends Searcher {
     public Document doc(int i) {
       throw new UnsupportedOperationException();
     }
+    
+    public Document doc(int i, FieldSelector fieldSelector) {
+        throw new UnsupportedOperationException();
+    }
 
     public Explanation explain(Weight weight,int doc) {
       throw new UnsupportedOperationException();
@@ -148,7 +153,12 @@ public class MultiSearcher extends Searcher {
     return searchables[i].doc(n - starts[i]);	  // dispatch to searcher
   }
 
-
+  // inherit javadoc
+  public Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+    int i = subSearcher(n);			  // find searcher index
+    return searchables[i].doc(n - starts[i], fieldSelector);	  // dispatch to searcher
+  }
+  
   /** Returns index of the searcher for document <code>n</code> in the array
    * used to construct this searcher. */
   public int subSearcher(int n) {                 // find searcher for doc n:
diff --git a/src/java/org/apache/lucene/search/RemoteSearchable.java b/src/java/org/apache/lucene/search/RemoteSearchable.java
index b1d916333f9..4cb6290310b 100644
--- a/src/java/org/apache/lucene/search/RemoteSearchable.java
+++ b/src/java/org/apache/lucene/search/RemoteSearchable.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
  */
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.CorruptIndexException;
 
@@ -81,6 +82,10 @@ public class RemoteSearchable
     return local.doc(i);
   }
 
+  public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
+	    return local.doc(i, fieldSelector);
+  }
+  
   public Query rewrite(Query original) throws IOException {
     return local.rewrite(original);
   }
diff --git a/src/java/org/apache/lucene/search/Searchable.java b/src/java/org/apache/lucene/search/Searchable.java
index d2569de700d..307e23c53ca 100644
--- a/src/java/org/apache/lucene/search/Searchable.java
+++ b/src/java/org/apache/lucene/search/Searchable.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
  */
 
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.CorruptIndexException;
@@ -98,6 +99,29 @@ public interface Searchable extends java.rmi.Remote {
    */
   Document doc(int i) throws CorruptIndexException, IOException;
 
+  /**
+   * Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
+   * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
+   * 
+   * <b>NOTE:</b> If the underlying Reader (more specifically, the underlying <code>FieldsReader</code>) is closed before the lazy {@link org.apache.lucene.document.Field} is
+   * loaded an exception may be thrown.  If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
+   * explicitly load it or fetch the Document again with a new loader.
+   * 
+   *  
+   * @param n Get the document at the <code>n</code><sup>th</sup> position
+   * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document.  May be null, in which case all Fields will be loaded.
+   * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
+   * @throws CorruptIndexException if the index is corrupt
+   * @throws IOException if there is a low-level IO error
+   * 
+   * @see IndexReader#document(int, FieldSelector)
+   * @see org.apache.lucene.document.Fieldable
+   * @see org.apache.lucene.document.FieldSelector
+   * @see org.apache.lucene.document.SetBasedFieldSelector
+   * @see org.apache.lucene.document.LoadFirstFieldSelector
+   */
+  Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
+  
   /** Expert: called to re-write queries into primitive queries.
    * @throws BooleanQuery.TooManyClauses
    */
diff --git a/src/test/org/apache/lucene/search/TestMultiSearcher.java b/src/test/org/apache/lucene/search/TestMultiSearcher.java
index 99e9ef7408c..cdf813f7086 100644
--- a/src/test/org/apache/lucene/search/TestMultiSearcher.java
+++ b/src/test/org/apache/lucene/search/TestMultiSearcher.java
@@ -17,21 +17,23 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import junit.framework.TestCase;
 import org.apache.lucene.analysis.KeywordAnalyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.SetBasedFieldSelector;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.QueryParser;
-import org.apache.lucene.search.Searcher;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.RAMDirectory;
 
-import junit.framework.TestCase;
-
 import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
 
 /**
  * Tests {@link MultiSearcher} class.
@@ -200,7 +202,7 @@ public class TestMultiSearcher extends TestCase
         Document document=new Document();
         
         document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.UN_TOKENIZED));
-        
+      document.add(new Field("other", "other contents", Field.Store.YES, Field.Index.UN_TOKENIZED));
         if (contents2!=null) {
             document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.UN_TOKENIZED));
         }
@@ -223,12 +225,57 @@ public class TestMultiSearcher extends TestCase
             }
         }
     }
-    
-    /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
-    public void testNormalization1() throws IOException {
-        testNormalization(1, "Using 1 document per index:");
-    }
-     */
+
+  public void testFieldSelector() throws Exception {
+    RAMDirectory ramDirectory1, ramDirectory2;
+    IndexSearcher indexSearcher1, indexSearcher2;
+
+    ramDirectory1 = new RAMDirectory();
+    ramDirectory2 = new RAMDirectory();
+    Query query = new TermQuery(new Term("contents", "doc0"));
+
+    // Now put the documents in a different index
+    initIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
+    initIndex(ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
+
+    indexSearcher1 = new IndexSearcher(ramDirectory1);
+    indexSearcher2 = new IndexSearcher(ramDirectory2);
+
+    MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2});
+    assertTrue("searcher is null and it shouldn't be", searcher != null);
+    Hits hits = searcher.search(query);
+    assertTrue("hits is null and it shouldn't be", hits != null);
+    assertTrue(hits.length() + " does not equal: " + 2, hits.length() == 2);
+    Document document = searcher.doc(hits.id(0));
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
+    //Should be one document from each directory
+    //they both have two fields, contents and other
+    Set ftl = new HashSet();
+    ftl.add("other");
+    SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
+    document = searcher.doc(hits.id(0), fs);
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
+    String value = document.get("contents");
+    assertTrue("value is not null and it should be", value == null);
+    value = document.get("other");
+    assertTrue("value is null and it shouldn't be", value != null);
+    ftl.clear();
+    ftl.add("contents");
+    fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
+    document = searcher.doc(hits.id(1), fs);
+    value = document.get("contents");
+    assertTrue("value is null and it shouldn't be", value != null);    
+    value = document.get("other");
+    assertTrue("value is not null and it should be", value == null);
+  }
+
+  /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
+ public void testNormalization1() throws IOException {
+     testNormalization(1, "Using 1 document per index:");
+ }
+  */
     
     public void testNormalization10() throws IOException {
         testNormalization(10, "Using 10 documents per index:");
diff --git a/src/test/org/apache/lucene/search/TestRemoteSearchable.java b/src/test/org/apache/lucene/search/TestRemoteSearchable.java
index 31bb750a336..50009440400 100644
--- a/src/test/org/apache/lucene/search/TestRemoteSearchable.java
+++ b/src/test/org/apache/lucene/search/TestRemoteSearchable.java
@@ -18,16 +18,17 @@ package org.apache.lucene.search;
  */
 
 import junit.framework.TestCase;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.*;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;
 
 import java.rmi.Naming;
 import java.rmi.registry.LocateRegistry;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import java.util.Collections;
+import java.util.Set;
+import java.util.HashSet;
 
 /**
  * @version $Id$
@@ -56,6 +57,7 @@ public class TestRemoteSearchable extends TestCase {
     IndexWriter writer = new IndexWriter(indexStore,new SimpleAnalyzer(),true);
     Document doc = new Document();
     doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED));
+    doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED));
     writer.addDocument(doc);
     writer.optimize();
     writer.close();
@@ -74,7 +76,20 @@ public class TestRemoteSearchable extends TestCase {
     Hits result = searcher.search(query);
 
     assertEquals(1, result.length());
-    assertEquals("test text", result.doc(0).get("test"));
+    Document document = result.doc(0);
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertEquals("test text", document.get("test"));
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
+    Set ftl = new HashSet();
+    ftl.add("other");
+    FieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
+    document = searcher.doc(0, fs);
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
+    fs = new MapFieldSelector(new String[]{"other"});
+    document = searcher.doc(0, fs);
+    assertTrue("document is null and it shouldn't be", document != null);
+    assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
   }
 
   public void testTermQuery() throws Exception {