Applied Mark Miller's patch.  Added Unit tests to TestRemoteSearchable and TestMultiSearcher.  To get RemoteSearchable to work, had to make FieldSelector and FieldSelectorResult serializable.

All tests pass.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@514675 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2007-03-05 14:28:01 +00:00
parent 4081e733b1
commit 83efa50f5e
10 changed files with 154 additions and 41 deletions

View File

@ -43,6 +43,8 @@ New features
1. LUCENE-759: Added two n-gram-producing TokenFilters.
(Otis Gospodnetic)
2. LUCENE-822: Added FieldSelector capabilities to Searchable for use with RemoteSearcher, and other Searchable implementations. (Mark Miller, Grant Ingersoll)
Optimizations
======================= Release 2.1.0 2007-02-14 =======================

View File

@ -1,4 +1,6 @@
package org.apache.lucene.document;
import java.io.Serializable;
/**
* Copyright 2004 The Apache Software Foundation
*
@ -20,7 +22,7 @@ package org.apache.lucene.document;
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
*
**/
public interface FieldSelector {
public interface FieldSelector extends Serializable {
/**
*

View File

@ -1,4 +1,6 @@
package org.apache.lucene.document;
import java.io.Serializable;
/**
* Copyright 2004 The Apache Software Foundation
*
@ -20,7 +22,7 @@ package org.apache.lucene.document;
*
**/
//Replace with an enumerated type in 1.5
public final class FieldSelectorResult {
public final class FieldSelectorResult implements Serializable {
/**
* Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encounterd.
@ -28,7 +30,7 @@ public final class FieldSelectorResult {
*<p/>
* {@link Document#add(Fieldable)} should be called by the Reader.
*/
public static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
public transient static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
/**
* Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until
* invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
@ -36,14 +38,14 @@ public final class FieldSelectorResult {
*<p/>
* {@link Document#add(Fieldable)} should be called by the Reader.
*/
public static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
public transient static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
/**
* Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
* {@link Document#add(Fieldable)} is not called.
* <p/>
* {@link Document#add(Fieldable)} should not be called by the Reader.
*/
public static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
public transient static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
/**
* Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the
* Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
@ -51,23 +53,23 @@ public final class FieldSelectorResult {
* <p/>
* {@link Document#add(Fieldable)} should be called by the Reader.
*/
public static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
/**
* Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes.
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
* <p/>
* {@link Document#add(Fieldable)} should be called by the Reader.
*/
public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
/** Expert: Load the size of this {@link Field} rather than its value.
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
* The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
*/
public static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
public transient static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */
public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
public transient static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);

View File

@ -21,14 +21,14 @@ public class MapFieldSelector implements FieldSelector {
Map fieldSelections;
/** Create a a MapFieldSelector
* @param fieldSelections maps from field names to FieldSelectorResults
* @param fieldSelections maps from field names (String) to FieldSelectorResults
*/
public MapFieldSelector(Map fieldSelections) {
this.fieldSelections = fieldSelections;
}
/** Create a a MapFieldSelector
* @param fields fields to LOAD. All other fields are NO_LOAD.
* @param fields fields to LOAD. List of Strings. All other fields are NO_LOAD.
*/
public MapFieldSelector(List fields) {
fieldSelections = new HashMap(fields.size()*5/3);

View File

@ -17,14 +17,15 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.store.Directory;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.Directory;
import java.io.IOException;
import java.util.BitSet;
/** Implements search over a single IndexReader.
*
@ -91,6 +92,11 @@ public class IndexSearcher extends Searcher {
return reader.document(i);
}
// inherit javadoc
public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
return reader.document(i, fieldSelector);
}
// inherit javadoc
public int maxDoc() throws IOException {
return reader.maxDoc();

View File

@ -17,16 +17,17 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.Term;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.CorruptIndexException;
/** Implements search over a set of <code>Searchables</code>.
*
* <p>Applications usually need only call the inherited {@link #search(Query)}
@ -86,6 +87,10 @@ public class MultiSearcher extends Searcher {
throw new UnsupportedOperationException();
}
public Document doc(int i, FieldSelector fieldSelector) {
throw new UnsupportedOperationException();
}
public Explanation explain(Weight weight,int doc) {
throw new UnsupportedOperationException();
}
@ -148,6 +153,11 @@ public class MultiSearcher extends Searcher {
return searchables[i].doc(n - starts[i]); // dispatch to searcher
}
// inherit javadoc
public Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
int i = subSearcher(n); // find searcher index
return searchables[i].doc(n - starts[i], fieldSelector); // dispatch to searcher
}
/** Returns index of the searcher for document <code>n</code> in the array
* used to construct this searcher. */

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.CorruptIndexException;
@ -81,6 +82,10 @@ public class RemoteSearchable
return local.doc(i);
}
public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
return local.doc(i, fieldSelector);
}
public Query rewrite(Query original) throws IOException {
return local.rewrite(original);
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.CorruptIndexException;
@ -98,6 +99,29 @@ public interface Searchable extends java.rmi.Remote {
*/
Document doc(int i) throws CorruptIndexException, IOException;
/**
* Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
* may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
*
* <b>NOTE:</b> If the underlying Reader (more specifically, the underlying <code>FieldsReader</code>) is closed before the lazy {@link org.apache.lucene.document.Field} is
* loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
* explicitly load it or fetch the Document again with a new loader.
*
*
* @param n Get the document at the <code>n</code><sup>th</sup> position
* @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded.
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @see IndexReader#document(int, FieldSelector)
* @see org.apache.lucene.document.Fieldable
* @see org.apache.lucene.document.FieldSelector
* @see org.apache.lucene.document.SetBasedFieldSelector
* @see org.apache.lucene.document.LoadFirstFieldSelector
*/
Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
/** Expert: called to re-write queries into primitive queries.
* @throws BooleanQuery.TooManyClauses
*/

View File

@ -17,21 +17,23 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import junit.framework.TestCase;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import junit.framework.TestCase;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
/**
* Tests {@link MultiSearcher} class.
@ -200,7 +202,7 @@ public class TestMultiSearcher extends TestCase
Document document=new Document();
document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.UN_TOKENIZED));
document.add(new Field("other", "other contents", Field.Store.YES, Field.Index.UN_TOKENIZED));
if (contents2!=null) {
document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.UN_TOKENIZED));
}
@ -224,11 +226,56 @@ public class TestMultiSearcher extends TestCase
}
}
/* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
public void testNormalization1() throws IOException {
testNormalization(1, "Using 1 document per index:");
}
*/
public void testFieldSelector() throws Exception {
RAMDirectory ramDirectory1, ramDirectory2;
IndexSearcher indexSearcher1, indexSearcher2;
ramDirectory1 = new RAMDirectory();
ramDirectory2 = new RAMDirectory();
Query query = new TermQuery(new Term("contents", "doc0"));
// Now put the documents in a different index
initIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
initIndex(ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
indexSearcher1 = new IndexSearcher(ramDirectory1);
indexSearcher2 = new IndexSearcher(ramDirectory2);
MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2});
assertTrue("searcher is null and it shouldn't be", searcher != null);
Hits hits = searcher.search(query);
assertTrue("hits is null and it shouldn't be", hits != null);
assertTrue(hits.length() + " does not equal: " + 2, hits.length() == 2);
Document document = searcher.doc(hits.id(0));
assertTrue("document is null and it shouldn't be", document != null);
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
//Should be one document from each directory
//they both have two fields, contents and other
Set ftl = new HashSet();
ftl.add("other");
SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
document = searcher.doc(hits.id(0), fs);
assertTrue("document is null and it shouldn't be", document != null);
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
String value = document.get("contents");
assertTrue("value is not null and it should be", value == null);
value = document.get("other");
assertTrue("value is null and it shouldn't be", value != null);
ftl.clear();
ftl.add("contents");
fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
document = searcher.doc(hits.id(1), fs);
value = document.get("contents");
assertTrue("value is null and it shouldn't be", value != null);
value = document.get("other");
assertTrue("value is not null and it should be", value == null);
}
/* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
public void testNormalization1() throws IOException {
testNormalization(1, "Using 1 document per index:");
}
*/
public void testNormalization10() throws IOException {
testNormalization(10, "Using 10 documents per index:");

View File

@ -18,16 +18,17 @@ package org.apache.lucene.search;
*/
import junit.framework.TestCase;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import java.rmi.Naming;
import java.rmi.registry.LocateRegistry;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.util.Collections;
import java.util.Set;
import java.util.HashSet;
/**
* @version $Id$
@ -56,6 +57,7 @@ public class TestRemoteSearchable extends TestCase {
IndexWriter writer = new IndexWriter(indexStore,new SimpleAnalyzer(),true);
Document doc = new Document();
doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
@ -74,7 +76,20 @@ public class TestRemoteSearchable extends TestCase {
Hits result = searcher.search(query);
assertEquals(1, result.length());
assertEquals("test text", result.doc(0).get("test"));
Document document = result.doc(0);
assertTrue("document is null and it shouldn't be", document != null);
assertEquals("test text", document.get("test"));
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
Set ftl = new HashSet();
ftl.add("other");
FieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
document = searcher.doc(0, fs);
assertTrue("document is null and it shouldn't be", document != null);
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
fs = new MapFieldSelector(new String[]{"other"});
document = searcher.doc(0, fs);
assertTrue("document is null and it shouldn't be", document != null);
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
}
public void testTermQuery() throws Exception {