mirror of https://github.com/apache/lucene.git
Applied Mark Miller's patch. Added Unit tests to TestRemoteSearchable and TestMultiSearcher. To get RemoteSearchable to work, had to make FieldSelector and FieldSelectorResult serializable. All tests pass. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@514675 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4081e733b1
commit
83efa50f5e
|
@ -43,6 +43,8 @@ New features
|
|||
1. LUCENE-759: Added two n-gram-producing TokenFilters.
|
||||
(Otis Gospodnetic)
|
||||
|
||||
2. LUCENE-822: Added FieldSelector capabilities to Searchable for use with RemoteSearcher, and other Searchable implementations. (Mark Miller, Grant Ingersoll)
|
||||
|
||||
Optimizations
|
||||
|
||||
======================= Release 2.1.0 2007-02-14 =======================
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
|
@ -20,7 +22,7 @@ package org.apache.lucene.document;
|
|||
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
|
||||
*
|
||||
**/
|
||||
public interface FieldSelector {
|
||||
public interface FieldSelector extends Serializable {
|
||||
|
||||
/**
|
||||
*
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.io.Serializable;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
|
@ -20,7 +22,7 @@ package org.apache.lucene.document;
|
|||
*
|
||||
**/
|
||||
//Replace with an enumerated type in 1.5
|
||||
public final class FieldSelectorResult {
|
||||
public final class FieldSelectorResult implements Serializable {
|
||||
|
||||
/**
|
||||
* Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encounterd.
|
||||
|
@ -28,7 +30,7 @@ public final class FieldSelectorResult {
|
|||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
|
||||
public transient static final FieldSelectorResult LOAD = new FieldSelectorResult(0);
|
||||
/**
|
||||
* Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until
|
||||
* invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
|
||||
|
@ -36,14 +38,14 @@ public final class FieldSelectorResult {
|
|||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
|
||||
public transient static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1);
|
||||
/**
|
||||
* Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
|
||||
* {@link Document#add(Fieldable)} is not called.
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should not be called by the Reader.
|
||||
*/
|
||||
public static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
|
||||
public transient static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2);
|
||||
/**
|
||||
* Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the
|
||||
* Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
|
||||
|
@ -51,23 +53,23 @@ public final class FieldSelectorResult {
|
|||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
|
||||
public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3);
|
||||
/**
|
||||
* Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes.
|
||||
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
*/
|
||||
public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
|
||||
public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4);
|
||||
|
||||
/** Expert: Load the size of this {@link Field} rather than its value.
|
||||
* Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
|
||||
* The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
|
||||
*/
|
||||
public static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
|
||||
public transient static final FieldSelectorResult SIZE = new FieldSelectorResult(5);
|
||||
|
||||
/** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */
|
||||
public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
|
||||
public transient static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6);
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -21,14 +21,14 @@ public class MapFieldSelector implements FieldSelector {
|
|||
Map fieldSelections;
|
||||
|
||||
/** Create a a MapFieldSelector
|
||||
* @param fieldSelections maps from field names to FieldSelectorResults
|
||||
* @param fieldSelections maps from field names (String) to FieldSelectorResults
|
||||
*/
|
||||
public MapFieldSelector(Map fieldSelections) {
|
||||
this.fieldSelections = fieldSelections;
|
||||
}
|
||||
|
||||
/** Create a a MapFieldSelector
|
||||
* @param fields fields to LOAD. All other fields are NO_LOAD.
|
||||
* @param fields fields to LOAD. List of Strings. All other fields are NO_LOAD.
|
||||
*/
|
||||
public MapFieldSelector(List fields) {
|
||||
fieldSelections = new HashMap(fields.size()*5/3);
|
||||
|
|
|
@ -17,14 +17,15 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
/** Implements search over a single IndexReader.
|
||||
*
|
||||
|
@ -90,7 +91,12 @@ public class IndexSearcher extends Searcher {
|
|||
public Document doc(int i) throws CorruptIndexException, IOException {
|
||||
return reader.document(i);
|
||||
}
|
||||
|
||||
|
||||
// inherit javadoc
|
||||
public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
return reader.document(i, fieldSelector);
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
public int maxDoc() throws IOException {
|
||||
return reader.maxDoc();
|
||||
|
|
|
@ -17,16 +17,17 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
|
||||
/** Implements search over a set of <code>Searchables</code>.
|
||||
*
|
||||
* <p>Applications usually need only call the inherited {@link #search(Query)}
|
||||
|
@ -85,6 +86,10 @@ public class MultiSearcher extends Searcher {
|
|||
public Document doc(int i) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public Document doc(int i, FieldSelector fieldSelector) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
public Explanation explain(Weight weight,int doc) {
|
||||
throw new UnsupportedOperationException();
|
||||
|
@ -148,7 +153,12 @@ public class MultiSearcher extends Searcher {
|
|||
return searchables[i].doc(n - starts[i]); // dispatch to searcher
|
||||
}
|
||||
|
||||
|
||||
// inherit javadoc
|
||||
public Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
int i = subSearcher(n); // find searcher index
|
||||
return searchables[i].doc(n - starts[i], fieldSelector); // dispatch to searcher
|
||||
}
|
||||
|
||||
/** Returns index of the searcher for document <code>n</code> in the array
|
||||
* used to construct this searcher. */
|
||||
public int subSearcher(int n) { // find searcher for doc n:
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
|
||||
|
@ -81,6 +82,10 @@ public class RemoteSearchable
|
|||
return local.doc(i);
|
||||
}
|
||||
|
||||
public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
return local.doc(i, fieldSelector);
|
||||
}
|
||||
|
||||
public Query rewrite(Query original) throws IOException {
|
||||
return local.rewrite(original);
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
|
@ -98,6 +99,29 @@ public interface Searchable extends java.rmi.Remote {
|
|||
*/
|
||||
Document doc(int i) throws CorruptIndexException, IOException;
|
||||
|
||||
/**
|
||||
* Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
|
||||
* may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
|
||||
*
|
||||
* <b>NOTE:</b> If the underlying Reader (more specifically, the underlying <code>FieldsReader</code>) is closed before the lazy {@link org.apache.lucene.document.Field} is
|
||||
* loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
|
||||
* explicitly load it or fetch the Document again with a new loader.
|
||||
*
|
||||
*
|
||||
* @param n Get the document at the <code>n</code><sup>th</sup> position
|
||||
* @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded.
|
||||
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @see IndexReader#document(int, FieldSelector)
|
||||
* @see org.apache.lucene.document.Fieldable
|
||||
* @see org.apache.lucene.document.FieldSelector
|
||||
* @see org.apache.lucene.document.SetBasedFieldSelector
|
||||
* @see org.apache.lucene.document.LoadFirstFieldSelector
|
||||
*/
|
||||
Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
|
||||
|
||||
/** Expert: called to re-write queries into primitive queries.
|
||||
* @throws BooleanQuery.TooManyClauses
|
||||
*/
|
||||
|
|
|
@ -17,21 +17,23 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.Searcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Tests {@link MultiSearcher} class.
|
||||
|
@ -200,7 +202,7 @@ public class TestMultiSearcher extends TestCase
|
|||
Document document=new Document();
|
||||
|
||||
document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
|
||||
document.add(new Field("other", "other contents", Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
if (contents2!=null) {
|
||||
document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||
}
|
||||
|
@ -223,12 +225,57 @@ public class TestMultiSearcher extends TestCase
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
|
||||
public void testNormalization1() throws IOException {
|
||||
testNormalization(1, "Using 1 document per index:");
|
||||
}
|
||||
*/
|
||||
|
||||
public void testFieldSelector() throws Exception {
|
||||
RAMDirectory ramDirectory1, ramDirectory2;
|
||||
IndexSearcher indexSearcher1, indexSearcher2;
|
||||
|
||||
ramDirectory1 = new RAMDirectory();
|
||||
ramDirectory2 = new RAMDirectory();
|
||||
Query query = new TermQuery(new Term("contents", "doc0"));
|
||||
|
||||
// Now put the documents in a different index
|
||||
initIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc...
|
||||
initIndex(ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc...
|
||||
|
||||
indexSearcher1 = new IndexSearcher(ramDirectory1);
|
||||
indexSearcher2 = new IndexSearcher(ramDirectory2);
|
||||
|
||||
MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2});
|
||||
assertTrue("searcher is null and it shouldn't be", searcher != null);
|
||||
Hits hits = searcher.search(query);
|
||||
assertTrue("hits is null and it shouldn't be", hits != null);
|
||||
assertTrue(hits.length() + " does not equal: " + 2, hits.length() == 2);
|
||||
Document document = searcher.doc(hits.id(0));
|
||||
assertTrue("document is null and it shouldn't be", document != null);
|
||||
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
|
||||
//Should be one document from each directory
|
||||
//they both have two fields, contents and other
|
||||
Set ftl = new HashSet();
|
||||
ftl.add("other");
|
||||
SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
|
||||
document = searcher.doc(hits.id(0), fs);
|
||||
assertTrue("document is null and it shouldn't be", document != null);
|
||||
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
|
||||
String value = document.get("contents");
|
||||
assertTrue("value is not null and it should be", value == null);
|
||||
value = document.get("other");
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
ftl.clear();
|
||||
ftl.add("contents");
|
||||
fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
|
||||
document = searcher.doc(hits.id(1), fs);
|
||||
value = document.get("contents");
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
value = document.get("other");
|
||||
assertTrue("value is not null and it should be", value == null);
|
||||
}
|
||||
|
||||
/* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0
|
||||
public void testNormalization1() throws IOException {
|
||||
testNormalization(1, "Using 1 document per index:");
|
||||
}
|
||||
*/
|
||||
|
||||
public void testNormalization10() throws IOException {
|
||||
testNormalization(10, "Using 10 documents per index:");
|
||||
|
|
|
@ -18,16 +18,17 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
||||
import java.rmi.Naming;
|
||||
import java.rmi.registry.LocateRegistry;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import java.util.Collections;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
/**
|
||||
* @version $Id$
|
||||
|
@ -56,6 +57,7 @@ public class TestRemoteSearchable extends TestCase {
|
|||
IndexWriter writer = new IndexWriter(indexStore,new SimpleAnalyzer(),true);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED));
|
||||
doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED));
|
||||
writer.addDocument(doc);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
@ -74,7 +76,20 @@ public class TestRemoteSearchable extends TestCase {
|
|||
Hits result = searcher.search(query);
|
||||
|
||||
assertEquals(1, result.length());
|
||||
assertEquals("test text", result.doc(0).get("test"));
|
||||
Document document = result.doc(0);
|
||||
assertTrue("document is null and it shouldn't be", document != null);
|
||||
assertEquals("test text", document.get("test"));
|
||||
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2);
|
||||
Set ftl = new HashSet();
|
||||
ftl.add("other");
|
||||
FieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET);
|
||||
document = searcher.doc(0, fs);
|
||||
assertTrue("document is null and it shouldn't be", document != null);
|
||||
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
|
||||
fs = new MapFieldSelector(new String[]{"other"});
|
||||
document = searcher.doc(0, fs);
|
||||
assertTrue("document is null and it shouldn't be", document != null);
|
||||
assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1);
|
||||
}
|
||||
|
||||
public void testTermQuery() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue