From 83efa50f5ed6614873b5c4508428921ee1fb7621 Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Mon, 5 Mar 2007 14:28:01 +0000 Subject: [PATCH] https://issues.apache.org/jira/browse/LUCENE-822 Applied Mark Miller's patch. Added Unit tests to TestRemoteSearchable and TestMultiSearcher. To get RemoteSearchable to work, had to make FieldSelector and FieldSelectorResult serializable. All tests pass. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@514675 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../apache/lucene/document/FieldSelector.java | 4 +- .../lucene/document/FieldSelectorResult.java | 18 ++--- .../lucene/document/MapFieldSelector.java | 4 +- .../apache/lucene/search/IndexSearcher.java | 18 +++-- .../apache/lucene/search/MultiSearcher.java | 20 ++++-- .../lucene/search/RemoteSearchable.java | 5 ++ .../org/apache/lucene/search/Searchable.java | 24 +++++++ .../lucene/search/TestMultiSearcher.java | 69 ++++++++++++++++--- .../lucene/search/TestRemoteSearchable.java | 31 ++++++--- 10 files changed, 154 insertions(+), 41 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index d594af0234a..09a00f4f971 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -43,6 +43,8 @@ New features 1. LUCENE-759: Added two n-gram-producing TokenFilters. (Otis Gospodnetic) + 2. LUCENE-822: Added FieldSelector capabilities to Searchable for use with RemoteSearcher, and other Searchable implementations. (Mark Miller, Grant Ingersoll) + Optimizations ======================= Release 2.1.0 2007-02-14 ======================= diff --git a/src/java/org/apache/lucene/document/FieldSelector.java b/src/java/org/apache/lucene/document/FieldSelector.java index 1915c3a44fe..0ed52ca01e6 100755 --- a/src/java/org/apache/lucene/document/FieldSelector.java +++ b/src/java/org/apache/lucene/document/FieldSelector.java @@ -1,4 +1,6 @@ package org.apache.lucene.document; + +import java.io.Serializable; /** * Copyright 2004 The Apache Software Foundation * @@ -20,7 +22,7 @@ package org.apache.lucene.document; * what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)} * **/ -public interface FieldSelector { +public interface FieldSelector extends Serializable { /** * diff --git a/src/java/org/apache/lucene/document/FieldSelectorResult.java b/src/java/org/apache/lucene/document/FieldSelectorResult.java index 41c334dfa13..0dac759a835 100755 --- a/src/java/org/apache/lucene/document/FieldSelectorResult.java +++ b/src/java/org/apache/lucene/document/FieldSelectorResult.java @@ -1,4 +1,6 @@ package org.apache.lucene.document; + +import java.io.Serializable; /** * Copyright 2004 The Apache Software Foundation * @@ -20,7 +22,7 @@ package org.apache.lucene.document; * **/ //Replace with an enumerated type in 1.5 -public final class FieldSelectorResult { +public final class FieldSelectorResult implements Serializable { /** * Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encounterd. @@ -28,7 +30,7 @@ public final class FieldSelectorResult { *

* {@link Document#add(Fieldable)} should be called by the Reader. */ - public static final FieldSelectorResult LOAD = new FieldSelectorResult(0); + public transient static final FieldSelectorResult LOAD = new FieldSelectorResult(0); /** * Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until * invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should @@ -36,14 +38,14 @@ public final class FieldSelectorResult { *

* {@link Document#add(Fieldable)} should be called by the Reader. */ - public static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1); + public transient static final FieldSelectorResult LAZY_LOAD = new FieldSelectorResult(1); /** * Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null. * {@link Document#add(Fieldable)} is not called. *

* {@link Document#add(Fieldable)} should not be called by the Reader. */ - public static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2); + public transient static final FieldSelectorResult NO_LOAD = new FieldSelectorResult(2); /** * Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the * Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should @@ -51,23 +53,23 @@ public final class FieldSelectorResult { *

* {@link Document#add(Fieldable)} should be called by the Reader. */ - public static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3); + public transient static final FieldSelectorResult LOAD_AND_BREAK = new FieldSelectorResult(3); /** * Behaves much like {@link #LOAD} but does not uncompress any compressed data. This is used for internal purposes. * {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null. *

* {@link Document#add(Fieldable)} should be called by the Reader. */ - public static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4); + public transient static final FieldSelectorResult LOAD_FOR_MERGE = new FieldSelectorResult(4); /** Expert: Load the size of this {@link Field} rather than its value. * Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value. * The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0] */ - public static final FieldSelectorResult SIZE = new FieldSelectorResult(5); + public transient static final FieldSelectorResult SIZE = new FieldSelectorResult(5); /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e. stop loading further fields, after the size is loaded */ - public static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6); + public transient static final FieldSelectorResult SIZE_AND_BREAK = new FieldSelectorResult(6); diff --git a/src/java/org/apache/lucene/document/MapFieldSelector.java b/src/java/org/apache/lucene/document/MapFieldSelector.java index 02cc06743e3..fe6489b99bc 100644 --- a/src/java/org/apache/lucene/document/MapFieldSelector.java +++ b/src/java/org/apache/lucene/document/MapFieldSelector.java @@ -21,14 +21,14 @@ public class MapFieldSelector implements FieldSelector { Map fieldSelections; /** Create a a MapFieldSelector - * @param fieldSelections maps from field names to FieldSelectorResults + * @param fieldSelections maps from field names (String) to FieldSelectorResults */ public MapFieldSelector(Map fieldSelections) { this.fieldSelections = fieldSelections; } /** Create a a MapFieldSelector - * @param fields fields to LOAD. All other fields are NO_LOAD. + * @param fields fields to LOAD. List of Strings. All other fields are NO_LOAD. */ public MapFieldSelector(List fields) { fieldSelections = new HashMap(fields.size()*5/3); diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java index 60a7779d203..031a8d55a77 100644 --- a/src/java/org/apache/lucene/search/IndexSearcher.java +++ b/src/java/org/apache/lucene/search/IndexSearcher.java @@ -17,14 +17,15 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.BitSet; - -import org.apache.lucene.store.Directory; import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.store.Directory; + +import java.io.IOException; +import java.util.BitSet; /** Implements search over a single IndexReader. * @@ -90,7 +91,12 @@ public class IndexSearcher extends Searcher { public Document doc(int i) throws CorruptIndexException, IOException { return reader.document(i); } - + + // inherit javadoc + public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + return reader.document(i, fieldSelector); + } + // inherit javadoc public int maxDoc() throws IOException { return reader.maxDoc(); diff --git a/src/java/org/apache/lucene/search/MultiSearcher.java b/src/java/org/apache/lucene/search/MultiSearcher.java index fee7bad5f0a..3ef5631fd9e 100644 --- a/src/java/org/apache/lucene/search/MultiSearcher.java +++ b/src/java/org/apache/lucene/search/MultiSearcher.java @@ -17,16 +17,17 @@ package org.apache.lucene.search; * limitations under the License. */ +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.Term; + import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.CorruptIndexException; - /** Implements search over a set of Searchables. * *

Applications usually need only call the inherited {@link #search(Query)} @@ -85,6 +86,10 @@ public class MultiSearcher extends Searcher { public Document doc(int i) { throw new UnsupportedOperationException(); } + + public Document doc(int i, FieldSelector fieldSelector) { + throw new UnsupportedOperationException(); + } public Explanation explain(Weight weight,int doc) { throw new UnsupportedOperationException(); @@ -148,7 +153,12 @@ public class MultiSearcher extends Searcher { return searchables[i].doc(n - starts[i]); // dispatch to searcher } - + // inherit javadoc + public Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + int i = subSearcher(n); // find searcher index + return searchables[i].doc(n - starts[i], fieldSelector); // dispatch to searcher + } + /** Returns index of the searcher for document n in the array * used to construct this searcher. */ public int subSearcher(int n) { // find searcher for doc n: diff --git a/src/java/org/apache/lucene/search/RemoteSearchable.java b/src/java/org/apache/lucene/search/RemoteSearchable.java index b1d916333f9..4cb6290310b 100644 --- a/src/java/org/apache/lucene/search/RemoteSearchable.java +++ b/src/java/org/apache/lucene/search/RemoteSearchable.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.Term; import org.apache.lucene.index.CorruptIndexException; @@ -81,6 +82,10 @@ public class RemoteSearchable return local.doc(i); } + public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + return local.doc(i, fieldSelector); + } + public Query rewrite(Query original) throws IOException { return local.rewrite(original); } diff --git a/src/java/org/apache/lucene/search/Searchable.java b/src/java/org/apache/lucene/search/Searchable.java index d2569de700d..307e23c53ca 100644 --- a/src/java/org/apache/lucene/search/Searchable.java +++ b/src/java/org/apache/lucene/search/Searchable.java @@ -18,6 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.CorruptIndexException; @@ -98,6 +99,29 @@ public interface Searchable extends java.rmi.Remote { */ Document doc(int i) throws CorruptIndexException, IOException; + /** + * Get the {@link org.apache.lucene.document.Document} at the nth position. The {@link org.apache.lucene.document.FieldSelector} + * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded. + * + * NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the lazy {@link org.apache.lucene.document.Field} is + * loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must + * explicitly load it or fetch the Document again with a new loader. + * + * + * @param n Get the document at the nth position + * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. + * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * + * @see IndexReader#document(int, FieldSelector) + * @see org.apache.lucene.document.Fieldable + * @see org.apache.lucene.document.FieldSelector + * @see org.apache.lucene.document.SetBasedFieldSelector + * @see org.apache.lucene.document.LoadFirstFieldSelector + */ + Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException; + /** Expert: called to re-write queries into primitive queries. * @throws BooleanQuery.TooManyClauses */ diff --git a/src/test/org/apache/lucene/search/TestMultiSearcher.java b/src/test/org/apache/lucene/search/TestMultiSearcher.java index 99e9ef7408c..cdf813f7086 100644 --- a/src/test/org/apache/lucene/search/TestMultiSearcher.java +++ b/src/test/org/apache/lucene/search/TestMultiSearcher.java @@ -17,21 +17,23 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.analysis.standard.StandardAnalyzer; +import junit.framework.TestCase; import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.QueryParser; -import org.apache.lucene.search.Searcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; -import junit.framework.TestCase; - import java.io.IOException; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; /** * Tests {@link MultiSearcher} class. @@ -200,7 +202,7 @@ public class TestMultiSearcher extends TestCase Document document=new Document(); document.add(new Field("contents", contents1, Field.Store.YES, Field.Index.UN_TOKENIZED)); - + document.add(new Field("other", "other contents", Field.Store.YES, Field.Index.UN_TOKENIZED)); if (contents2!=null) { document.add(new Field("contents", contents2, Field.Store.YES, Field.Index.UN_TOKENIZED)); } @@ -223,12 +225,57 @@ public class TestMultiSearcher extends TestCase } } } - - /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0 - public void testNormalization1() throws IOException { - testNormalization(1, "Using 1 document per index:"); - } - */ + + public void testFieldSelector() throws Exception { + RAMDirectory ramDirectory1, ramDirectory2; + IndexSearcher indexSearcher1, indexSearcher2; + + ramDirectory1 = new RAMDirectory(); + ramDirectory2 = new RAMDirectory(); + Query query = new TermQuery(new Term("contents", "doc0")); + + // Now put the documents in a different index + initIndex(ramDirectory1, 10, true, null); // documents with a single token "doc0", "doc1", etc... + initIndex(ramDirectory2, 10, true, "x"); // documents with two tokens "doc0" and "x", "doc1" and x, etc... + + indexSearcher1 = new IndexSearcher(ramDirectory1); + indexSearcher2 = new IndexSearcher(ramDirectory2); + + MultiSearcher searcher = getMultiSearcherInstance(new Searcher[]{indexSearcher1, indexSearcher2}); + assertTrue("searcher is null and it shouldn't be", searcher != null); + Hits hits = searcher.search(query); + assertTrue("hits is null and it shouldn't be", hits != null); + assertTrue(hits.length() + " does not equal: " + 2, hits.length() == 2); + Document document = searcher.doc(hits.id(0)); + assertTrue("document is null and it shouldn't be", document != null); + assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2); + //Should be one document from each directory + //they both have two fields, contents and other + Set ftl = new HashSet(); + ftl.add("other"); + SetBasedFieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET); + document = searcher.doc(hits.id(0), fs); + assertTrue("document is null and it shouldn't be", document != null); + assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1); + String value = document.get("contents"); + assertTrue("value is not null and it should be", value == null); + value = document.get("other"); + assertTrue("value is null and it shouldn't be", value != null); + ftl.clear(); + ftl.add("contents"); + fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET); + document = searcher.doc(hits.id(1), fs); + value = document.get("contents"); + assertTrue("value is null and it shouldn't be", value != null); + value = document.get("other"); + assertTrue("value is not null and it should be", value == null); + } + + /* uncomment this when the highest score is always normalized to 1.0, even when it was < 1.0 + public void testNormalization1() throws IOException { + testNormalization(1, "Using 1 document per index:"); + } + */ public void testNormalization10() throws IOException { testNormalization(10, "Using 10 documents per index:"); diff --git a/src/test/org/apache/lucene/search/TestRemoteSearchable.java b/src/test/org/apache/lucene/search/TestRemoteSearchable.java index 31bb750a336..50009440400 100644 --- a/src/test/org/apache/lucene/search/TestRemoteSearchable.java +++ b/src/test/org/apache/lucene/search/TestRemoteSearchable.java @@ -18,16 +18,17 @@ package org.apache.lucene.search; */ import junit.framework.TestCase; +import org.apache.lucene.analysis.SimpleAnalyzer; +import org.apache.lucene.document.*; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.store.RAMDirectory; import java.rmi.Naming; import java.rmi.registry.LocateRegistry; - -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.store.RAMDirectory; -import org.apache.lucene.analysis.SimpleAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import java.util.Collections; +import java.util.Set; +import java.util.HashSet; /** * @version $Id$ @@ -56,6 +57,7 @@ public class TestRemoteSearchable extends TestCase { IndexWriter writer = new IndexWriter(indexStore,new SimpleAnalyzer(),true); Document doc = new Document(); doc.add(new Field("test", "test text", Field.Store.YES, Field.Index.TOKENIZED)); + doc.add(new Field("other", "other test text", Field.Store.YES, Field.Index.TOKENIZED)); writer.addDocument(doc); writer.optimize(); writer.close(); @@ -74,7 +76,20 @@ public class TestRemoteSearchable extends TestCase { Hits result = searcher.search(query); assertEquals(1, result.length()); - assertEquals("test text", result.doc(0).get("test")); + Document document = result.doc(0); + assertTrue("document is null and it shouldn't be", document != null); + assertEquals("test text", document.get("test")); + assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 2, document.getFields().size() == 2); + Set ftl = new HashSet(); + ftl.add("other"); + FieldSelector fs = new SetBasedFieldSelector(ftl, Collections.EMPTY_SET); + document = searcher.doc(0, fs); + assertTrue("document is null and it shouldn't be", document != null); + assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1); + fs = new MapFieldSelector(new String[]{"other"}); + document = searcher.doc(0, fs); + assertTrue("document is null and it shouldn't be", document != null); + assertTrue("document.getFields() Size: " + document.getFields().size() + " is not: " + 1, document.getFields().size() == 1); } public void testTermQuery() throws Exception {