LUCENE-2114: TestFilteredSearch tests on multi-segment index; fix Filter javadocs to call out that reader is per-segment

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@887347 13f79535-47bb-0310-9956-ffa450edef68
2009-12-04 20:31:44 +00:00 · 2009-12-04 20:31:44 +00:00 · fa7b8f0c10
parent 1f314fb9fa
commit fa7b8f0c10
3 changed files with 62 additions and 17 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -69,6 +69,10 @@ Test Cases
 * LUCENE-2065: Use Java 5 generics throughout our unit tests.  (Kay
  Kay via Mike McCandless)
 * LUCENE-2114: Change TestFilteredSearch to test on multi-segment
  index as well; improve javadocs of Filter to call out that the
  provided reader is per-segment (Simon Willnauer via Mike McCandless)
 ======================= Release 3.0.0 2009-11-25 =======================
 Changes in backwards compatibility policy
--- a/src/java/org/apache/lucene/search/Filter.java
+++ b/src/java/org/apache/lucene/search/Filter.java
@ -22,12 +22,28 @@ import java.io.IOException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.util.DocIdBitSet;
-/** Abstract base class providing a mechanism to use a subset of an index
+/** 
 *  Abstract base class providing a mechanism to use a subset of an index
 *  for restriction or permission of index search results.
 *  <p>
 */
 public abstract class Filter implements java.io.Serializable {
  /**
   * Creates a {@link DocIdSet} that provides the documents which should be
   * permitted or prohibited in search results. <b>NOTE:</b> null can be
   * returned if no documents will be accepted by this Filter.
   * <p>
   * Note: This method might be called more than once during a search if the
   * index has more than one segment. In such a case the {@link DocIdSet}
   * must be relative to the document base of the given reader. Yet, the
   * segment readers are passed in increasing document base order.
   * 
   * @param reader a {@link IndexReader} instance opened on the index currently
   *         searched on. Note, it is likely that the provided reader does not
   *         represent the whole underlying index i.e. if the index has more than
   *         one segment the given reader only represents a single segment.
   *          
   * @return a DocIdSet that provides the documents which should be permitted or
   *         prohibited in search results. <b>NOTE:</b> null can be returned if
   *         no documents will be accepted by this Filter.
--- a/src/test/org/apache/lucene/search/TestFilteredSearch.java
+++ b/src/test/org/apache/lucene/search/TestFilteredSearch.java
@ -24,9 +24,12 @@ import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.OpenBitSet;
@ -42,19 +45,32 @@ public class TestFilteredSearch extends LuceneTestCase {
  private static final String FIELD = "category";
-  public void testFilteredSearch() {
+  public void testFilteredSearch() throws CorruptIndexException, LockObtainFailedException, IOException {
    boolean enforceSingleSegment = true;
    RAMDirectory directory = new RAMDirectory();
    int[] filterBits = {1, 36};
-    Filter filter = new SimpleDocIdSetFilter(filterBits);
+    SimpleDocIdSetFilter filter = new SimpleDocIdSetFilter(filterBits);
    try {
    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    searchFiltered(writer, directory, filter, enforceSingleSegment);
    // run the test on more than one segment
    enforceSingleSegment = false;
    // reset - it is stateful
    filter.reset();
    writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
    // we index 60 docs - this will create 6 segments
    writer.setMaxBufferedDocs(10);
    searchFiltered(writer, directory, filter, enforceSingleSegment);
  }
  public void searchFiltered(IndexWriter writer, Directory directory, Filter filter, boolean optimize) {
    try {
      for (int i = 0; i < 60; i++) {//Simple docs
        Document doc = new Document();
        doc.add(new Field(FIELD, Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);
      }
      if(optimize)
        writer.optimize();
      writer.close();
      BooleanQuery booleanQuery = new BooleanQuery();
@ -72,21 +88,30 @@ public class TestFilteredSearch extends LuceneTestCase {
  }
  public static final class SimpleDocIdSetFilter extends Filter {
-    private OpenBitSet bits;
+    private int docBase;
-
+    private final int[] docs;
    private int index;
    public SimpleDocIdSetFilter(int[] docs) {
-      bits = new OpenBitSet();
+      this.docs = docs;
      for(int i = 0; i < docs.length; i++){
    	  bits.set(docs[i]);
    }
    }
    @Override
    public DocIdSet getDocIdSet(IndexReader reader) {
-      return bits;
+      final OpenBitSet set = new OpenBitSet();
      final int limit = docBase+reader.maxDoc();
      for (;index < docs.length; index++) {
        final int docId = docs[index];
        if(docId > limit)
          break;
        set.set(docId-docBase);
      }
      docBase = limit;
      return set.isEmpty()?null:set;
    }
    public void reset(){
      index = 0;
      docBase = 0;
    }
  }