From f720852e376331fe365af82ceb36dcae7ff5dd00 Mon Sep 17 00:00:00 2001
From: "Chris M. Hostetter" <hossman@apache.org>
Date: Tue, 18 Aug 2009 18:25:47 +0000
Subject: [PATCH] LUCENE-1791: Enhance QueryUtils and CheckHits to wrap
 everything they check in MultiReader/MultiSearcher

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@805525 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES.txt                                   |   5 +
 .../org/apache/lucene/search/CheckHits.java   |  67 +++++---
 .../org/apache/lucene/search/QueryUtils.java  | 144 ++++++++++++++++--
 .../lucene/search/TestExplanations.java       |  63 ++++----
 4 files changed, 217 insertions(+), 62 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 1fcce817f12..4a704d4c2ce 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -783,6 +783,11 @@ Build
 
 Test Cases
 
+ 1. LUCENE-1791: Enhancements to the QueryUtils and CheckHits utility 
+    classes to wrap IndexReaders and Searchers in MultiReaders or 
+    MultiSearcher when possible to help excercise more edge cases.
+    (Chris Hostetter, Mark Miller)
+
 ======================= Release 2.4.1 2009-03-09 =======================
 
 API Changes
diff --git a/src/test/org/apache/lucene/search/CheckHits.java b/src/test/org/apache/lucene/search/CheckHits.java
index cf7d7338f20..8dfe7dcfd04 100644
--- a/src/test/org/apache/lucene/search/CheckHits.java
+++ b/src/test/org/apache/lucene/search/CheckHits.java
@@ -29,8 +29,8 @@ import org.apache.lucene.store.Directory;
 public class CheckHits {
   
   /**
-   * Some explains methods calculate their vlaues though a slightly
-   * differnet  order of operations from the acctaul scoring method ...
+   * Some explains methods calculate their values though a slightly
+   * different  order of operations from the actual scoring method ...
    * this allows for a small amount of variation
    */
   public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005f;
@@ -74,7 +74,7 @@ public class CheckHits {
    * </p>
    * @param query the query to test
    * @param searcher the searcher to test the query against
-   * @param defaultFieldName used for displaing the query in assertion messages
+   * @param defaultFieldName used for displaying the query in assertion messages
    * @param results a list of documentIds that must match the query
    * @see Searcher#search(Query,HitCollector)
    * @see #checkHits
@@ -82,31 +82,58 @@ public class CheckHits {
   public static void checkHitCollector(Query query, String defaultFieldName,
                                        Searcher searcher, int[] results)
     throws IOException {
+
+    QueryUtils.check(query,searcher);
     
     Set correct = new TreeSet();
     for (int i = 0; i < results.length; i++) {
       correct.add(new Integer(results[i]));
     }
-    
     final Set actual = new TreeSet();
-    searcher.search(query, new Collector() {
-        private int base = 0;
-        public void setScorer(Scorer scorer) throws IOException {}
-        public void collect(int doc) {
-          actual.add(new Integer(doc + base));
-        }
-        public void setNextReader(IndexReader reader, int docBase) {
-          base = docBase;
-        }
-        public boolean acceptsDocsOutOfOrder() {
-          return true;
-        }
-      });
-    Assert.assertEquals(query.toString(defaultFieldName), correct, actual);
+    final Collector c = new SetCollector(actual);
 
-    QueryUtils.check(query,searcher);
+    searcher.search(query, c);
+    Assert.assertEquals("Simple: " + query.toString(defaultFieldName), 
+                        correct, actual);
+
+    for (int i = -1; i < 2; i++) {
+      actual.clear();
+      QueryUtils.wrapSearcher(searcher, i).search(query, c);
+      Assert.assertEquals("Wrap Searcher " + i + ": " +
+                          query.toString(defaultFieldName),
+                          correct, actual);
+    }
+                        
+    if ( ! ( searcher instanceof IndexSearcher ) ) return;
+
+    for (int i = -1; i < 2; i++) {
+      actual.clear();
+      QueryUtils.wrapUnderlyingReader
+        ((IndexSearcher)searcher, i).search(query, c);
+      Assert.assertEquals("Wrap Reader " + i + ": " +
+                          query.toString(defaultFieldName),
+                          correct, actual);
+    }
   }
-  
+
+  public static class SetCollector extends Collector {
+    final Set bag;
+    public SetCollector(Set bag) {
+      this.bag = bag;
+    }
+    private int base = 0;
+    public void setScorer(Scorer scorer) throws IOException {}
+    public void collect(int doc) {
+      bag.add(new Integer(doc + base));
+    }
+    public void setNextReader(IndexReader reader, int docBase) {
+      base = docBase;
+    }
+    public boolean acceptsDocsOutOfOrder() {
+      return true;
+    }
+  }
+
   /**
    * Tests that a query matches the an expected set of documents using Hits.
    *
diff --git a/src/test/org/apache/lucene/search/QueryUtils.java b/src/test/org/apache/lucene/search/QueryUtils.java
index bdcf36dd222..76f86203409 100644
--- a/src/test/org/apache/lucene/search/QueryUtils.java
+++ b/src/test/org/apache/lucene/search/QueryUtils.java
@@ -5,10 +5,19 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
 
 import junit.framework.Assert;
 
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.ReaderUtil;
 
 /**
  * Copyright 2005 Apache Software Foundation
@@ -75,12 +84,20 @@ public class QueryUtils {
   }
   
   /** 
-   * various query sanity checks on a searcher, including explanation checks.
-   * @see #checkExplanations
-   * @see #checkSkipTo
+   * Various query sanity checks on a searcher, some checks are only done for
+   * instanceof IndexSearcher.
+   *
    * @see #check(Query)
+   * @see #checkFirstSkipTo
+   * @see #checkSkipTo
+   * @see #checkExplanations
+   * @see #checkSerialization
+   * @see #checkEqual
    */
   public static void check(Query q1, Searcher s) {
+    check(q1, s, true);
+  }
+  private static void check(Query q1, Searcher s, boolean wrap) {
     try {
       check(q1);
       if (s!=null) {
@@ -88,6 +105,16 @@ public class QueryUtils {
           IndexSearcher is = (IndexSearcher)s;
           checkFirstSkipTo(q1,is);
           checkSkipTo(q1,is);
+          if (wrap) {
+            check(q1, wrapUnderlyingReader(is, -1), false);
+            check(q1, wrapUnderlyingReader(is,  0), false);
+            check(q1, wrapUnderlyingReader(is, +1), false);
+          }
+        }
+        if (wrap) {
+          check(q1, wrapSearcher(s, -1), false);
+          check(q1, wrapSearcher(s,  0), false);
+          check(q1, wrapSearcher(s, +1), false);
         }
         checkExplanations(q1,s);
         checkSerialization(q1,s);
@@ -101,6 +128,104 @@ public class QueryUtils {
     }
   }
 
+  /**
+   * Given an IndexSearcher, returns a new IndexSearcher whose IndexReader 
+   * is a MultiReader containing the Reader of the original IndexSearcher, 
+   * as well as several "empty" IndexReaders -- some of which will have 
+   * deleted documents in them.  This new IndexSearcher should 
+   * behave exactly the same as the original IndexSearcher.
+   * @param s the searcher to wrap
+   * @param edge if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub
+   */
+  public static IndexSearcher wrapUnderlyingReader(final IndexSearcher s, final int edge) 
+    throws IOException {
+
+    IndexReader r = s.getIndexReader();
+
+    // we can't put deleted docs before the nested reader, because
+    // it will throw off the docIds
+    IndexReader[] readers = new IndexReader[] {
+      edge < 0 ? r : IndexReader.open(makeEmptyIndex(0)),
+      IndexReader.open(makeEmptyIndex(0)),
+      new MultiReader(new IndexReader[] {
+        IndexReader.open(makeEmptyIndex(edge < 0 ? 4 : 0)),
+        IndexReader.open(makeEmptyIndex(0)),
+        0 == edge ? r : IndexReader.open(makeEmptyIndex(0))
+      }),
+      IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 7)),
+      IndexReader.open(makeEmptyIndex(0)),
+      new MultiReader(new IndexReader[] {
+        IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 5)),
+        IndexReader.open(makeEmptyIndex(0)),
+        0 < edge ? r : IndexReader.open(makeEmptyIndex(0))
+      })
+    };
+    IndexSearcher out = new IndexSearcher(new MultiReader(readers));
+    out.setSimilarity(s.getSimilarity());
+    return out;
+  }
+  /**
+   * Given a Searcher, returns a new MultiSearcher wrapping the  
+   * the original Searcher, 
+   * as well as several "empty" IndexSearchers -- some of which will have
+   * deleted documents in them.  This new MultiSearcher 
+   * should behave exactly the same as the original Searcher.
+   * @param s the Searcher to wrap
+   * @param edge if negative, s will be the first sub; if 0, s will be in hte middle, if positive s will be the last sub
+   */
+  public static MultiSearcher wrapSearcher(final Searcher s, final int edge) 
+    throws IOException {
+
+    // we can't put deleted docs before the nested reader, because
+    // it will through off the docIds
+    Searcher[] searchers = new Searcher[] {
+      edge < 0 ? s : new IndexSearcher(makeEmptyIndex(0)),
+      new MultiSearcher(new Searcher[] {
+        new IndexSearcher(makeEmptyIndex(edge < 0 ? 65 : 0)),
+        new IndexSearcher(makeEmptyIndex(0)),
+        0 == edge ? s : new IndexSearcher(makeEmptyIndex(0))
+      }),
+      new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 3)),
+      new IndexSearcher(makeEmptyIndex(0)),
+      new MultiSearcher(new Searcher[] {
+        new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 5)),
+        new IndexSearcher(makeEmptyIndex(0)),
+        0 < edge ? s : new IndexSearcher(makeEmptyIndex(0))
+      })
+    };
+    MultiSearcher out = new MultiSearcher(searchers);
+    out.setSimilarity(s.getSimilarity());
+    return out;
+  }
+
+  private static RAMDirectory makeEmptyIndex(final int numDeletedDocs) 
+    throws IOException {
+      RAMDirectory d = new RAMDirectory();
+      IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true,
+                                      MaxFieldLength.LIMITED);
+      for (int i = 0; i < numDeletedDocs; i++) {
+        w.addDocument(new Document());
+      }
+      w.commit();
+      w.deleteDocuments( new MatchAllDocsQuery() );
+      w.commit();
+
+      if (0 < numDeletedDocs)
+        Assert.assertTrue("writer has no deletions", w.hasDeletions());
+
+      Assert.assertEquals("writer is missing some deleted docs", 
+                          numDeletedDocs, w.maxDoc());
+      Assert.assertEquals("writer has non-deleted docs", 
+                          0, w.numDocs());
+      w.close();
+      IndexReader r = IndexReader.open(d);
+      Assert.assertEquals("reader has wrong number of deleted docs", 
+                          numDeletedDocs, r.numDeletedDocs());
+      r.close();
+      return d;
+  }
+  
+
   /** check that the query weight is serializable. 
    * @throws IOException if serialization check fail. 
    */
@@ -115,7 +240,7 @@ public class QueryUtils {
       ois.readObject();
       ois.close();
       
-      //skip rquals() test for now - most weights don't overide equals() and we won't add this just for the tests.
+      //skip equals() test for now - most weights don't override equals() and we won't add this just for the tests.
       //TestCase.assertEquals("writeObject(w) != w.  ("+w+")",w2,w);   
       
     } catch (Exception e) {
@@ -146,10 +271,6 @@ public class QueryUtils {
         {skip_op, skip_op, skip_op, next_op, next_op},
     };
     for (int k = 0; k < orders.length; k++) {
-      IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
-
-      for (int x = 0; x < readers.length; x++) {
-        IndexReader reader = readers[x];
 
         final int order[] = orders[k];
         // System.out.print("Order:");for (int i = 0; i < order.length; i++)
@@ -158,7 +279,7 @@ public class QueryUtils {
         final int opidx[] = { 0 };
 
         final Weight w = q.weight(s);
-        final Scorer scorer = w.scorer(reader, true, false);
+        final Scorer scorer = w.scorer(s.getIndexReader(), true, false);
         if (scorer == null) {
           continue;
         }
@@ -229,7 +350,6 @@ public class QueryUtils {
             .nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;
         Assert.assertFalse(more);
       }
-    }
   }
     
   // check that first skip on just created scorers always goes to the right doc
@@ -271,7 +391,9 @@ public class QueryUtils {
       }
     });
     
-    IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
+    List readerList = new ArrayList();
+    ReaderUtil.gatherSubReaders(readerList, s.getIndexReader());
+    IndexReader[] readers = (IndexReader[]) readerList.toArray(new IndexReader[0]);
     for(int i = 0; i < readers.length; i++) {
       IndexReader reader = readers[i];
       Weight w = q.weight(s);
diff --git a/src/test/org/apache/lucene/search/TestExplanations.java b/src/test/org/apache/lucene/search/TestExplanations.java
index 48ad17f46eb..6b029f1227d 100644
--- a/src/test/org/apache/lucene/search/TestExplanations.java
+++ b/src/test/org/apache/lucene/search/TestExplanations.java
@@ -17,35 +17,30 @@ package org.apache.lucene.search;
  * limitations under the License.
  */
 
-import org.apache.lucene.search.spans.*;
-import org.apache.lucene.store.RAMDirectory;
-
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.queryParser.ParseException;
-
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanFirstQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanNotQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.DocIdBitSet;
-
-import java.util.Random;
-import java.util.BitSet;
 
 /**
- * Tests primative queries (ie: that rewrite to themselves) to
+ * Tests primitive queries (ie: that rewrite to themselves) to
  * insure they match the expected set of docs, and that the score of each
  * match is equal to the value of the scores explanation.
  *
  * <p>
- * The assumption is that if all of the "primative" queries work well,
- * then anythingthat rewrites to a primative will work well also.
+ * The assumption is that if all of the "primitive" queries work well,
+ * then anything that rewrites to a primitive will work well also.
  * </p>
  *
  * @see "Subclasses for actual tests"
@@ -53,6 +48,7 @@ import java.util.BitSet;
 public class TestExplanations extends LuceneTestCase {
   protected IndexSearcher searcher;
 
+  public static final String KEY = "KEY";
   public static final String FIELD = "field";
   public static final QueryParser qp =
     new QueryParser(FIELD, new WhitespaceAnalyzer());
@@ -69,6 +65,7 @@ public class TestExplanations extends LuceneTestCase {
                                         IndexWriter.MaxFieldLength.LIMITED);
     for (int i = 0; i < docFields.length; i++) {
       Document doc = new Document();
+      doc.add(new Field(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED));
       doc.add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
       writer.addDocument(doc);
     }
@@ -117,18 +114,22 @@ public class TestExplanations extends LuceneTestCase {
     bqtest(makeQuery(queryText), expDocNrs);
   }
   
-  /** A filter that only lets the specified document numbers pass */
-  public static class ItemizedFilter extends Filter {
-    int[] docs;
-    public ItemizedFilter(int[] docs) {
-      this.docs = docs;
-    }
-    public DocIdSet getDocIdSet(IndexReader r) {
-      BitSet b = new BitSet(r.maxDoc());
-      for (int i = 0; i < docs.length; i++) {
-        b.set(docs[i]);
+  /** 
+   * Convenience subclass of FieldCacheTermsFilter
+   */
+  public static class ItemizedFilter extends FieldCacheTermsFilter {
+    private static String[] int2str(int [] terms) {
+      String [] out = new String[terms.length];
+      for (int i = 0; i < terms.length; i++) {
+        out[i] = ""+terms[i];
       }
-      return new DocIdBitSet(b);
+      return out;
+    }
+    public ItemizedFilter(String keyField, int [] keys) {
+      super(keyField, int2str(keys));
+    }
+    public ItemizedFilter(int [] keys) {
+      super(KEY, int2str(keys));
     }
   }