LUCENE-1791: Enhance QueryUtils and CheckHits to wrap everything they check in MultiReader/MultiSearcher

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@805525 13f79535-47bb-0310-9956-ffa450edef68
2009-08-18 18:25:47 +00:00 · 2009-08-18 18:25:47 +00:00 · f720852e37
parent 3887cf9419
commit f720852e37
4 changed files with 217 additions and 62 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -783,6 +783,11 @@ Build

 Test Cases

+ 1. LUCENE-1791: Enhancements to the QueryUtils and CheckHits utility 
+    classes to wrap IndexReaders and Searchers in MultiReaders or 
+    MultiSearcher when possible to help excercise more edge cases.
+    (Chris Hostetter, Mark Miller)
+
 ======================= Release 2.4.1 2009-03-09 =======================

 API Changes
--- a/src/test/org/apache/lucene/search/CheckHits.java
+++ b/src/test/org/apache/lucene/search/CheckHits.java
@ -29,8 +29,8 @@ import org.apache.lucene.store.Directory;
 public class CheckHits {
  
  /**
-   * Some explains methods calculate their vlaues though a slightly
-   * differnet  order of operations from the acctaul scoring method ...
+   * Some explains methods calculate their values though a slightly
+   * different  order of operations from the actual scoring method ...
   * this allows for a small amount of variation
   */
  public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005f;
@ -74,7 +74,7 @@ public class CheckHits {
   * </p>
   * @param query the query to test
   * @param searcher the searcher to test the query against
-   * @param defaultFieldName used for displaing the query in assertion messages
+   * @param defaultFieldName used for displaying the query in assertion messages
   * @param results a list of documentIds that must match the query
   * @see Searcher#search(Query,HitCollector)
   * @see #checkHits
@ -83,28 +83,55 @@ public class CheckHits {
                                       Searcher searcher, int[] results)
    throws IOException {

+    QueryUtils.check(query,searcher);
+    
    Set correct = new TreeSet();
    for (int i = 0; i < results.length; i++) {
      correct.add(new Integer(results[i]));
    }
-    
    final Set actual = new TreeSet();
-    searcher.search(query, new Collector() {
-        private int base = 0;
-        public void setScorer(Scorer scorer) throws IOException {}
-        public void collect(int doc) {
-          actual.add(new Integer(doc + base));
-        }
-        public void setNextReader(IndexReader reader, int docBase) {
-          base = docBase;
-        }
-        public boolean acceptsDocsOutOfOrder() {
-          return true;
-        }
-      });
-    Assert.assertEquals(query.toString(defaultFieldName), correct, actual);
+    final Collector c = new SetCollector(actual);

-    QueryUtils.check(query,searcher);
+    searcher.search(query, c);
+    Assert.assertEquals("Simple: " + query.toString(defaultFieldName), 
+                        correct, actual);
+
+    for (int i = -1; i < 2; i++) {
+      actual.clear();
+      QueryUtils.wrapSearcher(searcher, i).search(query, c);
+      Assert.assertEquals("Wrap Searcher " + i + ": " +
+                          query.toString(defaultFieldName),
+                          correct, actual);
+    }
+                        
+    if ( ! ( searcher instanceof IndexSearcher ) ) return;
+
+    for (int i = -1; i < 2; i++) {
+      actual.clear();
+      QueryUtils.wrapUnderlyingReader
+        ((IndexSearcher)searcher, i).search(query, c);
+      Assert.assertEquals("Wrap Reader " + i + ": " +
+                          query.toString(defaultFieldName),
+                          correct, actual);
+    }
+  }
+
+  public static class SetCollector extends Collector {
+    final Set bag;
+    public SetCollector(Set bag) {
+      this.bag = bag;
+    }
+    private int base = 0;
+    public void setScorer(Scorer scorer) throws IOException {}
+    public void collect(int doc) {
+      bag.add(new Integer(doc + base));
+    }
+    public void setNextReader(IndexReader reader, int docBase) {
+      base = docBase;
+    }
+    public boolean acceptsDocsOutOfOrder() {
+      return true;
+    }
  }

  /**
--- a/src/test/org/apache/lucene/search/QueryUtils.java
+++ b/src/test/org/apache/lucene/search/QueryUtils.java
@ -5,10 +5,19 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;

 import junit.framework.Assert;

+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.ReaderUtil;

 /**
 * Copyright 2005 Apache Software Foundation
@ -75,12 +84,20 @@ public class QueryUtils {
  }
  
  /** 
-   * various query sanity checks on a searcher, including explanation checks.
-   * @see #checkExplanations
-   * @see #checkSkipTo
+   * Various query sanity checks on a searcher, some checks are only done for
+   * instanceof IndexSearcher.
+   *
   * @see #check(Query)
+   * @see #checkFirstSkipTo
+   * @see #checkSkipTo
+   * @see #checkExplanations
+   * @see #checkSerialization
+   * @see #checkEqual
   */
  public static void check(Query q1, Searcher s) {
+    check(q1, s, true);
+  }
+  private static void check(Query q1, Searcher s, boolean wrap) {
    try {
      check(q1);
      if (s!=null) {
@ -88,6 +105,16 @@ public class QueryUtils {
          IndexSearcher is = (IndexSearcher)s;
          checkFirstSkipTo(q1,is);
          checkSkipTo(q1,is);
+          if (wrap) {
+            check(q1, wrapUnderlyingReader(is, -1), false);
+            check(q1, wrapUnderlyingReader(is,  0), false);
+            check(q1, wrapUnderlyingReader(is, +1), false);
+          }
+        }
+        if (wrap) {
+          check(q1, wrapSearcher(s, -1), false);
+          check(q1, wrapSearcher(s,  0), false);
+          check(q1, wrapSearcher(s, +1), false);
        }
        checkExplanations(q1,s);
        checkSerialization(q1,s);
@ -101,6 +128,104 @@ public class QueryUtils {
    }
  }

+  /**
+   * Given an IndexSearcher, returns a new IndexSearcher whose IndexReader 
+   * is a MultiReader containing the Reader of the original IndexSearcher, 
+   * as well as several "empty" IndexReaders -- some of which will have 
+   * deleted documents in them.  This new IndexSearcher should 
+   * behave exactly the same as the original IndexSearcher.
+   * @param s the searcher to wrap
+   * @param edge if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub
+   */
+  public static IndexSearcher wrapUnderlyingReader(final IndexSearcher s, final int edge) 
+    throws IOException {
+
+    IndexReader r = s.getIndexReader();
+
+    // we can't put deleted docs before the nested reader, because
+    // it will throw off the docIds
+    IndexReader[] readers = new IndexReader[] {
+      edge < 0 ? r : IndexReader.open(makeEmptyIndex(0)),
+      IndexReader.open(makeEmptyIndex(0)),
+      new MultiReader(new IndexReader[] {
+        IndexReader.open(makeEmptyIndex(edge < 0 ? 4 : 0)),
+        IndexReader.open(makeEmptyIndex(0)),
+        0 == edge ? r : IndexReader.open(makeEmptyIndex(0))
+      }),
+      IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 7)),
+      IndexReader.open(makeEmptyIndex(0)),
+      new MultiReader(new IndexReader[] {
+        IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 5)),
+        IndexReader.open(makeEmptyIndex(0)),
+        0 < edge ? r : IndexReader.open(makeEmptyIndex(0))
+      })
+    };
+    IndexSearcher out = new IndexSearcher(new MultiReader(readers));
+    out.setSimilarity(s.getSimilarity());
+    return out;
+  }
+  /**
+   * Given a Searcher, returns a new MultiSearcher wrapping the  
+   * the original Searcher, 
+   * as well as several "empty" IndexSearchers -- some of which will have
+   * deleted documents in them.  This new MultiSearcher 
+   * should behave exactly the same as the original Searcher.
+   * @param s the Searcher to wrap
+   * @param edge if negative, s will be the first sub; if 0, s will be in hte middle, if positive s will be the last sub
+   */
+  public static MultiSearcher wrapSearcher(final Searcher s, final int edge) 
+    throws IOException {
+
+    // we can't put deleted docs before the nested reader, because
+    // it will through off the docIds
+    Searcher[] searchers = new Searcher[] {
+      edge < 0 ? s : new IndexSearcher(makeEmptyIndex(0)),
+      new MultiSearcher(new Searcher[] {
+        new IndexSearcher(makeEmptyIndex(edge < 0 ? 65 : 0)),
+        new IndexSearcher(makeEmptyIndex(0)),
+        0 == edge ? s : new IndexSearcher(makeEmptyIndex(0))
+      }),
+      new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 3)),
+      new IndexSearcher(makeEmptyIndex(0)),
+      new MultiSearcher(new Searcher[] {
+        new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 5)),
+        new IndexSearcher(makeEmptyIndex(0)),
+        0 < edge ? s : new IndexSearcher(makeEmptyIndex(0))
+      })
+    };
+    MultiSearcher out = new MultiSearcher(searchers);
+    out.setSimilarity(s.getSimilarity());
+    return out;
+  }
+
+  private static RAMDirectory makeEmptyIndex(final int numDeletedDocs) 
+    throws IOException {
+      RAMDirectory d = new RAMDirectory();
+      IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true,
+                                      MaxFieldLength.LIMITED);
+      for (int i = 0; i < numDeletedDocs; i++) {
+        w.addDocument(new Document());
+      }
+      w.commit();
+      w.deleteDocuments( new MatchAllDocsQuery() );
+      w.commit();
+
+      if (0 < numDeletedDocs)
+        Assert.assertTrue("writer has no deletions", w.hasDeletions());
+
+      Assert.assertEquals("writer is missing some deleted docs", 
+                          numDeletedDocs, w.maxDoc());
+      Assert.assertEquals("writer has non-deleted docs", 
+                          0, w.numDocs());
+      w.close();
+      IndexReader r = IndexReader.open(d);
+      Assert.assertEquals("reader has wrong number of deleted docs", 
+                          numDeletedDocs, r.numDeletedDocs());
+      r.close();
+      return d;
+  }
+  
+
  /** check that the query weight is serializable. 
   * @throws IOException if serialization check fail. 
   */
@ -115,7 +240,7 @@ public class QueryUtils {
      ois.readObject();
      ois.close();
      
-      //skip rquals() test for now - most weights don't overide equals() and we won't add this just for the tests.
+      //skip equals() test for now - most weights don't override equals() and we won't add this just for the tests.
      //TestCase.assertEquals("writeObject(w) != w.  ("+w+")",w2,w);   
      
    } catch (Exception e) {
@ -146,10 +271,6 @@ public class QueryUtils {
        {skip_op, skip_op, skip_op, next_op, next_op},
    };
    for (int k = 0; k < orders.length; k++) {
-      IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
-
-      for (int x = 0; x < readers.length; x++) {
-        IndexReader reader = readers[x];

        final int order[] = orders[k];
        // System.out.print("Order:");for (int i = 0; i < order.length; i++)
@ -158,7 +279,7 @@ public class QueryUtils {
        final int opidx[] = { 0 };

        final Weight w = q.weight(s);
-        final Scorer scorer = w.scorer(reader, true, false);
+        final Scorer scorer = w.scorer(s.getIndexReader(), true, false);
        if (scorer == null) {
          continue;
        }
@ -229,7 +350,6 @@ public class QueryUtils {
            .nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;
        Assert.assertFalse(more);
      }
-    }
  }
    
  // check that first skip on just created scorers always goes to the right doc
@ -271,7 +391,9 @@ public class QueryUtils {
      }
    });
    
-    IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
+    List readerList = new ArrayList();
+    ReaderUtil.gatherSubReaders(readerList, s.getIndexReader());
+    IndexReader[] readers = (IndexReader[]) readerList.toArray(new IndexReader[0]);
    for(int i = 0; i < readers.length; i++) {
      IndexReader reader = readers[i];
      Weight w = q.weight(s);
--- a/src/test/org/apache/lucene/search/TestExplanations.java
+++ b/src/test/org/apache/lucene/search/TestExplanations.java
@ -17,35 +17,30 @@ package org.apache.lucene.search;
 * limitations under the License.
 */

-import org.apache.lucene.search.spans.*;
-import org.apache.lucene.store.RAMDirectory;
-
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
 import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.queryParser.ParseException;
-
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanFirstQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanNotQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.DocIdBitSet;
-
-import java.util.Random;
-import java.util.BitSet;

 /**
- * Tests primative queries (ie: that rewrite to themselves) to
+ * Tests primitive queries (ie: that rewrite to themselves) to
 * insure they match the expected set of docs, and that the score of each
 * match is equal to the value of the scores explanation.
 *
 * <p>
- * The assumption is that if all of the "primative" queries work well,
- * then anythingthat rewrites to a primative will work well also.
+ * The assumption is that if all of the "primitive" queries work well,
+ * then anything that rewrites to a primitive will work well also.
 * </p>
 *
 * @see "Subclasses for actual tests"
@ -53,6 +48,7 @@ import java.util.BitSet;
 public class TestExplanations extends LuceneTestCase {
  protected IndexSearcher searcher;

+  public static final String KEY = "KEY";
  public static final String FIELD = "field";
  public static final QueryParser qp =
    new QueryParser(FIELD, new WhitespaceAnalyzer());
@ -69,6 +65,7 @@ public class TestExplanations extends LuceneTestCase {
                                        IndexWriter.MaxFieldLength.LIMITED);
    for (int i = 0; i < docFields.length; i++) {
      Document doc = new Document();
+      doc.add(new Field(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED));
      doc.add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
      writer.addDocument(doc);
    }
@ -117,18 +114,22 @@ public class TestExplanations extends LuceneTestCase {
    bqtest(makeQuery(queryText), expDocNrs);
  }
  
-  /** A filter that only lets the specified document numbers pass */
-  public static class ItemizedFilter extends Filter {
-    int[] docs;
-    public ItemizedFilter(int[] docs) {
-      this.docs = docs;
-    }
-    public DocIdSet getDocIdSet(IndexReader r) {
-      BitSet b = new BitSet(r.maxDoc());
-      for (int i = 0; i < docs.length; i++) {
-        b.set(docs[i]);
+  /** 
+   * Convenience subclass of FieldCacheTermsFilter
+   */
+  public static class ItemizedFilter extends FieldCacheTermsFilter {
+    private static String[] int2str(int [] terms) {
+      String [] out = new String[terms.length];
+      for (int i = 0; i < terms.length; i++) {
+        out[i] = ""+terms[i];
      }
-      return new DocIdBitSet(b);
+      return out;
+    }
+    public ItemizedFilter(String keyField, int [] keys) {
+      super(keyField, int2str(keys));
+    }
+    public ItemizedFilter(int [] keys) {
+      super(KEY, int2str(keys));
    }
  }