From f720852e376331fe365af82ceb36dcae7ff5dd00 Mon Sep 17 00:00:00 2001
From: "Chris M. Hostetter"
Date: Tue, 18 Aug 2009 18:25:47 +0000
Subject: [PATCH] LUCENE-1791: Enhance QueryUtils and CheckHits to wrap
everything they check in MultiReader/MultiSearcher
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@805525 13f79535-47bb-0310-9956-ffa450edef68
---
CHANGES.txt | 5 +
.../org/apache/lucene/search/CheckHits.java | 67 +++++---
.../org/apache/lucene/search/QueryUtils.java | 144 ++++++++++++++++--
.../lucene/search/TestExplanations.java | 63 ++++----
4 files changed, 217 insertions(+), 62 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index 1fcce817f12..4a704d4c2ce 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -783,6 +783,11 @@ Build
Test Cases
+ 1. LUCENE-1791: Enhancements to the QueryUtils and CheckHits utility
+ classes to wrap IndexReaders and Searchers in MultiReaders or
+ MultiSearcher when possible to help excercise more edge cases.
+ (Chris Hostetter, Mark Miller)
+
======================= Release 2.4.1 2009-03-09 =======================
API Changes
diff --git a/src/test/org/apache/lucene/search/CheckHits.java b/src/test/org/apache/lucene/search/CheckHits.java
index cf7d7338f20..8dfe7dcfd04 100644
--- a/src/test/org/apache/lucene/search/CheckHits.java
+++ b/src/test/org/apache/lucene/search/CheckHits.java
@@ -29,8 +29,8 @@ import org.apache.lucene.store.Directory;
public class CheckHits {
/**
- * Some explains methods calculate their vlaues though a slightly
- * differnet order of operations from the acctaul scoring method ...
+ * Some explains methods calculate their values though a slightly
+ * different order of operations from the actual scoring method ...
* this allows for a small amount of variation
*/
public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005f;
@@ -74,7 +74,7 @@ public class CheckHits {
*
* @param query the query to test
* @param searcher the searcher to test the query against
- * @param defaultFieldName used for displaing the query in assertion messages
+ * @param defaultFieldName used for displaying the query in assertion messages
* @param results a list of documentIds that must match the query
* @see Searcher#search(Query,HitCollector)
* @see #checkHits
@@ -82,31 +82,58 @@ public class CheckHits {
public static void checkHitCollector(Query query, String defaultFieldName,
Searcher searcher, int[] results)
throws IOException {
+
+ QueryUtils.check(query,searcher);
Set correct = new TreeSet();
for (int i = 0; i < results.length; i++) {
correct.add(new Integer(results[i]));
}
-
final Set actual = new TreeSet();
- searcher.search(query, new Collector() {
- private int base = 0;
- public void setScorer(Scorer scorer) throws IOException {}
- public void collect(int doc) {
- actual.add(new Integer(doc + base));
- }
- public void setNextReader(IndexReader reader, int docBase) {
- base = docBase;
- }
- public boolean acceptsDocsOutOfOrder() {
- return true;
- }
- });
- Assert.assertEquals(query.toString(defaultFieldName), correct, actual);
+ final Collector c = new SetCollector(actual);
- QueryUtils.check(query,searcher);
+ searcher.search(query, c);
+ Assert.assertEquals("Simple: " + query.toString(defaultFieldName),
+ correct, actual);
+
+ for (int i = -1; i < 2; i++) {
+ actual.clear();
+ QueryUtils.wrapSearcher(searcher, i).search(query, c);
+ Assert.assertEquals("Wrap Searcher " + i + ": " +
+ query.toString(defaultFieldName),
+ correct, actual);
+ }
+
+ if ( ! ( searcher instanceof IndexSearcher ) ) return;
+
+ for (int i = -1; i < 2; i++) {
+ actual.clear();
+ QueryUtils.wrapUnderlyingReader
+ ((IndexSearcher)searcher, i).search(query, c);
+ Assert.assertEquals("Wrap Reader " + i + ": " +
+ query.toString(defaultFieldName),
+ correct, actual);
+ }
}
-
+
+ public static class SetCollector extends Collector {
+ final Set bag;
+ public SetCollector(Set bag) {
+ this.bag = bag;
+ }
+ private int base = 0;
+ public void setScorer(Scorer scorer) throws IOException {}
+ public void collect(int doc) {
+ bag.add(new Integer(doc + base));
+ }
+ public void setNextReader(IndexReader reader, int docBase) {
+ base = docBase;
+ }
+ public boolean acceptsDocsOutOfOrder() {
+ return true;
+ }
+ }
+
/**
* Tests that a query matches the an expected set of documents using Hits.
*
diff --git a/src/test/org/apache/lucene/search/QueryUtils.java b/src/test/org/apache/lucene/search/QueryUtils.java
index bdcf36dd222..76f86203409 100644
--- a/src/test/org/apache/lucene/search/QueryUtils.java
+++ b/src/test/org/apache/lucene/search/QueryUtils.java
@@ -5,10 +5,19 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
+import java.util.ArrayList;
+import java.util.List;
import junit.framework.Assert;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.ReaderUtil;
/**
* Copyright 2005 Apache Software Foundation
@@ -75,12 +84,20 @@ public class QueryUtils {
}
/**
- * various query sanity checks on a searcher, including explanation checks.
- * @see #checkExplanations
- * @see #checkSkipTo
+ * Various query sanity checks on a searcher, some checks are only done for
+ * instanceof IndexSearcher.
+ *
* @see #check(Query)
+ * @see #checkFirstSkipTo
+ * @see #checkSkipTo
+ * @see #checkExplanations
+ * @see #checkSerialization
+ * @see #checkEqual
*/
public static void check(Query q1, Searcher s) {
+ check(q1, s, true);
+ }
+ private static void check(Query q1, Searcher s, boolean wrap) {
try {
check(q1);
if (s!=null) {
@@ -88,6 +105,16 @@ public class QueryUtils {
IndexSearcher is = (IndexSearcher)s;
checkFirstSkipTo(q1,is);
checkSkipTo(q1,is);
+ if (wrap) {
+ check(q1, wrapUnderlyingReader(is, -1), false);
+ check(q1, wrapUnderlyingReader(is, 0), false);
+ check(q1, wrapUnderlyingReader(is, +1), false);
+ }
+ }
+ if (wrap) {
+ check(q1, wrapSearcher(s, -1), false);
+ check(q1, wrapSearcher(s, 0), false);
+ check(q1, wrapSearcher(s, +1), false);
}
checkExplanations(q1,s);
checkSerialization(q1,s);
@@ -101,6 +128,104 @@ public class QueryUtils {
}
}
+ /**
+ * Given an IndexSearcher, returns a new IndexSearcher whose IndexReader
+ * is a MultiReader containing the Reader of the original IndexSearcher,
+ * as well as several "empty" IndexReaders -- some of which will have
+ * deleted documents in them. This new IndexSearcher should
+ * behave exactly the same as the original IndexSearcher.
+ * @param s the searcher to wrap
+ * @param edge if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub
+ */
+ public static IndexSearcher wrapUnderlyingReader(final IndexSearcher s, final int edge)
+ throws IOException {
+
+ IndexReader r = s.getIndexReader();
+
+ // we can't put deleted docs before the nested reader, because
+ // it will throw off the docIds
+ IndexReader[] readers = new IndexReader[] {
+ edge < 0 ? r : IndexReader.open(makeEmptyIndex(0)),
+ IndexReader.open(makeEmptyIndex(0)),
+ new MultiReader(new IndexReader[] {
+ IndexReader.open(makeEmptyIndex(edge < 0 ? 4 : 0)),
+ IndexReader.open(makeEmptyIndex(0)),
+ 0 == edge ? r : IndexReader.open(makeEmptyIndex(0))
+ }),
+ IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 7)),
+ IndexReader.open(makeEmptyIndex(0)),
+ new MultiReader(new IndexReader[] {
+ IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 5)),
+ IndexReader.open(makeEmptyIndex(0)),
+ 0 < edge ? r : IndexReader.open(makeEmptyIndex(0))
+ })
+ };
+ IndexSearcher out = new IndexSearcher(new MultiReader(readers));
+ out.setSimilarity(s.getSimilarity());
+ return out;
+ }
+ /**
+ * Given a Searcher, returns a new MultiSearcher wrapping the
+ * the original Searcher,
+ * as well as several "empty" IndexSearchers -- some of which will have
+ * deleted documents in them. This new MultiSearcher
+ * should behave exactly the same as the original Searcher.
+ * @param s the Searcher to wrap
+ * @param edge if negative, s will be the first sub; if 0, s will be in hte middle, if positive s will be the last sub
+ */
+ public static MultiSearcher wrapSearcher(final Searcher s, final int edge)
+ throws IOException {
+
+ // we can't put deleted docs before the nested reader, because
+ // it will through off the docIds
+ Searcher[] searchers = new Searcher[] {
+ edge < 0 ? s : new IndexSearcher(makeEmptyIndex(0)),
+ new MultiSearcher(new Searcher[] {
+ new IndexSearcher(makeEmptyIndex(edge < 0 ? 65 : 0)),
+ new IndexSearcher(makeEmptyIndex(0)),
+ 0 == edge ? s : new IndexSearcher(makeEmptyIndex(0))
+ }),
+ new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 3)),
+ new IndexSearcher(makeEmptyIndex(0)),
+ new MultiSearcher(new Searcher[] {
+ new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 5)),
+ new IndexSearcher(makeEmptyIndex(0)),
+ 0 < edge ? s : new IndexSearcher(makeEmptyIndex(0))
+ })
+ };
+ MultiSearcher out = new MultiSearcher(searchers);
+ out.setSimilarity(s.getSimilarity());
+ return out;
+ }
+
+ private static RAMDirectory makeEmptyIndex(final int numDeletedDocs)
+ throws IOException {
+ RAMDirectory d = new RAMDirectory();
+ IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true,
+ MaxFieldLength.LIMITED);
+ for (int i = 0; i < numDeletedDocs; i++) {
+ w.addDocument(new Document());
+ }
+ w.commit();
+ w.deleteDocuments( new MatchAllDocsQuery() );
+ w.commit();
+
+ if (0 < numDeletedDocs)
+ Assert.assertTrue("writer has no deletions", w.hasDeletions());
+
+ Assert.assertEquals("writer is missing some deleted docs",
+ numDeletedDocs, w.maxDoc());
+ Assert.assertEquals("writer has non-deleted docs",
+ 0, w.numDocs());
+ w.close();
+ IndexReader r = IndexReader.open(d);
+ Assert.assertEquals("reader has wrong number of deleted docs",
+ numDeletedDocs, r.numDeletedDocs());
+ r.close();
+ return d;
+ }
+
+
/** check that the query weight is serializable.
* @throws IOException if serialization check fail.
*/
@@ -115,7 +240,7 @@ public class QueryUtils {
ois.readObject();
ois.close();
- //skip rquals() test for now - most weights don't overide equals() and we won't add this just for the tests.
+ //skip equals() test for now - most weights don't override equals() and we won't add this just for the tests.
//TestCase.assertEquals("writeObject(w) != w. ("+w+")",w2,w);
} catch (Exception e) {
@@ -146,10 +271,6 @@ public class QueryUtils {
{skip_op, skip_op, skip_op, next_op, next_op},
};
for (int k = 0; k < orders.length; k++) {
- IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
-
- for (int x = 0; x < readers.length; x++) {
- IndexReader reader = readers[x];
final int order[] = orders[k];
// System.out.print("Order:");for (int i = 0; i < order.length; i++)
@@ -158,7 +279,7 @@ public class QueryUtils {
final int opidx[] = { 0 };
final Weight w = q.weight(s);
- final Scorer scorer = w.scorer(reader, true, false);
+ final Scorer scorer = w.scorer(s.getIndexReader(), true, false);
if (scorer == null) {
continue;
}
@@ -229,7 +350,6 @@ public class QueryUtils {
.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;
Assert.assertFalse(more);
}
- }
}
// check that first skip on just created scorers always goes to the right doc
@@ -271,7 +391,9 @@ public class QueryUtils {
}
});
- IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
+ List readerList = new ArrayList();
+ ReaderUtil.gatherSubReaders(readerList, s.getIndexReader());
+ IndexReader[] readers = (IndexReader[]) readerList.toArray(new IndexReader[0]);
for(int i = 0; i < readers.length; i++) {
IndexReader reader = readers[i];
Weight w = q.weight(s);
diff --git a/src/test/org/apache/lucene/search/TestExplanations.java b/src/test/org/apache/lucene/search/TestExplanations.java
index 48ad17f46eb..6b029f1227d 100644
--- a/src/test/org/apache/lucene/search/TestExplanations.java
+++ b/src/test/org/apache/lucene/search/TestExplanations.java
@@ -17,35 +17,30 @@ package org.apache.lucene.search;
* limitations under the License.
*/
-import org.apache.lucene.search.spans.*;
-import org.apache.lucene.store.RAMDirectory;
-
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
-
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
-
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.spans.SpanFirstQuery;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanNotQuery;
+import org.apache.lucene.search.spans.SpanOrQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.search.spans.SpanTermQuery;
+import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util.DocIdBitSet;
-
-import java.util.Random;
-import java.util.BitSet;
/**
- * Tests primative queries (ie: that rewrite to themselves) to
+ * Tests primitive queries (ie: that rewrite to themselves) to
* insure they match the expected set of docs, and that the score of each
* match is equal to the value of the scores explanation.
*
*
- * The assumption is that if all of the "primative" queries work well,
- * then anythingthat rewrites to a primative will work well also.
+ * The assumption is that if all of the "primitive" queries work well,
+ * then anything that rewrites to a primitive will work well also.
*
*
* @see "Subclasses for actual tests"
@@ -53,6 +48,7 @@ import java.util.BitSet;
public class TestExplanations extends LuceneTestCase {
protected IndexSearcher searcher;
+ public static final String KEY = "KEY";
public static final String FIELD = "field";
public static final QueryParser qp =
new QueryParser(FIELD, new WhitespaceAnalyzer());
@@ -69,6 +65,7 @@ public class TestExplanations extends LuceneTestCase {
IndexWriter.MaxFieldLength.LIMITED);
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
+ doc.add(new Field(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
}
@@ -117,18 +114,22 @@ public class TestExplanations extends LuceneTestCase {
bqtest(makeQuery(queryText), expDocNrs);
}
- /** A filter that only lets the specified document numbers pass */
- public static class ItemizedFilter extends Filter {
- int[] docs;
- public ItemizedFilter(int[] docs) {
- this.docs = docs;
- }
- public DocIdSet getDocIdSet(IndexReader r) {
- BitSet b = new BitSet(r.maxDoc());
- for (int i = 0; i < docs.length; i++) {
- b.set(docs[i]);
+ /**
+ * Convenience subclass of FieldCacheTermsFilter
+ */
+ public static class ItemizedFilter extends FieldCacheTermsFilter {
+ private static String[] int2str(int [] terms) {
+ String [] out = new String[terms.length];
+ for (int i = 0; i < terms.length; i++) {
+ out[i] = ""+terms[i];
}
- return new DocIdBitSet(b);
+ return out;
+ }
+ public ItemizedFilter(String keyField, int [] keys) {
+ super(keyField, int2str(keys));
+ }
+ public ItemizedFilter(int [] keys) {
+ super(KEY, int2str(keys));
}
}