LUCENE-1791: Enhance QueryUtils and CheckHits to wrap everything they check in MultiReader/MultiSearcher

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@805525 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2009-08-18 18:25:47 +00:00
parent 3887cf9419
commit f720852e37
4 changed files with 217 additions and 62 deletions

View File

@ -783,6 +783,11 @@ Build
Test Cases
1. LUCENE-1791: Enhancements to the QueryUtils and CheckHits utility
classes to wrap IndexReaders and Searchers in MultiReaders or
MultiSearcher when possible to help excercise more edge cases.
(Chris Hostetter, Mark Miller)
======================= Release 2.4.1 2009-03-09 =======================
API Changes

View File

@ -29,8 +29,8 @@ import org.apache.lucene.store.Directory;
public class CheckHits {
/**
* Some explains methods calculate their vlaues though a slightly
* differnet order of operations from the acctaul scoring method ...
* Some explains methods calculate their values though a slightly
* different order of operations from the actual scoring method ...
* this allows for a small amount of variation
*/
public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005f;
@ -74,7 +74,7 @@ public class CheckHits {
* </p>
* @param query the query to test
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
* @param defaultFieldName used for displaying the query in assertion messages
* @param results a list of documentIds that must match the query
* @see Searcher#search(Query,HitCollector)
* @see #checkHits
@ -83,28 +83,55 @@ public class CheckHits {
Searcher searcher, int[] results)
throws IOException {
QueryUtils.check(query,searcher);
Set correct = new TreeSet();
for (int i = 0; i < results.length; i++) {
correct.add(new Integer(results[i]));
}
final Set actual = new TreeSet();
searcher.search(query, new Collector() {
private int base = 0;
public void setScorer(Scorer scorer) throws IOException {}
public void collect(int doc) {
actual.add(new Integer(doc + base));
}
public void setNextReader(IndexReader reader, int docBase) {
base = docBase;
}
public boolean acceptsDocsOutOfOrder() {
return true;
}
});
Assert.assertEquals(query.toString(defaultFieldName), correct, actual);
final Collector c = new SetCollector(actual);
QueryUtils.check(query,searcher);
searcher.search(query, c);
Assert.assertEquals("Simple: " + query.toString(defaultFieldName),
correct, actual);
for (int i = -1; i < 2; i++) {
actual.clear();
QueryUtils.wrapSearcher(searcher, i).search(query, c);
Assert.assertEquals("Wrap Searcher " + i + ": " +
query.toString(defaultFieldName),
correct, actual);
}
if ( ! ( searcher instanceof IndexSearcher ) ) return;
for (int i = -1; i < 2; i++) {
actual.clear();
QueryUtils.wrapUnderlyingReader
((IndexSearcher)searcher, i).search(query, c);
Assert.assertEquals("Wrap Reader " + i + ": " +
query.toString(defaultFieldName),
correct, actual);
}
}
public static class SetCollector extends Collector {
final Set bag;
public SetCollector(Set bag) {
this.bag = bag;
}
private int base = 0;
public void setScorer(Scorer scorer) throws IOException {}
public void collect(int doc) {
bag.add(new Integer(doc + base));
}
public void setNextReader(IndexReader reader, int docBase) {
base = docBase;
}
public boolean acceptsDocsOutOfOrder() {
return true;
}
}
/**

View File

@ -5,10 +5,19 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.List;
import junit.framework.Assert;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.ReaderUtil;
/**
* Copyright 2005 Apache Software Foundation
@ -75,12 +84,20 @@ public class QueryUtils {
}
/**
* various query sanity checks on a searcher, including explanation checks.
* @see #checkExplanations
* @see #checkSkipTo
* Various query sanity checks on a searcher, some checks are only done for
* instanceof IndexSearcher.
*
* @see #check(Query)
* @see #checkFirstSkipTo
* @see #checkSkipTo
* @see #checkExplanations
* @see #checkSerialization
* @see #checkEqual
*/
public static void check(Query q1, Searcher s) {
check(q1, s, true);
}
private static void check(Query q1, Searcher s, boolean wrap) {
try {
check(q1);
if (s!=null) {
@ -88,6 +105,16 @@ public class QueryUtils {
IndexSearcher is = (IndexSearcher)s;
checkFirstSkipTo(q1,is);
checkSkipTo(q1,is);
if (wrap) {
check(q1, wrapUnderlyingReader(is, -1), false);
check(q1, wrapUnderlyingReader(is, 0), false);
check(q1, wrapUnderlyingReader(is, +1), false);
}
}
if (wrap) {
check(q1, wrapSearcher(s, -1), false);
check(q1, wrapSearcher(s, 0), false);
check(q1, wrapSearcher(s, +1), false);
}
checkExplanations(q1,s);
checkSerialization(q1,s);
@ -101,6 +128,104 @@ public class QueryUtils {
}
}
/**
* Given an IndexSearcher, returns a new IndexSearcher whose IndexReader
* is a MultiReader containing the Reader of the original IndexSearcher,
* as well as several "empty" IndexReaders -- some of which will have
* deleted documents in them. This new IndexSearcher should
* behave exactly the same as the original IndexSearcher.
* @param s the searcher to wrap
* @param edge if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub
*/
public static IndexSearcher wrapUnderlyingReader(final IndexSearcher s, final int edge)
throws IOException {
IndexReader r = s.getIndexReader();
// we can't put deleted docs before the nested reader, because
// it will throw off the docIds
IndexReader[] readers = new IndexReader[] {
edge < 0 ? r : IndexReader.open(makeEmptyIndex(0)),
IndexReader.open(makeEmptyIndex(0)),
new MultiReader(new IndexReader[] {
IndexReader.open(makeEmptyIndex(edge < 0 ? 4 : 0)),
IndexReader.open(makeEmptyIndex(0)),
0 == edge ? r : IndexReader.open(makeEmptyIndex(0))
}),
IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 7)),
IndexReader.open(makeEmptyIndex(0)),
new MultiReader(new IndexReader[] {
IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 5)),
IndexReader.open(makeEmptyIndex(0)),
0 < edge ? r : IndexReader.open(makeEmptyIndex(0))
})
};
IndexSearcher out = new IndexSearcher(new MultiReader(readers));
out.setSimilarity(s.getSimilarity());
return out;
}
/**
* Given a Searcher, returns a new MultiSearcher wrapping the
* the original Searcher,
* as well as several "empty" IndexSearchers -- some of which will have
* deleted documents in them. This new MultiSearcher
* should behave exactly the same as the original Searcher.
* @param s the Searcher to wrap
* @param edge if negative, s will be the first sub; if 0, s will be in hte middle, if positive s will be the last sub
*/
public static MultiSearcher wrapSearcher(final Searcher s, final int edge)
throws IOException {
// we can't put deleted docs before the nested reader, because
// it will through off the docIds
Searcher[] searchers = new Searcher[] {
edge < 0 ? s : new IndexSearcher(makeEmptyIndex(0)),
new MultiSearcher(new Searcher[] {
new IndexSearcher(makeEmptyIndex(edge < 0 ? 65 : 0)),
new IndexSearcher(makeEmptyIndex(0)),
0 == edge ? s : new IndexSearcher(makeEmptyIndex(0))
}),
new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 3)),
new IndexSearcher(makeEmptyIndex(0)),
new MultiSearcher(new Searcher[] {
new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 5)),
new IndexSearcher(makeEmptyIndex(0)),
0 < edge ? s : new IndexSearcher(makeEmptyIndex(0))
})
};
MultiSearcher out = new MultiSearcher(searchers);
out.setSimilarity(s.getSimilarity());
return out;
}
private static RAMDirectory makeEmptyIndex(final int numDeletedDocs)
throws IOException {
RAMDirectory d = new RAMDirectory();
IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true,
MaxFieldLength.LIMITED);
for (int i = 0; i < numDeletedDocs; i++) {
w.addDocument(new Document());
}
w.commit();
w.deleteDocuments( new MatchAllDocsQuery() );
w.commit();
if (0 < numDeletedDocs)
Assert.assertTrue("writer has no deletions", w.hasDeletions());
Assert.assertEquals("writer is missing some deleted docs",
numDeletedDocs, w.maxDoc());
Assert.assertEquals("writer has non-deleted docs",
0, w.numDocs());
w.close();
IndexReader r = IndexReader.open(d);
Assert.assertEquals("reader has wrong number of deleted docs",
numDeletedDocs, r.numDeletedDocs());
r.close();
return d;
}
/** check that the query weight is serializable.
* @throws IOException if serialization check fail.
*/
@ -115,7 +240,7 @@ public class QueryUtils {
ois.readObject();
ois.close();
//skip rquals() test for now - most weights don't overide equals() and we won't add this just for the tests.
//skip equals() test for now - most weights don't override equals() and we won't add this just for the tests.
//TestCase.assertEquals("writeObject(w) != w. ("+w+")",w2,w);
} catch (Exception e) {
@ -146,10 +271,6 @@ public class QueryUtils {
{skip_op, skip_op, skip_op, next_op, next_op},
};
for (int k = 0; k < orders.length; k++) {
IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
for (int x = 0; x < readers.length; x++) {
IndexReader reader = readers[x];
final int order[] = orders[k];
// System.out.print("Order:");for (int i = 0; i < order.length; i++)
@ -158,7 +279,7 @@ public class QueryUtils {
final int opidx[] = { 0 };
final Weight w = q.weight(s);
final Scorer scorer = w.scorer(reader, true, false);
final Scorer scorer = w.scorer(s.getIndexReader(), true, false);
if (scorer == null) {
continue;
}
@ -229,7 +350,6 @@ public class QueryUtils {
.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS;
Assert.assertFalse(more);
}
}
}
// check that first skip on just created scorers always goes to the right doc
@ -271,7 +391,9 @@ public class QueryUtils {
}
});
IndexReader[] readers = s.getIndexReader().getSequentialSubReaders();
List readerList = new ArrayList();
ReaderUtil.gatherSubReaders(readerList, s.getIndexReader());
IndexReader[] readers = (IndexReader[]) readerList.toArray(new IndexReader[0]);
for(int i = 0; i < readers.length; i++) {
IndexReader reader = readers[i];
Weight w = q.weight(s);

View File

@ -17,35 +17,30 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.search.spans.*;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanFirstQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.DocIdBitSet;
import java.util.Random;
import java.util.BitSet;
/**
* Tests primative queries (ie: that rewrite to themselves) to
* Tests primitive queries (ie: that rewrite to themselves) to
* insure they match the expected set of docs, and that the score of each
* match is equal to the value of the scores explanation.
*
* <p>
* The assumption is that if all of the "primative" queries work well,
* then anythingthat rewrites to a primative will work well also.
* The assumption is that if all of the "primitive" queries work well,
* then anything that rewrites to a primitive will work well also.
* </p>
*
* @see "Subclasses for actual tests"
@ -53,6 +48,7 @@ import java.util.BitSet;
public class TestExplanations extends LuceneTestCase {
protected IndexSearcher searcher;
public static final String KEY = "KEY";
public static final String FIELD = "field";
public static final QueryParser qp =
new QueryParser(FIELD, new WhitespaceAnalyzer());
@ -69,6 +65,7 @@ public class TestExplanations extends LuceneTestCase {
IndexWriter.MaxFieldLength.LIMITED);
for (int i = 0; i < docFields.length; i++) {
Document doc = new Document();
doc.add(new Field(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
}
@ -117,18 +114,22 @@ public class TestExplanations extends LuceneTestCase {
bqtest(makeQuery(queryText), expDocNrs);
}
/** A filter that only lets the specified document numbers pass */
public static class ItemizedFilter extends Filter {
int[] docs;
public ItemizedFilter(int[] docs) {
this.docs = docs;
}
public DocIdSet getDocIdSet(IndexReader r) {
BitSet b = new BitSet(r.maxDoc());
for (int i = 0; i < docs.length; i++) {
b.set(docs[i]);
/**
* Convenience subclass of FieldCacheTermsFilter
*/
public static class ItemizedFilter extends FieldCacheTermsFilter {
private static String[] int2str(int [] terms) {
String [] out = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
out[i] = ""+terms[i];
}
return new DocIdBitSet(b);
return out;
}
public ItemizedFilter(String keyField, int [] keys) {
super(keyField, int2str(keys));
}
public ItemizedFilter(int [] keys) {
super(KEY, int2str(keys));
}
}