From f720852e376331fe365af82ceb36dcae7ff5dd00 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Tue, 18 Aug 2009 18:25:47 +0000 Subject: [PATCH] LUCENE-1791: Enhance QueryUtils and CheckHits to wrap everything they check in MultiReader/MultiSearcher git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@805525 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 5 + .../org/apache/lucene/search/CheckHits.java | 67 +++++--- .../org/apache/lucene/search/QueryUtils.java | 144 ++++++++++++++++-- .../lucene/search/TestExplanations.java | 63 ++++---- 4 files changed, 217 insertions(+), 62 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 1fcce817f12..4a704d4c2ce 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -783,6 +783,11 @@ Build Test Cases + 1. LUCENE-1791: Enhancements to the QueryUtils and CheckHits utility + classes to wrap IndexReaders and Searchers in MultiReaders or + MultiSearcher when possible to help excercise more edge cases. + (Chris Hostetter, Mark Miller) + ======================= Release 2.4.1 2009-03-09 ======================= API Changes diff --git a/src/test/org/apache/lucene/search/CheckHits.java b/src/test/org/apache/lucene/search/CheckHits.java index cf7d7338f20..8dfe7dcfd04 100644 --- a/src/test/org/apache/lucene/search/CheckHits.java +++ b/src/test/org/apache/lucene/search/CheckHits.java @@ -29,8 +29,8 @@ import org.apache.lucene.store.Directory; public class CheckHits { /** - * Some explains methods calculate their vlaues though a slightly - * differnet order of operations from the acctaul scoring method ... + * Some explains methods calculate their values though a slightly + * different order of operations from the actual scoring method ... * this allows for a small amount of variation */ public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.00005f; @@ -74,7 +74,7 @@ public class CheckHits { *

* @param query the query to test * @param searcher the searcher to test the query against - * @param defaultFieldName used for displaing the query in assertion messages + * @param defaultFieldName used for displaying the query in assertion messages * @param results a list of documentIds that must match the query * @see Searcher#search(Query,HitCollector) * @see #checkHits @@ -82,31 +82,58 @@ public class CheckHits { public static void checkHitCollector(Query query, String defaultFieldName, Searcher searcher, int[] results) throws IOException { + + QueryUtils.check(query,searcher); Set correct = new TreeSet(); for (int i = 0; i < results.length; i++) { correct.add(new Integer(results[i])); } - final Set actual = new TreeSet(); - searcher.search(query, new Collector() { - private int base = 0; - public void setScorer(Scorer scorer) throws IOException {} - public void collect(int doc) { - actual.add(new Integer(doc + base)); - } - public void setNextReader(IndexReader reader, int docBase) { - base = docBase; - } - public boolean acceptsDocsOutOfOrder() { - return true; - } - }); - Assert.assertEquals(query.toString(defaultFieldName), correct, actual); + final Collector c = new SetCollector(actual); - QueryUtils.check(query,searcher); + searcher.search(query, c); + Assert.assertEquals("Simple: " + query.toString(defaultFieldName), + correct, actual); + + for (int i = -1; i < 2; i++) { + actual.clear(); + QueryUtils.wrapSearcher(searcher, i).search(query, c); + Assert.assertEquals("Wrap Searcher " + i + ": " + + query.toString(defaultFieldName), + correct, actual); + } + + if ( ! ( searcher instanceof IndexSearcher ) ) return; + + for (int i = -1; i < 2; i++) { + actual.clear(); + QueryUtils.wrapUnderlyingReader + ((IndexSearcher)searcher, i).search(query, c); + Assert.assertEquals("Wrap Reader " + i + ": " + + query.toString(defaultFieldName), + correct, actual); + } } - + + public static class SetCollector extends Collector { + final Set bag; + public SetCollector(Set bag) { + this.bag = bag; + } + private int base = 0; + public void setScorer(Scorer scorer) throws IOException {} + public void collect(int doc) { + bag.add(new Integer(doc + base)); + } + public void setNextReader(IndexReader reader, int docBase) { + base = docBase; + } + public boolean acceptsDocsOutOfOrder() { + return true; + } + } + /** * Tests that a query matches the an expected set of documents using Hits. * diff --git a/src/test/org/apache/lucene/search/QueryUtils.java b/src/test/org/apache/lucene/search/QueryUtils.java index bdcf36dd222..76f86203409 100644 --- a/src/test/org/apache/lucene/search/QueryUtils.java +++ b/src/test/org/apache/lucene/search/QueryUtils.java @@ -5,10 +5,19 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.List; import junit.framework.Assert; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.MultiReader; +import org.apache.lucene.index.IndexWriter.MaxFieldLength; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.ReaderUtil; /** * Copyright 2005 Apache Software Foundation @@ -75,12 +84,20 @@ public class QueryUtils { } /** - * various query sanity checks on a searcher, including explanation checks. - * @see #checkExplanations - * @see #checkSkipTo + * Various query sanity checks on a searcher, some checks are only done for + * instanceof IndexSearcher. + * * @see #check(Query) + * @see #checkFirstSkipTo + * @see #checkSkipTo + * @see #checkExplanations + * @see #checkSerialization + * @see #checkEqual */ public static void check(Query q1, Searcher s) { + check(q1, s, true); + } + private static void check(Query q1, Searcher s, boolean wrap) { try { check(q1); if (s!=null) { @@ -88,6 +105,16 @@ public class QueryUtils { IndexSearcher is = (IndexSearcher)s; checkFirstSkipTo(q1,is); checkSkipTo(q1,is); + if (wrap) { + check(q1, wrapUnderlyingReader(is, -1), false); + check(q1, wrapUnderlyingReader(is, 0), false); + check(q1, wrapUnderlyingReader(is, +1), false); + } + } + if (wrap) { + check(q1, wrapSearcher(s, -1), false); + check(q1, wrapSearcher(s, 0), false); + check(q1, wrapSearcher(s, +1), false); } checkExplanations(q1,s); checkSerialization(q1,s); @@ -101,6 +128,104 @@ public class QueryUtils { } } + /** + * Given an IndexSearcher, returns a new IndexSearcher whose IndexReader + * is a MultiReader containing the Reader of the original IndexSearcher, + * as well as several "empty" IndexReaders -- some of which will have + * deleted documents in them. This new IndexSearcher should + * behave exactly the same as the original IndexSearcher. + * @param s the searcher to wrap + * @param edge if negative, s will be the first sub; if 0, s will be in the middle, if positive s will be the last sub + */ + public static IndexSearcher wrapUnderlyingReader(final IndexSearcher s, final int edge) + throws IOException { + + IndexReader r = s.getIndexReader(); + + // we can't put deleted docs before the nested reader, because + // it will throw off the docIds + IndexReader[] readers = new IndexReader[] { + edge < 0 ? r : IndexReader.open(makeEmptyIndex(0)), + IndexReader.open(makeEmptyIndex(0)), + new MultiReader(new IndexReader[] { + IndexReader.open(makeEmptyIndex(edge < 0 ? 4 : 0)), + IndexReader.open(makeEmptyIndex(0)), + 0 == edge ? r : IndexReader.open(makeEmptyIndex(0)) + }), + IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 7)), + IndexReader.open(makeEmptyIndex(0)), + new MultiReader(new IndexReader[] { + IndexReader.open(makeEmptyIndex(0 < edge ? 0 : 5)), + IndexReader.open(makeEmptyIndex(0)), + 0 < edge ? r : IndexReader.open(makeEmptyIndex(0)) + }) + }; + IndexSearcher out = new IndexSearcher(new MultiReader(readers)); + out.setSimilarity(s.getSimilarity()); + return out; + } + /** + * Given a Searcher, returns a new MultiSearcher wrapping the + * the original Searcher, + * as well as several "empty" IndexSearchers -- some of which will have + * deleted documents in them. This new MultiSearcher + * should behave exactly the same as the original Searcher. + * @param s the Searcher to wrap + * @param edge if negative, s will be the first sub; if 0, s will be in hte middle, if positive s will be the last sub + */ + public static MultiSearcher wrapSearcher(final Searcher s, final int edge) + throws IOException { + + // we can't put deleted docs before the nested reader, because + // it will through off the docIds + Searcher[] searchers = new Searcher[] { + edge < 0 ? s : new IndexSearcher(makeEmptyIndex(0)), + new MultiSearcher(new Searcher[] { + new IndexSearcher(makeEmptyIndex(edge < 0 ? 65 : 0)), + new IndexSearcher(makeEmptyIndex(0)), + 0 == edge ? s : new IndexSearcher(makeEmptyIndex(0)) + }), + new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 3)), + new IndexSearcher(makeEmptyIndex(0)), + new MultiSearcher(new Searcher[] { + new IndexSearcher(makeEmptyIndex(0 < edge ? 0 : 5)), + new IndexSearcher(makeEmptyIndex(0)), + 0 < edge ? s : new IndexSearcher(makeEmptyIndex(0)) + }) + }; + MultiSearcher out = new MultiSearcher(searchers); + out.setSimilarity(s.getSimilarity()); + return out; + } + + private static RAMDirectory makeEmptyIndex(final int numDeletedDocs) + throws IOException { + RAMDirectory d = new RAMDirectory(); + IndexWriter w = new IndexWriter(d, new WhitespaceAnalyzer(), true, + MaxFieldLength.LIMITED); + for (int i = 0; i < numDeletedDocs; i++) { + w.addDocument(new Document()); + } + w.commit(); + w.deleteDocuments( new MatchAllDocsQuery() ); + w.commit(); + + if (0 < numDeletedDocs) + Assert.assertTrue("writer has no deletions", w.hasDeletions()); + + Assert.assertEquals("writer is missing some deleted docs", + numDeletedDocs, w.maxDoc()); + Assert.assertEquals("writer has non-deleted docs", + 0, w.numDocs()); + w.close(); + IndexReader r = IndexReader.open(d); + Assert.assertEquals("reader has wrong number of deleted docs", + numDeletedDocs, r.numDeletedDocs()); + r.close(); + return d; + } + + /** check that the query weight is serializable. * @throws IOException if serialization check fail. */ @@ -115,7 +240,7 @@ public class QueryUtils { ois.readObject(); ois.close(); - //skip rquals() test for now - most weights don't overide equals() and we won't add this just for the tests. + //skip equals() test for now - most weights don't override equals() and we won't add this just for the tests. //TestCase.assertEquals("writeObject(w) != w. ("+w+")",w2,w); } catch (Exception e) { @@ -146,10 +271,6 @@ public class QueryUtils { {skip_op, skip_op, skip_op, next_op, next_op}, }; for (int k = 0; k < orders.length; k++) { - IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); - - for (int x = 0; x < readers.length; x++) { - IndexReader reader = readers[x]; final int order[] = orders[k]; // System.out.print("Order:");for (int i = 0; i < order.length; i++) @@ -158,7 +279,7 @@ public class QueryUtils { final int opidx[] = { 0 }; final Weight w = q.weight(s); - final Scorer scorer = w.scorer(reader, true, false); + final Scorer scorer = w.scorer(s.getIndexReader(), true, false); if (scorer == null) { continue; } @@ -229,7 +350,6 @@ public class QueryUtils { .nextDoc()) != DocIdSetIterator.NO_MORE_DOCS; Assert.assertFalse(more); } - } } // check that first skip on just created scorers always goes to the right doc @@ -271,7 +391,9 @@ public class QueryUtils { } }); - IndexReader[] readers = s.getIndexReader().getSequentialSubReaders(); + List readerList = new ArrayList(); + ReaderUtil.gatherSubReaders(readerList, s.getIndexReader()); + IndexReader[] readers = (IndexReader[]) readerList.toArray(new IndexReader[0]); for(int i = 0; i < readers.length; i++) { IndexReader reader = readers[i]; Weight w = q.weight(s); diff --git a/src/test/org/apache/lucene/search/TestExplanations.java b/src/test/org/apache/lucene/search/TestExplanations.java index 48ad17f46eb..6b029f1227d 100644 --- a/src/test/org/apache/lucene/search/TestExplanations.java +++ b/src/test/org/apache/lucene/search/TestExplanations.java @@ -17,35 +17,30 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.search.spans.*; -import org.apache.lucene.store.RAMDirectory; - -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; - -import org.apache.lucene.analysis.WhitespaceAnalyzer; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; - import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.ParseException; - +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.spans.SpanFirstQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanNotQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.DocIdBitSet; - -import java.util.Random; -import java.util.BitSet; /** - * Tests primative queries (ie: that rewrite to themselves) to + * Tests primitive queries (ie: that rewrite to themselves) to * insure they match the expected set of docs, and that the score of each * match is equal to the value of the scores explanation. * *

- * The assumption is that if all of the "primative" queries work well, - * then anythingthat rewrites to a primative will work well also. + * The assumption is that if all of the "primitive" queries work well, + * then anything that rewrites to a primitive will work well also. *

* * @see "Subclasses for actual tests" @@ -53,6 +48,7 @@ import java.util.BitSet; public class TestExplanations extends LuceneTestCase { protected IndexSearcher searcher; + public static final String KEY = "KEY"; public static final String FIELD = "field"; public static final QueryParser qp = new QueryParser(FIELD, new WhitespaceAnalyzer()); @@ -69,6 +65,7 @@ public class TestExplanations extends LuceneTestCase { IndexWriter.MaxFieldLength.LIMITED); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); + doc.add(new Field(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); doc.add(new Field(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } @@ -117,18 +114,22 @@ public class TestExplanations extends LuceneTestCase { bqtest(makeQuery(queryText), expDocNrs); } - /** A filter that only lets the specified document numbers pass */ - public static class ItemizedFilter extends Filter { - int[] docs; - public ItemizedFilter(int[] docs) { - this.docs = docs; - } - public DocIdSet getDocIdSet(IndexReader r) { - BitSet b = new BitSet(r.maxDoc()); - for (int i = 0; i < docs.length; i++) { - b.set(docs[i]); + /** + * Convenience subclass of FieldCacheTermsFilter + */ + public static class ItemizedFilter extends FieldCacheTermsFilter { + private static String[] int2str(int [] terms) { + String [] out = new String[terms.length]; + for (int i = 0; i < terms.length; i++) { + out[i] = ""+terms[i]; } - return new DocIdBitSet(b); + return out; + } + public ItemizedFilter(String keyField, int [] keys) { + super(keyField, int2str(keys)); + } + public ItemizedFilter(int [] keys) { + super(KEY, int2str(keys)); } }