LUCENE-5487: add test case verifying a query time join inside a BQ still gets the optimized top-level scorer (it doesn't today); fixed nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1574434 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-03-05 11:17:38 +00:00
parent 2303ca13ed
commit e903257259
5 changed files with 173 additions and 45 deletions

View File

@ -112,33 +112,47 @@ final class BooleanScorer extends TopScorer {
// An internal class which is used in score(Collector, int) for setting the
// current score. This is required since Collector exposes a setScorer method
// and implementations that need the score will call scorer.score().
// Therefore the only methods that are implemented are score() and doc().
private static final class BucketScorer extends Scorer {
// Therefore the only methods that are implemented are
// score(), doc() and freq().
private static final class FakeScorer extends Scorer {
double score;
int doc = NO_MORE_DOCS;
float score;
int doc;
int freq;
public BucketScorer(Weight weight) { super(weight); }
public FakeScorer() {
super(null);
}
@Override
public int advance(int target) { return DocsEnum.NO_MORE_DOCS; }
public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override
public int docID() { return doc; }
public int docID() {
return doc;
}
@Override
public int freq() { return freq; }
public int freq() {
return freq;
}
@Override
public int nextDoc() { return DocsEnum.NO_MORE_DOCS; }
public int nextDoc() {
throw new UnsupportedOperationException();
}
@Override
public float score() { return (float)score; }
public float score() {
return score;
}
@Override
public long cost() { return 1; }
public long cost() {
throw new UnsupportedOperationException();
}
}
static final class Bucket {
@ -236,10 +250,10 @@ final class BooleanScorer extends TopScorer {
boolean more;
Bucket tmp;
BucketScorer bs = new BucketScorer(weight);
FakeScorer fs = new FakeScorer();
// The internal loop will set the score and doc before calling collect.
collector.setScorer(bs);
collector.setScorer(fs);
do {
bucketTable.first = null;
@ -267,9 +281,9 @@ final class BooleanScorer extends TopScorer {
}
if (current.coord >= minNrShouldMatch) {
bs.score = current.score * coordFactors[current.coord];
bs.doc = current.doc;
bs.freq = current.coord;
fs.score = (float) (current.score * coordFactors[current.coord]);
fs.doc = current.doc;
fs.freq = current.coord;
collector.collect(current.doc);
}
}

View File

@ -92,18 +92,6 @@ public abstract class Weight {
*
* @param context
* the {@link AtomicReaderContext} for which to return the {@link Scorer}.
* @param scoreDocsInOrder
* specifies whether in-order scoring of documents is required. Note
* that if set to false (i.e., out-of-order scoring is required),
* this method can return whatever scoring mode it supports, as every
* in-order scorer is also an out-of-order one. However, an
* out-of-order scorer may not support {@link Scorer#nextDoc()}
* and/or {@link Scorer#advance(int)}, therefore it is recommended to
* request an in-order scorer if use of these methods is required.
* @param topScorer
* if true, {@link Scorer#score(Collector)} will be called; if false,
* {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will
* be called.
* @param acceptDocs
* Bits that represent the allowable docs to match (typically deleted docs
* but possibly filtering other documents)
@ -113,7 +101,33 @@ public abstract class Weight {
*/
public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException;
// nocommit jdocs
/**
* Optional method, to return a {@link TopScorer} to
* score the query and send hits to a {@link Collector}.
* Only queries that have a different top-level approach
* need to override this; the default implementation
* pulls a normal {@link Scorer} and iterates and
* collects the resulting hits.
*
* @param context
* the {@link AtomicReaderContext} for which to return the {@link Scorer}.
* @param scoreDocsInOrder
* specifies whether in-order scoring of documents is required. Note
* that if set to false (i.e., out-of-order scoring is required),
* this method can return whatever scoring mode it supports, as every
* in-order scorer is also an out-of-order one. However, an
* out-of-order scorer may not support {@link Scorer#nextDoc()}
* and/or {@link Scorer#advance(int)}, therefore it is recommended to
* request an in-order scorer if use of these
* methods is required.
* @param acceptDocs
* Bits that represent the allowable docs to match (typically deleted docs
* but possibly filtering other documents)
*
* @return a {@link TopScorer} which scores documents and
* passes them to a collector.
* @throws IOException if there is a low-level I/O error
*/
public TopScorer topScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {
final Scorer scorer = scorer(context, acceptDocs);

View File

@ -182,7 +182,7 @@ public class TestBooleanScorer extends LuceneTestCase {
public void setScorer(Scorer scorer) {
// Make sure we got BooleanScorer:
final Class<?> clazz = scorer instanceof AssertingScorer ? ((AssertingScorer) scorer).getIn().getClass() : scorer.getClass();
assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$BucketScorer", clazz.getName());
assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$FakeScorer", clazz.getName());
}
@Override

View File

@ -121,7 +121,7 @@ public class TestConstantScoreQuery extends LuceneTestCase {
checkHits(searcher, csq2, csq2.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), ConstantScoreQuery.ConstantScorer.class.getName());
// for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection!
final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer";
final String bucketScorerClass = BooleanScorer.class.getName() + "$FakeScorer";
checkHits(searcher, bq, csq1.getBoost() + csq2.getBoost(), bucketScorerClass, null);
checkHits(searcher, csqbq, csqbq.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), bucketScorerClass);
} finally {

View File

@ -17,6 +17,19 @@ package org.apache.lucene.search.join;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
@ -34,6 +47,8 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
@ -54,19 +69,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception {
@ -151,6 +153,104 @@ public class TestJoinUtil extends LuceneTestCase {
dir.close();
}
/** LUCENE-5487: verify a join query inside a SHOULD BQ
* will still use the join query's optimized TopScorers */
public void testInsideBooleanQuery() throws Exception {
final String idField = "id";
final String toField = "productId";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new TextField(idField, "7", Field.Store.NO));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(toField, "7", Field.Store.NO));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "3", Field.Store.NO));
doc.add(new TextField(toField, "7", Field.Store.NO));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new TextField(idField, "0", Field.Store.NO));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "5", Field.Store.NO));
doc.add(new TextField(toField, "0", Field.Store.NO));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "6", Field.Store.NO));
doc.add(new TextField(toField, "0", Field.Store.NO));
w.addDocument(doc);
w.forceMerge(1);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
// Search for product
Query joinQuery =
JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);
BooleanQuery bq = new BooleanQuery();
bq.add(joinQuery, BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(new Term("id", "3")), BooleanClause.Occur.SHOULD);
indexSearcher.search(bq, new Collector() {
boolean sawFive;
@Override
public void setNextReader(AtomicReaderContext context) {
}
@Override
public void collect(int docID) {
// Hairy / evil (depends on how BooleanScorer
// stores temporarily collected docIDs by
// appending to head of linked list):
if (docID == 5) {
sawFive = true;
} else if (docID == 1) {
assertFalse("optimized topScorer was not used for join query embedded in boolean query!", sawFive);
}
}
@Override
public void setScorer(Scorer scorer) {
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
});
indexSearcher.getIndexReader().close();
dir.close();
}
public void testSimpleWithScoring() throws Exception {
final String idField = "id";
final String toField = "movieId";