LUCENE-5487: add test case verifying a query time join inside a BQ still gets the optimized top-level scorer (it doesn't today); fixed nocommits

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5487@1574434 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2014-03-05 11:17:38 +00:00
parent 2303ca13ed
commit e903257259
5 changed files with 173 additions and 45 deletions

View File

@ -112,33 +112,47 @@ final class BooleanScorer extends TopScorer {
// An internal class which is used in score(Collector, int) for setting the // An internal class which is used in score(Collector, int) for setting the
// current score. This is required since Collector exposes a setScorer method // current score. This is required since Collector exposes a setScorer method
// and implementations that need the score will call scorer.score(). // and implementations that need the score will call scorer.score().
// Therefore the only methods that are implemented are score() and doc(). // Therefore the only methods that are implemented are
private static final class BucketScorer extends Scorer { // score(), doc() and freq().
private static final class FakeScorer extends Scorer {
double score; float score;
int doc = NO_MORE_DOCS; int doc;
int freq; int freq;
public BucketScorer(Weight weight) { super(weight); } public FakeScorer() {
super(null);
}
@Override @Override
public int advance(int target) { return DocsEnum.NO_MORE_DOCS; } public int advance(int target) {
throw new UnsupportedOperationException();
}
@Override @Override
public int docID() { return doc; } public int docID() {
return doc;
}
@Override @Override
public int freq() { return freq; } public int freq() {
return freq;
}
@Override @Override
public int nextDoc() { return DocsEnum.NO_MORE_DOCS; } public int nextDoc() {
throw new UnsupportedOperationException();
}
@Override @Override
public float score() { return (float)score; } public float score() {
return score;
}
@Override @Override
public long cost() { return 1; } public long cost() {
throw new UnsupportedOperationException();
}
} }
static final class Bucket { static final class Bucket {
@ -236,10 +250,10 @@ final class BooleanScorer extends TopScorer {
boolean more; boolean more;
Bucket tmp; Bucket tmp;
BucketScorer bs = new BucketScorer(weight); FakeScorer fs = new FakeScorer();
// The internal loop will set the score and doc before calling collect. // The internal loop will set the score and doc before calling collect.
collector.setScorer(bs); collector.setScorer(fs);
do { do {
bucketTable.first = null; bucketTable.first = null;
@ -267,9 +281,9 @@ final class BooleanScorer extends TopScorer {
} }
if (current.coord >= minNrShouldMatch) { if (current.coord >= minNrShouldMatch) {
bs.score = current.score * coordFactors[current.coord]; fs.score = (float) (current.score * coordFactors[current.coord]);
bs.doc = current.doc; fs.doc = current.doc;
bs.freq = current.coord; fs.freq = current.coord;
collector.collect(current.doc); collector.collect(current.doc);
} }
} }

View File

@ -92,18 +92,6 @@ public abstract class Weight {
* *
* @param context * @param context
* the {@link AtomicReaderContext} for which to return the {@link Scorer}. * the {@link AtomicReaderContext} for which to return the {@link Scorer}.
* @param scoreDocsInOrder
* specifies whether in-order scoring of documents is required. Note
* that if set to false (i.e., out-of-order scoring is required),
* this method can return whatever scoring mode it supports, as every
* in-order scorer is also an out-of-order one. However, an
* out-of-order scorer may not support {@link Scorer#nextDoc()}
* and/or {@link Scorer#advance(int)}, therefore it is recommended to
* request an in-order scorer if use of these methods is required.
* @param topScorer
* if true, {@link Scorer#score(Collector)} will be called; if false,
* {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will
* be called.
* @param acceptDocs * @param acceptDocs
* Bits that represent the allowable docs to match (typically deleted docs * Bits that represent the allowable docs to match (typically deleted docs
* but possibly filtering other documents) * but possibly filtering other documents)
@ -113,7 +101,33 @@ public abstract class Weight {
*/ */
public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException; public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException;
// nocommit jdocs /**
* Optional method, to return a {@link TopScorer} to
* score the query and send hits to a {@link Collector}.
* Only queries that have a different top-level approach
* need to override this; the default implementation
* pulls a normal {@link Scorer} and iterates and
* collects the resulting hits.
*
* @param context
* the {@link AtomicReaderContext} for which to return the {@link Scorer}.
* @param scoreDocsInOrder
* specifies whether in-order scoring of documents is required. Note
* that if set to false (i.e., out-of-order scoring is required),
* this method can return whatever scoring mode it supports, as every
* in-order scorer is also an out-of-order one. However, an
* out-of-order scorer may not support {@link Scorer#nextDoc()}
* and/or {@link Scorer#advance(int)}, therefore it is recommended to
* request an in-order scorer if use of these
* methods is required.
* @param acceptDocs
* Bits that represent the allowable docs to match (typically deleted docs
* but possibly filtering other documents)
*
* @return a {@link TopScorer} which scores documents and
* passes them to a collector.
* @throws IOException if there is a low-level I/O error
*/
public TopScorer topScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { public TopScorer topScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException {
final Scorer scorer = scorer(context, acceptDocs); final Scorer scorer = scorer(context, acceptDocs);

View File

@ -182,7 +182,7 @@ public class TestBooleanScorer extends LuceneTestCase {
public void setScorer(Scorer scorer) { public void setScorer(Scorer scorer) {
// Make sure we got BooleanScorer: // Make sure we got BooleanScorer:
final Class<?> clazz = scorer instanceof AssertingScorer ? ((AssertingScorer) scorer).getIn().getClass() : scorer.getClass(); final Class<?> clazz = scorer instanceof AssertingScorer ? ((AssertingScorer) scorer).getIn().getClass() : scorer.getClass();
assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$BucketScorer", clazz.getName()); assertEquals("Scorer is implemented by wrong class", BooleanScorer.class.getName() + "$FakeScorer", clazz.getName());
} }
@Override @Override

View File

@ -121,7 +121,7 @@ public class TestConstantScoreQuery extends LuceneTestCase {
checkHits(searcher, csq2, csq2.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), ConstantScoreQuery.ConstantScorer.class.getName()); checkHits(searcher, csq2, csq2.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), ConstantScoreQuery.ConstantScorer.class.getName());
// for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection! // for the combined BQ, the scorer should always be BooleanScorer's BucketScorer, because our scorer supports out-of order collection!
final String bucketScorerClass = BooleanScorer.class.getName() + "$BucketScorer"; final String bucketScorerClass = BooleanScorer.class.getName() + "$FakeScorer";
checkHits(searcher, bq, csq1.getBoost() + csq2.getBoost(), bucketScorerClass, null); checkHits(searcher, bq, csq1.getBoost() + csq2.getBoost(), bucketScorerClass, null);
checkHits(searcher, csqbq, csqbq.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), bucketScorerClass); checkHits(searcher, csqbq, csqbq.getBoost(), ConstantScoreQuery.ConstantScorer.class.getName(), bucketScorerClass);
} finally { } finally {

View File

@ -17,6 +17,19 @@ package org.apache.lucene.search.join;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document; import org.apache.lucene.document.Document;
@ -34,6 +47,8 @@ import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector; import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Explanation;
@ -54,19 +69,6 @@ import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil; import org.apache.lucene.util.TestUtil;
import org.junit.Test; import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
public class TestJoinUtil extends LuceneTestCase { public class TestJoinUtil extends LuceneTestCase {
public void testSimple() throws Exception { public void testSimple() throws Exception {
@ -151,6 +153,104 @@ public class TestJoinUtil extends LuceneTestCase {
dir.close(); dir.close();
} }
/** LUCENE-5487: verify a join query inside a SHOULD BQ
* will still use the join query's optimized TopScorers */
public void testInsideBooleanQuery() throws Exception {
final String idField = "id";
final String toField = "productId";
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(
random(),
dir,
newIndexWriterConfig(TEST_VERSION_CURRENT,
new MockAnalyzer(random())).setMergePolicy(newLogMergePolicy()));
// 0
Document doc = new Document();
doc.add(new TextField("description", "random text", Field.Store.NO));
doc.add(new TextField("name", "name1", Field.Store.NO));
doc.add(new TextField(idField, "7", Field.Store.NO));
w.addDocument(doc);
// 1
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "2", Field.Store.NO));
doc.add(new TextField(toField, "7", Field.Store.NO));
w.addDocument(doc);
// 2
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "3", Field.Store.NO));
doc.add(new TextField(toField, "7", Field.Store.NO));
w.addDocument(doc);
// 3
doc = new Document();
doc.add(new TextField("description", "more random text", Field.Store.NO));
doc.add(new TextField("name", "name2", Field.Store.NO));
doc.add(new TextField(idField, "0", Field.Store.NO));
w.addDocument(doc);
w.commit();
// 4
doc = new Document();
doc.add(new TextField("price", "10.0", Field.Store.NO));
doc.add(new TextField(idField, "5", Field.Store.NO));
doc.add(new TextField(toField, "0", Field.Store.NO));
w.addDocument(doc);
// 5
doc = new Document();
doc.add(new TextField("price", "20.0", Field.Store.NO));
doc.add(new TextField(idField, "6", Field.Store.NO));
doc.add(new TextField(toField, "0", Field.Store.NO));
w.addDocument(doc);
w.forceMerge(1);
IndexSearcher indexSearcher = new IndexSearcher(w.getReader());
w.close();
// Search for product
Query joinQuery =
JoinUtil.createJoinQuery(idField, false, toField, new TermQuery(new Term("description", "random")), indexSearcher, ScoreMode.Avg);
BooleanQuery bq = new BooleanQuery();
bq.add(joinQuery, BooleanClause.Occur.SHOULD);
bq.add(new TermQuery(new Term("id", "3")), BooleanClause.Occur.SHOULD);
indexSearcher.search(bq, new Collector() {
boolean sawFive;
@Override
public void setNextReader(AtomicReaderContext context) {
}
@Override
public void collect(int docID) {
// Hairy / evil (depends on how BooleanScorer
// stores temporarily collected docIDs by
// appending to head of linked list):
if (docID == 5) {
sawFive = true;
} else if (docID == 1) {
assertFalse("optimized topScorer was not used for join query embedded in boolean query!", sawFive);
}
}
@Override
public void setScorer(Scorer scorer) {
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
});
indexSearcher.getIndexReader().close();
dir.close();
}
public void testSimpleWithScoring() throws Exception { public void testSimpleWithScoring() throws Exception {
final String idField = "id"; final String idField = "id";
final String toField = "movieId"; final String toField = "movieId";