mirror of https://github.com/apache/lucene.git
Add Bulk Scorer For ToParentBlockJoinQuery (#13697)
This commit is contained in:
parent
3da48d0403
commit
45da83bd72
|
@ -410,6 +410,10 @@ Optimizations
|
|||
|
||||
* GITHUB#13686: Replace Map<String,Object> with IntObjectHashMap for DV producer (Pan Guixin)
|
||||
|
||||
* GITHUB#13697: Add a bulk scorer to ToParentBlockJoinQuery, which delegates to the bulk scorer of the child query.
|
||||
This should speed up query evaluation when the child query has a specialized bulk scorer, such as disjunctive queries.
|
||||
(Mike Pellegrini)
|
||||
|
||||
Changes in runtime behavior
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.search.join;
|
||||
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
import static org.apache.lucene.search.ScoreMode.COMPLETE;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -24,20 +25,25 @@ import java.util.Collections;
|
|||
import java.util.Locale;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.BulkScorer;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.FilterLeafCollector;
|
||||
import org.apache.lucene.search.FilterWeight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Matches;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
/**
|
||||
* This query requires that you index children and parent docs as a single block, using the {@link
|
||||
|
@ -156,6 +162,18 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
return new BlockJoinScorer(childScorerSupplier.get(leadCost), parents, scoreMode);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BulkScorer bulkScorer() throws IOException {
|
||||
if (scoreMode == ScoreMode.None) {
|
||||
// BlockJoinBulkScorer evaluates all child hits exhaustively, but when scoreMode is None
|
||||
// we only need to evaluate a single child doc per parent. In this case, use the default
|
||||
// bulk scorer instead, which uses BlockJoinScorer to iterate over child hits.
|
||||
// BlockJoinScorer is optimized to skip child hit evaluation when scoreMode is None.
|
||||
return super.bulkScorer();
|
||||
}
|
||||
return new BlockJoinBulkScorer(childScorerSupplier.bulkScorer(), parents, scoreMode);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return childScorerSupplier.cost();
|
||||
|
@ -275,6 +293,54 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
private static class Score extends Scorable {
|
||||
private final ScoreMode scoreMode;
|
||||
private double score;
|
||||
private int freq;
|
||||
|
||||
public Score(ScoreMode scoreMode) {
|
||||
this.scoreMode = scoreMode;
|
||||
this.score = 0;
|
||||
this.freq = 0;
|
||||
}
|
||||
|
||||
public void reset(Scorable firstChildScorer) throws IOException {
|
||||
score = scoreMode == ScoreMode.None ? 0 : firstChildScorer.score();
|
||||
freq = 1;
|
||||
}
|
||||
|
||||
public void addChildScore(Scorable childScorer) throws IOException {
|
||||
final float childScore = scoreMode == ScoreMode.None ? 0 : childScorer.score();
|
||||
freq++;
|
||||
switch (scoreMode) {
|
||||
case Total:
|
||||
case Avg:
|
||||
score += childScore;
|
||||
break;
|
||||
case Min:
|
||||
score = Math.min(score, childScore);
|
||||
break;
|
||||
case Max:
|
||||
score = Math.max(score, childScore);
|
||||
break;
|
||||
case None:
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
assert freq > 0;
|
||||
double score = this.score;
|
||||
if (scoreMode == ScoreMode.Avg) {
|
||||
score /= freq;
|
||||
}
|
||||
return (float) score;
|
||||
}
|
||||
}
|
||||
|
||||
static class BlockJoinScorer extends Scorer {
|
||||
private final Scorer childScorer;
|
||||
private final BitSet parentBits;
|
||||
|
@ -283,13 +349,14 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
private final TwoPhaseIterator childTwoPhase;
|
||||
private final ParentApproximation parentApproximation;
|
||||
private final ParentTwoPhase parentTwoPhase;
|
||||
private float score;
|
||||
private final Score parentScore;
|
||||
|
||||
public BlockJoinScorer(Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) {
|
||||
// System.out.println("Q.init firstChildDoc=" + firstChildDoc);
|
||||
this.parentBits = parentBits;
|
||||
this.childScorer = childScorer;
|
||||
this.scoreMode = scoreMode;
|
||||
this.parentScore = new Score(scoreMode);
|
||||
childTwoPhase = childScorer.twoPhaseIterator();
|
||||
if (childTwoPhase == null) {
|
||||
childApproximation = childScorer.iterator();
|
||||
|
@ -329,8 +396,7 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
setScoreAndFreq();
|
||||
return score;
|
||||
return scoreChildDocs();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -348,34 +414,26 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
private void setScoreAndFreq() throws IOException {
|
||||
private float scoreChildDocs() throws IOException {
|
||||
if (childApproximation.docID() >= parentApproximation.docID()) {
|
||||
return;
|
||||
return parentScore.score();
|
||||
}
|
||||
double score = scoreMode == ScoreMode.None ? 0 : childScorer.score();
|
||||
int freq = 1;
|
||||
while (childApproximation.nextDoc() < parentApproximation.docID()) {
|
||||
if (childTwoPhase == null || childTwoPhase.matches()) {
|
||||
final float childScore = scoreMode == ScoreMode.None ? 0 : childScorer.score();
|
||||
freq += 1;
|
||||
switch (scoreMode) {
|
||||
case Total:
|
||||
case Avg:
|
||||
score += childScore;
|
||||
break;
|
||||
case Min:
|
||||
score = Math.min(score, childScore);
|
||||
break;
|
||||
case Max:
|
||||
score = Math.max(score, childScore);
|
||||
break;
|
||||
case None:
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
|
||||
float score = 0;
|
||||
if (scoreMode != ScoreMode.None) {
|
||||
parentScore.reset(childScorer);
|
||||
while (childApproximation.nextDoc() < parentApproximation.docID()) {
|
||||
if (childTwoPhase == null || childTwoPhase.matches()) {
|
||||
parentScore.addChildScore(childScorer);
|
||||
}
|
||||
}
|
||||
|
||||
score = parentScore.score();
|
||||
}
|
||||
|
||||
// TODO: When score mode is None, this check is broken because the child approximation is not
|
||||
// advanced and will therefore never match the parent approximation at this point in
|
||||
// execution. Fix this error check when score mode is None.
|
||||
if (childApproximation.docID() == parentApproximation.docID()
|
||||
&& (childTwoPhase == null || childTwoPhase.matches())) {
|
||||
throw new IllegalStateException(
|
||||
|
@ -386,10 +444,8 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
+ ", "
|
||||
+ childScorer.getClass());
|
||||
}
|
||||
if (scoreMode == ScoreMode.Avg) {
|
||||
score /= freq;
|
||||
}
|
||||
this.score = (float) score;
|
||||
|
||||
return score;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -440,6 +496,120 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
private abstract static class BatchAwareLeafCollector extends FilterLeafCollector {
|
||||
public BatchAwareLeafCollector(LeafCollector in) {
|
||||
super(in);
|
||||
}
|
||||
|
||||
public void endBatch() throws IOException {}
|
||||
}
|
||||
|
||||
private static class BlockJoinBulkScorer extends BulkScorer {
|
||||
private final BulkScorer childBulkScorer;
|
||||
private final ScoreMode scoreMode;
|
||||
private final BitSet parents;
|
||||
private final int parentsLength;
|
||||
|
||||
public BlockJoinBulkScorer(BulkScorer childBulkScorer, BitSet parents, ScoreMode scoreMode) {
|
||||
this.childBulkScorer = childBulkScorer;
|
||||
this.scoreMode = scoreMode;
|
||||
this.parents = parents;
|
||||
this.parentsLength = parents.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
|
||||
throws IOException {
|
||||
if (min == max) {
|
||||
return scoringCompleteCheck(max, max);
|
||||
}
|
||||
|
||||
// Subtract one because max is exclusive w.r.t. score but inclusive w.r.t prevSetBit
|
||||
int lastParent = parents.prevSetBit(Math.min(parentsLength, max) - 1);
|
||||
int prevParent = min == 0 ? -1 : parents.prevSetBit(min - 1);
|
||||
if (lastParent == prevParent) {
|
||||
// No parent docs in this range.
|
||||
return scoringCompleteCheck(max, max);
|
||||
}
|
||||
|
||||
BatchAwareLeafCollector wrappedCollector = wrapCollector(collector);
|
||||
childBulkScorer.score(wrappedCollector, acceptDocs, prevParent + 1, lastParent + 1);
|
||||
wrappedCollector.endBatch();
|
||||
|
||||
return scoringCompleteCheck(lastParent + 1, max);
|
||||
}
|
||||
|
||||
private int scoringCompleteCheck(int innerMax, int returnedMax) {
|
||||
// If we've scored the last parent in the bit set, return NO_MORE_DOCS to indicate we are done
|
||||
// scoring
|
||||
return innerMax >= parentsLength ? NO_MORE_DOCS : returnedMax;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return childBulkScorer.cost();
|
||||
}
|
||||
|
||||
private BatchAwareLeafCollector wrapCollector(LeafCollector collector) {
|
||||
return new BatchAwareLeafCollector(collector) {
|
||||
private final Score currentParentScore = new Score(scoreMode);
|
||||
private int currentParent = -1;
|
||||
private Scorable scorer = null;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
assert scorer != null;
|
||||
this.scorer = scorer;
|
||||
|
||||
super.setScorer(
|
||||
new Scorable() {
|
||||
@Override
|
||||
public float score() {
|
||||
return currentParentScore.score();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setMinCompetitiveScore(float minScore) throws IOException {
|
||||
if (scoreMode == ScoreMode.None || scoreMode == ScoreMode.Max) {
|
||||
scorer.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
if (doc > currentParent) {
|
||||
// Emit the current parent and setup scoring for the next parent
|
||||
if (currentParent >= 0) {
|
||||
in.collect(currentParent);
|
||||
}
|
||||
|
||||
currentParent = parents.nextSetBit(doc);
|
||||
currentParentScore.reset(scorer);
|
||||
} else if (doc == currentParent) {
|
||||
throw new IllegalStateException(
|
||||
"Child query must not match same docs with parent filter. "
|
||||
+ "Combine them as must clauses (+) to find a problem doc. "
|
||||
+ "docId="
|
||||
+ doc
|
||||
+ ", "
|
||||
+ childBulkScorer.getClass());
|
||||
} else {
|
||||
currentParentScore.addChildScore(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endBatch() throws IOException {
|
||||
if (currentParent >= 0) {
|
||||
in.collect(currentParent);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
|
||||
final Query childRewrite = childQuery.rewrite(indexSearcher);
|
||||
|
|
|
@ -0,0 +1,458 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.search.join;
|
||||
|
||||
import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.BulkScorer;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.LeafCollector;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.tests.index.RandomIndexWriter;
|
||||
import org.apache.lucene.tests.util.LuceneTestCase;
|
||||
import org.apache.lucene.tests.util.TestUtil;
|
||||
|
||||
public class TestBlockJoinBulkScorer extends LuceneTestCase {
|
||||
private static final String TYPE_FIELD_NAME = "type";
|
||||
private static final String VALUE_FIELD_NAME = "value";
|
||||
private static final String PARENT_FILTER_VALUE = "parent";
|
||||
private static final String CHILD_FILTER_VALUE = "child";
|
||||
|
||||
private enum MatchValue {
|
||||
MATCH_A("A", 1),
|
||||
MATCH_B("B", 2),
|
||||
MATCH_C("C", 3),
|
||||
MATCH_D("D", 4);
|
||||
|
||||
private static final List<MatchValue> VALUES = List.of(values());
|
||||
|
||||
private final String text;
|
||||
private final int score;
|
||||
|
||||
MatchValue(String text, int score) {
|
||||
this.text = text;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
public int getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return text;
|
||||
}
|
||||
|
||||
public static MatchValue random() {
|
||||
return RandomPicks.randomFrom(LuceneTestCase.random(), VALUES);
|
||||
}
|
||||
}
|
||||
|
||||
private record ChildDocMatch(int docId, List<MatchValue> matches) {
|
||||
public ChildDocMatch(int docId, List<MatchValue> matches) {
|
||||
this.docId = docId;
|
||||
this.matches = Collections.unmodifiableList(matches);
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Integer, List<ChildDocMatch>> populateRandomIndex(
|
||||
RandomIndexWriter writer, int maxParentDocCount, int maxChildDocCount, int maxChildDocMatches)
|
||||
throws IOException {
|
||||
Map<Integer, List<ChildDocMatch>> expectedMatches = new HashMap<>();
|
||||
|
||||
final int parentDocCount = random().nextInt(1, maxParentDocCount + 1);
|
||||
int currentDocId = 0;
|
||||
for (int i = 0; i < parentDocCount; i++) {
|
||||
final int childDocCount = random().nextInt(maxChildDocCount + 1);
|
||||
List<Document> docs = new ArrayList<>(childDocCount);
|
||||
List<ChildDocMatch> childDocMatches = new ArrayList<>(childDocCount);
|
||||
|
||||
for (int j = 0; j < childDocCount; j++) {
|
||||
// Build a child doc
|
||||
Document childDoc = new Document();
|
||||
childDoc.add(newStringField(TYPE_FIELD_NAME, CHILD_FILTER_VALUE, Field.Store.NO));
|
||||
|
||||
final int matchCount = random().nextInt(maxChildDocMatches + 1);
|
||||
List<MatchValue> matchValues = new ArrayList<>(matchCount);
|
||||
for (int k = 0; k < matchCount; k++) {
|
||||
// Add a match to the child doc
|
||||
MatchValue matchValue = MatchValue.random();
|
||||
matchValues.add(matchValue);
|
||||
childDoc.add(newStringField(VALUE_FIELD_NAME, matchValue.getText(), Field.Store.NO));
|
||||
}
|
||||
|
||||
docs.add(childDoc);
|
||||
childDocMatches.add(new ChildDocMatch(currentDocId++, matchValues));
|
||||
}
|
||||
|
||||
// Build a parent doc
|
||||
Document parentDoc = new Document();
|
||||
parentDoc.add(newStringField(TYPE_FIELD_NAME, PARENT_FILTER_VALUE, Field.Store.NO));
|
||||
docs.add(parentDoc);
|
||||
|
||||
// Don't add parent docs with no children to expectedMatches
|
||||
if (childDocCount > 0) {
|
||||
expectedMatches.put(currentDocId, childDocMatches);
|
||||
}
|
||||
currentDocId++;
|
||||
|
||||
writer.addDocuments(docs);
|
||||
}
|
||||
|
||||
return expectedMatches;
|
||||
}
|
||||
|
||||
private static void populateStaticIndex(RandomIndexWriter writer) throws IOException {
|
||||
// Use these vars to improve readability when defining the docs
|
||||
final String A = MatchValue.MATCH_A.getText();
|
||||
final String B = MatchValue.MATCH_B.getText();
|
||||
final String C = MatchValue.MATCH_C.getText();
|
||||
final String D = MatchValue.MATCH_D.getText();
|
||||
|
||||
for (String[][] values :
|
||||
Arrays.asList(
|
||||
new String[][] {{A, B}, {A, B, C}},
|
||||
new String[][] {{A}, {B}},
|
||||
new String[][] {{}},
|
||||
new String[][] {{A, B, C}, {A, B, C, D}},
|
||||
new String[][] {{B}},
|
||||
new String[][] {{B, C}, {A, B}, {A, C}})) {
|
||||
|
||||
List<Document> docs = new ArrayList<>();
|
||||
for (String[] value : values) {
|
||||
Document childDoc = new Document();
|
||||
childDoc.add(newStringField(TYPE_FIELD_NAME, CHILD_FILTER_VALUE, Field.Store.NO));
|
||||
for (String v : value) {
|
||||
childDoc.add(newStringField(VALUE_FIELD_NAME, v, Field.Store.NO));
|
||||
}
|
||||
docs.add(childDoc);
|
||||
}
|
||||
|
||||
Document parentDoc = new Document();
|
||||
parentDoc.add(newStringField(TYPE_FIELD_NAME, PARENT_FILTER_VALUE, Field.Store.NO));
|
||||
docs.add(parentDoc);
|
||||
|
||||
writer.addDocuments(docs);
|
||||
}
|
||||
}
|
||||
|
||||
private static Map<Integer, Float> computeExpectedScores(
|
||||
Map<Integer, List<ChildDocMatch>> expectedMatches,
|
||||
ScoreMode joinScoreMode,
|
||||
org.apache.lucene.search.ScoreMode searchScoreMode) {
|
||||
Map<Integer, Float> expectedScores = new HashMap<>();
|
||||
for (var entry : expectedMatches.entrySet()) {
|
||||
// Filter out child docs with no matches since those will never contribute to the score
|
||||
List<ChildDocMatch> childDocMatches =
|
||||
entry.getValue().stream().filter(m -> !m.matches().isEmpty()).toList();
|
||||
if (childDocMatches.isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
double expectedScore = 0;
|
||||
if (searchScoreMode.needsScores()) {
|
||||
boolean firstScore = true;
|
||||
for (ChildDocMatch childDocMatch : childDocMatches) {
|
||||
float expectedChildDocScore = computeExpectedScore(childDocMatch);
|
||||
switch (joinScoreMode) {
|
||||
case Total:
|
||||
case Avg:
|
||||
expectedScore += expectedChildDocScore;
|
||||
break;
|
||||
case Min:
|
||||
expectedScore =
|
||||
firstScore
|
||||
? expectedChildDocScore
|
||||
: Math.min(expectedScore, expectedChildDocScore);
|
||||
break;
|
||||
case Max:
|
||||
expectedScore = Math.max(expectedScore, expectedChildDocScore);
|
||||
break;
|
||||
case None:
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
firstScore = false;
|
||||
}
|
||||
|
||||
if (joinScoreMode == ScoreMode.Avg) {
|
||||
expectedScore /= childDocMatches.size();
|
||||
}
|
||||
}
|
||||
|
||||
expectedScores.put(entry.getKey(), (float) expectedScore);
|
||||
}
|
||||
|
||||
return expectedScores;
|
||||
}
|
||||
|
||||
private static float computeExpectedScore(ChildDocMatch childDocMatch) {
|
||||
float expectedScore = 0.0f;
|
||||
Set<MatchValue> matchValueSet = new HashSet<>(childDocMatch.matches());
|
||||
for (MatchValue matchValue : matchValueSet) {
|
||||
expectedScore += matchValue.getScore();
|
||||
}
|
||||
|
||||
return expectedScore;
|
||||
}
|
||||
|
||||
private static ToParentBlockJoinQuery buildQuery(ScoreMode scoreMode) {
|
||||
BooleanQuery.Builder childQueryBuilder = new BooleanQuery.Builder();
|
||||
for (MatchValue matchValue : MatchValue.VALUES) {
|
||||
childQueryBuilder.add(
|
||||
new BoostQuery(
|
||||
new ConstantScoreQuery(
|
||||
new TermQuery(new Term(VALUE_FIELD_NAME, matchValue.getText()))),
|
||||
matchValue.getScore()),
|
||||
BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
BitSetProducer parentsFilter =
|
||||
new QueryBitSetProducer(new TermQuery(new Term(TYPE_FIELD_NAME, PARENT_FILTER_VALUE)));
|
||||
return new ToParentBlockJoinQuery(childQueryBuilder.build(), parentsFilter, scoreMode);
|
||||
}
|
||||
|
||||
private static void assertScores(
|
||||
BulkScorer bulkScorer,
|
||||
org.apache.lucene.search.ScoreMode scoreMode,
|
||||
Float minScore,
|
||||
Map<Integer, Float> expectedScores)
|
||||
throws IOException {
|
||||
Map<Integer, Float> actualScores = new HashMap<>();
|
||||
bulkScorer.score(
|
||||
new LeafCollector() {
|
||||
private Scorable scorer;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) throws IOException {
|
||||
assertNotNull(scorer);
|
||||
this.scorer = scorer;
|
||||
if (minScore != null) {
|
||||
this.scorer.setMinCompetitiveScore(minScore);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
assertNotNull(scorer);
|
||||
actualScores.put(doc, scoreMode.needsScores() ? scorer.score() : 0);
|
||||
}
|
||||
},
|
||||
null,
|
||||
0,
|
||||
NO_MORE_DOCS);
|
||||
assertEquals(expectedScores, actualScores);
|
||||
}
|
||||
|
||||
public void testScoreRandomIndices() throws IOException {
|
||||
for (int i = 0; i < 200 * RANDOM_MULTIPLIER; i++) {
|
||||
try (Directory dir = newDirectory()) {
|
||||
Map<Integer, List<ChildDocMatch>> expectedMatches;
|
||||
try (RandomIndexWriter w =
|
||||
new RandomIndexWriter(
|
||||
random(),
|
||||
dir,
|
||||
newIndexWriterConfig()
|
||||
.setMergePolicy(
|
||||
// retain doc id order
|
||||
newLogMergePolicy(random().nextBoolean())))) {
|
||||
|
||||
expectedMatches =
|
||||
populateRandomIndex(
|
||||
w,
|
||||
TestUtil.nextInt(random(), 10 * RANDOM_MULTIPLIER, 30 * RANDOM_MULTIPLIER),
|
||||
20,
|
||||
3);
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
final ScoreMode joinScoreMode =
|
||||
RandomPicks.randomFrom(LuceneTestCase.random(), ScoreMode.values());
|
||||
final org.apache.lucene.search.ScoreMode searchScoreMode =
|
||||
RandomPicks.randomFrom(
|
||||
LuceneTestCase.random(), org.apache.lucene.search.ScoreMode.values());
|
||||
final Map<Integer, Float> expectedScores =
|
||||
computeExpectedScores(expectedMatches, joinScoreMode, searchScoreMode);
|
||||
|
||||
ToParentBlockJoinQuery query = buildQuery(joinScoreMode);
|
||||
Weight weight = searcher.createWeight(searcher.rewrite(query), searchScoreMode, 1);
|
||||
ScorerSupplier ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
|
||||
if (ss == null) {
|
||||
// Score supplier will be null when there are no matches
|
||||
assertTrue(expectedScores.isEmpty());
|
||||
continue;
|
||||
}
|
||||
|
||||
assertScores(ss.bulkScorer(), searchScoreMode, null, expectedScores);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSetMinCompetitiveScoreWithScoreModeMax() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
try (RandomIndexWriter w =
|
||||
new RandomIndexWriter(
|
||||
random(),
|
||||
dir,
|
||||
newIndexWriterConfig()
|
||||
.setMergePolicy(
|
||||
// retain doc id order
|
||||
newLogMergePolicy(random().nextBoolean())))) {
|
||||
|
||||
populateStaticIndex(w);
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
final ToParentBlockJoinQuery query = buildQuery(ScoreMode.Max);
|
||||
final org.apache.lucene.search.ScoreMode scoreMode =
|
||||
org.apache.lucene.search.ScoreMode.TOP_SCORES;
|
||||
final Weight weight = searcher.createWeight(searcher.rewrite(query), scoreMode, 1);
|
||||
|
||||
{
|
||||
Map<Integer, Float> expectedScores =
|
||||
Map.of(
|
||||
2, 6.0f,
|
||||
5, 2.0f,
|
||||
10, 10.0f,
|
||||
12, 2.0f,
|
||||
16, 5.0f);
|
||||
|
||||
ScorerSupplier ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
|
||||
ss.setTopLevelScoringClause();
|
||||
assertScores(ss.bulkScorer(), scoreMode, null, expectedScores);
|
||||
}
|
||||
|
||||
{
|
||||
// Doc 16 is returned because MaxScoreBulkScorer scores assuming A will match in doc 13,
|
||||
// leading to a potential max score of 6. By the time it determines that A doesn't match,
|
||||
// scoring is complete and thus there is no advantage to not collecting the doc.
|
||||
Map<Integer, Float> expectedScores =
|
||||
Map.of(
|
||||
2, 6.0f,
|
||||
10, 10.0f,
|
||||
16, 5.0f);
|
||||
|
||||
ScorerSupplier ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
|
||||
ss.setTopLevelScoringClause();
|
||||
assertScores(ss.bulkScorer(), scoreMode, 6.0f, expectedScores);
|
||||
}
|
||||
|
||||
{
|
||||
Map<Integer, Float> expectedScores = Map.of();
|
||||
|
||||
ScorerSupplier ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
|
||||
ss.setTopLevelScoringClause();
|
||||
assertScores(ss.bulkScorer(), scoreMode, 11.0f, expectedScores);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void testSetMinCompetitiveScoreWithScoreModeNone() throws IOException {
|
||||
try (Directory dir = newDirectory()) {
|
||||
try (RandomIndexWriter w =
|
||||
new RandomIndexWriter(
|
||||
random(),
|
||||
dir,
|
||||
newIndexWriterConfig()
|
||||
.setMergePolicy(
|
||||
// retain doc id order
|
||||
newLogMergePolicy(random().nextBoolean())))) {
|
||||
|
||||
populateStaticIndex(w);
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
||||
try (IndexReader reader = DirectoryReader.open(dir)) {
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
final ToParentBlockJoinQuery query = buildQuery(ScoreMode.None);
|
||||
final org.apache.lucene.search.ScoreMode scoreMode =
|
||||
org.apache.lucene.search.ScoreMode.TOP_SCORES;
|
||||
final Weight weight = searcher.createWeight(searcher.rewrite(query), scoreMode, 1);
|
||||
|
||||
{
|
||||
Map<Integer, Float> expectedScores =
|
||||
Map.of(
|
||||
2, 0.0f,
|
||||
5, 0.0f,
|
||||
10, 0.0f,
|
||||
12, 0.0f,
|
||||
16, 0.0f);
|
||||
|
||||
ScorerSupplier ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
|
||||
ss.setTopLevelScoringClause();
|
||||
assertScores(ss.bulkScorer(), scoreMode, null, expectedScores);
|
||||
}
|
||||
|
||||
{
|
||||
Map<Integer, Float> expectedScores =
|
||||
Map.of(
|
||||
2, 0.0f,
|
||||
5, 0.0f,
|
||||
10, 0.0f,
|
||||
12, 0.0f,
|
||||
16, 0.0f);
|
||||
|
||||
ScorerSupplier ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
|
||||
ss.setTopLevelScoringClause();
|
||||
assertScores(ss.bulkScorer(), scoreMode, 0.0f, expectedScores);
|
||||
}
|
||||
|
||||
{
|
||||
Map<Integer, Float> expectedScores = Map.of();
|
||||
|
||||
ScorerSupplier ss = weight.scorerSupplier(searcher.getIndexReader().leaves().get(0));
|
||||
ss.setTopLevelScoringClause();
|
||||
assertScores(ss.bulkScorer(), scoreMode, Math.nextUp(0f), expectedScores);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -16,6 +16,7 @@
|
|||
*/
|
||||
package org.apache.lucene.search.join;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -81,9 +82,16 @@ public class TestBlockJoinValidation extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testNextDocValidationForToParentBjq() throws Exception {
|
||||
// TODO: This test is broken when score mode is None because BlockJoinScorer#scoreChildDocs does
|
||||
// not advance the child approximation. Adjust this test once that is fixed.
|
||||
final List<ScoreMode> validScoreModes =
|
||||
List.of(ScoreMode.Avg, ScoreMode.Max, ScoreMode.Total, ScoreMode.Min);
|
||||
Query parentQueryWithRandomChild = createChildrenQueryWithOneParent(getRandomChildNumber(0));
|
||||
ToParentBlockJoinQuery blockJoinQuery =
|
||||
new ToParentBlockJoinQuery(parentQueryWithRandomChild, parentsFilter, ScoreMode.None);
|
||||
new ToParentBlockJoinQuery(
|
||||
parentQueryWithRandomChild,
|
||||
parentsFilter,
|
||||
RandomPicks.randomFrom(LuceneTestCase.random(), validScoreModes));
|
||||
IllegalStateException expected =
|
||||
expectThrows(
|
||||
IllegalStateException.class,
|
||||
|
|
Loading…
Reference in New Issue