mirror of https://github.com/apache/lucene.git
LUCENE-7654: To-parent block joins should implement two-phase iteration.
This commit is contained in:
parent
076662d1b2
commit
9d5dc0cf57
|
@ -135,6 +135,10 @@ Optimizations
|
|||
whether BKD cells are entirely within the distance close to the dateline.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-7654: ToParentBlockJoinQuery now implements two-phase iteration and
|
||||
computes scores lazily in order to be faster when used in conjunctions.
|
||||
(Adrien Grand)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-7651: Fix Javadocs build for Java 8u121 by injecting "Google Code
|
||||
|
|
|
@ -20,15 +20,18 @@ import java.io.IOException;
|
|||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.FilterWeight;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
import org.apache.lucene.search.FilterWeight;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.ScorerSupplier;
|
||||
import org.apache.lucene.search.TwoPhaseIterator;
|
||||
import org.apache.lucene.search.Weight;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
|
||||
|
@ -74,7 +77,7 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
private final ScoreMode scoreMode;
|
||||
|
||||
/** Create a ToParentBlockJoinQuery.
|
||||
*
|
||||
*
|
||||
* @param childQuery Query matching child documents.
|
||||
* @param parentsFilter Filter identifying the parent documents.
|
||||
* @param scoreMode How to aggregate multiple child scores
|
||||
|
@ -100,7 +103,7 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException {
|
||||
return new BlockJoinWeight(this, childQuery.createWeight(searcher, needsScores, boost), parentsFilter, needsScores ? scoreMode : ScoreMode.None);
|
||||
}
|
||||
|
||||
|
||||
/** Return our child query. */
|
||||
public Query getChildQuery() {
|
||||
return childQuery;
|
||||
|
@ -116,33 +119,44 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
this.scoreMode = scoreMode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context) throws IOException {
|
||||
final ScorerSupplier scorerSupplier = scorerSupplier(context);
|
||||
if (scorerSupplier == null) {
|
||||
return null;
|
||||
}
|
||||
return scorerSupplier.get(false);
|
||||
}
|
||||
|
||||
// NOTE: acceptDocs applies (and is checked) only in the
|
||||
// parent document space
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext readerContext) throws IOException {
|
||||
|
||||
final Scorer childScorer = in.scorer(readerContext);
|
||||
if (childScorer == null) {
|
||||
// No matches
|
||||
return null;
|
||||
}
|
||||
|
||||
final int firstChildDoc = childScorer.iterator().nextDoc();
|
||||
if (firstChildDoc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
// No matches
|
||||
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
|
||||
final ScorerSupplier childScorerSupplier = in.scorerSupplier(context);
|
||||
if (childScorerSupplier == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// NOTE: this does not take accept docs into account, the responsibility
|
||||
// to not match deleted docs is on the scorer
|
||||
final BitSet parents = parentsFilter.getBitSet(readerContext);
|
||||
|
||||
final BitSet parents = parentsFilter.getBitSet(context);
|
||||
if (parents == null) {
|
||||
// No matches
|
||||
return null;
|
||||
}
|
||||
|
||||
return new BlockJoinScorer(this, childScorer, parents, firstChildDoc, scoreMode);
|
||||
return new ScorerSupplier() {
|
||||
|
||||
@Override
|
||||
public Scorer get(boolean randomAccess) throws IOException {
|
||||
return new BlockJoinScorer(BlockJoinWeight.this, childScorerSupplier.get(randomAccess), parents, scoreMode);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return childScorerSupplier.cost();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -154,25 +168,108 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
return Explanation.noMatch("Not a match");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static class ParentApproximation extends DocIdSetIterator {
|
||||
|
||||
private final DocIdSetIterator childApproximation;
|
||||
private final BitSet parentBits;
|
||||
private int doc = -1;
|
||||
|
||||
ParentApproximation(DocIdSetIterator childApproximation, BitSet parentBits) {
|
||||
this.childApproximation = childApproximation;
|
||||
this.parentBits = parentBits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(doc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= parentBits.length()) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
final int firstChildTarget = target == 0 ? 0 : parentBits.prevSetBit(target - 1) + 1;
|
||||
int childDoc = childApproximation.docID();
|
||||
if (childDoc < firstChildTarget) {
|
||||
childDoc = childApproximation.advance(firstChildTarget);
|
||||
}
|
||||
if (childDoc >= parentBits.length() - 1) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return doc = parentBits.nextSetBit(childDoc + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return childApproximation.cost();
|
||||
}
|
||||
}
|
||||
|
||||
private static class ParentTwoPhase extends TwoPhaseIterator {
|
||||
|
||||
private final ParentApproximation parentApproximation;
|
||||
private final DocIdSetIterator childApproximation;
|
||||
private final TwoPhaseIterator childTwoPhase;
|
||||
|
||||
ParentTwoPhase(ParentApproximation parentApproximation, TwoPhaseIterator childTwoPhase) {
|
||||
super(parentApproximation);
|
||||
this.parentApproximation = parentApproximation;
|
||||
this.childApproximation = childTwoPhase.approximation();
|
||||
this.childTwoPhase = childTwoPhase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
assert childApproximation.docID() < parentApproximation.docID();
|
||||
do {
|
||||
if (childTwoPhase.matches()) {
|
||||
return true;
|
||||
}
|
||||
} while (childApproximation.nextDoc() < parentApproximation.docID());
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
// TODO: how could we compute a match cost?
|
||||
return childTwoPhase.matchCost() + 10;
|
||||
}
|
||||
}
|
||||
|
||||
static class BlockJoinScorer extends Scorer {
|
||||
private final Scorer childScorer;
|
||||
private final BitSet parentBits;
|
||||
private final ScoreMode scoreMode;
|
||||
private int parentDoc = -1;
|
||||
private int prevParentDoc;
|
||||
private float parentScore;
|
||||
private int parentFreq;
|
||||
private int nextChildDoc;
|
||||
private int childDocUpto;
|
||||
private final DocIdSetIterator childApproximation;
|
||||
private final TwoPhaseIterator childTwoPhase;
|
||||
private final ParentApproximation parentApproximation;
|
||||
private final ParentTwoPhase parentTwoPhase;
|
||||
private float score;
|
||||
private int freq;
|
||||
|
||||
public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, int firstChildDoc, ScoreMode scoreMode) {
|
||||
public BlockJoinScorer(Weight weight, Scorer childScorer, BitSet parentBits, ScoreMode scoreMode) {
|
||||
super(weight);
|
||||
//System.out.println("Q.init firstChildDoc=" + firstChildDoc);
|
||||
this.parentBits = parentBits;
|
||||
this.childScorer = childScorer;
|
||||
this.scoreMode = scoreMode;
|
||||
nextChildDoc = firstChildDoc;
|
||||
childTwoPhase = childScorer.twoPhaseIterator();
|
||||
if (childTwoPhase == null) {
|
||||
childApproximation = childScorer.iterator();
|
||||
parentApproximation = new ParentApproximation(childApproximation, parentBits);
|
||||
parentTwoPhase = null;
|
||||
} else {
|
||||
childApproximation = childTwoPhase.approximation();
|
||||
parentApproximation = new ParentApproximation(childTwoPhase.approximation(), parentBits);
|
||||
parentTwoPhase = new ParentTwoPhase(parentApproximation, childTwoPhase);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -182,152 +279,80 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() {
|
||||
return new DocIdSetIterator() {
|
||||
final DocIdSetIterator childIt = childScorer.iterator();
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
//System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc);
|
||||
if (nextChildDoc == NO_MORE_DOCS) {
|
||||
//System.out.println(" end");
|
||||
return parentDoc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
// Gather all children sharing the same parent as
|
||||
// nextChildDoc
|
||||
|
||||
parentDoc = parentBits.nextSetBit(nextChildDoc);
|
||||
|
||||
// Parent & child docs are supposed to be
|
||||
// orthogonal:
|
||||
checkOrthogonal(nextChildDoc, parentDoc);
|
||||
|
||||
//System.out.println(" parentDoc=" + parentDoc);
|
||||
assert parentDoc != DocIdSetIterator.NO_MORE_DOCS;
|
||||
|
||||
float totalScore = 0;
|
||||
float maxScore = Float.NEGATIVE_INFINITY;
|
||||
float minScore = Float.POSITIVE_INFINITY;
|
||||
|
||||
childDocUpto = 0;
|
||||
parentFreq = 0;
|
||||
do {
|
||||
|
||||
//System.out.println(" c=" + nextChildDoc);
|
||||
if (scoreMode != ScoreMode.None) {
|
||||
// TODO: specialize this into dedicated classes per-scoreMode
|
||||
final float childScore = childScorer.score();
|
||||
final int childFreq = childScorer.freq();
|
||||
maxScore = Math.max(childScore, maxScore);
|
||||
minScore = Math.min(childScore, minScore);
|
||||
totalScore += childScore;
|
||||
parentFreq += childFreq;
|
||||
}
|
||||
childDocUpto++;
|
||||
nextChildDoc = childIt.nextDoc();
|
||||
} while (nextChildDoc < parentDoc);
|
||||
|
||||
// Parent & child docs are supposed to be
|
||||
// orthogonal:
|
||||
checkOrthogonal(nextChildDoc, parentDoc);
|
||||
|
||||
switch(scoreMode) {
|
||||
case Avg:
|
||||
parentScore = totalScore / childDocUpto;
|
||||
break;
|
||||
case Max:
|
||||
parentScore = maxScore;
|
||||
break;
|
||||
case Min:
|
||||
parentScore = minScore;
|
||||
break;
|
||||
case Total:
|
||||
parentScore = totalScore;
|
||||
break;
|
||||
case None:
|
||||
break;
|
||||
}
|
||||
|
||||
//System.out.println(" return parentDoc=" + parentDoc + " childDocUpto=" + childDocUpto);
|
||||
return parentDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int parentTarget) throws IOException {
|
||||
|
||||
//System.out.println("Q.advance parentTarget=" + parentTarget);
|
||||
if (parentTarget == NO_MORE_DOCS) {
|
||||
return parentDoc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
if (parentTarget == 0) {
|
||||
// Callers should only be passing in a docID from
|
||||
// the parent space, so this means this parent
|
||||
// has no children (it got docID 0), so it cannot
|
||||
// possibly match. We must handle this case
|
||||
// separately otherwise we pass invalid -1 to
|
||||
// prevSetBit below:
|
||||
return nextDoc();
|
||||
}
|
||||
|
||||
prevParentDoc = parentBits.prevSetBit(parentTarget-1);
|
||||
|
||||
//System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc);
|
||||
assert prevParentDoc >= parentDoc;
|
||||
if (prevParentDoc > nextChildDoc) {
|
||||
nextChildDoc = childIt.advance(prevParentDoc);
|
||||
// System.out.println(" childScorer advanced to child docID=" + nextChildDoc);
|
||||
//} else {
|
||||
//System.out.println(" skip childScorer advance");
|
||||
}
|
||||
|
||||
// Parent & child docs are supposed to be orthogonal:
|
||||
checkOrthogonal(nextChildDoc, prevParentDoc);
|
||||
|
||||
final int nd = nextDoc();
|
||||
//System.out.println(" return nextParentDoc=" + nd);
|
||||
return nd;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return parentDoc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return childIt.cost();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private void checkOrthogonal(int childDoc, int parentDoc) {
|
||||
if (childDoc==parentDoc) {
|
||||
throw new IllegalStateException("Child query must not match same docs with parent filter. "
|
||||
+ "Combine them as must clauses (+) to find a problem doc. "
|
||||
+ "docId=" + nextChildDoc + ", " + childScorer.getClass());
|
||||
|
||||
if (parentTwoPhase == null) {
|
||||
// the approximation is exact
|
||||
return parentApproximation;
|
||||
} else {
|
||||
return TwoPhaseIterator.asDocIdSetIterator(parentTwoPhase);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TwoPhaseIterator twoPhaseIterator() {
|
||||
return parentTwoPhase;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return parentDoc;
|
||||
return parentApproximation.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return parentScore;
|
||||
setScoreAndFreq();
|
||||
return score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
return parentFreq;
|
||||
public int freq() throws IOException {
|
||||
setScoreAndFreq();
|
||||
return freq;
|
||||
}
|
||||
|
||||
private void setScoreAndFreq() throws IOException {
|
||||
if (childApproximation.docID() >= parentApproximation.docID()) {
|
||||
return;
|
||||
}
|
||||
double score = scoreMode == ScoreMode.None ? 0 : childScorer.score();
|
||||
int freq = 1;
|
||||
while (childApproximation.nextDoc() < parentApproximation.docID()) {
|
||||
if (childTwoPhase == null || childTwoPhase.matches()) {
|
||||
final float childScore = childScorer.score();
|
||||
freq += 1;
|
||||
switch (scoreMode) {
|
||||
case Total:
|
||||
case Avg:
|
||||
score += childScore;
|
||||
break;
|
||||
case Min:
|
||||
score = Math.min(score, childScore);
|
||||
break;
|
||||
case Max:
|
||||
score = Math.min(score, childScore);
|
||||
break;
|
||||
case None:
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (childApproximation.docID() == parentApproximation.docID() && (childTwoPhase == null || childTwoPhase.matches())) {
|
||||
throw new IllegalStateException("Child query must not match same docs with parent filter. "
|
||||
+ "Combine them as must clauses (+) to find a problem doc. "
|
||||
+ "docId=" + parentApproximation.docID() + ", " + childScorer.getClass());
|
||||
}
|
||||
if (scoreMode == ScoreMode.Avg) {
|
||||
score /= freq;
|
||||
}
|
||||
this.score = (float) score;
|
||||
this.freq = freq;
|
||||
}
|
||||
|
||||
public Explanation explain(LeafReaderContext context, Weight childWeight) throws IOException {
|
||||
int prevParentDoc = parentBits.prevSetBit(parentApproximation.docID() - 1);
|
||||
int start = context.docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc
|
||||
int end = context.docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc
|
||||
int end = context.docBase + parentApproximation.docID() - 1; // -1 b/c parentDoc is parent doc
|
||||
|
||||
Explanation bestChild = null;
|
||||
int matches = 0;
|
||||
|
@ -341,6 +366,7 @@ public class ToParentBlockJoinQuery extends Query {
|
|||
}
|
||||
}
|
||||
|
||||
assert freq() == matches;
|
||||
return Explanation.match(score(), String.format(Locale.ROOT,
|
||||
"Score based on %d child docs in range from %d to %d, best match:", matches, start, end), bestChild
|
||||
);
|
||||
|
|
|
@ -671,7 +671,7 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
System.out.println("TEST: iter=" + (1+iter) + " of " + iters);
|
||||
}
|
||||
|
||||
final Query childQuery;
|
||||
Query childQuery;
|
||||
if (random().nextInt(3) == 2) {
|
||||
final int childFieldID = random().nextInt(childFields.length);
|
||||
childQuery = new TermQuery(new Term("child" + childFieldID,
|
||||
|
@ -706,6 +706,9 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
random().nextBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.MUST_NOT);
|
||||
childQuery = bq.build();
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
childQuery = new RandomApproximationQuery(childQuery, random());
|
||||
}
|
||||
|
||||
|
||||
final ScoreMode agg = ScoreMode.values()[random().nextInt(ScoreMode.values().length)];
|
||||
|
|
|
@ -87,25 +87,6 @@ public class TestBlockJoinValidation extends LuceneTestCase {
|
|||
assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
|
||||
}
|
||||
|
||||
public void testAdvanceValidationForToParentBjq() throws Exception {
|
||||
int randomChildNumber = getRandomChildNumber(0);
|
||||
// we need to make advance method meet wrong document, so random child number
|
||||
// in BJQ must be greater than child number in Boolean clause
|
||||
int nextRandomChildNumber = getRandomChildNumber(randomChildNumber);
|
||||
Query parentQueryWithRandomChild = createChildrenQueryWithOneParent(nextRandomChildNumber);
|
||||
ToParentBlockJoinQuery blockJoinQuery = new ToParentBlockJoinQuery(parentQueryWithRandomChild, parentsFilter, ScoreMode.None);
|
||||
// advance() method is used by ConjunctionScorer, so we need to create Boolean conjunction query
|
||||
BooleanQuery.Builder conjunctionQuery = new BooleanQuery.Builder();
|
||||
WildcardQuery childQuery = new WildcardQuery(new Term("child", createFieldValue(randomChildNumber)));
|
||||
conjunctionQuery.add(new BooleanClause(childQuery, BooleanClause.Occur.MUST));
|
||||
conjunctionQuery.add(new BooleanClause(blockJoinQuery, BooleanClause.Occur.MUST));
|
||||
|
||||
IllegalStateException expected = expectThrows(IllegalStateException.class, () -> {
|
||||
indexSearcher.search(conjunctionQuery.build(), 1);
|
||||
});
|
||||
assertTrue(expected.getMessage() != null && expected.getMessage().contains("Child query must not match same docs with parent filter"));
|
||||
}
|
||||
|
||||
public void testNextDocValidationForToChildBjq() throws Exception {
|
||||
Query parentQueryWithRandomChild = createParentsQueryWithOneChild(getRandomChildNumber(0));
|
||||
|
||||
|
|
Loading…
Reference in New Issue