diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 5a83c312ef6..3c524401152 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -40,6 +40,10 @@ Improvements
IndexReader after (illegally) removing the old index and
reindexing (Vitaly Funstein, Robert Muir, Mike McCandless)
+Optimizations
+
+* LUCENE-7330: Speed up conjunction queries. (Adrien Grand)
+
Other
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
index 53e3753436a..205a349b73b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionDISI.java
@@ -31,11 +31,13 @@ import org.apache.lucene.util.CollectionUtil;
*
Public only for use in {@link org.apache.lucene.search.spans}.
* @lucene.internal
*/
-public class ConjunctionDISI extends DocIdSetIterator {
+public final class ConjunctionDISI extends DocIdSetIterator {
- /** Create a conjunction over the provided {@link Scorer}s, taking advantage
- * of {@link TwoPhaseIterator}. */
- public static ConjunctionDISI intersectScorers(List scorers) {
+ /** Create a conjunction over the provided {@link Scorer}s. Note that the
+ * returned {@link DocIdSetIterator} might leverage two-phase iteration in
+ * which case it is possible to retrieve the {@link TwoPhaseIterator} using
+ * {@link TwoPhaseIterator#unwrap}. */
+ public static DocIdSetIterator intersectScorers(List scorers) {
if (scorers.size() < 2) {
throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators");
}
@@ -45,15 +47,18 @@ public class ConjunctionDISI extends DocIdSetIterator {
addScorer(scorer, allIterators, twoPhaseIterators);
}
- if (twoPhaseIterators.isEmpty()) {
- return new ConjunctionDISI(allIterators);
- } else {
- return new TwoPhase(allIterators, twoPhaseIterators);
+ DocIdSetIterator iterator = new ConjunctionDISI(allIterators);
+ if (twoPhaseIterators.isEmpty() == false) {
+ iterator = TwoPhaseIterator.asDocIdSetIterator(new ConjunctionTwoPhaseIterator(iterator, twoPhaseIterators));
}
+ return iterator;
}
- /** Create a conjunction over the provided DocIdSetIterators. */
- public static ConjunctionDISI intersectIterators(List iterators) {
+ /** Create a conjunction over the provided DocIdSetIterators. Note that the
+ * returned {@link DocIdSetIterator} might leverage two-phase iteration in
+ * which case it is possible to retrieve the {@link TwoPhaseIterator} using
+ * {@link TwoPhaseIterator#unwrap}. */
+ public static DocIdSetIterator intersectIterators(List iterators) {
if (iterators.size() < 2) {
throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators");
}
@@ -63,16 +68,18 @@ public class ConjunctionDISI extends DocIdSetIterator {
addIterator(iterator, allIterators, twoPhaseIterators);
}
- if (twoPhaseIterators.isEmpty()) {
- return new ConjunctionDISI(allIterators);
- } else {
- return new TwoPhase(allIterators, twoPhaseIterators);
+ DocIdSetIterator iterator = new ConjunctionDISI(allIterators);
+ if (twoPhaseIterators.isEmpty() == false) {
+ iterator = TwoPhaseIterator.asDocIdSetIterator(new ConjunctionTwoPhaseIterator(iterator, twoPhaseIterators));
}
+ return iterator;
}
- /** Create a conjunction over the provided {@link Scorer}s, taking advantage
- * of {@link TwoPhaseIterator}. */
- public static ConjunctionDISI intersectSpans(List spanList) {
+ /** Create a conjunction over the provided {@link Spans}. Note that the
+ * returned {@link DocIdSetIterator} might leverage two-phase iteration in
+ * which case it is possible to retrieve the {@link TwoPhaseIterator} using
+ * {@link TwoPhaseIterator#unwrap}. */
+ public static DocIdSetIterator intersectSpans(List spanList) {
if (spanList.size() < 2) {
throw new IllegalArgumentException("Cannot make a ConjunctionDISI of less than 2 iterators");
}
@@ -82,11 +89,11 @@ public class ConjunctionDISI extends DocIdSetIterator {
addSpans(spans, allIterators, twoPhaseIterators);
}
- if (twoPhaseIterators.isEmpty()) {
- return new ConjunctionDISI(allIterators);
- } else {
- return new TwoPhase(allIterators, twoPhaseIterators);
+ DocIdSetIterator iterator = new ConjunctionDISI(allIterators);
+ if (twoPhaseIterators.isEmpty() == false) {
+ iterator = TwoPhaseIterator.asDocIdSetIterator(new ConjunctionTwoPhaseIterator(iterator, twoPhaseIterators));
}
+ return iterator;
}
/** Adds the scorer, possibly splitting up into two phases or collapsing if it is another conjunction */
@@ -110,17 +117,16 @@ public class ConjunctionDISI extends DocIdSetIterator {
}
private static void addIterator(DocIdSetIterator disi, List allIterators, List twoPhaseIterators) {
- // Check for exactly this class for collapsing. Subclasses can do their own optimizations.
- if (disi.getClass() == ConjunctionDISI.class || disi.getClass() == TwoPhase.class) {
+ TwoPhaseIterator twoPhase = TwoPhaseIterator.unwrap(disi);
+ if (twoPhase != null) {
+ addTwoPhaseIterator(twoPhase, allIterators, twoPhaseIterators);
+ } else if (disi.getClass() == ConjunctionDISI.class) { // Check for exactly this class for collapsing
ConjunctionDISI conjunction = (ConjunctionDISI) disi;
// subconjuctions have already split themselves into two phase iterators and others, so we can take those
// iterators as they are and move them up to this conjunction
- allIterators.add(conjunction.lead);
+ allIterators.add(conjunction.lead1);
+ allIterators.add(conjunction.lead2);
Collections.addAll(allIterators, conjunction.others);
- if (conjunction.getClass() == TwoPhase.class) {
- TwoPhase twoPhase = (TwoPhase) conjunction;
- Collections.addAll(twoPhaseIterators, twoPhase.twoPhaseView.twoPhaseIterators);
- }
} else {
allIterators.add(disi);
}
@@ -128,13 +134,17 @@ public class ConjunctionDISI extends DocIdSetIterator {
private static void addTwoPhaseIterator(TwoPhaseIterator twoPhaseIter, List allIterators, List twoPhaseIterators) {
addIterator(twoPhaseIter.approximation(), allIterators, twoPhaseIterators);
- twoPhaseIterators.add(twoPhaseIter);
+ if (twoPhaseIter.getClass() == ConjunctionTwoPhaseIterator.class) { // Check for exactly this class for collapsing
+ Collections.addAll(twoPhaseIterators, ((ConjunctionTwoPhaseIterator) twoPhaseIter).twoPhaseIterators);
+ } else {
+ twoPhaseIterators.add(twoPhaseIter);
+ }
}
- final DocIdSetIterator lead;
+ final DocIdSetIterator lead1, lead2;
final DocIdSetIterator[] others;
- ConjunctionDISI(List extends DocIdSetIterator> iterators) {
+ private ConjunctionDISI(List extends DocIdSetIterator> iterators) {
assert iterators.size() >= 2;
// Sort the array the first time to allow the least frequent DocsEnum to
// lead the matching.
@@ -144,84 +154,77 @@ public class ConjunctionDISI extends DocIdSetIterator {
return Long.compare(o1.cost(), o2.cost());
}
});
- lead = iterators.get(0);
- others = iterators.subList(1, iterators.size()).toArray(new DocIdSetIterator[0]);
- }
-
- protected boolean matches() throws IOException {
- return true;
- }
-
- TwoPhaseIterator asTwoPhaseIterator() {
- return null;
+ lead1 = iterators.get(0);
+ lead2 = iterators.get(1);
+ others = iterators.subList(2, iterators.size()).toArray(new DocIdSetIterator[0]);
}
private int doNext(int doc) throws IOException {
- for(;;) {
+ advanceHead: for(;;) {
+ assert doc == lead1.docID();
- if (doc == NO_MORE_DOCS) {
- // we need this check because it is only ok to call #matches when positioned
- return NO_MORE_DOCS;
+ // find agreement between the two iterators with the lower costs
+ // we special case them because they do not need the
+ // 'other.docID() < doc' check that the 'others' iterators need
+ final int next2 = lead2.advance(doc);
+ if (next2 != doc) {
+ doc = lead1.advance(next2);
+ if (next2 != doc) {
+ continue;
+ }
}
- advanceHead: for(;;) {
- for (DocIdSetIterator other : others) {
- // invariant: docsAndFreqs[i].doc <= doc at this point.
+ // then find agreement with other iterators
+ for (DocIdSetIterator other : others) {
+ // other.doc may already be equal to doc if we "continued advanceHead"
+ // on the previous iteration and the advance on the lead scorer exactly matched.
+ if (other.docID() < doc) {
+ final int next = other.advance(doc);
- // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead"
- // on the previous iteration and the advance on the lead scorer exactly matched.
- if (other.docID() < doc) {
- final int next = other.advance(doc);
-
- if (next > doc) {
- // DocsEnum beyond the current doc - break and advance lead to the new highest doc.
- doc = lead.advance(next);
- break advanceHead;
- }
+ if (next > doc) {
+ // iterator beyond the current doc - advance lead and continue to the new highest doc.
+ doc = lead1.advance(next);
+ continue advanceHead;
}
}
-
- if (matches()) {
- // success - all DocsEnums are on the same doc
- return doc;
- } else {
- doc = lead.nextDoc();
- break advanceHead;
- }
}
+
+ // success - all iterators are on the same doc
+ return doc;
}
}
@Override
public int advance(int target) throws IOException {
- return doNext(lead.advance(target));
+ return doNext(lead1.advance(target));
}
@Override
public int docID() {
- return lead.docID();
+ return lead1.docID();
}
@Override
public int nextDoc() throws IOException {
- return doNext(lead.nextDoc());
+ return doNext(lead1.nextDoc());
}
@Override
public long cost() {
- return lead.cost(); // overestimate
+ return lead1.cost(); // overestimate
}
/**
- * {@link TwoPhaseIterator} view of a {@link TwoPhase} conjunction.
+ * {@link TwoPhaseIterator} implementing a conjunction.
*/
- private static class TwoPhaseConjunctionDISI extends TwoPhaseIterator {
+ private static final class ConjunctionTwoPhaseIterator extends TwoPhaseIterator {
private final TwoPhaseIterator[] twoPhaseIterators;
private final float matchCost;
- private TwoPhaseConjunctionDISI(List extends DocIdSetIterator> iterators, List twoPhaseIterators) {
- super(new ConjunctionDISI(iterators));
+ private ConjunctionTwoPhaseIterator(DocIdSetIterator approximation,
+ List extends TwoPhaseIterator> twoPhaseIterators) {
+ super(approximation);
assert twoPhaseIterators.size() > 0;
CollectionUtil.timSort(twoPhaseIterators, new Comparator() {
@@ -259,37 +262,4 @@ public class ConjunctionDISI extends DocIdSetIterator {
}
- /**
- * A conjunction DISI built on top of approximations. This implementation
- * verifies that documents actually match by consulting the provided
- * {@link TwoPhaseIterator}s.
- *
- * Another important difference with {@link ConjunctionDISI} is that this
- * implementation supports approximations too: the approximation of this
- * impl is the conjunction of the approximations of the wrapped iterators.
- * This allows eg. {@code +"A B" +C} to be approximated as
- * {@code +(+A +B) +C}.
- */
- // NOTE: this is essentially the same as TwoPhaseDocIdSetIterator.asDocIdSetIterator
- // but is its own impl in order to be able to expose a two-phase view
- private static class TwoPhase extends ConjunctionDISI {
-
- final TwoPhaseConjunctionDISI twoPhaseView;
-
- private TwoPhase(List extends DocIdSetIterator> iterators, List twoPhaseIterators) {
- super(iterators);
- twoPhaseView = new TwoPhaseConjunctionDISI(iterators, twoPhaseIterators);
- }
-
- @Override
- public TwoPhaseConjunctionDISI asTwoPhaseIterator() {
- return twoPhaseView;
- }
-
- @Override
- protected boolean matches() throws IOException {
- return twoPhaseView.matches();
- }
- }
-
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
index 066f07c9e99..ab1de92475b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionScorer.java
@@ -25,7 +25,7 @@ import java.util.List;
/** Scorer for conjunctions, sets of queries, all of which are required. */
class ConjunctionScorer extends Scorer {
- final ConjunctionDISI disi;
+ final DocIdSetIterator disi;
final Scorer[] scorers;
final float coord;
@@ -44,7 +44,7 @@ class ConjunctionScorer extends Scorer {
@Override
public TwoPhaseIterator twoPhaseIterator() {
- return disi.asTwoPhaseIterator();
+ return TwoPhaseIterator.unwrap(disi);
}
@Override
diff --git a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
index c3c8b713c35..f5ccc7c294b 100644
--- a/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@@ -37,7 +37,7 @@ final class ExactPhraseScorer extends Scorer {
}
}
- private final ConjunctionDISI conjunction;
+ private final DocIdSetIterator conjunction;
private final PostingsAndPosition[] postings;
private int freq;
@@ -60,6 +60,7 @@ final class ExactPhraseScorer extends Scorer {
postingsAndPositions.add(new PostingsAndPosition(posting.postings, posting.position));
}
conjunction = ConjunctionDISI.intersectIterators(iterators);
+ assert TwoPhaseIterator.unwrap(conjunction) == null;
this.postings = postingsAndPositions.toArray(new PostingsAndPosition[postingsAndPositions.size()]);
this.matchCost = matchCost;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
index 2ee3e70d21c..acc82820172 100644
--- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java
@@ -31,7 +31,7 @@ import org.apache.lucene.util.FixedBitSet;
final class SloppyPhraseScorer extends Scorer {
- private final ConjunctionDISI conjunction;
+ private final DocIdSetIterator conjunction;
private final PhrasePositions[] phrasePositions;
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
@@ -70,6 +70,7 @@ final class SloppyPhraseScorer extends Scorer {
phrasePositions[i] = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
}
conjunction = ConjunctionDISI.intersectIterators(Arrays.asList(iterators));
+ assert TwoPhaseIterator.unwrap(conjunction) == null;
this.matchCost = matchCost;
}
diff --git a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
index 38176bfdb79..ff472e7632f 100644
--- a/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
+++ b/lucene/core/src/java/org/apache/lucene/search/TwoPhaseIterator.java
@@ -41,40 +41,61 @@ public abstract class TwoPhaseIterator {
/** Return a {@link DocIdSetIterator} view of the provided
* {@link TwoPhaseIterator}. */
public static DocIdSetIterator asDocIdSetIterator(TwoPhaseIterator twoPhaseIterator) {
- final DocIdSetIterator approximation = twoPhaseIterator.approximation();
- return new DocIdSetIterator() {
+ return new TwoPhaseIteratorAsDocIdSetIterator(twoPhaseIterator);
+ }
- @Override
- public int docID() {
- return approximation.docID();
- }
+ /**
+ * If the given {@link DocIdSetIterator} has been created with
+ * {@link #asDocIdSetIterator}, then this will return the wrapped
+ * {@link TwoPhaseIterator}. Otherwise this returns {@code null}.
+ */
+ public static TwoPhaseIterator unwrap(DocIdSetIterator iterator) {
+ if (iterator instanceof TwoPhaseIteratorAsDocIdSetIterator) {
+ return ((TwoPhaseIteratorAsDocIdSetIterator) iterator).twoPhaseIterator;
+ } else {
+ return null;
+ }
+ }
- @Override
- public int nextDoc() throws IOException {
- return doNext(approximation.nextDoc());
- }
+ private static class TwoPhaseIteratorAsDocIdSetIterator extends DocIdSetIterator {
- @Override
- public int advance(int target) throws IOException {
- return doNext(approximation.advance(target));
- }
+ final TwoPhaseIterator twoPhaseIterator;
+ final DocIdSetIterator approximation;
- private int doNext(int doc) throws IOException {
- for (;; doc = approximation.nextDoc()) {
- if (doc == NO_MORE_DOCS) {
- return NO_MORE_DOCS;
- } else if (twoPhaseIterator.matches()) {
- return doc;
- }
+ TwoPhaseIteratorAsDocIdSetIterator(TwoPhaseIterator twoPhaseIterator) {
+ this.twoPhaseIterator = twoPhaseIterator;
+ this.approximation = twoPhaseIterator.approximation;
+ }
+
+ @Override
+ public int docID() {
+ return approximation.docID();
+ }
+
+ @Override
+ public int nextDoc() throws IOException {
+ return doNext(approximation.nextDoc());
+ }
+
+ @Override
+ public int advance(int target) throws IOException {
+ return doNext(approximation.advance(target));
+ }
+
+ private int doNext(int doc) throws IOException {
+ for (;; doc = approximation.nextDoc()) {
+ if (doc == NO_MORE_DOCS) {
+ return NO_MORE_DOCS;
+ } else if (twoPhaseIterator.matches()) {
+ return doc;
}
}
+ }
- @Override
- public long cost() {
- return approximation.cost();
- }
-
- };
+ @Override
+ public long cost() {
+ return approximation.cost();
+ }
}
/** Return an approximation. The returned {@link DocIdSetIterator} is a
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
index 269990d3398..dcb666417c7 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestConjunctionDISI.java
@@ -181,7 +181,7 @@ public class TestConjunctionDISI extends LuceneTestCase {
}
}
- final ConjunctionDISI conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
+ final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
}
}
@@ -211,8 +211,8 @@ public class TestConjunctionDISI extends LuceneTestCase {
}
}
- final ConjunctionDISI conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
- TwoPhaseIterator twoPhaseIterator = conjunction.asTwoPhaseIterator();
+ final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(Arrays.asList(iterators));
+ TwoPhaseIterator twoPhaseIterator = TwoPhaseIterator.unwrap(conjunction);
assertEquals(hasApproximation, twoPhaseIterator != null);
if (hasApproximation) {
assertEquals(intersect(sets), toBitSet(maxDoc, TwoPhaseIterator.asDocIdSetIterator(twoPhaseIterator)));
@@ -248,8 +248,8 @@ public class TestConjunctionDISI extends LuceneTestCase {
if (conjunction == null) {
conjunction = newIterator;
} else {
- final ConjunctionDISI conj = ConjunctionDISI.intersectScorers(Arrays.asList(conjunction, newIterator));
- conjunction = scorer(conj, conj.asTwoPhaseIterator());
+ final DocIdSetIterator conj = ConjunctionDISI.intersectScorers(Arrays.asList(conjunction, newIterator));
+ conjunction = scorer(conj, TwoPhaseIterator.unwrap(conj));
}
}
@@ -309,7 +309,7 @@ public class TestConjunctionDISI extends LuceneTestCase {
}
- final ConjunctionDISI conjunction = ConjunctionDISI.intersectScorers(scorers);
+ final DocIdSetIterator conjunction = ConjunctionDISI.intersectScorers(scorers);
assertEquals(intersect(sets), toBitSet(maxDoc, conjunction));
}
}