diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e531233c7bf..6134d662961 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -245,6 +245,12 @@ Bug Fixes cached datastructure. Otherwise this can cause inconsistencies with readers at different points in time. (Robert Muir) +* LUCENE-4791: A conjunction of terms (ConjunctionTermScorer) scanned on + the lowest frequency term instead of skipping, leading to potentially + large performance impacts for many non-random or non-uniform + term distributions. (John Wang, yonik) + + Documentation * LUCENE-4718: Fixed documentation of oal.queryparser.classic. diff --git a/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java b/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java index 7fb392ecc7f..b48a94e67eb 100644 --- a/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/ConjunctionTermScorer.java @@ -50,26 +50,32 @@ class ConjunctionTermScorer extends Scorer { } private int doNext(int doc) throws IOException { - do { - if (lead.doc == DocIdSetIterator.NO_MORE_DOCS) { - return NO_MORE_DOCS; - } - advanceHead: do { + for(;;) { + // doc may already be NO_MORE_DOCS here, but we don't check explicitly + // since all scorers should advance to NO_MORE_DOCS, match, then + // return that value. + advanceHead: for(;;) { for (int i = 1; i < docsAndFreqs.length; i++) { + // invariant: docsAndFreqs[i].doc <= doc at this point. + + // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead" + // on the previous iteration and the advance on the lead scorer exactly matched. if (docsAndFreqs[i].doc < doc) { docsAndFreqs[i].doc = docsAndFreqs[i].docs.advance(doc); - } - if (docsAndFreqs[i].doc > doc) { - // DocsEnum beyond the current doc - break and advance lead - break advanceHead; + + if (docsAndFreqs[i].doc > doc) { + // DocsEnum beyond the current doc - break and advance lead to the new highest doc. + doc = docsAndFreqs[i].doc; + break advanceHead; + } } } // success - all DocsEnums are on the same doc return doc; - } while (true); + } // advance head for next iteration - doc = lead.doc = lead.docs.nextDoc(); - } while (true); + doc = lead.doc = lead.docs.advance(doc); + } } @Override