diff --git a/CHANGES.txt b/CHANGES.txt index f35560b2217..8c6093b3be2 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -259,6 +259,11 @@ Optimizations raw bytes for each contiguous range of non-deleted documents. (Robert Engels via Mike McCandless) +13. LUCENE-693: Speed up nested conjunctions (~2x) that match many + documents, and a slight performance increase for top level + conjunctions. (yonik) + + Documentation 1. LUCENE-1051: Generate separate javadocs for core, demo and contrib diff --git a/src/java/org/apache/lucene/search/BooleanScorer2.java b/src/java/org/apache/lucene/search/BooleanScorer2.java index cd4efab3af8..10f77765488 100644 --- a/src/java/org/apache/lucene/search/BooleanScorer2.java +++ b/src/java/org/apache/lucene/search/BooleanScorer2.java @@ -139,7 +139,7 @@ class BooleanScorer2 extends Scorer { * When "sum" is used in a name it means score value summing * over the matching scorers */ - private void initCountingSumScorer() { + private void initCountingSumScorer() throws IOException { coordinator.init(); countingSumScorer = makeCountingSumScorer(); } @@ -192,10 +192,10 @@ class BooleanScorer2 extends Scorer { private static Similarity defaultSimilarity = new DefaultSimilarity(); - private Scorer countingConjunctionSumScorer(List requiredScorers) { + private Scorer countingConjunctionSumScorer(List requiredScorers) throws IOException { // each scorer from the list counted as a single matcher final int requiredNrMatchers = requiredScorers.size(); - ConjunctionScorer cs = new ConjunctionScorer(defaultSimilarity) { + return new ConjunctionScorer(defaultSimilarity, requiredScorers) { private int lastScoredDoc = -1; public float score() throws IOException { @@ -210,34 +210,26 @@ class BooleanScorer2 extends Scorer { return super.score(); } }; - Iterator rsi = requiredScorers.iterator(); - while (rsi.hasNext()) { - cs.add((Scorer) rsi.next()); - } - return cs; } - private Scorer dualConjunctionSumScorer(Scorer req1, Scorer req2) { // non counting. - ConjunctionScorer cs = new ConjunctionScorer(defaultSimilarity); + private Scorer dualConjunctionSumScorer(Scorer req1, Scorer req2) throws IOException { // non counting. + return new ConjunctionScorer(defaultSimilarity, new Scorer[]{req1, req2}); // All scorers match, so defaultSimilarity always has 1 as // the coordination factor. // Therefore the sum of the scores of two scorers // is used as score. - cs.add(req1); - cs.add(req2); - return cs; } /** Returns the scorer to be used for match counting and score summing. * Uses requiredScorers, optionalScorers and prohibitedScorers. */ - private Scorer makeCountingSumScorer() { // each scorer counted as a single matcher + private Scorer makeCountingSumScorer() throws IOException { // each scorer counted as a single matcher return (requiredScorers.size() == 0) ? makeCountingSumScorerNoReq() : makeCountingSumScorerSomeReq(); } - private Scorer makeCountingSumScorerNoReq() { // No required scorers + private Scorer makeCountingSumScorerNoReq() throws IOException { // No required scorers if (optionalScorers.size() == 0) { return new NonMatchingScorer(); // no clauses or only prohibited clauses } else { // No required scorers. At least one optional scorer. @@ -258,7 +250,7 @@ class BooleanScorer2 extends Scorer { } } - private Scorer makeCountingSumScorerSomeReq() { // At least one required scorer. + private Scorer makeCountingSumScorerSomeReq() throws IOException { // At least one required scorer. if (optionalScorers.size() < minNrShouldMatch) { return new NonMatchingScorer(); // fewer optional clauses than minimum that should match } else if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. diff --git a/src/java/org/apache/lucene/search/ConjunctionScorer.java b/src/java/org/apache/lucene/search/ConjunctionScorer.java index 6a6002bdc6a..23e8d0ea8f9 100644 --- a/src/java/org/apache/lucene/search/ConjunctionScorer.java +++ b/src/java/org/apache/lucene/search/ConjunctionScorer.java @@ -18,118 +18,105 @@ package org.apache.lucene.search; */ import java.io.IOException; +import java.util.Collection; import java.util.Arrays; import java.util.Comparator; /** Scorer for conjunctions, sets of queries, all of which are required. */ class ConjunctionScorer extends Scorer { - private Scorer[] scorers = new Scorer[2]; - private int length = 0; - private int first = 0; - private int last = -1; - private boolean firstTime = true; - private boolean more = true; - private float coord; + private final Scorer[] scorers; - public ConjunctionScorer(Similarity similarity) { + private boolean firstTime=true; + private boolean more; + private final float coord; + private int lastDoc=-1; + + public ConjunctionScorer(Similarity similarity, Collection scorers) throws IOException { + this(similarity, (Scorer[])scorers.toArray(new Scorer[scorers.size()])); + } + + public ConjunctionScorer(Similarity similarity, Scorer[] scorers) throws IOException { super(similarity); + this.scorers = scorers; + coord = getSimilarity().coord(this.scorers.length, this.scorers.length); } - final void add(Scorer scorer) { - if (length >= scorers.length) { - // grow the array - Scorer[] temps = new Scorer[scorers.length * 2]; - System.arraycopy(scorers, 0, temps, 0, length); - scorers = temps; - } - last += 1; - length += 1; - scorers[last] = scorer; - } - - public int doc() { return scorers[first].doc(); } + public int doc() { return lastDoc; } public boolean next() throws IOException { - if (firstTime) { - init(true); - } else if (more) { - more = scorers[last].next(); // trigger further scanning - } + if (firstTime) + return init(0); + else if (more) + more = scorers[(scorers.length-1)].next(); return doNext(); } - + private boolean doNext() throws IOException { - while (more && scorers[first].doc() < scorers[last].doc()) { // find doc w/ all clauses - more = scorers[first].skipTo(scorers[last].doc()); // skip first upto last - last = first; // move first to last - first = (first == length-1) ? 0 : first+1; + int first=0; + Scorer lastScorer = scorers[scorers.length-1]; + Scorer firstScorer; + while (more && (firstScorer=scorers[first]).doc() < (lastDoc=lastScorer.doc())) { + more = firstScorer.skipTo(lastDoc); + lastScorer = firstScorer; + first = (first == (scorers.length-1)) ? 0 : first+1; } - return more; // found a doc with all clauses + return more; } public boolean skipTo(int target) throws IOException { - if(firstTime) { - init(false); - } - - for (int i = 0, pos = first; i < length; i++) { - if (!more) break; - more = scorers[pos].skipTo(target); - pos = (pos == length-1) ? 0 : pos+1; - } - - if (more) - sortScorers(); // re-sort scorers - + if (firstTime) + return init(target); + else if (more) + more = scorers[(scorers.length-1)].skipTo(target); return doNext(); } - public float score() throws IOException { - float sum = 0.0f; - for (int i = 0; i < length; i++) { - sum += scorers[i].score(); - } - return sum * coord; - } - - private void init(boolean initScorers) throws IOException { - // compute coord factor - coord = getSimilarity().coord(length, length); - - more = length > 0; - - if(initScorers){ - // move each scorer to its first entry - for (int i = 0, pos = first; i < length; i++) { - if (!more) break; - more = scorers[pos].next(); - pos = (pos == length-1) ? 0 : pos+1; - } - // initial sort of simulated list - if (more) - sortScorers(); + // Note... most of this could be done in the constructor + // thus skipping a check for firstTime per call to next() and skipTo() + private boolean init(int target) throws IOException { + firstTime=false; + more = scorers.length>1; + for (int i=0; i>1); i++) { + Scorer tmp = scorers[i]; + scorers[i] = scorers[end-i]; + scorers[end-i] = tmp; + } + + return more; + } + + public float score() throws IOException { + float sum = 0.0f; + for (int i = 0; i < scorers.length; i++) { + sum += scorers[i].score(); + } + return sum * coord; } public Explanation explain(int doc) { diff --git a/src/test/org/apache/lucene/search/TestScorerPerf.java b/src/test/org/apache/lucene/search/TestScorerPerf.java index 4eee43a86bf..ad180da8e75 100755 --- a/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -42,6 +42,7 @@ public class TestScorerPerf extends LuceneTestCase { boolean validate = true; // set to false when doing performance testing BitSet[] sets; + Term[] terms; IndexSearcher s; public void createDummySearcher() throws Exception { @@ -55,22 +56,25 @@ public class TestScorerPerf extends LuceneTestCase { public void createRandomTerms(int nDocs, int nTerms, double power, Directory dir) throws Exception { int[] freq = new int[nTerms]; + terms = new Term[nTerms]; for (int i=0; i=termsInIndex) tnum=termflag.nextClearBit(0); + termflag.set(tnum); + Query tq = new TermQuery(terms[tnum]); bq.add(tq, BooleanClause.Occur.MUST); } CountingHitCollector hc = new CountingHitCollector(); s.search(bq, hc); + nMatches += hc.getCount(); ret += hc.getSum(); } + System.out.println("Average number of matches="+(nMatches/iter)); return ret; } @@ -233,7 +244,7 @@ public class TestScorerPerf extends LuceneTestCase { int iter ) throws IOException { int ret=0; - + long nMatches=0; for (int i=0; i=25) tnum=termflag.nextClearBit(0); + termflag.set(tnum); + Query tq = new TermQuery(terms[tnum]); bq.add(tq, BooleanClause.Occur.MUST); } // inner @@ -256,9 +270,10 @@ public class TestScorerPerf extends LuceneTestCase { CountingHitCollector hc = new CountingHitCollector(); s.search(oq, hc); + nMatches += hc.getCount(); ret += hc.getSum(); } - + System.out.println("Average number of matches="+(nMatches/iter)); return ret; } @@ -275,7 +290,7 @@ public class TestScorerPerf extends LuceneTestCase { PhraseQuery q = new PhraseQuery(); for (int j=0; j