From 53061c4bf87d7815dbf4186831df99ec5c2d2662 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 11 Apr 2013 17:43:17 +0000 Subject: [PATCH] LUCENE-4926: speed up disjunctionmaxscorer git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466997 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 2 + .../lucene/search/DisjunctionMaxScorer.java | 57 +++++++++---------- .../search/TestDisjunctionMaxQuery.java | 39 +++++++++++++ 3 files changed, 69 insertions(+), 29 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e8db6a3be07..8cab37b107e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -206,6 +206,8 @@ Optimizations * LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions. (Robert Muir) +* LUCENE-4926: Speed up DisjunctionMatchQuery. (Robert Muir) + API Changes * LUCENE-4844: removed TaxonomyReader.getParent(), you should use diff --git a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java index 976b4d17215..f7f885f1d7d 100644 --- a/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/DisjunctionMaxScorer.java @@ -28,6 +28,7 @@ class DisjunctionMaxScorer extends DisjunctionScorer { /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */ private final float tieBreakerMultiplier; private int doc = -1; + private int freq = -1; /* Used when scoring currently matching doc. */ private float scoreSum; @@ -55,8 +56,8 @@ class DisjunctionMaxScorer extends DisjunctionScorer { @Override public int nextDoc() throws IOException { - if (numScorers == 0) return doc = NO_MORE_DOCS; - while (subScorers[0].docID() == doc) { + assert doc != NO_MORE_DOCS; + while(true) { if (subScorers[0].nextDoc() != NO_MORE_DOCS) { heapAdjust(0); } else { @@ -65,9 +66,11 @@ class DisjunctionMaxScorer extends DisjunctionScorer { return doc = NO_MORE_DOCS; } } + if (subScorers[0].docID() != doc) { + afterNext(); + return doc; + } } - - return doc = subScorers[0].docID(); } @Override @@ -80,47 +83,40 @@ class DisjunctionMaxScorer extends DisjunctionScorer { */ @Override public float score() throws IOException { - int doc = subScorers[0].docID(); - scoreSum = scoreMax = subScorers[0].score(); - int size = numScorers; - scoreAll(1, size, doc); - scoreAll(2, size, doc); return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier; } + + private void afterNext() throws IOException { + doc = subScorers[0].docID(); + if (doc != NO_MORE_DOCS) { + scoreSum = scoreMax = subScorers[0].score(); + freq = 1; + scoreAll(1); + scoreAll(2); + } + } // Recursively iterate all subScorers that generated last doc computing sum and max - private void scoreAll(int root, int size, int doc) throws IOException { - if (root < size && subScorers[root].docID() == doc) { + private void scoreAll(int root) throws IOException { + if (root < numScorers && subScorers[root].docID() == doc) { float sub = subScorers[root].score(); + freq++; scoreSum += sub; scoreMax = Math.max(scoreMax, sub); - scoreAll((root<<1)+1, size, doc); - scoreAll((root<<1)+2, size, doc); + scoreAll((root<<1)+1); + scoreAll((root<<1)+2); } } @Override public int freq() throws IOException { - int doc = subScorers[0].docID(); - int size = numScorers; - return 1 + freq(1, size, doc) + freq(2, size, doc); - } - - // Recursively iterate all subScorers that generated last doc computing sum and max - private int freq(int root, int size, int doc) throws IOException { - int freq = 0; - if (root < size && subScorers[root].docID() == doc) { - freq++; - freq += freq((root<<1)+1, size, doc); - freq += freq((root<<1)+2, size, doc); - } return freq; } @Override public int advance(int target) throws IOException { - if (numScorers == 0) return doc = NO_MORE_DOCS; - while (subScorers[0].docID() < target) { + assert doc != NO_MORE_DOCS; + while(true) { if (subScorers[0].advance(target) != NO_MORE_DOCS) { heapAdjust(0); } else { @@ -129,7 +125,10 @@ class DisjunctionMaxScorer extends DisjunctionScorer { return doc = NO_MORE_DOCS; } } + if (subScorers[0].docID() >= target) { + afterNext(); + return doc; + } } - return doc = subScorers[0].docID(); } } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java index fd52200994f..dcaf0236112 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestDisjunctionMaxQuery.java @@ -19,12 +19,16 @@ package org.apache.lucene.search; import org.apache.lucene.document.Field; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.SlowCompositeReaderWrapper; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.RandomIndexWriter; @@ -32,6 +36,8 @@ import org.apache.lucene.index.StoredDocument; import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.Directory; import java.text.DecimalFormat; @@ -470,6 +476,39 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase { } } + // LUCENE-4477 / LUCENE-4401: + public void testBooleanSpanQuery() throws Exception { + int hits = 0; + Directory directory = newDirectory(); + Analyzer indexerAnalyzer = new MockAnalyzer(random()); + + IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer); + IndexWriter writer = new IndexWriter(directory, config); + String FIELD = "content"; + Document d = new Document(); + d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES)); + writer.addDocument(d); + writer.close(); + + IndexReader indexReader = DirectoryReader.open(directory); + IndexSearcher searcher = newSearcher(indexReader); + + DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f); + SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork")); + SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork")); + query.add(sq1); + query.add(sq2); + TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true); + searcher.search(query, collector); + hits = collector.topDocs().scoreDocs.length; + for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs){ + System.out.println(scoreDoc.doc); + } + indexReader.close(); + assertEquals(hits, 1); + directory.close(); + } + /** macro */ protected Query tq(String f, String t) { return new TermQuery(new Term(f, t));