LUCENE-4926: speed up disjunctionmaxscorer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466997 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2013-04-11 17:43:17 +00:00
parent 4de12fe628
commit 53061c4bf8
3 changed files with 69 additions and 29 deletions

View File

@ -206,6 +206,8 @@ Optimizations
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
(Robert Muir)
* LUCENE-4926: Speed up DisjunctionMatchQuery. (Robert Muir)
API Changes
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use

View File

@ -28,6 +28,7 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
private final float tieBreakerMultiplier;
private int doc = -1;
private int freq = -1;
/* Used when scoring currently matching doc. */
private float scoreSum;
@ -55,8 +56,8 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
@Override
public int nextDoc() throws IOException {
if (numScorers == 0) return doc = NO_MORE_DOCS;
while (subScorers[0].docID() == doc) {
assert doc != NO_MORE_DOCS;
while(true) {
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
heapAdjust(0);
} else {
@ -65,9 +66,11 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
return doc = NO_MORE_DOCS;
}
}
if (subScorers[0].docID() != doc) {
afterNext();
return doc;
}
}
return doc = subScorers[0].docID();
}
@Override
@ -80,47 +83,40 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
*/
@Override
public float score() throws IOException {
int doc = subScorers[0].docID();
scoreSum = scoreMax = subScorers[0].score();
int size = numScorers;
scoreAll(1, size, doc);
scoreAll(2, size, doc);
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
}
private void afterNext() throws IOException {
doc = subScorers[0].docID();
if (doc != NO_MORE_DOCS) {
scoreSum = scoreMax = subScorers[0].score();
freq = 1;
scoreAll(1);
scoreAll(2);
}
}
// Recursively iterate all subScorers that generated last doc computing sum and max
private void scoreAll(int root, int size, int doc) throws IOException {
if (root < size && subScorers[root].docID() == doc) {
private void scoreAll(int root) throws IOException {
if (root < numScorers && subScorers[root].docID() == doc) {
float sub = subScorers[root].score();
freq++;
scoreSum += sub;
scoreMax = Math.max(scoreMax, sub);
scoreAll((root<<1)+1, size, doc);
scoreAll((root<<1)+2, size, doc);
scoreAll((root<<1)+1);
scoreAll((root<<1)+2);
}
}
@Override
public int freq() throws IOException {
int doc = subScorers[0].docID();
int size = numScorers;
return 1 + freq(1, size, doc) + freq(2, size, doc);
}
// Recursively iterate all subScorers that generated last doc computing sum and max
private int freq(int root, int size, int doc) throws IOException {
int freq = 0;
if (root < size && subScorers[root].docID() == doc) {
freq++;
freq += freq((root<<1)+1, size, doc);
freq += freq((root<<1)+2, size, doc);
}
return freq;
}
@Override
public int advance(int target) throws IOException {
if (numScorers == 0) return doc = NO_MORE_DOCS;
while (subScorers[0].docID() < target) {
assert doc != NO_MORE_DOCS;
while(true) {
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
heapAdjust(0);
} else {
@ -129,7 +125,10 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
return doc = NO_MORE_DOCS;
}
}
if (subScorers[0].docID() >= target) {
afterNext();
return doc;
}
}
return doc = subScorers[0].docID();
}
}

View File

@ -19,12 +19,16 @@ package org.apache.lucene.search;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.SlowCompositeReaderWrapper;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.RandomIndexWriter;
@ -32,6 +36,8 @@ import org.apache.lucene.index.StoredDocument;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import java.text.DecimalFormat;
@ -470,6 +476,39 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
}
}
// LUCENE-4477 / LUCENE-4401:
public void testBooleanSpanQuery() throws Exception {
int hits = 0;
Directory directory = newDirectory();
Analyzer indexerAnalyzer = new MockAnalyzer(random());
IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
IndexWriter writer = new IndexWriter(directory, config);
String FIELD = "content";
Document d = new Document();
d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
writer.addDocument(d);
writer.close();
IndexReader indexReader = DirectoryReader.open(directory);
IndexSearcher searcher = newSearcher(indexReader);
DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
query.add(sq1);
query.add(sq2);
TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
searcher.search(query, collector);
hits = collector.topDocs().scoreDocs.length;
for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs){
System.out.println(scoreDoc.doc);
}
indexReader.close();
assertEquals(hits, 1);
directory.close();
}
/** macro */
protected Query tq(String f, String t) {
return new TermQuery(new Term(f, t));