mirror of https://github.com/apache/lucene.git
LUCENE-4926: speed up disjunctionmaxscorer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466997 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4de12fe628
commit
53061c4bf8
|
@ -206,6 +206,8 @@ Optimizations
|
|||
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
|
||||
(Robert Muir)
|
||||
|
||||
* LUCENE-4926: Speed up DisjunctionMatchQuery. (Robert Muir)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
||||
|
|
|
@ -28,6 +28,7 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
|
||||
private final float tieBreakerMultiplier;
|
||||
private int doc = -1;
|
||||
private int freq = -1;
|
||||
|
||||
/* Used when scoring currently matching doc. */
|
||||
private float scoreSum;
|
||||
|
@ -55,8 +56,8 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (numScorers == 0) return doc = NO_MORE_DOCS;
|
||||
while (subScorers[0].docID() == doc) {
|
||||
assert doc != NO_MORE_DOCS;
|
||||
while(true) {
|
||||
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
|
||||
heapAdjust(0);
|
||||
} else {
|
||||
|
@ -65,9 +66,11 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
if (subScorers[0].docID() != doc) {
|
||||
afterNext();
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
|
||||
return doc = subScorers[0].docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -80,47 +83,40 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
*/
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
int doc = subScorers[0].docID();
|
||||
scoreSum = scoreMax = subScorers[0].score();
|
||||
int size = numScorers;
|
||||
scoreAll(1, size, doc);
|
||||
scoreAll(2, size, doc);
|
||||
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
|
||||
}
|
||||
|
||||
private void afterNext() throws IOException {
|
||||
doc = subScorers[0].docID();
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
scoreSum = scoreMax = subScorers[0].score();
|
||||
freq = 1;
|
||||
scoreAll(1);
|
||||
scoreAll(2);
|
||||
}
|
||||
}
|
||||
|
||||
// Recursively iterate all subScorers that generated last doc computing sum and max
|
||||
private void scoreAll(int root, int size, int doc) throws IOException {
|
||||
if (root < size && subScorers[root].docID() == doc) {
|
||||
private void scoreAll(int root) throws IOException {
|
||||
if (root < numScorers && subScorers[root].docID() == doc) {
|
||||
float sub = subScorers[root].score();
|
||||
freq++;
|
||||
scoreSum += sub;
|
||||
scoreMax = Math.max(scoreMax, sub);
|
||||
scoreAll((root<<1)+1, size, doc);
|
||||
scoreAll((root<<1)+2, size, doc);
|
||||
scoreAll((root<<1)+1);
|
||||
scoreAll((root<<1)+2);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
int doc = subScorers[0].docID();
|
||||
int size = numScorers;
|
||||
return 1 + freq(1, size, doc) + freq(2, size, doc);
|
||||
}
|
||||
|
||||
// Recursively iterate all subScorers that generated last doc computing sum and max
|
||||
private int freq(int root, int size, int doc) throws IOException {
|
||||
int freq = 0;
|
||||
if (root < size && subScorers[root].docID() == doc) {
|
||||
freq++;
|
||||
freq += freq((root<<1)+1, size, doc);
|
||||
freq += freq((root<<1)+2, size, doc);
|
||||
}
|
||||
return freq;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (numScorers == 0) return doc = NO_MORE_DOCS;
|
||||
while (subScorers[0].docID() < target) {
|
||||
assert doc != NO_MORE_DOCS;
|
||||
while(true) {
|
||||
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
|
||||
heapAdjust(0);
|
||||
} else {
|
||||
|
@ -129,7 +125,10 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
|||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
if (subScorers[0].docID() >= target) {
|
||||
afterNext();
|
||||
return doc;
|
||||
}
|
||||
}
|
||||
return doc = subScorers[0].docID();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,12 +19,16 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
@ -32,6 +36,8 @@ import org.apache.lucene.index.StoredDocument;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
||||
import java.text.DecimalFormat;
|
||||
|
@ -470,6 +476,39 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
// LUCENE-4477 / LUCENE-4401:
|
||||
public void testBooleanSpanQuery() throws Exception {
|
||||
int hits = 0;
|
||||
Directory directory = newDirectory();
|
||||
Analyzer indexerAnalyzer = new MockAnalyzer(random());
|
||||
|
||||
IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
|
||||
IndexWriter writer = new IndexWriter(directory, config);
|
||||
String FIELD = "content";
|
||||
Document d = new Document();
|
||||
d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
|
||||
writer.addDocument(d);
|
||||
writer.close();
|
||||
|
||||
IndexReader indexReader = DirectoryReader.open(directory);
|
||||
IndexSearcher searcher = newSearcher(indexReader);
|
||||
|
||||
DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
|
||||
SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
|
||||
SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
|
||||
query.add(sq1);
|
||||
query.add(sq2);
|
||||
TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
|
||||
searcher.search(query, collector);
|
||||
hits = collector.topDocs().scoreDocs.length;
|
||||
for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs){
|
||||
System.out.println(scoreDoc.doc);
|
||||
}
|
||||
indexReader.close();
|
||||
assertEquals(hits, 1);
|
||||
directory.close();
|
||||
}
|
||||
|
||||
/** macro */
|
||||
protected Query tq(String f, String t) {
|
||||
return new TermQuery(new Term(f, t));
|
||||
|
|
Loading…
Reference in New Issue