mirror of https://github.com/apache/lucene.git
LUCENE-4926: speed up disjunctionmaxscorer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466997 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4de12fe628
commit
53061c4bf8
|
@ -206,6 +206,8 @@ Optimizations
|
||||||
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
|
* LUCENE-4923: Speed up BooleanQuerys processing of in-order disjunctions.
|
||||||
(Robert Muir)
|
(Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4926: Speed up DisjunctionMatchQuery. (Robert Muir)
|
||||||
|
|
||||||
API Changes
|
API Changes
|
||||||
|
|
||||||
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
* LUCENE-4844: removed TaxonomyReader.getParent(), you should use
|
||||||
|
|
|
@ -28,6 +28,7 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
||||||
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
|
/* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */
|
||||||
private final float tieBreakerMultiplier;
|
private final float tieBreakerMultiplier;
|
||||||
private int doc = -1;
|
private int doc = -1;
|
||||||
|
private int freq = -1;
|
||||||
|
|
||||||
/* Used when scoring currently matching doc. */
|
/* Used when scoring currently matching doc. */
|
||||||
private float scoreSum;
|
private float scoreSum;
|
||||||
|
@ -55,8 +56,8 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int nextDoc() throws IOException {
|
public int nextDoc() throws IOException {
|
||||||
if (numScorers == 0) return doc = NO_MORE_DOCS;
|
assert doc != NO_MORE_DOCS;
|
||||||
while (subScorers[0].docID() == doc) {
|
while(true) {
|
||||||
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
|
if (subScorers[0].nextDoc() != NO_MORE_DOCS) {
|
||||||
heapAdjust(0);
|
heapAdjust(0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -65,9 +66,11 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
||||||
return doc = NO_MORE_DOCS;
|
return doc = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (subScorers[0].docID() != doc) {
|
||||||
|
afterNext();
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return doc = subScorers[0].docID();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -80,47 +83,40 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public float score() throws IOException {
|
public float score() throws IOException {
|
||||||
int doc = subScorers[0].docID();
|
|
||||||
scoreSum = scoreMax = subScorers[0].score();
|
|
||||||
int size = numScorers;
|
|
||||||
scoreAll(1, size, doc);
|
|
||||||
scoreAll(2, size, doc);
|
|
||||||
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
|
return scoreMax + (scoreSum - scoreMax) * tieBreakerMultiplier;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void afterNext() throws IOException {
|
||||||
|
doc = subScorers[0].docID();
|
||||||
|
if (doc != NO_MORE_DOCS) {
|
||||||
|
scoreSum = scoreMax = subScorers[0].score();
|
||||||
|
freq = 1;
|
||||||
|
scoreAll(1);
|
||||||
|
scoreAll(2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Recursively iterate all subScorers that generated last doc computing sum and max
|
// Recursively iterate all subScorers that generated last doc computing sum and max
|
||||||
private void scoreAll(int root, int size, int doc) throws IOException {
|
private void scoreAll(int root) throws IOException {
|
||||||
if (root < size && subScorers[root].docID() == doc) {
|
if (root < numScorers && subScorers[root].docID() == doc) {
|
||||||
float sub = subScorers[root].score();
|
float sub = subScorers[root].score();
|
||||||
|
freq++;
|
||||||
scoreSum += sub;
|
scoreSum += sub;
|
||||||
scoreMax = Math.max(scoreMax, sub);
|
scoreMax = Math.max(scoreMax, sub);
|
||||||
scoreAll((root<<1)+1, size, doc);
|
scoreAll((root<<1)+1);
|
||||||
scoreAll((root<<1)+2, size, doc);
|
scoreAll((root<<1)+2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int freq() throws IOException {
|
public int freq() throws IOException {
|
||||||
int doc = subScorers[0].docID();
|
|
||||||
int size = numScorers;
|
|
||||||
return 1 + freq(1, size, doc) + freq(2, size, doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursively iterate all subScorers that generated last doc computing sum and max
|
|
||||||
private int freq(int root, int size, int doc) throws IOException {
|
|
||||||
int freq = 0;
|
|
||||||
if (root < size && subScorers[root].docID() == doc) {
|
|
||||||
freq++;
|
|
||||||
freq += freq((root<<1)+1, size, doc);
|
|
||||||
freq += freq((root<<1)+2, size, doc);
|
|
||||||
}
|
|
||||||
return freq;
|
return freq;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int advance(int target) throws IOException {
|
public int advance(int target) throws IOException {
|
||||||
if (numScorers == 0) return doc = NO_MORE_DOCS;
|
assert doc != NO_MORE_DOCS;
|
||||||
while (subScorers[0].docID() < target) {
|
while(true) {
|
||||||
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
|
if (subScorers[0].advance(target) != NO_MORE_DOCS) {
|
||||||
heapAdjust(0);
|
heapAdjust(0);
|
||||||
} else {
|
} else {
|
||||||
|
@ -129,7 +125,10 @@ class DisjunctionMaxScorer extends DisjunctionScorer {
|
||||||
return doc = NO_MORE_DOCS;
|
return doc = NO_MORE_DOCS;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
if (subScorers[0].docID() >= target) {
|
||||||
return doc = subScorers[0].docID();
|
afterNext();
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,12 +19,16 @@ package org.apache.lucene.search;
|
||||||
|
|
||||||
import org.apache.lucene.document.Field;
|
import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.FieldType;
|
import org.apache.lucene.document.FieldType;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.index.AtomicReaderContext;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||||
import org.apache.lucene.index.FieldInvertState;
|
import org.apache.lucene.index.FieldInvertState;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
|
@ -32,6 +36,8 @@ import org.apache.lucene.index.StoredDocument;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
import org.apache.lucene.search.similarities.Similarity;
|
||||||
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
|
|
||||||
import java.text.DecimalFormat;
|
import java.text.DecimalFormat;
|
||||||
|
@ -470,6 +476,39 @@ public class TestDisjunctionMaxQuery extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE-4477 / LUCENE-4401:
|
||||||
|
public void testBooleanSpanQuery() throws Exception {
|
||||||
|
int hits = 0;
|
||||||
|
Directory directory = newDirectory();
|
||||||
|
Analyzer indexerAnalyzer = new MockAnalyzer(random());
|
||||||
|
|
||||||
|
IndexWriterConfig config = new IndexWriterConfig(TEST_VERSION_CURRENT, indexerAnalyzer);
|
||||||
|
IndexWriter writer = new IndexWriter(directory, config);
|
||||||
|
String FIELD = "content";
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(new TextField(FIELD, "clockwork orange", Field.Store.YES));
|
||||||
|
writer.addDocument(d);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
IndexReader indexReader = DirectoryReader.open(directory);
|
||||||
|
IndexSearcher searcher = newSearcher(indexReader);
|
||||||
|
|
||||||
|
DisjunctionMaxQuery query = new DisjunctionMaxQuery(1.0f);
|
||||||
|
SpanQuery sq1 = new SpanTermQuery(new Term(FIELD, "clockwork"));
|
||||||
|
SpanQuery sq2 = new SpanTermQuery(new Term(FIELD, "clckwork"));
|
||||||
|
query.add(sq1);
|
||||||
|
query.add(sq2);
|
||||||
|
TopScoreDocCollector collector = TopScoreDocCollector.create(1000, true);
|
||||||
|
searcher.search(query, collector);
|
||||||
|
hits = collector.topDocs().scoreDocs.length;
|
||||||
|
for (ScoreDoc scoreDoc : collector.topDocs().scoreDocs){
|
||||||
|
System.out.println(scoreDoc.doc);
|
||||||
|
}
|
||||||
|
indexReader.close();
|
||||||
|
assertEquals(hits, 1);
|
||||||
|
directory.close();
|
||||||
|
}
|
||||||
|
|
||||||
/** macro */
|
/** macro */
|
||||||
protected Query tq(String f, String t) {
|
protected Query tq(String f, String t) {
|
||||||
return new TermQuery(new Term(f, t));
|
return new TermQuery(new Term(f, t));
|
||||||
|
|
Loading…
Reference in New Issue