mirror of
https://github.com/apache/lucene.git
synced 2025-03-07 00:39:21 +00:00
LUCENE-2130: fix performance issue with MultiTermEnum.seek when you seek just a bit ahead on each call (which AutomatonFuzzyTermsEnum does)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@963751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
02f76901ca
commit
ed6406b8df
@ -728,6 +728,9 @@ Bug fixes
|
||||
files when a mergedSegmentWarmer is set on IndexWriter. (Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-2130: Fix performance issue when FuzzyQuery runs on a
|
||||
multi-segment index (Michael McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2281: added doBeforeFlush to IndexWriter to allow extensions to perform
|
||||
|
@ -42,6 +42,9 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||
private final MultiDocsEnum.EnumWithSlice[] subDocs;
|
||||
private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
|
||||
|
||||
private BytesRef lastSeek;
|
||||
private final BytesRef lastSeekScratch = new BytesRef();
|
||||
|
||||
private int numTop;
|
||||
private int numSubs;
|
||||
private BytesRef current;
|
||||
@ -139,8 +142,40 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||
public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
|
||||
queue.clear();
|
||||
numTop = 0;
|
||||
|
||||
boolean seekOpt = false;
|
||||
if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
|
||||
seekOpt = true;
|
||||
}
|
||||
lastSeekScratch.copy(term);
|
||||
lastSeek = lastSeekScratch;
|
||||
|
||||
for(int i=0;i<numSubs;i++) {
|
||||
final SeekStatus status = currentSubs[i].terms.seek(term, useCache);
|
||||
final SeekStatus status;
|
||||
// LUCENE-2130: if we had just seek'd already, prior
|
||||
// to this seek, and the new seek term is after the
|
||||
// previous one, don't try to re-seek this sub if its
|
||||
// current term is already beyond this new seek term.
|
||||
// Doing so is a waste because this sub will simply
|
||||
// seek to the same spot.
|
||||
if (seekOpt) {
|
||||
final BytesRef curTerm = currentSubs[i].current;
|
||||
if (curTerm != null) {
|
||||
final int cmp = termComp.compare(term, curTerm);
|
||||
if (cmp == 0) {
|
||||
status = SeekStatus.FOUND;
|
||||
} else if (cmp < 0) {
|
||||
status = SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
status = currentSubs[i].terms.seek(term, useCache);
|
||||
}
|
||||
} else {
|
||||
status = SeekStatus.END;
|
||||
}
|
||||
} else {
|
||||
status = currentSubs[i].terms.seek(term, useCache);
|
||||
}
|
||||
|
||||
if (status == SeekStatus.FOUND) {
|
||||
top[numTop++] = currentSubs[i];
|
||||
current = currentSubs[i].current = currentSubs[i].terms.term();
|
||||
@ -150,6 +185,7 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||
queue.add(currentSubs[i]);
|
||||
} else {
|
||||
// enum exhausted
|
||||
currentSubs[i].current = null;
|
||||
}
|
||||
}
|
||||
|
||||
@ -205,6 +241,8 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||
|
||||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
lastSeek = null;
|
||||
|
||||
// restore queue
|
||||
pushTop();
|
||||
|
||||
|
@ -88,14 +88,13 @@ public class TestMultiFields extends LuceneTestCase {
|
||||
BytesRef term = terms.get(r.nextInt(terms.size()));
|
||||
|
||||
DocsEnum docsEnum = terms2.docs(delDocs, term, null);
|
||||
int count = 0;
|
||||
assertNotNull(docsEnum);
|
||||
|
||||
for(int docID : docs.get(term)) {
|
||||
if (!deleted.contains(docID)) {
|
||||
assertEquals(docID, docsEnum.nextDoc());
|
||||
count++;
|
||||
}
|
||||
}
|
||||
//System.out.println("c=" + count + " t=" + term);
|
||||
assertEquals(docsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
|
||||
}
|
||||
|
||||
@ -104,6 +103,7 @@ public class TestMultiFields extends LuceneTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
private void verify(IndexReader r, String term, List<Integer> expected) throws Exception {
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(r,
|
||||
MultiFields.getDeletedDocs(r),
|
||||
@ -115,6 +115,7 @@ public class TestMultiFields extends LuceneTestCase {
|
||||
}
|
||||
assertEquals(docs.NO_MORE_DOCS, docs.nextDoc());
|
||||
}
|
||||
*/
|
||||
|
||||
public void testSeparateEnums() throws Exception {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
@ -133,5 +134,4 @@ public class TestMultiFields extends LuceneTestCase {
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user