LUCENE-2130: fix performance issue with MultiTermEnum.seek when you seek just a bit ahead on each call (which AutomatonFuzzyTermsEnum does)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@963751 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2010-07-13 15:03:07 +00:00
parent 02f76901ca
commit ed6406b8df
3 changed files with 46 additions and 5 deletions

View File

@ -728,6 +728,9 @@ Bug fixes
files when a mergedSegmentWarmer is set on IndexWriter. (Mike
McCandless)
* LUCENE-2130: Fix performance issue when FuzzyQuery runs on a
multi-segment index (Michael McCandless)
API Changes
* LUCENE-2281: added doBeforeFlush to IndexWriter to allow extensions to perform

View File

@ -42,6 +42,9 @@ public final class MultiTermsEnum extends TermsEnum {
private final MultiDocsEnum.EnumWithSlice[] subDocs;
private final MultiDocsAndPositionsEnum.EnumWithSlice[] subDocsAndPositions;
private BytesRef lastSeek;
private final BytesRef lastSeekScratch = new BytesRef();
private int numTop;
private int numSubs;
private BytesRef current;
@ -139,8 +142,40 @@ public final class MultiTermsEnum extends TermsEnum {
public SeekStatus seek(BytesRef term, boolean useCache) throws IOException {
queue.clear();
numTop = 0;
boolean seekOpt = false;
if (lastSeek != null && termComp.compare(lastSeek, term) <= 0) {
seekOpt = true;
}
lastSeekScratch.copy(term);
lastSeek = lastSeekScratch;
for(int i=0;i<numSubs;i++) {
final SeekStatus status = currentSubs[i].terms.seek(term, useCache);
final SeekStatus status;
// LUCENE-2130: if we had just seek'd already, prior
// to this seek, and the new seek term is after the
// previous one, don't try to re-seek this sub if its
// current term is already beyond this new seek term.
// Doing so is a waste because this sub will simply
// seek to the same spot.
if (seekOpt) {
final BytesRef curTerm = currentSubs[i].current;
if (curTerm != null) {
final int cmp = termComp.compare(term, curTerm);
if (cmp == 0) {
status = SeekStatus.FOUND;
} else if (cmp < 0) {
status = SeekStatus.NOT_FOUND;
} else {
status = currentSubs[i].terms.seek(term, useCache);
}
} else {
status = SeekStatus.END;
}
} else {
status = currentSubs[i].terms.seek(term, useCache);
}
if (status == SeekStatus.FOUND) {
top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term();
@ -150,6 +185,7 @@ public final class MultiTermsEnum extends TermsEnum {
queue.add(currentSubs[i]);
} else {
// enum exhausted
currentSubs[i].current = null;
}
}
@ -205,6 +241,8 @@ public final class MultiTermsEnum extends TermsEnum {
@Override
public BytesRef next() throws IOException {
lastSeek = null;
// restore queue
pushTop();

View File

@ -88,14 +88,13 @@ public class TestMultiFields extends LuceneTestCase {
BytesRef term = terms.get(r.nextInt(terms.size()));
DocsEnum docsEnum = terms2.docs(delDocs, term, null);
int count = 0;
assertNotNull(docsEnum);
for(int docID : docs.get(term)) {
if (!deleted.contains(docID)) {
assertEquals(docID, docsEnum.nextDoc());
count++;
}
}
//System.out.println("c=" + count + " t=" + term);
assertEquals(docsEnum.NO_MORE_DOCS, docsEnum.nextDoc());
}
@ -104,6 +103,7 @@ public class TestMultiFields extends LuceneTestCase {
}
}
/*
private void verify(IndexReader r, String term, List<Integer> expected) throws Exception {
DocsEnum docs = MultiFields.getTermDocsEnum(r,
MultiFields.getDeletedDocs(r),
@ -115,6 +115,7 @@ public class TestMultiFields extends LuceneTestCase {
}
assertEquals(docs.NO_MORE_DOCS, docs.nextDoc());
}
*/
public void testSeparateEnums() throws Exception {
Directory dir = new MockRAMDirectory();
@ -133,5 +134,4 @@ public class TestMultiFields extends LuceneTestCase {
r.close();
dir.close();
}
}