LUCENE-1596: MultiTermDocs speedup when set with MultiTermDocs.seek(MultiTermEnum)

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@774846 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2009-05-14 16:40:23 +00:00
parent 61dc82c6d0
commit f4415a3924
3 changed files with 66 additions and 7 deletions

View File

@ -345,6 +345,9 @@ Optimizations
score should then call Scorer.score() per hit inside score should then call Scorer.score() per hit inside
collect(). (Shai Erera via Mike McCandless) collect(). (Shai Erera via Mike McCandless)
8. LUCENE-1596: MultiTermDocs speedup when set with
MultiTermDocs.seek(MultiTermEnum) (yonik)
Documentation Documentation
Build Build

View File

@ -533,10 +533,12 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
private Term term; private Term term;
private int docFreq; private int docFreq;
final SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments
public MultiTermEnum(IndexReader[] readers, int[] starts, Term t) public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
throws IOException { throws IOException {
queue = new SegmentMergeQueue(readers.length); queue = new SegmentMergeQueue(readers.length);
matchingSegments = new SegmentMergeInfo[readers.length+1];
for (int i = 0; i < readers.length; i++) { for (int i = 0; i < readers.length; i++) {
IndexReader reader = readers[i]; IndexReader reader = readers[i];
TermEnum termEnum; TermEnum termEnum;
@ -547,6 +549,7 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
termEnum = reader.terms(); termEnum = reader.terms();
SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader); SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
smi.ord = i;
if (t == null ? smi.next() : termEnum.term() != null) if (t == null ? smi.next() : termEnum.term() != null)
queue.put(smi); // initialize queue queue.put(smi); // initialize queue
else else
@ -559,7 +562,20 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
} }
public boolean next() throws IOException { public boolean next() throws IOException {
for (int i=0; i<matchingSegments.length; i++) {
SegmentMergeInfo smi = matchingSegments[i];
if (smi==null) break;
if (smi.next())
queue.put(smi);
else
smi.close(); // done with segment
}
int numMatchingSegments = 0;
matchingSegments[0] = null;
SegmentMergeInfo top = (SegmentMergeInfo)queue.top(); SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
if (top == null) { if (top == null) {
term = null; term = null;
return false; return false;
@ -569,14 +585,13 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
docFreq = 0; docFreq = 0;
while (top != null && term.compareTo(top.term) == 0) { while (top != null && term.compareTo(top.term) == 0) {
matchingSegments[numMatchingSegments++] = top;
queue.pop(); queue.pop();
docFreq += top.termEnum.docFreq(); // increment freq docFreq += top.termEnum.docFreq(); // increment freq
if (top.next())
queue.put(top); // restore queue
else
top.close(); // done with a segment
top = (SegmentMergeInfo)queue.top(); top = (SegmentMergeInfo)queue.top();
} }
matchingSegments[numMatchingSegments] = null;
return true; return true;
} }
@ -603,7 +618,11 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
private TermDocs[] readerTermDocs; private TermDocs[] readerTermDocs;
protected TermDocs current; // == readerTermDocs[pointer] protected TermDocs current; // == readerTermDocs[pointer]
private MultiTermEnum tenum; // the term enum used for seeking... can be null
int matchingSegmentPos; // position into the matching segments from tenum
SegmentMergeInfo smi; // current segment mere info... can be null
public MultiTermDocs(IndexReader[] r, int[] s) { public MultiTermDocs(IndexReader[] r, int[] s) {
readers = r; readers = r;
starts = s; starts = s;
@ -623,10 +642,16 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
this.base = 0; this.base = 0;
this.pointer = 0; this.pointer = 0;
this.current = null; this.current = null;
this.tenum = null;
this.smi = null;
this.matchingSegmentPos = 0;
} }
public void seek(TermEnum termEnum) throws IOException { public void seek(TermEnum termEnum) throws IOException {
seek(termEnum.term()); seek(termEnum.term());
if (termEnum instanceof MultiTermEnum) {
this.tenum = (MultiTermEnum)termEnum;
}
} }
public boolean next() throws IOException { public boolean next() throws IOException {
@ -635,6 +660,14 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
return true; return true;
} }
else if (pointer < readers.length) { else if (pointer < readers.length) {
if (tenum != null) {
smi = tenum.matchingSegments[matchingSegmentPos++];
if (smi==null) {
pointer = readers.length;
return false;
}
pointer = smi.ord;
}
base = starts[pointer]; base = starts[pointer];
current = termDocs(pointer++); current = termDocs(pointer++);
} else { } else {
@ -648,6 +681,14 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
while (true) { while (true) {
while (current == null) { while (current == null) {
if (pointer < readers.length) { // try next segment if (pointer < readers.length) { // try next segment
if (tenum != null) {
smi = tenum.matchingSegments[matchingSegmentPos++];
if (smi==null) {
pointer = readers.length;
return 0;
}
pointer = smi.ord;
}
base = starts[pointer]; base = starts[pointer];
current = termDocs(pointer++); current = termDocs(pointer++);
} else { } else {
@ -672,6 +713,14 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
if (current != null && current.skipTo(target-base)) { if (current != null && current.skipTo(target-base)) {
return true; return true;
} else if (pointer < readers.length) { } else if (pointer < readers.length) {
if (tenum != null) {
SegmentMergeInfo smi = tenum.matchingSegments[matchingSegmentPos++];
if (smi==null) {
pointer = readers.length;
return false;
}
pointer = smi.ord;
}
base = starts[pointer]; base = starts[pointer];
current = termDocs(pointer++); current = termDocs(pointer++);
} else } else
@ -683,7 +732,13 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
TermDocs result = readerTermDocs[i]; TermDocs result = readerTermDocs[i];
if (result == null) if (result == null)
result = readerTermDocs[i] = termDocs(readers[i]); result = readerTermDocs[i] = termDocs(readers[i]);
result.seek(term); if (smi != null) {
assert(smi.ord == i);
assert(smi.termEnum.term().equals(term));
result.seek(smi.termEnum);
} else {
result.seek(term);
}
return result; return result;
} }

View File

@ -22,6 +22,7 @@ import java.io.IOException;
final class SegmentMergeInfo { final class SegmentMergeInfo {
Term term; Term term;
int base; int base;
int ord; // the position of the segment in a MultiReader
TermEnum termEnum; TermEnum termEnum;
IndexReader reader; IndexReader reader;
int delCount; int delCount;