mirror of https://github.com/apache/lucene.git
LUCENE-1596: MultiTermDocs speedup when set with MultiTermDocs.seek(MultiTermEnum)
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@774846 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
61dc82c6d0
commit
f4415a3924
|
@ -345,6 +345,9 @@ Optimizations
|
||||||
score should then call Scorer.score() per hit inside
|
score should then call Scorer.score() per hit inside
|
||||||
collect(). (Shai Erera via Mike McCandless)
|
collect(). (Shai Erera via Mike McCandless)
|
||||||
|
|
||||||
|
8. LUCENE-1596: MultiTermDocs speedup when set with
|
||||||
|
MultiTermDocs.seek(MultiTermEnum) (yonik)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
|
@ -533,10 +533,12 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
|
|
||||||
private Term term;
|
private Term term;
|
||||||
private int docFreq;
|
private int docFreq;
|
||||||
|
final SegmentMergeInfo[] matchingSegments; // null terminated array of matching segments
|
||||||
|
|
||||||
public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
|
public MultiTermEnum(IndexReader[] readers, int[] starts, Term t)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
queue = new SegmentMergeQueue(readers.length);
|
queue = new SegmentMergeQueue(readers.length);
|
||||||
|
matchingSegments = new SegmentMergeInfo[readers.length+1];
|
||||||
for (int i = 0; i < readers.length; i++) {
|
for (int i = 0; i < readers.length; i++) {
|
||||||
IndexReader reader = readers[i];
|
IndexReader reader = readers[i];
|
||||||
TermEnum termEnum;
|
TermEnum termEnum;
|
||||||
|
@ -547,6 +549,7 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
termEnum = reader.terms();
|
termEnum = reader.terms();
|
||||||
|
|
||||||
SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
|
SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
|
||||||
|
smi.ord = i;
|
||||||
if (t == null ? smi.next() : termEnum.term() != null)
|
if (t == null ? smi.next() : termEnum.term() != null)
|
||||||
queue.put(smi); // initialize queue
|
queue.put(smi); // initialize queue
|
||||||
else
|
else
|
||||||
|
@ -559,7 +562,20 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
public boolean next() throws IOException {
|
||||||
|
for (int i=0; i<matchingSegments.length; i++) {
|
||||||
|
SegmentMergeInfo smi = matchingSegments[i];
|
||||||
|
if (smi==null) break;
|
||||||
|
if (smi.next())
|
||||||
|
queue.put(smi);
|
||||||
|
else
|
||||||
|
smi.close(); // done with segment
|
||||||
|
}
|
||||||
|
|
||||||
|
int numMatchingSegments = 0;
|
||||||
|
matchingSegments[0] = null;
|
||||||
|
|
||||||
SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
|
SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
|
||||||
|
|
||||||
if (top == null) {
|
if (top == null) {
|
||||||
term = null;
|
term = null;
|
||||||
return false;
|
return false;
|
||||||
|
@ -569,14 +585,13 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
docFreq = 0;
|
docFreq = 0;
|
||||||
|
|
||||||
while (top != null && term.compareTo(top.term) == 0) {
|
while (top != null && term.compareTo(top.term) == 0) {
|
||||||
|
matchingSegments[numMatchingSegments++] = top;
|
||||||
queue.pop();
|
queue.pop();
|
||||||
docFreq += top.termEnum.docFreq(); // increment freq
|
docFreq += top.termEnum.docFreq(); // increment freq
|
||||||
if (top.next())
|
|
||||||
queue.put(top); // restore queue
|
|
||||||
else
|
|
||||||
top.close(); // done with a segment
|
|
||||||
top = (SegmentMergeInfo)queue.top();
|
top = (SegmentMergeInfo)queue.top();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
matchingSegments[numMatchingSegments] = null;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -603,7 +618,11 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
|
|
||||||
private TermDocs[] readerTermDocs;
|
private TermDocs[] readerTermDocs;
|
||||||
protected TermDocs current; // == readerTermDocs[pointer]
|
protected TermDocs current; // == readerTermDocs[pointer]
|
||||||
|
|
||||||
|
private MultiTermEnum tenum; // the term enum used for seeking... can be null
|
||||||
|
int matchingSegmentPos; // position into the matching segments from tenum
|
||||||
|
SegmentMergeInfo smi; // current segment mere info... can be null
|
||||||
|
|
||||||
public MultiTermDocs(IndexReader[] r, int[] s) {
|
public MultiTermDocs(IndexReader[] r, int[] s) {
|
||||||
readers = r;
|
readers = r;
|
||||||
starts = s;
|
starts = s;
|
||||||
|
@ -623,10 +642,16 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
this.base = 0;
|
this.base = 0;
|
||||||
this.pointer = 0;
|
this.pointer = 0;
|
||||||
this.current = null;
|
this.current = null;
|
||||||
|
this.tenum = null;
|
||||||
|
this.smi = null;
|
||||||
|
this.matchingSegmentPos = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void seek(TermEnum termEnum) throws IOException {
|
public void seek(TermEnum termEnum) throws IOException {
|
||||||
seek(termEnum.term());
|
seek(termEnum.term());
|
||||||
|
if (termEnum instanceof MultiTermEnum) {
|
||||||
|
this.tenum = (MultiTermEnum)termEnum;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean next() throws IOException {
|
public boolean next() throws IOException {
|
||||||
|
@ -635,6 +660,14 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (pointer < readers.length) {
|
else if (pointer < readers.length) {
|
||||||
|
if (tenum != null) {
|
||||||
|
smi = tenum.matchingSegments[matchingSegmentPos++];
|
||||||
|
if (smi==null) {
|
||||||
|
pointer = readers.length;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pointer = smi.ord;
|
||||||
|
}
|
||||||
base = starts[pointer];
|
base = starts[pointer];
|
||||||
current = termDocs(pointer++);
|
current = termDocs(pointer++);
|
||||||
} else {
|
} else {
|
||||||
|
@ -648,6 +681,14 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
while (true) {
|
while (true) {
|
||||||
while (current == null) {
|
while (current == null) {
|
||||||
if (pointer < readers.length) { // try next segment
|
if (pointer < readers.length) { // try next segment
|
||||||
|
if (tenum != null) {
|
||||||
|
smi = tenum.matchingSegments[matchingSegmentPos++];
|
||||||
|
if (smi==null) {
|
||||||
|
pointer = readers.length;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
pointer = smi.ord;
|
||||||
|
}
|
||||||
base = starts[pointer];
|
base = starts[pointer];
|
||||||
current = termDocs(pointer++);
|
current = termDocs(pointer++);
|
||||||
} else {
|
} else {
|
||||||
|
@ -672,6 +713,14 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
if (current != null && current.skipTo(target-base)) {
|
if (current != null && current.skipTo(target-base)) {
|
||||||
return true;
|
return true;
|
||||||
} else if (pointer < readers.length) {
|
} else if (pointer < readers.length) {
|
||||||
|
if (tenum != null) {
|
||||||
|
SegmentMergeInfo smi = tenum.matchingSegments[matchingSegmentPos++];
|
||||||
|
if (smi==null) {
|
||||||
|
pointer = readers.length;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
pointer = smi.ord;
|
||||||
|
}
|
||||||
base = starts[pointer];
|
base = starts[pointer];
|
||||||
current = termDocs(pointer++);
|
current = termDocs(pointer++);
|
||||||
} else
|
} else
|
||||||
|
@ -683,7 +732,13 @@ class MultiSegmentReader extends DirectoryIndexReader implements Cloneable {
|
||||||
TermDocs result = readerTermDocs[i];
|
TermDocs result = readerTermDocs[i];
|
||||||
if (result == null)
|
if (result == null)
|
||||||
result = readerTermDocs[i] = termDocs(readers[i]);
|
result = readerTermDocs[i] = termDocs(readers[i]);
|
||||||
result.seek(term);
|
if (smi != null) {
|
||||||
|
assert(smi.ord == i);
|
||||||
|
assert(smi.termEnum.term().equals(term));
|
||||||
|
result.seek(smi.termEnum);
|
||||||
|
} else {
|
||||||
|
result.seek(term);
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.IOException;
|
||||||
final class SegmentMergeInfo {
|
final class SegmentMergeInfo {
|
||||||
Term term;
|
Term term;
|
||||||
int base;
|
int base;
|
||||||
|
int ord; // the position of the segment in a MultiReader
|
||||||
TermEnum termEnum;
|
TermEnum termEnum;
|
||||||
IndexReader reader;
|
IndexReader reader;
|
||||||
int delCount;
|
int delCount;
|
||||||
|
|
Loading…
Reference in New Issue