LUCENE-6690: Speed up MultiTermsEnum.next().

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692253 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-07-22 11:03:08 +00:00
parent 129a83b198
commit d01866b8fc
2 changed files with 59 additions and 18 deletions

View File

@ -356,6 +356,9 @@ Optimizations
in the case that there are few unique sets of values. in the case that there are few unique sets of values.
(Adrien Grand, Robert Muir) (Adrien Grand, Robert Muir)
* LUCENE-6690: Sped up MultiTermsEnum.next() on high-cardinality fields.
(Adrien Grand)
Build Build
* LUCENE-6518: Don't report false thread leaks from IBM J9 * LUCENE-6518: Don't report false thread leaks from IBM J9

View File

@ -19,7 +19,9 @@ package org.apache.lucene.index;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
@ -32,6 +34,13 @@ import org.apache.lucene.util.PriorityQueue;
*/ */
public final class MultiTermsEnum extends TermsEnum { public final class MultiTermsEnum extends TermsEnum {
private static final Comparator<TermsEnumWithSlice> INDEX_COMPARATOR = new Comparator<TermsEnumWithSlice>() {
@Override
public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) {
return o1.index - o2.index;
}
};
private final TermMergeQueue queue; private final TermMergeQueue queue;
private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader) private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader)
private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field
@ -213,12 +222,14 @@ public final class MultiTermsEnum extends TermsEnum {
if (status == SeekStatus.FOUND) { if (status == SeekStatus.FOUND) {
top[numTop++] = currentSubs[i]; top[numTop++] = currentSubs[i];
current = currentSubs[i].current = currentSubs[i].terms.term(); current = currentSubs[i].current = currentSubs[i].terms.term();
queue.add(currentSubs[i]);
} else { } else {
if (status == SeekStatus.NOT_FOUND) { if (status == SeekStatus.NOT_FOUND) {
currentSubs[i].current = currentSubs[i].terms.term(); currentSubs[i].current = currentSubs[i].terms.term();
assert currentSubs[i].current != null; assert currentSubs[i].current != null;
queue.add(currentSubs[i]); queue.add(currentSubs[i]);
} else { } else {
assert status == SeekStatus.END;
// enum exhausted // enum exhausted
currentSubs[i].current = null; currentSubs[i].current = null;
} }
@ -253,23 +264,19 @@ public final class MultiTermsEnum extends TermsEnum {
// extract all subs from the queue that have the same // extract all subs from the queue that have the same
// top term // top term
assert numTop == 0; assert numTop == 0;
while(true) { numTop = queue.fillTop(top);
top[numTop++] = queue.pop();
if (queue.size() == 0 || !(queue.top()).current.bytesEquals(top[0].current)) {
break;
}
}
current = top[0].current; current = top[0].current;
} }
private void pushTop() throws IOException { private void pushTop() throws IOException {
// call next() on each top, and put back into queue // call next() on each top, and reorder queue
for(int i=0;i<numTop;i++) { for (int i = 0; i < numTop; i++) {
top[i].current = top[i].terms.next(); TermsEnumWithSlice top = queue.top();
if (top[i].current != null) { top.current = top.terms.next();
queue.add(top[i]); if (top.current == null) {
queue.pop();
} else { } else {
// no more fields in this reader queue.updateTop();
} }
} }
numTop = 0; numTop = 0;
@ -342,6 +349,8 @@ public final class MultiTermsEnum extends TermsEnum {
int upto = 0; int upto = 0;
ArrayUtil.timSort(top, 0, numTop, INDEX_COMPARATOR);
for(int i=0;i<numTop;i++) { for(int i=0;i<numTop;i++) {
final TermsEnumWithSlice entry = top[i]; final TermsEnumWithSlice entry = top[i];
@ -382,18 +391,47 @@ public final class MultiTermsEnum extends TermsEnum {
} }
private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> { private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
final int[] stack;
TermMergeQueue(int size) { TermMergeQueue(int size) {
super(size); super(size);
this.stack = new int[size];
} }
@Override @Override
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) { protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
final int cmp = termsA.current.compareTo(termsB.current); return termsA.current.compareTo(termsB.current) < 0;
if (cmp != 0) {
return cmp < 0;
} else {
return termsA.subSlice.start < termsB.subSlice.start;
} }
/** Add the {@link #top()} slice as well as all slices that are positionned
* on the same term to {@code tops} and return how many of them there are. */
int fillTop(TermsEnumWithSlice[] tops) {
final int size = size();
if (size == 0) {
return 0;
}
tops[0] = top();
int numTop = 1;
stack[0] = 1;
int stackLen = 1;
while (stackLen != 0) {
final int index = stack[--stackLen];
final int leftChild = index << 1;
for (int child = leftChild, end = Math.min(size, leftChild + 1); child <= end; ++child) {
TermsEnumWithSlice te = get(child);
if (te.current.equals(tops[0].current)) {
tops[numTop++] = te;
stack[stackLen++] = child;
}
}
}
return numTop;
}
private TermsEnumWithSlice get(int i) {
return (TermsEnumWithSlice) getHeapArray()[i];
} }
} }