mirror of https://github.com/apache/lucene.git
LUCENE-6690: Speed up MultiTermsEnum.next().
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692253 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
129a83b198
commit
d01866b8fc
|
@ -356,6 +356,9 @@ Optimizations
|
||||||
in the case that there are few unique sets of values.
|
in the case that there are few unique sets of values.
|
||||||
(Adrien Grand, Robert Muir)
|
(Adrien Grand, Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-6690: Sped up MultiTermsEnum.next() on high-cardinality fields.
|
||||||
|
(Adrien Grand)
|
||||||
|
|
||||||
Build
|
Build
|
||||||
|
|
||||||
* LUCENE-6518: Don't report false thread leaks from IBM J9
|
* LUCENE-6518: Don't report false thread leaks from IBM J9
|
||||||
|
|
|
@ -19,7 +19,9 @@ package org.apache.lucene.index;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Comparator;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
@ -32,6 +34,13 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
*/
|
*/
|
||||||
public final class MultiTermsEnum extends TermsEnum {
|
public final class MultiTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
|
private static final Comparator<TermsEnumWithSlice> INDEX_COMPARATOR = new Comparator<TermsEnumWithSlice>() {
|
||||||
|
@Override
|
||||||
|
public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) {
|
||||||
|
return o1.index - o2.index;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
private final TermMergeQueue queue;
|
private final TermMergeQueue queue;
|
||||||
private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader)
|
private final TermsEnumWithSlice[] subs; // all of our subs (one per sub-reader)
|
||||||
private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field
|
private final TermsEnumWithSlice[] currentSubs; // current subs that have at least one term for this field
|
||||||
|
@ -213,12 +222,14 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
if (status == SeekStatus.FOUND) {
|
if (status == SeekStatus.FOUND) {
|
||||||
top[numTop++] = currentSubs[i];
|
top[numTop++] = currentSubs[i];
|
||||||
current = currentSubs[i].current = currentSubs[i].terms.term();
|
current = currentSubs[i].current = currentSubs[i].terms.term();
|
||||||
|
queue.add(currentSubs[i]);
|
||||||
} else {
|
} else {
|
||||||
if (status == SeekStatus.NOT_FOUND) {
|
if (status == SeekStatus.NOT_FOUND) {
|
||||||
currentSubs[i].current = currentSubs[i].terms.term();
|
currentSubs[i].current = currentSubs[i].terms.term();
|
||||||
assert currentSubs[i].current != null;
|
assert currentSubs[i].current != null;
|
||||||
queue.add(currentSubs[i]);
|
queue.add(currentSubs[i]);
|
||||||
} else {
|
} else {
|
||||||
|
assert status == SeekStatus.END;
|
||||||
// enum exhausted
|
// enum exhausted
|
||||||
currentSubs[i].current = null;
|
currentSubs[i].current = null;
|
||||||
}
|
}
|
||||||
|
@ -253,23 +264,19 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
// extract all subs from the queue that have the same
|
// extract all subs from the queue that have the same
|
||||||
// top term
|
// top term
|
||||||
assert numTop == 0;
|
assert numTop == 0;
|
||||||
while(true) {
|
numTop = queue.fillTop(top);
|
||||||
top[numTop++] = queue.pop();
|
|
||||||
if (queue.size() == 0 || !(queue.top()).current.bytesEquals(top[0].current)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
current = top[0].current;
|
current = top[0].current;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void pushTop() throws IOException {
|
private void pushTop() throws IOException {
|
||||||
// call next() on each top, and put back into queue
|
// call next() on each top, and reorder queue
|
||||||
for(int i=0;i<numTop;i++) {
|
for (int i = 0; i < numTop; i++) {
|
||||||
top[i].current = top[i].terms.next();
|
TermsEnumWithSlice top = queue.top();
|
||||||
if (top[i].current != null) {
|
top.current = top.terms.next();
|
||||||
queue.add(top[i]);
|
if (top.current == null) {
|
||||||
|
queue.pop();
|
||||||
} else {
|
} else {
|
||||||
// no more fields in this reader
|
queue.updateTop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
numTop = 0;
|
numTop = 0;
|
||||||
|
@ -342,6 +349,8 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
|
|
||||||
int upto = 0;
|
int upto = 0;
|
||||||
|
|
||||||
|
ArrayUtil.timSort(top, 0, numTop, INDEX_COMPARATOR);
|
||||||
|
|
||||||
for(int i=0;i<numTop;i++) {
|
for(int i=0;i<numTop;i++) {
|
||||||
|
|
||||||
final TermsEnumWithSlice entry = top[i];
|
final TermsEnumWithSlice entry = top[i];
|
||||||
|
@ -382,18 +391,47 @@ public final class MultiTermsEnum extends TermsEnum {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
|
private final static class TermMergeQueue extends PriorityQueue<TermsEnumWithSlice> {
|
||||||
|
|
||||||
|
final int[] stack;
|
||||||
|
|
||||||
TermMergeQueue(int size) {
|
TermMergeQueue(int size) {
|
||||||
super(size);
|
super(size);
|
||||||
|
this.stack = new int[size];
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
|
protected boolean lessThan(TermsEnumWithSlice termsA, TermsEnumWithSlice termsB) {
|
||||||
final int cmp = termsA.current.compareTo(termsB.current);
|
return termsA.current.compareTo(termsB.current) < 0;
|
||||||
if (cmp != 0) {
|
|
||||||
return cmp < 0;
|
|
||||||
} else {
|
|
||||||
return termsA.subSlice.start < termsB.subSlice.start;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Add the {@link #top()} slice as well as all slices that are positionned
|
||||||
|
* on the same term to {@code tops} and return how many of them there are. */
|
||||||
|
int fillTop(TermsEnumWithSlice[] tops) {
|
||||||
|
final int size = size();
|
||||||
|
if (size == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
tops[0] = top();
|
||||||
|
int numTop = 1;
|
||||||
|
stack[0] = 1;
|
||||||
|
int stackLen = 1;
|
||||||
|
|
||||||
|
while (stackLen != 0) {
|
||||||
|
final int index = stack[--stackLen];
|
||||||
|
final int leftChild = index << 1;
|
||||||
|
for (int child = leftChild, end = Math.min(size, leftChild + 1); child <= end; ++child) {
|
||||||
|
TermsEnumWithSlice te = get(child);
|
||||||
|
if (te.current.equals(tops[0].current)) {
|
||||||
|
tops[numTop++] = te;
|
||||||
|
stack[stackLen++] = child;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return numTop;
|
||||||
|
}
|
||||||
|
|
||||||
|
private TermsEnumWithSlice get(int i) {
|
||||||
|
return (TermsEnumWithSlice) getHeapArray()[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue