LUCENE-6645: optimize DocIdSetBuilder a bit more

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1690175 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2015-07-09 21:20:55 +00:00
parent 34320973ba
commit e071fd0153
2 changed files with 27 additions and 12 deletions

View File

@ -28,7 +28,7 @@ import org.apache.lucene.search.DocIdSetIterator;
public abstract class BitSet implements MutableBits, Accountable {
/** Build a {@link BitSet} from the content of the provided {@link DocIdSetIterator}.
* NOTE: this will consume the {@link BitSet}. */
* NOTE: this will fully consume the {@link DocIdSetIterator}. */
public static BitSet of(DocIdSetIterator it, int maxDoc) throws IOException {
final long cost = it.cost();
final int threshold = maxDoc >>> 7;

View File

@ -23,7 +23,9 @@ import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
/**
* A builder of {@link DocIdSet}s.
* A builder of {@link DocIdSet}s. At first it uses a sparse structure to gather
* documents, and then upgrades to a non-sparse bit set once enough hits match.
*
* @lucene.internal
*/
public final class DocIdSetBuilder {
@ -62,6 +64,17 @@ public final class DocIdSetBuilder {
this.bufferSize = 0;
}
/** Grows the buffer to at least minSize, but never larger than threshold. */
private void growBuffer(int minSize) {
assert minSize < threshold;
if (buffer.length < minSize) {
int nextSize = Math.min(threshold, ArrayUtil.oversize(minSize, RamUsageEstimator.NUM_BYTES_INT));
int[] newBuffer = new int[nextSize];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
}
}
/**
* Add the content of the provided {@link DocIdSetIterator} to this builder.
* NOTE: if you need to build a {@link DocIdSet} out of a single
@ -74,7 +87,8 @@ public final class DocIdSetBuilder {
bitSet.or(iter);
} else {
while (true) {
final int end = Math.min(threshold, buffer.length);
assert buffer.length <= threshold;
final int end = buffer.length;
for (int i = bufferSize; i < end; ++i) {
final int doc = iter.nextDoc();
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
@ -89,7 +103,7 @@ public final class DocIdSetBuilder {
break;
}
buffer = ArrayUtil.grow(buffer, bufferSize + 1);
growBuffer(bufferSize+1);
}
upgradeToBitSet();
@ -105,8 +119,8 @@ public final class DocIdSetBuilder {
public void grow(int numDocs) {
if (bitSet == null) {
final long newLength = bufferSize + numDocs;
if (newLength <= threshold) {
buffer = ArrayUtil.grow(buffer, (int) newLength);
if (newLength < threshold) {
growBuffer((int) newLength);
} else {
upgradeToBitSet();
}
@ -123,13 +137,13 @@ public final class DocIdSetBuilder {
if (bitSet != null) {
bitSet.set(doc);
} else {
if (bufferSize + 1 >= threshold) {
upgradeToBitSet();
bitSet.set(doc);
return;
}
if (bufferSize + 1 > buffer.length) {
buffer = ArrayUtil.grow(buffer, bufferSize + 1);
if (bufferSize + 1 >= threshold) {
upgradeToBitSet();
bitSet.set(doc);
return;
}
growBuffer(bufferSize+1);
}
buffer[bufferSize++] = doc;
}
@ -175,6 +189,7 @@ public final class DocIdSetBuilder {
LSBRadixSorter sorter = new LSBRadixSorter();
sorter.sort(buffer, 0, bufferSize);
final int l = dedup(buffer, bufferSize);
assert l <= bufferSize;
buffer = ArrayUtil.grow(buffer, l + 1);
buffer[l] = DocIdSetIterator.NO_MORE_DOCS;
return new IntArrayDocIdSet(buffer, l);