mirror of https://github.com/apache/lucene.git
LUCENE-7264: Fewer conditionals in DocIdSetBuilder.
This commit is contained in:
parent
54b873c2f9
commit
aa81ba8642
|
@ -78,8 +78,8 @@ Optimizations
|
||||||
* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
|
* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
|
||||||
waiting on a lock. (Adrien Grand)
|
waiting on a lock. (Adrien Grand)
|
||||||
|
|
||||||
* LUCENE-7261: Speed up LSBRadixSorter (which is used by TermsQuery, multi-term
|
* LUCENE-7261, LUCENE-7264: Speed up DocIdSetBuilder (which is used by
|
||||||
queries and point queries). (Adrien Grand)
|
TermsQuery, multi-term queries and point queries). (Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,7 @@ class SimpleTextBKDReader extends BKDReader {
|
||||||
in.seek(blockFP);
|
in.seek(blockFP);
|
||||||
readLine(in, scratch);
|
readLine(in, scratch);
|
||||||
int count = parseInt(scratch, BLOCK_COUNT);
|
int count = parseInt(scratch, BLOCK_COUNT);
|
||||||
|
visitor.grow(count);
|
||||||
for(int i=0;i<count;i++) {
|
for(int i=0;i<count;i++) {
|
||||||
readLine(in, scratch);
|
readLine(in, scratch);
|
||||||
visitor.visit(parseInt(scratch, BLOCK_DOC_ID));
|
visitor.visit(parseInt(scratch, BLOCK_DOC_ID));
|
||||||
|
@ -65,6 +66,7 @@ class SimpleTextBKDReader extends BKDReader {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
|
protected void visitDocValues(int[] commonPrefixLengths, byte[] scratchPackedValue, IndexInput in, int[] docIDs, int count, IntersectVisitor visitor) throws IOException {
|
||||||
|
visitor.grow(count);
|
||||||
// NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
|
// NOTE: we don't do prefix coding, so we ignore commonPrefixLengths
|
||||||
assert scratchPackedValue.length == packedBytesLength;
|
assert scratchPackedValue.length == packedBytesLength;
|
||||||
BytesRefBuilder scratch = new BytesRefBuilder();
|
BytesRefBuilder scratch = new BytesRefBuilder();
|
||||||
|
|
|
@ -163,6 +163,7 @@ public abstract class PointInSetQuery extends Query {
|
||||||
private BytesRef nextQueryPoint;
|
private BytesRef nextQueryPoint;
|
||||||
private final BytesRef scratch = new BytesRef();
|
private final BytesRef scratch = new BytesRef();
|
||||||
private final PrefixCodedTerms sortedPackedPoints;
|
private final PrefixCodedTerms sortedPackedPoints;
|
||||||
|
private DocIdSetBuilder.BulkAdder adder;
|
||||||
|
|
||||||
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result) throws IOException {
|
public MergePointVisitor(PrefixCodedTerms sortedPackedPoints, DocIdSetBuilder result) throws IOException {
|
||||||
this.result = result;
|
this.result = result;
|
||||||
|
@ -174,12 +175,12 @@ public abstract class PointInSetQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void grow(int count) {
|
public void grow(int count) {
|
||||||
result.grow(count);
|
adder = result.grow(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID) {
|
public void visit(int docID) {
|
||||||
result.add(docID);
|
adder.add(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -189,7 +190,7 @@ public abstract class PointInSetQuery extends Query {
|
||||||
int cmp = nextQueryPoint.compareTo(scratch);
|
int cmp = nextQueryPoint.compareTo(scratch);
|
||||||
if (cmp == 0) {
|
if (cmp == 0) {
|
||||||
// Query point equals index point, so collect and return
|
// Query point equals index point, so collect and return
|
||||||
result.add(docID);
|
adder.add(docID);
|
||||||
break;
|
break;
|
||||||
} else if (cmp < 0) {
|
} else if (cmp < 0) {
|
||||||
// Query point is before index point, so we move to next query point
|
// Query point is before index point, so we move to next query point
|
||||||
|
@ -237,6 +238,7 @@ public abstract class PointInSetQuery extends Query {
|
||||||
|
|
||||||
private final DocIdSetBuilder result;
|
private final DocIdSetBuilder result;
|
||||||
private final byte[] pointBytes;
|
private final byte[] pointBytes;
|
||||||
|
private DocIdSetBuilder.BulkAdder adder;
|
||||||
|
|
||||||
public SinglePointVisitor(DocIdSetBuilder result) {
|
public SinglePointVisitor(DocIdSetBuilder result) {
|
||||||
this.result = result;
|
this.result = result;
|
||||||
|
@ -251,12 +253,12 @@ public abstract class PointInSetQuery extends Query {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void grow(int count) {
|
public void grow(int count) {
|
||||||
result.grow(count);
|
adder = result.grow(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID) {
|
public void visit(int docID) {
|
||||||
result.add(docID);
|
adder.add(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -264,7 +266,7 @@ public abstract class PointInSetQuery extends Query {
|
||||||
assert packedValue.length == pointBytes.length;
|
assert packedValue.length == pointBytes.length;
|
||||||
if (Arrays.equals(packedValue, pointBytes)) {
|
if (Arrays.equals(packedValue, pointBytes)) {
|
||||||
// The point for this doc matches the point we are querying on
|
// The point for this doc matches the point we are querying on
|
||||||
result.add(docID);
|
adder.add(docID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -111,14 +111,16 @@ public abstract class PointRangeQuery extends Query {
|
||||||
values.intersect(field,
|
values.intersect(field,
|
||||||
new IntersectVisitor() {
|
new IntersectVisitor() {
|
||||||
|
|
||||||
|
DocIdSetBuilder.BulkAdder adder;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void grow(int count) {
|
public void grow(int count) {
|
||||||
result.grow(count);
|
adder = result.grow(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID) {
|
public void visit(int docID) {
|
||||||
result.add(docID);
|
adder.add(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -136,7 +138,7 @@ public abstract class PointRangeQuery extends Query {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Doc is in-bounds
|
// Doc is in-bounds
|
||||||
result.add(docID);
|
adder.add(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package org.apache.lucene.util;
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
@ -26,17 +27,50 @@ import org.apache.lucene.util.packed.PackedInts;
|
||||||
* A builder of {@link DocIdSet}s. At first it uses a sparse structure to gather
|
* A builder of {@link DocIdSet}s. At first it uses a sparse structure to gather
|
||||||
* documents, and then upgrades to a non-sparse bit set once enough hits match.
|
* documents, and then upgrades to a non-sparse bit set once enough hits match.
|
||||||
*
|
*
|
||||||
|
* To add documents, you first need to call {@link #grow} in order to reserve
|
||||||
|
* space, and then call {@link BulkAdder#add(int)} on the returned
|
||||||
|
* {@link BulkAdder}.
|
||||||
|
*
|
||||||
* @lucene.internal
|
* @lucene.internal
|
||||||
*/
|
*/
|
||||||
public final class DocIdSetBuilder {
|
public final class DocIdSetBuilder {
|
||||||
|
|
||||||
|
/** Utility class to efficiently add many docs in one go.
|
||||||
|
* @see DocIdSetBuilder#grow */
|
||||||
|
public static abstract class BulkAdder {
|
||||||
|
public abstract void add(int doc);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class FixedBitSetAdder extends BulkAdder {
|
||||||
|
final FixedBitSet bitSet;
|
||||||
|
|
||||||
|
FixedBitSetAdder(FixedBitSet bitSet) {
|
||||||
|
this.bitSet = bitSet;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int doc) {
|
||||||
|
bitSet.set(doc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private class BufferAdder extends BulkAdder {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void add(int doc) {
|
||||||
|
buffer[bufferSize++] = doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
private final int maxDoc;
|
private final int maxDoc;
|
||||||
private final int threshold;
|
private final int threshold;
|
||||||
|
|
||||||
private int[] buffer;
|
private int[] buffer;
|
||||||
private int bufferSize;
|
private int bufferSize;
|
||||||
|
|
||||||
private BitSet bitSet;
|
private FixedBitSet bitSet;
|
||||||
|
private BulkAdder adder = new BufferAdder();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a builder that can contain doc IDs between {@code 0} and {@code maxDoc}.
|
* Create a builder that can contain doc IDs between {@code 0} and {@code maxDoc}.
|
||||||
|
@ -62,6 +96,7 @@ public final class DocIdSetBuilder {
|
||||||
}
|
}
|
||||||
this.buffer = null;
|
this.buffer = null;
|
||||||
this.bufferSize = 0;
|
this.bufferSize = 0;
|
||||||
|
this.adder = new FixedBitSetAdder(bitSet);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Grows the buffer to at least minSize, but never larger than threshold. */
|
/** Grows the buffer to at least minSize, but never larger than threshold. */
|
||||||
|
@ -69,9 +104,7 @@ public final class DocIdSetBuilder {
|
||||||
assert minSize < threshold;
|
assert minSize < threshold;
|
||||||
if (buffer.length < minSize) {
|
if (buffer.length < minSize) {
|
||||||
int nextSize = Math.min(threshold, ArrayUtil.oversize(minSize, Integer.BYTES));
|
int nextSize = Math.min(threshold, ArrayUtil.oversize(minSize, Integer.BYTES));
|
||||||
int[] newBuffer = new int[nextSize];
|
buffer = Arrays.copyOf(buffer, nextSize);
|
||||||
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
|
|
||||||
buffer = newBuffer;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,7 +119,7 @@ public final class DocIdSetBuilder {
|
||||||
if (bitSet != null) {
|
if (bitSet != null) {
|
||||||
bitSet.or(iter);
|
bitSet.or(iter);
|
||||||
} else {
|
} else {
|
||||||
while (true) {
|
while (true) {
|
||||||
assert buffer.length <= threshold;
|
assert buffer.length <= threshold;
|
||||||
final int end = buffer.length;
|
final int end = buffer.length;
|
||||||
for (int i = bufferSize; i < end; ++i) {
|
for (int i = bufferSize; i < end; ++i) {
|
||||||
|
@ -114,39 +147,19 @@ public final class DocIdSetBuilder {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reserve space so that this builder can hold {@code numDocs} MORE documents.
|
* Reserve space and return a {@link BulkAdder} object that can be used to
|
||||||
|
* add up to {@code numDocs} documents.
|
||||||
*/
|
*/
|
||||||
public void grow(int numDocs) {
|
public BulkAdder grow(int numDocs) {
|
||||||
if (bitSet == null) {
|
if (bitSet == null) {
|
||||||
final long newLength = bufferSize + numDocs;
|
final long newLength = (long) bufferSize + numDocs;
|
||||||
if (newLength < threshold) {
|
if (newLength < threshold) {
|
||||||
growBuffer((int) newLength);
|
growBuffer((int) newLength);
|
||||||
} else {
|
} else {
|
||||||
upgradeToBitSet();
|
upgradeToBitSet();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
return adder;
|
||||||
|
|
||||||
/**
|
|
||||||
* Add a document to this builder.
|
|
||||||
* NOTE: doc IDs do not need to be provided in order.
|
|
||||||
* NOTE: if you plan on adding several docs at once, look into using
|
|
||||||
* {@link #grow(int)} to reserve space.
|
|
||||||
*/
|
|
||||||
public void add(int doc) {
|
|
||||||
if (bitSet != null) {
|
|
||||||
bitSet.set(doc);
|
|
||||||
} else {
|
|
||||||
if (bufferSize + 1 > buffer.length) {
|
|
||||||
if (bufferSize + 1 >= threshold) {
|
|
||||||
upgradeToBitSet();
|
|
||||||
bitSet.set(doc);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
growBuffer(bufferSize+1);
|
|
||||||
}
|
|
||||||
buffer[bufferSize++] = doc;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int dedup(int[] arr, int length) {
|
private static int dedup(int[] arr, int length) {
|
||||||
|
|
|
@ -40,11 +40,13 @@ public class TestReqExclBulkScorer extends LuceneTestCase {
|
||||||
DocIdSetBuilder exclBuilder = new DocIdSetBuilder(maxDoc);
|
DocIdSetBuilder exclBuilder = new DocIdSetBuilder(maxDoc);
|
||||||
final int numIncludedDocs = TestUtil.nextInt(random(), 1, maxDoc);
|
final int numIncludedDocs = TestUtil.nextInt(random(), 1, maxDoc);
|
||||||
final int numExcludedDocs = TestUtil.nextInt(random(), 1, maxDoc);
|
final int numExcludedDocs = TestUtil.nextInt(random(), 1, maxDoc);
|
||||||
|
DocIdSetBuilder.BulkAdder reqAdder = reqBuilder.grow(numIncludedDocs);
|
||||||
for (int i = 0; i < numIncludedDocs; ++i) {
|
for (int i = 0; i < numIncludedDocs; ++i) {
|
||||||
reqBuilder.add(random().nextInt(maxDoc));
|
reqAdder.add(random().nextInt(maxDoc));
|
||||||
}
|
}
|
||||||
|
DocIdSetBuilder.BulkAdder exclAdder = exclBuilder.grow(numIncludedDocs);
|
||||||
for (int i = 0; i < numExcludedDocs; ++i) {
|
for (int i = 0; i < numExcludedDocs; ++i) {
|
||||||
exclBuilder.add(random().nextInt(maxDoc));
|
exclAdder.add(random().nextInt(maxDoc));
|
||||||
}
|
}
|
||||||
|
|
||||||
final DocIdSet req = reqBuilder.build();
|
final DocIdSet req = reqBuilder.build();
|
||||||
|
|
|
@ -122,11 +122,14 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
|
||||||
DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
|
DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
|
||||||
for (j = 0; j < array.length; ) {
|
for (j = 0; j < array.length; ) {
|
||||||
final int l = TestUtil.nextInt(random(), 1, array.length - j);
|
final int l = TestUtil.nextInt(random(), 1, array.length - j);
|
||||||
if (rarely()) {
|
DocIdSetBuilder.BulkAdder adder = null;
|
||||||
builder.grow(l);
|
for (int k = 0, budget = 0; k < l; ++k) {
|
||||||
}
|
if (budget == 0 || rarely()) {
|
||||||
for (int k = 0; k < l; ++k) {
|
budget = TestUtil.nextInt(random(), 1, l - k + 5);
|
||||||
builder.add(array[j++]);
|
adder = builder.grow(budget);
|
||||||
|
}
|
||||||
|
adder.add(array[j++]);
|
||||||
|
budget--;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -110,9 +110,11 @@ final class GeoPointTermQueryConstantScoreWrapper <Q extends GeoPointMultiTermQu
|
||||||
if (termsEnum.boundaryTerm()) {
|
if (termsEnum.boundaryTerm()) {
|
||||||
builder.add(docs);
|
builder.add(docs);
|
||||||
} else {
|
} else {
|
||||||
int docId;
|
int numDocs = termsEnum.docFreq();
|
||||||
while ((docId = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
DocIdSetBuilder.BulkAdder adder = builder.grow(numDocs);
|
||||||
builder.add(docId);
|
for (int i = 0; i < numDocs; ++i) {
|
||||||
|
int docId = docs.nextDoc();
|
||||||
|
adder.add(docId);
|
||||||
preApproved.set(docId);
|
preApproved.set(docId);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,7 @@ class PointInShapeIntersectVisitor implements IntersectVisitor {
|
||||||
private final DocIdSetBuilder hits;
|
private final DocIdSetBuilder hits;
|
||||||
private final GeoShape shape;
|
private final GeoShape shape;
|
||||||
private final XYZBounds shapeBounds;
|
private final XYZBounds shapeBounds;
|
||||||
|
private DocIdSetBuilder.BulkAdder adder;
|
||||||
|
|
||||||
public PointInShapeIntersectVisitor(DocIdSetBuilder hits, GeoShape shape, XYZBounds shapeBounds) {
|
public PointInShapeIntersectVisitor(DocIdSetBuilder hits, GeoShape shape, XYZBounds shapeBounds) {
|
||||||
this.hits = hits;
|
this.hits = hits;
|
||||||
|
@ -40,12 +41,12 @@ class PointInShapeIntersectVisitor implements IntersectVisitor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void grow(int count) {
|
public void grow(int count) {
|
||||||
hits.grow(count);
|
adder = hits.grow(count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID) {
|
public void visit(int docID) {
|
||||||
hits.add(docID);
|
adder.add(docID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -58,7 +59,7 @@ class PointInShapeIntersectVisitor implements IntersectVisitor {
|
||||||
y >= shapeBounds.getMinimumY() && y <= shapeBounds.getMaximumY() &&
|
y >= shapeBounds.getMinimumY() && y <= shapeBounds.getMaximumY() &&
|
||||||
z >= shapeBounds.getMinimumZ() && z <= shapeBounds.getMaximumZ()) {
|
z >= shapeBounds.getMinimumZ() && z <= shapeBounds.getMaximumZ()) {
|
||||||
if (shape.isWithin(x, y, z)) {
|
if (shape.isWithin(x, y, z)) {
|
||||||
hits.add(docID);
|
adder.add(docID);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -77,6 +77,7 @@ public final class AssertingPointsFormat extends PointsFormat {
|
||||||
final byte[] lastMaxPackedValue;
|
final byte[] lastMaxPackedValue;
|
||||||
private Relation lastCompareResult;
|
private Relation lastCompareResult;
|
||||||
private int lastDocID = -1;
|
private int lastDocID = -1;
|
||||||
|
private int docBudget;
|
||||||
|
|
||||||
public AssertingIntersectVisitor(int numDims, int bytesPerDim, IntersectVisitor in) {
|
public AssertingIntersectVisitor(int numDims, int bytesPerDim, IntersectVisitor in) {
|
||||||
this.in = in;
|
this.in = in;
|
||||||
|
@ -93,6 +94,8 @@ public final class AssertingPointsFormat extends PointsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID) throws IOException {
|
public void visit(int docID) throws IOException {
|
||||||
|
assert --docBudget >= 0 : "called add() more times than the last call to grow() reserved";
|
||||||
|
|
||||||
// This method, not filtering each hit, should only be invoked when the cell is inside the query shape:
|
// This method, not filtering each hit, should only be invoked when the cell is inside the query shape:
|
||||||
assert lastCompareResult == Relation.CELL_INSIDE_QUERY;
|
assert lastCompareResult == Relation.CELL_INSIDE_QUERY;
|
||||||
in.visit(docID);
|
in.visit(docID);
|
||||||
|
@ -100,6 +103,7 @@ public final class AssertingPointsFormat extends PointsFormat {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void visit(int docID, byte[] packedValue) throws IOException {
|
public void visit(int docID, byte[] packedValue) throws IOException {
|
||||||
|
assert --docBudget >= 0 : "called add() more times than the last call to grow() reserved";
|
||||||
|
|
||||||
// This method, to filter each doc's value, should only be invoked when the cell crosses the query shape:
|
// This method, to filter each doc's value, should only be invoked when the cell crosses the query shape:
|
||||||
assert lastCompareResult == PointValues.Relation.CELL_CROSSES_QUERY;
|
assert lastCompareResult == PointValues.Relation.CELL_CROSSES_QUERY;
|
||||||
|
@ -130,6 +134,7 @@ public final class AssertingPointsFormat extends PointsFormat {
|
||||||
@Override
|
@Override
|
||||||
public void grow(int count) {
|
public void grow(int count) {
|
||||||
in.grow(count);
|
in.grow(count);
|
||||||
|
docBudget = count;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue