diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 4739a5fba80..a23a54779ca 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -78,8 +78,8 @@ Optimizations * LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than waiting on a lock. (Adrien Grand) -* LUCENE-7261: Speed up LSBRadixSorter (which is used by TermsQuery, multi-term - queries and point queries). (Adrien Grand) +* LUCENE-7261, LUCENE-7264: Speed up DocIdSetBuilder (which is used by + TermsQuery, multi-term queries and point queries). (Adrien Grand) Bug Fixes diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java index 6e2e1ac815a..35e94489472 100644 --- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java +++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextBKDReader.java @@ -44,6 +44,7 @@ class SimpleTextBKDReader extends BKDReader { in.seek(blockFP); readLine(in, scratch); int count = parseInt(scratch, BLOCK_COUNT); + visitor.grow(count); for(int i=0;i buffer.length) { - if (bufferSize + 1 >= threshold) { - upgradeToBitSet(); - bitSet.set(doc); - return; - } - growBuffer(bufferSize+1); - } - buffer[bufferSize++] = doc; - } + return adder; } private static int dedup(int[] arr, int length) { diff --git a/lucene/core/src/test/org/apache/lucene/search/TestReqExclBulkScorer.java b/lucene/core/src/test/org/apache/lucene/search/TestReqExclBulkScorer.java index 20917dc13ca..bbc4740e454 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestReqExclBulkScorer.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestReqExclBulkScorer.java @@ -40,11 +40,13 @@ public class TestReqExclBulkScorer extends LuceneTestCase { DocIdSetBuilder exclBuilder = new DocIdSetBuilder(maxDoc); final int numIncludedDocs = TestUtil.nextInt(random(), 1, maxDoc); final int numExcludedDocs = TestUtil.nextInt(random(), 1, maxDoc); + DocIdSetBuilder.BulkAdder reqAdder = reqBuilder.grow(numIncludedDocs); for (int i = 0; i < numIncludedDocs; ++i) { - reqBuilder.add(random().nextInt(maxDoc)); + reqAdder.add(random().nextInt(maxDoc)); } + DocIdSetBuilder.BulkAdder exclAdder = exclBuilder.grow(numIncludedDocs); for (int i = 0; i < numExcludedDocs; ++i) { - exclBuilder.add(random().nextInt(maxDoc)); + exclAdder.add(random().nextInt(maxDoc)); } final DocIdSet req = reqBuilder.build(); diff --git a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java index 97afe8b1854..8814be1f359 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestDocIdSetBuilder.java @@ -122,11 +122,14 @@ public class TestDocIdSetBuilder extends LuceneTestCase { DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc); for (j = 0; j < array.length; ) { final int l = TestUtil.nextInt(random(), 1, array.length - j); - if (rarely()) { - builder.grow(l); - } - for (int k = 0; k < l; ++k) { - builder.add(array[j++]); + DocIdSetBuilder.BulkAdder adder = null; + for (int k = 0, budget = 0; k < l; ++k) { + if (budget == 0 || rarely()) { + budget = TestUtil.nextInt(random(), 1, l - k + 5); + adder = builder.grow(budget); + } + adder.add(array[j++]); + budget--; } } diff --git a/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointTermQueryConstantScoreWrapper.java b/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointTermQueryConstantScoreWrapper.java index 96e0bd961cb..fb467e6317d 100644 --- a/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointTermQueryConstantScoreWrapper.java +++ b/lucene/spatial/src/java/org/apache/lucene/spatial/geopoint/search/GeoPointTermQueryConstantScoreWrapper.java @@ -110,9 +110,11 @@ final class GeoPointTermQueryConstantScoreWrapper = shapeBounds.getMinimumY() && y <= shapeBounds.getMaximumY() && z >= shapeBounds.getMinimumZ() && z <= shapeBounds.getMaximumZ()) { if (shape.isWithin(x, y, z)) { - hits.add(docID); + adder.add(docID); } } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointsFormat.java index 061d5b60a5c..b6729135435 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointsFormat.java +++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/asserting/AssertingPointsFormat.java @@ -77,6 +77,7 @@ public final class AssertingPointsFormat extends PointsFormat { final byte[] lastMaxPackedValue; private Relation lastCompareResult; private int lastDocID = -1; + private int docBudget; public AssertingIntersectVisitor(int numDims, int bytesPerDim, IntersectVisitor in) { this.in = in; @@ -93,6 +94,8 @@ public final class AssertingPointsFormat extends PointsFormat { @Override public void visit(int docID) throws IOException { + assert --docBudget >= 0 : "called add() more times than the last call to grow() reserved"; + // This method, not filtering each hit, should only be invoked when the cell is inside the query shape: assert lastCompareResult == Relation.CELL_INSIDE_QUERY; in.visit(docID); @@ -100,6 +103,7 @@ public final class AssertingPointsFormat extends PointsFormat { @Override public void visit(int docID, byte[] packedValue) throws IOException { + assert --docBudget >= 0 : "called add() more times than the last call to grow() reserved"; // This method, to filter each doc's value, should only be invoked when the cell crosses the query shape: assert lastCompareResult == PointValues.Relation.CELL_CROSSES_QUERY; @@ -130,6 +134,7 @@ public final class AssertingPointsFormat extends PointsFormat { @Override public void grow(int count) { in.grow(count); + docBudget = count; } @Override