diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a1a1fe0f84f..975581c643d 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -87,6 +87,11 @@ Optimizations * SOLR-8856: Do not cache merge or 'read once' contexts in the hdfs block cache. (Mark Miller, Mike Drob) +* SOLR-8922: Optimize filter creation (DocSetCollector) to minimize the amount of garbage + produced. This resulted in up to 3x throughput when small filter creation was the bottleneck, + as well as orders of magnitude less garbage. (Jeff Wartes, yonik) + + Other Changes ---------------------- * SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy. diff --git a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java index 8e529d925d4..25b12c54e61 100644 --- a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java +++ b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java @@ -17,6 +17,7 @@ package org.apache.solr.search; import java.io.IOException; +import java.util.ArrayList; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Scorer; @@ -37,7 +38,7 @@ public class DocSetCollector extends SimpleCollector { // in case there aren't that many hits, we may not want a very sparse // bit array. Optimistically collect the first few docs in an array // in case there are only a few. - final int[] scratch; + final ExpandingIntArray scratch; public DocSetCollector(int maxDoc) { this(DocSetUtil.smallSetSize(maxDoc), maxDoc); @@ -46,7 +47,7 @@ public class DocSetCollector extends SimpleCollector { public DocSetCollector(int smallSetSize, int maxDoc) { this.smallSetSize = smallSetSize; this.maxDoc = maxDoc; - this.scratch = new int[smallSetSize]; + this.scratch = new ExpandingIntArray(smallSetSize); } @Override @@ -59,8 +60,8 @@ public class DocSetCollector extends SimpleCollector { // than scanning through a potentially huge bit vector. // FUTURE: when search methods all start returning docs in order, maybe // we could have a ListDocSet() and use the collected array directly. - if (pos < scratch.length) { - scratch[pos]=doc; + if (pos < smallSetSize) { + scratch.add(pos, doc); } else { // this conditional could be removed if BitSet was preallocated, but that // would take up more memory, and add more GC time... @@ -72,12 +73,12 @@ public class DocSetCollector extends SimpleCollector { } public DocSet getDocSet() { - if (pos<=scratch.length) { + if (pos<=scratch.size()) { // assumes docs were collected in sorted order! - return new SortedIntDocSet(scratch, pos); + return new SortedIntDocSet(scratch.toArray(), pos); } else { // set the bits for ids that were collected in the array - for (int i=0; i arrays; + + public ExpandingIntArray(int smallSetSize) { + this.smallSetSize = smallSetSize; + this.currentAddArray = EMPTY; + } + + private void addNewArray() { + int arrSize = Math.max(10, currentAddArray.length << 1); + arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize + this.currentAddArray = new int[arrSize]; + if (arrays == null) { + arrays = new ArrayList<>(); + } + arrays.add(this.currentAddArray); + indexForNextAddInCurrentAddArray = 0; + // System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size)); + } + + public void add(int index, int value) { + // assert index == size; // only appending is supported + if (indexForNextAddInCurrentAddArray >= currentAddArray.length) { + addNewArray(); + } + currentAddArray[indexForNextAddInCurrentAddArray++] = value; + size++; + } + + public void copyTo(FixedBitSet bits) { + if (size > 0) { + int resultPos = 0; + for (int i = 0; i < arrays.size(); i++) { + int[] srcArray = arrays.get(i); + int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray; + for (int j = 0; j < intsToCopy; j++) { + bits.set(srcArray[j]); + } + resultPos += intsToCopy; + } + assert resultPos == size; + } + } + + public int[] toArray() { + int[] result = new int[size]; + if (size > 0) { + int resultPos = 0; + for (int i = 0; i < arrays.size(); i++) { + int[] srcArray = arrays.get(i); + int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray; + System.arraycopy(srcArray, 0, result, resultPos, intsToCopy); + resultPos += intsToCopy; + } + assert resultPos == size; + } + return result; + } + + public int size() { + return size; + } + } }