mirror of
https://github.com/apache/lucene.git
synced 2025-02-07 18:49:03 +00:00
SOLR-8922: optimize DocSetCollector to produce less garbage
This commit is contained in:
parent
65a69b9757
commit
301e178681
@ -87,6 +87,11 @@ Optimizations
|
||||
|
||||
* SOLR-8856: Do not cache merge or 'read once' contexts in the hdfs block cache. (Mark Miller, Mike Drob)
|
||||
|
||||
* SOLR-8922: Optimize filter creation (DocSetCollector) to minimize the amount of garbage
|
||||
produced. This resulted in up to 3x throughput when small filter creation was the bottleneck,
|
||||
as well as orders of magnitude less garbage. (Jeff Wartes, yonik)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
* SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy.
|
||||
|
@ -17,6 +17,7 @@
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
@ -37,7 +38,7 @@ public class DocSetCollector extends SimpleCollector {
|
||||
// in case there aren't that many hits, we may not want a very sparse
|
||||
// bit array. Optimistically collect the first few docs in an array
|
||||
// in case there are only a few.
|
||||
final int[] scratch;
|
||||
final ExpandingIntArray scratch;
|
||||
|
||||
public DocSetCollector(int maxDoc) {
|
||||
this(DocSetUtil.smallSetSize(maxDoc), maxDoc);
|
||||
@ -46,7 +47,7 @@ public class DocSetCollector extends SimpleCollector {
|
||||
public DocSetCollector(int smallSetSize, int maxDoc) {
|
||||
this.smallSetSize = smallSetSize;
|
||||
this.maxDoc = maxDoc;
|
||||
this.scratch = new int[smallSetSize];
|
||||
this.scratch = new ExpandingIntArray(smallSetSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -59,8 +60,8 @@ public class DocSetCollector extends SimpleCollector {
|
||||
// than scanning through a potentially huge bit vector.
|
||||
// FUTURE: when search methods all start returning docs in order, maybe
|
||||
// we could have a ListDocSet() and use the collected array directly.
|
||||
if (pos < scratch.length) {
|
||||
scratch[pos]=doc;
|
||||
if (pos < smallSetSize) {
|
||||
scratch.add(pos, doc);
|
||||
} else {
|
||||
// this conditional could be removed if BitSet was preallocated, but that
|
||||
// would take up more memory, and add more GC time...
|
||||
@ -72,12 +73,12 @@ public class DocSetCollector extends SimpleCollector {
|
||||
}
|
||||
|
||||
public DocSet getDocSet() {
|
||||
if (pos<=scratch.length) {
|
||||
if (pos<=scratch.size()) {
|
||||
// assumes docs were collected in sorted order!
|
||||
return new SortedIntDocSet(scratch, pos);
|
||||
return new SortedIntDocSet(scratch.toArray(), pos);
|
||||
} else {
|
||||
// set the bits for ids that were collected in the array
|
||||
for (int i=0; i<scratch.length; i++) bits.set(scratch[i]);
|
||||
scratch.copyTo(bits);
|
||||
return new BitDocSet(bits,pos);
|
||||
}
|
||||
}
|
||||
@ -95,4 +96,73 @@ public class DocSetCollector extends SimpleCollector {
|
||||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
this.base = context.docBase;
|
||||
}
|
||||
|
||||
protected static class ExpandingIntArray {
|
||||
private static final int[] EMPTY = new int[0];
|
||||
private int[] currentAddArray = null;
|
||||
private int indexForNextAddInCurrentAddArray = 0;
|
||||
private int size = 0;
|
||||
private final int smallSetSize;
|
||||
private ArrayList<int[]> arrays;
|
||||
|
||||
public ExpandingIntArray(int smallSetSize) {
|
||||
this.smallSetSize = smallSetSize;
|
||||
this.currentAddArray = EMPTY;
|
||||
}
|
||||
|
||||
private void addNewArray() {
|
||||
int arrSize = Math.max(10, currentAddArray.length << 1);
|
||||
arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize
|
||||
this.currentAddArray = new int[arrSize];
|
||||
if (arrays == null) {
|
||||
arrays = new ArrayList<>();
|
||||
}
|
||||
arrays.add(this.currentAddArray);
|
||||
indexForNextAddInCurrentAddArray = 0;
|
||||
// System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size));
|
||||
}
|
||||
|
||||
public void add(int index, int value) {
|
||||
// assert index == size; // only appending is supported
|
||||
if (indexForNextAddInCurrentAddArray >= currentAddArray.length) {
|
||||
addNewArray();
|
||||
}
|
||||
currentAddArray[indexForNextAddInCurrentAddArray++] = value;
|
||||
size++;
|
||||
}
|
||||
|
||||
public void copyTo(FixedBitSet bits) {
|
||||
if (size > 0) {
|
||||
int resultPos = 0;
|
||||
for (int i = 0; i < arrays.size(); i++) {
|
||||
int[] srcArray = arrays.get(i);
|
||||
int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
|
||||
for (int j = 0; j < intsToCopy; j++) {
|
||||
bits.set(srcArray[j]);
|
||||
}
|
||||
resultPos += intsToCopy;
|
||||
}
|
||||
assert resultPos == size;
|
||||
}
|
||||
}
|
||||
|
||||
public int[] toArray() {
|
||||
int[] result = new int[size];
|
||||
if (size > 0) {
|
||||
int resultPos = 0;
|
||||
for (int i = 0; i < arrays.size(); i++) {
|
||||
int[] srcArray = arrays.get(i);
|
||||
int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
|
||||
System.arraycopy(srcArray, 0, result, resultPos, intsToCopy);
|
||||
resultPos += intsToCopy;
|
||||
}
|
||||
assert resultPos == size;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user