SOLR-8922: optimize DocSetCollector to produce less garbage

This commit is contained in:
yonik 2016-04-09 19:10:02 -04:00
parent 65a69b9757
commit 301e178681
2 changed files with 82 additions and 7 deletions

View File

@ -87,6 +87,11 @@ Optimizations
* SOLR-8856: Do not cache merge or 'read once' contexts in the hdfs block cache. (Mark Miller, Mike Drob)
* SOLR-8922: Optimize filter creation (DocSetCollector) to minimize the amount of garbage
produced. This resulted in up to 3x throughput when small filter creation was the bottleneck,
as well as orders of magnitude less garbage. (Jeff Wartes, yonik)
Other Changes
----------------------
* SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy.

View File

@ -17,6 +17,7 @@
package org.apache.solr.search;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorer;
@ -37,7 +38,7 @@ public class DocSetCollector extends SimpleCollector {
// in case there aren't that many hits, we may not want a very sparse
// bit array. Optimistically collect the first few docs in an array
// in case there are only a few.
final int[] scratch;
final ExpandingIntArray scratch;
public DocSetCollector(int maxDoc) {
this(DocSetUtil.smallSetSize(maxDoc), maxDoc);
@ -46,7 +47,7 @@ public class DocSetCollector extends SimpleCollector {
public DocSetCollector(int smallSetSize, int maxDoc) {
this.smallSetSize = smallSetSize;
this.maxDoc = maxDoc;
this.scratch = new int[smallSetSize];
this.scratch = new ExpandingIntArray(smallSetSize);
}
@Override
@ -59,8 +60,8 @@ public class DocSetCollector extends SimpleCollector {
// than scanning through a potentially huge bit vector.
// FUTURE: when search methods all start returning docs in order, maybe
// we could have a ListDocSet() and use the collected array directly.
if (pos < scratch.length) {
scratch[pos]=doc;
if (pos < smallSetSize) {
scratch.add(pos, doc);
} else {
// this conditional could be removed if BitSet was preallocated, but that
// would take up more memory, and add more GC time...
@ -72,12 +73,12 @@ public class DocSetCollector extends SimpleCollector {
}
public DocSet getDocSet() {
if (pos<=scratch.length) {
if (pos<=scratch.size()) {
// assumes docs were collected in sorted order!
return new SortedIntDocSet(scratch, pos);
return new SortedIntDocSet(scratch.toArray(), pos);
} else {
// set the bits for ids that were collected in the array
for (int i=0; i<scratch.length; i++) bits.set(scratch[i]);
scratch.copyTo(bits);
return new BitDocSet(bits,pos);
}
}
@ -95,4 +96,73 @@ public class DocSetCollector extends SimpleCollector {
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.base = context.docBase;
}
protected static class ExpandingIntArray {
private static final int[] EMPTY = new int[0];
private int[] currentAddArray = null;
private int indexForNextAddInCurrentAddArray = 0;
private int size = 0;
private final int smallSetSize;
private ArrayList<int[]> arrays;
public ExpandingIntArray(int smallSetSize) {
this.smallSetSize = smallSetSize;
this.currentAddArray = EMPTY;
}
private void addNewArray() {
int arrSize = Math.max(10, currentAddArray.length << 1);
arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize
this.currentAddArray = new int[arrSize];
if (arrays == null) {
arrays = new ArrayList<>();
}
arrays.add(this.currentAddArray);
indexForNextAddInCurrentAddArray = 0;
// System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size));
}
public void add(int index, int value) {
// assert index == size; // only appending is supported
if (indexForNextAddInCurrentAddArray >= currentAddArray.length) {
addNewArray();
}
currentAddArray[indexForNextAddInCurrentAddArray++] = value;
size++;
}
public void copyTo(FixedBitSet bits) {
if (size > 0) {
int resultPos = 0;
for (int i = 0; i < arrays.size(); i++) {
int[] srcArray = arrays.get(i);
int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
for (int j = 0; j < intsToCopy; j++) {
bits.set(srcArray[j]);
}
resultPos += intsToCopy;
}
assert resultPos == size;
}
}
public int[] toArray() {
int[] result = new int[size];
if (size > 0) {
int resultPos = 0;
for (int i = 0; i < arrays.size(); i++) {
int[] srcArray = arrays.get(i);
int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
System.arraycopy(srcArray, 0, result, resultPos, intsToCopy);
resultPos += intsToCopy;
}
assert resultPos == size;
}
return result;
}
public int size() {
return size;
}
}
}