This commit is contained in:
Karl Wright 2016-04-10 05:11:27 -04:00
commit 7441f88ba6
3 changed files with 108 additions and 8 deletions

View File

@ -118,6 +118,11 @@ Optimizations
* SOLR-8856: Do not cache merge or 'read once' contexts in the hdfs block cache. (Mark Miller, Mike Drob)
* SOLR-8922: Optimize filter creation (DocSetCollector) to minimize the amount of garbage
produced. This resulted in up to 3x throughput when small filter creation was the bottleneck,
as well as orders of magnitude less garbage. (Jeff Wartes, yonik)
Other Changes
----------------------
* SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy.

View File

@ -17,6 +17,7 @@
package org.apache.solr.search;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorer;
@ -37,7 +38,7 @@ public class DocSetCollector extends SimpleCollector {
// in case there aren't that many hits, we may not want a very sparse
// bit array. Optimistically collect the first few docs in an array
// in case there are only a few.
final int[] scratch;
final ExpandingIntArray scratch;
public DocSetCollector(int maxDoc) {
this(DocSetUtil.smallSetSize(maxDoc), maxDoc);
@ -46,7 +47,7 @@ public class DocSetCollector extends SimpleCollector {
public DocSetCollector(int smallSetSize, int maxDoc) {
this.smallSetSize = smallSetSize;
this.maxDoc = maxDoc;
this.scratch = new int[smallSetSize];
this.scratch = new ExpandingIntArray(smallSetSize);
}
@Override
@ -59,8 +60,8 @@ public class DocSetCollector extends SimpleCollector {
// than scanning through a potentially huge bit vector.
// FUTURE: when search methods all start returning docs in order, maybe
// we could have a ListDocSet() and use the collected array directly.
if (pos < scratch.length) {
scratch[pos]=doc;
if (pos < smallSetSize) {
scratch.add(pos, doc);
} else {
// this conditional could be removed if BitSet was preallocated, but that
// would take up more memory, and add more GC time...
@ -72,12 +73,12 @@ public class DocSetCollector extends SimpleCollector {
}
public DocSet getDocSet() {
if (pos<=scratch.length) {
if (pos<=scratch.size()) {
// assumes docs were collected in sorted order!
return new SortedIntDocSet(scratch, pos);
return new SortedIntDocSet(scratch.toArray(), pos);
} else {
// set the bits for ids that were collected in the array
for (int i=0; i<scratch.length; i++) bits.set(scratch[i]);
scratch.copyTo(bits);
return new BitDocSet(bits,pos);
}
}
@ -95,4 +96,73 @@ public class DocSetCollector extends SimpleCollector {
protected void doSetNextReader(LeafReaderContext context) throws IOException {
this.base = context.docBase;
}
protected static class ExpandingIntArray {
private static final int[] EMPTY = new int[0];
private int[] currentAddArray = null;
private int indexForNextAddInCurrentAddArray = 0;
private int size = 0;
private final int smallSetSize;
private ArrayList<int[]> arrays;
public ExpandingIntArray(int smallSetSize) {
this.smallSetSize = smallSetSize;
this.currentAddArray = EMPTY;
}
private void addNewArray() {
int arrSize = Math.max(10, currentAddArray.length << 1);
arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize
this.currentAddArray = new int[arrSize];
if (arrays == null) {
arrays = new ArrayList<>();
}
arrays.add(this.currentAddArray);
indexForNextAddInCurrentAddArray = 0;
// System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size));
}
public void add(int index, int value) {
// assert index == size; // only appending is supported
if (indexForNextAddInCurrentAddArray >= currentAddArray.length) {
addNewArray();
}
currentAddArray[indexForNextAddInCurrentAddArray++] = value;
size++;
}
public void copyTo(FixedBitSet bits) {
if (size > 0) {
int resultPos = 0;
for (int i = 0; i < arrays.size(); i++) {
int[] srcArray = arrays.get(i);
int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
for (int j = 0; j < intsToCopy; j++) {
bits.set(srcArray[j]);
}
resultPos += intsToCopy;
}
assert resultPos == size;
}
}
public int[] toArray() {
int[] result = new int[size];
if (size > 0) {
int resultPos = 0;
for (int i = 0; i < arrays.size(); i++) {
int[] srcArray = arrays.get(i);
int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
System.arraycopy(srcArray, 0, result, resultPos, intsToCopy);
resultPos += intsToCopy;
}
assert resultPos == size;
}
return result;
}
public int size() {
return size;
}
}
}

View File

@ -55,7 +55,29 @@ public class TestDocSet extends LuceneTestCase {
super.setUp();
rand = random();
}
// test the DocSetCollector
public void collect(DocSet set, int maxDoc) {
int smallSetSize = maxDoc >> 64 + 3;
if (set.size() > 1) {
if (random().nextBoolean()) {
smallSetSize = set.size() + random().nextInt(3) - 1; // test the bounds around smallSetSize
}
}
DocSetCollector collector = new DocSetCollector(smallSetSize, maxDoc);
for(DocIterator i1 = set.iterator(); i1.hasNext();) {
try {
collector.collect( i1.nextDoc() );
} catch (IOException e) {
throw new RuntimeException(e); // should be impossible
}
}
DocSet result = collector.getDocSet();
iter(set, result); // check that they are equal
}
public FixedBitSet getRandomSet(int sz, int bitsToSet) {
FixedBitSet bs = new FixedBitSet(sz);
if (sz==0) return bs;
@ -165,6 +187,9 @@ public class TestDocSet extends LuceneTestCase {
iter(a1,b1);
iter(a2,b2);
collect(a1, maxSize);
collect(a2, maxSize);
FixedBitSet a_and = bs1.clone(); a_and.and(bs2);
FixedBitSet a_or = bs1.clone(); a_or.or(bs2);
// FixedBitSet a_xor = bs1.clone(); a_xor.xor(bs2);