mirror of https://github.com/apache/lucene.git
Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr
This commit is contained in:
commit
7441f88ba6
|
@ -118,6 +118,11 @@ Optimizations
|
|||
|
||||
* SOLR-8856: Do not cache merge or 'read once' contexts in the hdfs block cache. (Mark Miller, Mike Drob)
|
||||
|
||||
* SOLR-8922: Optimize filter creation (DocSetCollector) to minimize the amount of garbage
|
||||
produced. This resulted in up to 3x throughput when small filter creation was the bottleneck,
|
||||
as well as orders of magnitude less garbage. (Jeff Wartes, yonik)
|
||||
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
* SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy.
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.solr.search;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
@ -37,7 +38,7 @@ public class DocSetCollector extends SimpleCollector {
|
|||
// in case there aren't that many hits, we may not want a very sparse
|
||||
// bit array. Optimistically collect the first few docs in an array
|
||||
// in case there are only a few.
|
||||
final int[] scratch;
|
||||
final ExpandingIntArray scratch;
|
||||
|
||||
public DocSetCollector(int maxDoc) {
|
||||
this(DocSetUtil.smallSetSize(maxDoc), maxDoc);
|
||||
|
@ -46,7 +47,7 @@ public class DocSetCollector extends SimpleCollector {
|
|||
public DocSetCollector(int smallSetSize, int maxDoc) {
|
||||
this.smallSetSize = smallSetSize;
|
||||
this.maxDoc = maxDoc;
|
||||
this.scratch = new int[smallSetSize];
|
||||
this.scratch = new ExpandingIntArray(smallSetSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -59,8 +60,8 @@ public class DocSetCollector extends SimpleCollector {
|
|||
// than scanning through a potentially huge bit vector.
|
||||
// FUTURE: when search methods all start returning docs in order, maybe
|
||||
// we could have a ListDocSet() and use the collected array directly.
|
||||
if (pos < scratch.length) {
|
||||
scratch[pos]=doc;
|
||||
if (pos < smallSetSize) {
|
||||
scratch.add(pos, doc);
|
||||
} else {
|
||||
// this conditional could be removed if BitSet was preallocated, but that
|
||||
// would take up more memory, and add more GC time...
|
||||
|
@ -72,12 +73,12 @@ public class DocSetCollector extends SimpleCollector {
|
|||
}
|
||||
|
||||
public DocSet getDocSet() {
|
||||
if (pos<=scratch.length) {
|
||||
if (pos<=scratch.size()) {
|
||||
// assumes docs were collected in sorted order!
|
||||
return new SortedIntDocSet(scratch, pos);
|
||||
return new SortedIntDocSet(scratch.toArray(), pos);
|
||||
} else {
|
||||
// set the bits for ids that were collected in the array
|
||||
for (int i=0; i<scratch.length; i++) bits.set(scratch[i]);
|
||||
scratch.copyTo(bits);
|
||||
return new BitDocSet(bits,pos);
|
||||
}
|
||||
}
|
||||
|
@ -95,4 +96,73 @@ public class DocSetCollector extends SimpleCollector {
|
|||
protected void doSetNextReader(LeafReaderContext context) throws IOException {
|
||||
this.base = context.docBase;
|
||||
}
|
||||
|
||||
protected static class ExpandingIntArray {
|
||||
private static final int[] EMPTY = new int[0];
|
||||
private int[] currentAddArray = null;
|
||||
private int indexForNextAddInCurrentAddArray = 0;
|
||||
private int size = 0;
|
||||
private final int smallSetSize;
|
||||
private ArrayList<int[]> arrays;
|
||||
|
||||
public ExpandingIntArray(int smallSetSize) {
|
||||
this.smallSetSize = smallSetSize;
|
||||
this.currentAddArray = EMPTY;
|
||||
}
|
||||
|
||||
private void addNewArray() {
|
||||
int arrSize = Math.max(10, currentAddArray.length << 1);
|
||||
arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize
|
||||
this.currentAddArray = new int[arrSize];
|
||||
if (arrays == null) {
|
||||
arrays = new ArrayList<>();
|
||||
}
|
||||
arrays.add(this.currentAddArray);
|
||||
indexForNextAddInCurrentAddArray = 0;
|
||||
// System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size));
|
||||
}
|
||||
|
||||
public void add(int index, int value) {
|
||||
// assert index == size; // only appending is supported
|
||||
if (indexForNextAddInCurrentAddArray >= currentAddArray.length) {
|
||||
addNewArray();
|
||||
}
|
||||
currentAddArray[indexForNextAddInCurrentAddArray++] = value;
|
||||
size++;
|
||||
}
|
||||
|
||||
public void copyTo(FixedBitSet bits) {
|
||||
if (size > 0) {
|
||||
int resultPos = 0;
|
||||
for (int i = 0; i < arrays.size(); i++) {
|
||||
int[] srcArray = arrays.get(i);
|
||||
int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
|
||||
for (int j = 0; j < intsToCopy; j++) {
|
||||
bits.set(srcArray[j]);
|
||||
}
|
||||
resultPos += intsToCopy;
|
||||
}
|
||||
assert resultPos == size;
|
||||
}
|
||||
}
|
||||
|
||||
public int[] toArray() {
|
||||
int[] result = new int[size];
|
||||
if (size > 0) {
|
||||
int resultPos = 0;
|
||||
for (int i = 0; i < arrays.size(); i++) {
|
||||
int[] srcArray = arrays.get(i);
|
||||
int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
|
||||
System.arraycopy(srcArray, 0, result, resultPos, intsToCopy);
|
||||
resultPos += intsToCopy;
|
||||
}
|
||||
assert resultPos == size;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -56,6 +56,28 @@ public class TestDocSet extends LuceneTestCase {
|
|||
rand = random();
|
||||
}
|
||||
|
||||
// test the DocSetCollector
|
||||
public void collect(DocSet set, int maxDoc) {
|
||||
int smallSetSize = maxDoc >> 64 + 3;
|
||||
if (set.size() > 1) {
|
||||
if (random().nextBoolean()) {
|
||||
smallSetSize = set.size() + random().nextInt(3) - 1; // test the bounds around smallSetSize
|
||||
}
|
||||
}
|
||||
DocSetCollector collector = new DocSetCollector(smallSetSize, maxDoc);
|
||||
|
||||
for(DocIterator i1 = set.iterator(); i1.hasNext();) {
|
||||
try {
|
||||
collector.collect( i1.nextDoc() );
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e); // should be impossible
|
||||
}
|
||||
}
|
||||
|
||||
DocSet result = collector.getDocSet();
|
||||
iter(set, result); // check that they are equal
|
||||
}
|
||||
|
||||
public FixedBitSet getRandomSet(int sz, int bitsToSet) {
|
||||
FixedBitSet bs = new FixedBitSet(sz);
|
||||
if (sz==0) return bs;
|
||||
|
@ -165,6 +187,9 @@ public class TestDocSet extends LuceneTestCase {
|
|||
iter(a1,b1);
|
||||
iter(a2,b2);
|
||||
|
||||
collect(a1, maxSize);
|
||||
collect(a2, maxSize);
|
||||
|
||||
FixedBitSet a_and = bs1.clone(); a_and.and(bs2);
|
||||
FixedBitSet a_or = bs1.clone(); a_or.or(bs2);
|
||||
// FixedBitSet a_xor = bs1.clone(); a_xor.xor(bs2);
|
||||
|
|
Loading…
Reference in New Issue