Merge branch 'master' of https://git-wip-us.apache.org/repos/asf/lucene-solr

2016-04-10 05:11:27 -04:00 · 2016-04-10 05:11:27 -04:00 · 7441f88ba6
parent a4bf526a62 93f9456e2a
commit 7441f88ba6
3 changed files with 108 additions and 8 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -118,6 +118,11 @@ Optimizations
  
 * SOLR-8856: Do not cache merge or 'read once' contexts in the hdfs block cache. (Mark Miller, Mike Drob)

+* SOLR-8922: Optimize filter creation (DocSetCollector) to minimize the amount of garbage
+  produced. This resulted in up to 3x throughput when small filter creation was the bottleneck,
+  as well as orders of magnitude less garbage. (Jeff Wartes, yonik)
+
+
 Other Changes
 ----------------------
 * SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage policy.
--- a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
@ -17,6 +17,7 @@
 package org.apache.solr.search;

 import java.io.IOException;
+import java.util.ArrayList;

 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.Scorer;
@ -37,7 +38,7 @@ public class DocSetCollector extends SimpleCollector {
  // in case there aren't that many hits, we may not want a very sparse
  // bit array.  Optimistically collect the first few docs in an array
  // in case there are only a few.
-  final int[] scratch;
+  final ExpandingIntArray scratch;

  public DocSetCollector(int maxDoc) {
    this(DocSetUtil.smallSetSize(maxDoc), maxDoc);
@ -46,7 +47,7 @@ public class DocSetCollector extends SimpleCollector {
  public DocSetCollector(int smallSetSize, int maxDoc) {
    this.smallSetSize = smallSetSize;
    this.maxDoc = maxDoc;
-    this.scratch = new int[smallSetSize];
+    this.scratch = new ExpandingIntArray(smallSetSize);
  }

  @Override
@ -59,8 +60,8 @@ public class DocSetCollector extends SimpleCollector {
    // than scanning through a potentially huge bit vector.
    // FUTURE: when search methods all start returning docs in order, maybe
    // we could have a ListDocSet() and use the collected array directly.
-    if (pos < scratch.length) {
-      scratch[pos]=doc;
+    if (pos < smallSetSize) {
+      scratch.add(pos, doc);
    } else {
      // this conditional could be removed if BitSet was preallocated, but that
      // would take up more memory, and add more GC time...
@ -72,12 +73,12 @@ public class DocSetCollector extends SimpleCollector {
  }

  public DocSet getDocSet() {
-    if (pos<=scratch.length) {
+    if (pos<=scratch.size()) {
      // assumes docs were collected in sorted order!
-      return new SortedIntDocSet(scratch, pos);
+      return new SortedIntDocSet(scratch.toArray(), pos);
    } else {
      // set the bits for ids that were collected in the array
-      for (int i=0; i<scratch.length; i++) bits.set(scratch[i]);
+      scratch.copyTo(bits);
      return new BitDocSet(bits,pos);
    }
  }
@ -95,4 +96,73 @@ public class DocSetCollector extends SimpleCollector {
  protected void doSetNextReader(LeafReaderContext context) throws IOException {
    this.base = context.docBase;
  }
+
+  protected static class ExpandingIntArray {
+    private static final int[] EMPTY = new int[0];
+    private int[] currentAddArray = null;
+    private int indexForNextAddInCurrentAddArray = 0;
+    private int size = 0;
+    private final int smallSetSize;
+    private ArrayList<int[]> arrays;
+
+    public ExpandingIntArray(int smallSetSize) {
+      this.smallSetSize = smallSetSize;
+      this.currentAddArray = EMPTY;
+    }
+
+    private void addNewArray() {
+      int arrSize = Math.max(10, currentAddArray.length << 1);
+      arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize
+      this.currentAddArray = new int[arrSize];
+      if (arrays == null) {
+        arrays = new ArrayList<>();
+      }
+      arrays.add(this.currentAddArray);
+      indexForNextAddInCurrentAddArray = 0;
+      // System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size));
+    }
+
+    public void add(int index, int value) {
+      // assert index == size; // only appending is supported
+      if (indexForNextAddInCurrentAddArray >= currentAddArray.length) {
+        addNewArray();
+      }
+      currentAddArray[indexForNextAddInCurrentAddArray++] = value;
+      size++;
+    }
+
+    public void copyTo(FixedBitSet bits) {
+      if (size > 0) {
+        int resultPos = 0;
+        for (int i = 0; i < arrays.size(); i++) {
+          int[] srcArray = arrays.get(i);
+          int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
+          for (int j = 0; j < intsToCopy; j++) {
+            bits.set(srcArray[j]);
+          }
+          resultPos += intsToCopy;
+        }
+        assert resultPos == size;
+      }
+    }
+
+    public int[] toArray() {
+      int[] result = new int[size];
+      if (size > 0) {
+        int resultPos = 0;
+        for (int i = 0; i < arrays.size(); i++) {
+          int[] srcArray = arrays.get(i);
+          int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
+          System.arraycopy(srcArray, 0, result, resultPos, intsToCopy);
+          resultPos += intsToCopy;
+        }
+        assert resultPos == size;
+      }
+      return result;
+    }
+
+    public int size() {
+      return size;
+    }
+  }
 }
--- a/solr/core/src/test/org/apache/solr/search/TestDocSet.java
+++ b/solr/core/src/test/org/apache/solr/search/TestDocSet.java
@ -55,7 +55,29 @@ public class TestDocSet extends LuceneTestCase {
    super.setUp();
    rand = random();
  }
-  
+
+  // test the DocSetCollector
+  public void collect(DocSet set, int maxDoc) {
+    int smallSetSize = maxDoc >> 64 + 3;
+    if (set.size() > 1) {
+      if (random().nextBoolean()) {
+        smallSetSize = set.size() + random().nextInt(3) - 1;  // test the bounds around smallSetSize
+      }
+    }
+    DocSetCollector collector = new DocSetCollector(smallSetSize, maxDoc);
+
+    for(DocIterator i1 = set.iterator(); i1.hasNext();) {
+      try {
+        collector.collect( i1.nextDoc() );
+      } catch (IOException e) {
+        throw new RuntimeException(e);  // should be impossible
+      }
+    }
+
+    DocSet result = collector.getDocSet();
+    iter(set, result);  // check that they are equal
+  }
+
  public FixedBitSet getRandomSet(int sz, int bitsToSet) {
    FixedBitSet bs = new FixedBitSet(sz);
    if (sz==0) return bs;
@ -165,6 +187,9 @@ public class TestDocSet extends LuceneTestCase {
    iter(a1,b1);
    iter(a2,b2);

+    collect(a1, maxSize);
+    collect(a2, maxSize);
+
    FixedBitSet a_and = bs1.clone(); a_and.and(bs2);
    FixedBitSet a_or = bs1.clone(); a_or.or(bs2);
    // FixedBitSet a_xor = bs1.clone(); a_xor.xor(bs2);