SOLR-9764: share liveDocs for any DocSet of size numDocs

2017-01-31 11:52:04 -05:00 · 2017-01-31 11:52:04 -05:00 · a43ef8f480
parent d8d61ff61d
commit a43ef8f480
13 changed files with 192 additions and 39 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -141,6 +141,9 @@ Optimizations
 * SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying
  DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya)

+* SOLR-9764: All filters that which all documents in the index now share the same memory (DocSet).
+  (Michael Sun, yonik)
+
 Other Changes
 ----------------------
 * SOLR-9980: Expose configVersion in core admin status (Jessica Cheng Mallet via Tomás Fernández Löbbe)
--- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
@ -49,6 +49,7 @@ import org.apache.solr.search.BitDocSet;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.DocSetBuilder;
 import org.apache.solr.search.DocSetProducer;
+import org.apache.solr.search.DocSetUtil;
 import org.apache.solr.search.ExtendedQueryBase;
 import org.apache.solr.search.Filter;
 import org.apache.solr.search.SolrIndexSearcher;
@ -168,7 +169,8 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
      maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
    }

-    return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
+    DocSet set =  maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
+    return DocSetUtil.getDocSet(set, searcher);
  }


--- a/solr/core/src/java/org/apache/solr/search/BitDocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/BitDocSet.java
@ -261,7 +261,7 @@ public class BitDocSet extends DocSetBase {
  }
  
  @Override
-  protected BitDocSet clone() {
+  public BitDocSet clone() {
    return new BitDocSet(bits.clone(), size);
  }

--- a/solr/core/src/java/org/apache/solr/search/DocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSet.java
@ -31,7 +31,7 @@ import org.apache.solr.common.SolrException;
 *
 * @since solr 0.9
 */
-public interface DocSet extends Closeable, Accountable /* extends Collection<Integer> */ {
+public interface DocSet extends Closeable, Accountable, Cloneable /* extends Collection<Integer> */ {
  
  /**
   * Adds the specified document if it is not currently in the DocSet
@ -131,5 +131,7 @@ public interface DocSet extends Closeable, Accountable /* extends Collection<Int
   */
  public void addAllTo(DocSet target);

+  public DocSet clone();
+
  public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
 }
--- a/solr/core/src/java/org/apache/solr/search/DocSetBase.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetBase.java
@ -23,8 +23,8 @@ import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BitDocIdSet;
+import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.common.SolrException;

@ -63,8 +63,21 @@ abstract class DocSetBase implements DocSet {
      // don't compare matches
    }

+    FixedBitSet bs1 = this.getBits();
+    FixedBitSet bs2 = toBitSet(other);
+
+// resize both BitSets to make sure they have the same amount of zero padding
+
+    int maxNumBits = bs1.length() > bs2.length() ? bs1.length() : bs2.length();
+    bs1 = FixedBitSet.ensureCapacity(bs1, maxNumBits);
+    bs2 = FixedBitSet.ensureCapacity(bs2, maxNumBits);
+
    // if (this.size() != other.size()) return false;
-    return this.getBits().equals(toBitSet(other));
+    return bs1.equals(bs2);
+  }
+
+  public DocSet clone() {
+    throw new RuntimeException(new CloneNotSupportedException());
  }

  /**
@ -90,7 +103,7 @@ abstract class DocSetBase implements DocSet {
   * implementation.
   */
  protected FixedBitSet getBits() {
-    FixedBitSet bits = new FixedBitSet(64);
+    FixedBitSet bits = new FixedBitSet(size());
    for (DocIterator iter = iterator(); iter.hasNext();) {
      int nextDoc = iter.nextDoc();
      bits = FixedBitSet.ensureCapacity(bits, nextDoc);
@ -193,7 +206,7 @@ abstract class DocSetBase implements DocSet {

              @Override
              public int nextDoc() {
-                pos = bs.nextSetBit(pos+1);
+                pos = bs.nextSetBit(pos+1);  // TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
                return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
              }

--- a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
@ -72,10 +72,17 @@ public class DocSetCollector extends SimpleCollector {
    pos++;
  }

+  /** The number of documents that have been collected */
+  public int size() {
+    return pos;
+  }
+
  public DocSet getDocSet() {
    if (pos<=scratch.size()) {
      // assumes docs were collected in sorted order!
      return new SortedIntDocSet(scratch.toArray(), pos);
+//    } else if (pos == maxDoc) {
+//      return new MatchAllDocSet(maxDoc);  // a bunch of code currently relies on BitDocSet (either explicitly, or implicitly for performance)
    } else {
      // set the bits for ids that were collected in the array
      scratch.copyTo(bits);
--- a/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
@ -39,6 +39,7 @@ import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
+import org.apache.solr.common.SolrException;

 /** @lucene.experimental */
 public class DocSetUtil {
@ -71,6 +72,51 @@ public class DocSetUtil {
    }
  }

+  /**
+   * This variant of getDocSet will attempt to do some deduplication
+   * on certain DocSets such as DocSets that match numDocs.  This means it can return
+   * a cached version of the set, and the returned set should not be modified.
+   * @lucene.experimental
+   */
+  public static DocSet getDocSet(DocSetCollector collector, SolrIndexSearcher searcher) {
+    if (collector.size() == searcher.numDocs()) {
+      if (!searcher.isLiveDocsInstantiated()) {
+        searcher.setLiveDocs( collector.getDocSet() );
+      }
+      try {
+        return searcher.getLiveDocs();
+      } catch (IOException e) {
+        // should be impossible... liveDocs should exist, so no IO should be necessary
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+      }
+    }
+
+    return collector.getDocSet();
+  }
+
+  /**
+   * This variant of getDocSet maps all sets with size numDocs to searcher.getLiveDocs.
+   * The returned set should not be modified.
+   * @lucene.experimental
+   */
+  public static DocSet getDocSet(DocSet docs, SolrIndexSearcher searcher) {
+    if (docs.size() == searcher.numDocs()) {
+      if (!searcher.isLiveDocsInstantiated()) {
+        searcher.setLiveDocs( docs );
+      }
+      try {
+        // if this docset has the same cardinality as liveDocs, return liveDocs instead
+        // so this set will be short lived garbage.
+        return searcher.getLiveDocs();
+      } catch (IOException e) {
+        // should be impossible... liveDocs should exist, so no IO should be necessary
+        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
+      }
+    }
+
+    return docs;
+  }
+
  // implementers of DocSetProducer should not call this with themselves or it will result in an infinite loop
  public static DocSet createDocSet(SolrIndexSearcher searcher, Query query, DocSet filter) throws IOException {

@ -105,7 +151,7 @@ public class DocSetUtil {
    // but we should not catch it here, as we don't know how this DocSet will be used (it could be negated before use) or cached.
    searcher.search(query, collector);

-    return collector.getDocSet();
+    return getDocSet(collector, searcher);
  }

  public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
@ -113,7 +159,6 @@ public class DocSetUtil {
    int maxDoc = searcher.getIndexReader().maxDoc();
    int smallSetSize = smallSetSize(maxDoc);

-
    String field = term.field();
    BytesRef termVal = term.bytes();

@ -135,15 +180,16 @@ public class DocSetUtil {
      }
    }

+    DocSet answer = null;
    if (maxCount == 0) {
-      return DocSet.EMPTY;
+      answer = DocSet.EMPTY;
+    } else if (maxCount <= smallSetSize) {
+      answer = createSmallSet(leaves, postList, maxCount, firstReader);
+    } else {
+      answer = createBigSet(leaves, postList, maxDoc, firstReader);
    }

-    if (maxCount <= smallSetSize) {
-      return createSmallSet(leaves, postList, maxCount, firstReader);
-    }
-
-    return createBigSet(leaves, postList, maxDoc, firstReader);
+    return DocSetUtil.getDocSet( answer, searcher );
  }

  private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {
--- a/solr/core/src/java/org/apache/solr/search/DocSlice.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSlice.java
@ -165,12 +165,8 @@ public class DocSlice extends DocSetBase implements DocList {
  }

  @Override
-  protected DocSlice clone() {
-    try {
-      // DocSlice is not currently mutable
-      DocSlice slice = (DocSlice) super.clone();
-    } catch (CloneNotSupportedException e) {}
-    return null;
+  public DocSlice clone() {
+    return (DocSlice) super.clone();
  }

  /** WARNING: this can over-estimate real memory use since backing arrays are shared with other DocSlice instances */
--- a/solr/core/src/java/org/apache/solr/search/HashDocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/HashDocSet.java
@ -290,7 +290,7 @@ public final class HashDocSet extends DocSetBase {
  }

  @Override
-  protected HashDocSet clone() {
+  public HashDocSet clone() {
    return new HashDocSet(this);
  }

--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -429,6 +429,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    return reader.maxDoc();
  }

+  public final int numDocs() {
+    return reader.numDocs();
+  }
+
  public final int docFreq(Term term) throws IOException {
    return reader.docFreq(term);
  }
@ -1063,19 +1067,24 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    getDocSet(query);
  }

-  public BitDocSet getDocSetBits(Query q) throws IOException {
-    DocSet answer = getDocSet(q);
-    if (answer instanceof BitDocSet) {
-      return (BitDocSet) answer;
-    }
-
+  private BitDocSet makeBitDocSet(DocSet answer) {
+    // TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
+    // or make DocSet instances remember maxDoc
    FixedBitSet bs = new FixedBitSet(maxDoc());
    DocIterator iter = answer.iterator();
    while (iter.hasNext()) {
      bs.set(iter.nextDoc());
    }

-    BitDocSet answerBits = new BitDocSet(bs, answer.size());
+    return new BitDocSet(bs, answer.size());
+  }
+
+  public BitDocSet getDocSetBits(Query q) throws IOException {
+    DocSet answer = getDocSet(q);
+    if (answer instanceof BitDocSet) {
+      return (BitDocSet) answer;
+    }
+    BitDocSet answerBits = makeBitDocSet(answer);
    if (filterCache != null) {
      filterCache.put(q, answerBits);
    }
@ -1138,16 +1147,35 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  }

  private static Query matchAllDocsQuery = new MatchAllDocsQuery();
-  private BitDocSet liveDocs;
+  private volatile BitDocSet liveDocs;

+  /** @lucene.internal the type of DocSet returned may change in the future */
  public BitDocSet getLiveDocs() throws IOException {
-    // going through the filter cache will provide thread safety here
+    // Going through the filter cache will provide thread safety here if we only had getLiveDocs,
+    // but the addition of setLiveDocs means we needed to add volatile to "liveDocs".
    if (liveDocs == null) {
      liveDocs = getDocSetBits(matchAllDocsQuery);
    }
+    assert liveDocs.size() == numDocs();
    return liveDocs;
  }

+  /** @lucene.internal */
+  public boolean isLiveDocsInstantiated() {
+    return liveDocs != null;
+  }
+
+  /** @lucene.internal */
+  public void setLiveDocs(DocSet docs) {
+    // a few places currently expect BitDocSet
+    assert docs.size() == numDocs();
+    if (docs instanceof BitDocSet) {
+      this.liveDocs = (BitDocSet)docs;
+    } else {
+      this.liveDocs = makeBitDocSet(docs);
+    }
+  }
+
  public static class ProcessedFilter {
    public DocSet answer; // the answer, if non-null
    public Filter filter;
@ -1178,8 +1206,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
      ((DelegatingCollector) collector).finish();
    }

-    DocSet docSet = setCollector.getDocSet();
-    return docSet;
+    return DocSetUtil.getDocSet(setCollector, this);
  }

  /**
@ -1251,7 +1278,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
      ((DelegatingCollector) collector).finish();
    }

-    return setCollector.getDocSet();
+    return DocSetUtil.getDocSet(setCollector, this);
  }

  public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
@ -1959,7 +1986,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI

      buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);

-      set = setCollector.getDocSet();
+      set = DocSetUtil.getDocSet(setCollector, this);

      nDocsReturned = 0;
      ids = new int[nDocsReturned];
@ -1976,7 +2003,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI

      buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);

-      set = setCollector.getDocSet();
+      set = DocSetUtil.getDocSet(setCollector, this);

      totalHits = topCollector.getTotalHits();
      assert (totalHits == set.size());
--- a/solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
@ -791,7 +791,7 @@ public class SortedIntDocSet extends DocSetBase {
  }

  @Override
-  protected SortedIntDocSet clone() {
+  public SortedIntDocSet clone() {
    return new SortedIntDocSet(docs.clone());
  }

--- a/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
@ -40,6 +40,7 @@ import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.BitDocSet;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.DocSetCollector;
+import org.apache.solr.search.DocSetUtil;
 import org.apache.solr.search.QueryCommand;
 import org.apache.solr.search.QueryResult;
 import org.apache.solr.search.QueryUtils;
@ -193,7 +194,7 @@ public class CommandHandler {
    List<Collector> allCollectors = new ArrayList<>(collectors);
    allCollectors.add(docSetCollector);
    searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors));
-    return docSetCollector.getDocSet();
+    return DocSetUtil.getDocSet( docSetCollector, searcher );
  }

  @SuppressWarnings("unchecked")
--- a/solr/core/src/test/org/apache/solr/search/TestFiltering.java
+++ b/solr/core/src/test/org/apache/solr/search/TestFiltering.java
@ -18,6 +18,7 @@ package org.apache.solr.search;


 import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrInputDocument;
@ -42,6 +43,61 @@ public class TestFiltering extends SolrTestCaseJ4 {
    initCore("solrconfig.xml","schema_latest.xml");
  }

+  @Test
+  public void testLiveDocsSharing() throws Exception {
+    clearIndex();
+    for (int i=0; i<20; i++) {
+      for (int repeat=0; repeat < (i%5==0 ? 2 : 1); repeat++) {
+        assertU(adoc("id", Integer.toString(i), "foo_s", "foo", "val_i", Integer.toString(i), "val_s", Character.toString((char)('A' + i))));
+      }
+    }
+    assertU(commit());
+
+    String[] queries = {
+        "foo_s:foo",
+        "foo_s:f*",
+        "*:*",
+        "id:[* TO *]",
+        "id:[0 TO 99]",
+        "val_i:[0 TO 20]",
+        "val_s:[A TO z]"
+    };
+
+    SolrQueryRequest req = req();
+    try {
+      SolrIndexSearcher searcher = req.getSearcher();
+
+      DocSet live = null;
+      for (String qstr :  queries) {
+        Query q = QParser.getParser(qstr, null, req).getQuery();
+        // System.out.println("getting set for " + q);
+        DocSet set = searcher.getDocSet(q);
+        if (live == null) {
+          live = searcher.getLiveDocs();
+        }
+        assertTrue( set == live);
+
+        QueryCommand cmd = new QueryCommand();
+        cmd.setQuery( QParser.getParser(qstr, null, req).getQuery() );
+        cmd.setLen(random().nextInt(30));
+        cmd.setNeedDocSet(true);
+        QueryResult res = new QueryResult();
+        searcher.search(res, cmd);
+        set = res.getDocSet();
+        assertTrue( set == live );
+
+        cmd.setQuery( QParser.getParser(qstr + " OR id:0", null, req).getQuery() );
+        cmd.setFilterList( QParser.getParser(qstr + " OR id:1", null, req).getQuery() );
+        res = new QueryResult();
+        searcher.search(res, cmd);
+        set = res.getDocSet();
+        assertTrue( set == live );
+      }
+
+    } finally {
+      req.close();
+    }
+  }

    public void testCaching() throws Exception {
    clearIndex();