SOLR-9764: share liveDocs for any DocSet of size numDocs

2017-01-31 11:52:04 -05:00 · 2017-01-31 11:52:04 -05:00 · a43ef8f480
parent d8d61ff61d
commit a43ef8f480
13 changed files with 192 additions and 39 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -141,6 +141,9 @@ Optimizations
 * SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying
  DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya)
 * SOLR-9764: All filters that which all documents in the index now share the same memory (DocSet).
  (Michael Sun, yonik)
 Other Changes
 ----------------------
 * SOLR-9980: Expose configVersion in core admin status (Jessica Cheng Mallet via Tomás Fernández Löbbe)
--- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
@ -49,6 +49,7 @@ import org.apache.solr.search.BitDocSet;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.DocSetBuilder;
 import org.apache.solr.search.DocSetProducer;
 import org.apache.solr.search.DocSetUtil;
 import org.apache.solr.search.ExtendedQueryBase;
 import org.apache.solr.search.Filter;
 import org.apache.solr.search.SolrIndexSearcher;
@ -168,7 +169,8 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
      maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
    }
-    return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
+    DocSet set =  maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
    return DocSetUtil.getDocSet(set, searcher);
  }
--- a/solr/core/src/java/org/apache/solr/search/BitDocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/BitDocSet.java
@ -261,7 +261,7 @@ public class BitDocSet extends DocSetBase {
  }
  @Override
-  protected BitDocSet clone() {
+  public BitDocSet clone() {
    return new BitDocSet(bits.clone(), size);
  }
--- a/solr/core/src/java/org/apache/solr/search/DocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSet.java
@ -31,7 +31,7 @@ import org.apache.solr.common.SolrException;
 *
 * @since solr 0.9
 */
-public interface DocSet extends Closeable, Accountable /* extends Collection<Integer> */ {
+public interface DocSet extends Closeable, Accountable, Cloneable /* extends Collection<Integer> */ {
  /**
   * Adds the specified document if it is not currently in the DocSet
@ -131,5 +131,7 @@ public interface DocSet extends Closeable, Accountable /* extends Collection<Int
   */
  public void addAllTo(DocSet target);
  public DocSet clone();
  public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
 }
--- a/solr/core/src/java/org/apache/solr/search/DocSetBase.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetBase.java
@ -23,8 +23,8 @@ import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BitDocIdSet;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.common.SolrException;
@ -63,8 +63,21 @@ abstract class DocSetBase implements DocSet {
      // don't compare matches
    }
    FixedBitSet bs1 = this.getBits();
    FixedBitSet bs2 = toBitSet(other);
 // resize both BitSets to make sure they have the same amount of zero padding
    int maxNumBits = bs1.length() > bs2.length() ? bs1.length() : bs2.length();
    bs1 = FixedBitSet.ensureCapacity(bs1, maxNumBits);
    bs2 = FixedBitSet.ensureCapacity(bs2, maxNumBits);
    // if (this.size() != other.size()) return false;
-    return this.getBits().equals(toBitSet(other));
+    return bs1.equals(bs2);
  }
  public DocSet clone() {
    throw new RuntimeException(new CloneNotSupportedException());
  }
  /**
@ -90,7 +103,7 @@ abstract class DocSetBase implements DocSet {
   * implementation.
   */
  protected FixedBitSet getBits() {
-    FixedBitSet bits = new FixedBitSet(64);
+    FixedBitSet bits = new FixedBitSet(size());
    for (DocIterator iter = iterator(); iter.hasNext();) {
      int nextDoc = iter.nextDoc();
      bits = FixedBitSet.ensureCapacity(bits, nextDoc);
@ -193,7 +206,7 @@ abstract class DocSetBase implements DocSet {
              @Override
              public int nextDoc() {
-                pos = bs.nextSetBit(pos+1);
+                pos = bs.nextSetBit(pos+1);  // TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
                return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
              }
--- a/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetCollector.java
@ -72,10 +72,17 @@ public class DocSetCollector extends SimpleCollector {
    pos++;
  }
  /** The number of documents that have been collected */
  public int size() {
    return pos;
  }
  public DocSet getDocSet() {
    if (pos<=scratch.size()) {
      // assumes docs were collected in sorted order!
      return new SortedIntDocSet(scratch.toArray(), pos);
 //    } else if (pos == maxDoc) {
 //      return new MatchAllDocSet(maxDoc);  // a bunch of code currently relies on BitDocSet (either explicitly, or implicitly for performance)
    } else {
      // set the bits for ids that were collected in the array
      scratch.copyTo(bits);
--- a/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetUtil.java
@ -39,6 +39,7 @@ import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.common.SolrException;
 /** @lucene.experimental */
 public class DocSetUtil {
@ -71,6 +72,51 @@ public class DocSetUtil {
    }
  }
  /**
   * This variant of getDocSet will attempt to do some deduplication
   * on certain DocSets such as DocSets that match numDocs.  This means it can return
   * a cached version of the set, and the returned set should not be modified.
   * @lucene.experimental
   */
  public static DocSet getDocSet(DocSetCollector collector, SolrIndexSearcher searcher) {
    if (collector.size() == searcher.numDocs()) {
      if (!searcher.isLiveDocsInstantiated()) {
        searcher.setLiveDocs( collector.getDocSet() );
      }
      try {
        return searcher.getLiveDocs();
      } catch (IOException e) {
        // should be impossible... liveDocs should exist, so no IO should be necessary
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      }
    }
    return collector.getDocSet();
  }
  /**
   * This variant of getDocSet maps all sets with size numDocs to searcher.getLiveDocs.
   * The returned set should not be modified.
   * @lucene.experimental
   */
  public static DocSet getDocSet(DocSet docs, SolrIndexSearcher searcher) {
    if (docs.size() == searcher.numDocs()) {
      if (!searcher.isLiveDocsInstantiated()) {
        searcher.setLiveDocs( docs );
      }
      try {
        // if this docset has the same cardinality as liveDocs, return liveDocs instead
        // so this set will be short lived garbage.
        return searcher.getLiveDocs();
      } catch (IOException e) {
        // should be impossible... liveDocs should exist, so no IO should be necessary
        throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
      }
    }
    return docs;
  }
  // implementers of DocSetProducer should not call this with themselves or it will result in an infinite loop
  public static DocSet createDocSet(SolrIndexSearcher searcher, Query query, DocSet filter) throws IOException {
@ -105,7 +151,7 @@ public class DocSetUtil {
    // but we should not catch it here, as we don't know how this DocSet will be used (it could be negated before use) or cached.
    searcher.search(query, collector);
-    return collector.getDocSet();
+    return getDocSet(collector, searcher);
  }
  public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
@ -113,7 +159,6 @@ public class DocSetUtil {
    int maxDoc = searcher.getIndexReader().maxDoc();
    int smallSetSize = smallSetSize(maxDoc);
    String field = term.field();
    BytesRef termVal = term.bytes();
@ -135,15 +180,16 @@ public class DocSetUtil {
      }
    }
    DocSet answer = null;
    if (maxCount == 0) {
-      return DocSet.EMPTY;
+      answer = DocSet.EMPTY;
    } else if (maxCount <= smallSetSize) {
      answer = createSmallSet(leaves, postList, maxCount, firstReader);
    } else {
      answer = createBigSet(leaves, postList, maxDoc, firstReader);
    }
-    if (maxCount <= smallSetSize) {
+    return DocSetUtil.getDocSet( answer, searcher );
      return createSmallSet(leaves, postList, maxCount, firstReader);
    }
    return createBigSet(leaves, postList, maxDoc, firstReader);
  }
  private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {
--- a/solr/core/src/java/org/apache/solr/search/DocSlice.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSlice.java
@ -165,12 +165,8 @@ public class DocSlice extends DocSetBase implements DocList {
  }
  @Override
-  protected DocSlice clone() {
+  public DocSlice clone() {
-    try {
+    return (DocSlice) super.clone();
      // DocSlice is not currently mutable
      DocSlice slice = (DocSlice) super.clone();
    } catch (CloneNotSupportedException e) {}
    return null;
  }
  /** WARNING: this can over-estimate real memory use since backing arrays are shared with other DocSlice instances */
--- a/solr/core/src/java/org/apache/solr/search/HashDocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/HashDocSet.java
@ -290,7 +290,7 @@ public final class HashDocSet extends DocSetBase {
  }
  @Override
-  protected HashDocSet clone() {
+  public HashDocSet clone() {
    return new HashDocSet(this);
  }
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -429,6 +429,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    return reader.maxDoc();
  }
  public final int numDocs() {
    return reader.numDocs();
  }
  public final int docFreq(Term term) throws IOException {
    return reader.docFreq(term);
  }
@ -1063,19 +1067,24 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
    getDocSet(query);
  }
-  public BitDocSet getDocSetBits(Query q) throws IOException {
+  private BitDocSet makeBitDocSet(DocSet answer) {
-    DocSet answer = getDocSet(q);
+    // TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
-    if (answer instanceof BitDocSet) {
+    // or make DocSet instances remember maxDoc
      return (BitDocSet) answer;
    }
    FixedBitSet bs = new FixedBitSet(maxDoc());
    DocIterator iter = answer.iterator();
    while (iter.hasNext()) {
      bs.set(iter.nextDoc());
    }
-    BitDocSet answerBits = new BitDocSet(bs, answer.size());
+    return new BitDocSet(bs, answer.size());
  }
  public BitDocSet getDocSetBits(Query q) throws IOException {
    DocSet answer = getDocSet(q);
    if (answer instanceof BitDocSet) {
      return (BitDocSet) answer;
    }
    BitDocSet answerBits = makeBitDocSet(answer);
    if (filterCache != null) {
      filterCache.put(q, answerBits);
    }
@ -1138,16 +1147,35 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
  }
  private static Query matchAllDocsQuery = new MatchAllDocsQuery();
-  private BitDocSet liveDocs;
+  private volatile BitDocSet liveDocs;
  /** @lucene.internal the type of DocSet returned may change in the future */
  public BitDocSet getLiveDocs() throws IOException {
-    // going through the filter cache will provide thread safety here
+    // Going through the filter cache will provide thread safety here if we only had getLiveDocs,
    // but the addition of setLiveDocs means we needed to add volatile to "liveDocs".
    if (liveDocs == null) {
      liveDocs = getDocSetBits(matchAllDocsQuery);
    }
    assert liveDocs.size() == numDocs();
    return liveDocs;
  }
  /** @lucene.internal */
  public boolean isLiveDocsInstantiated() {
    return liveDocs != null;
  }
  /** @lucene.internal */
  public void setLiveDocs(DocSet docs) {
    // a few places currently expect BitDocSet
    assert docs.size() == numDocs();
    if (docs instanceof BitDocSet) {
      this.liveDocs = (BitDocSet)docs;
    } else {
      this.liveDocs = makeBitDocSet(docs);
    }
  }
  public static class ProcessedFilter {
    public DocSet answer; // the answer, if non-null
    public Filter filter;
@ -1178,8 +1206,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
      ((DelegatingCollector) collector).finish();
    }
-    DocSet docSet = setCollector.getDocSet();
+    return DocSetUtil.getDocSet(setCollector, this);
    return docSet;
  }
  /**
@ -1251,7 +1278,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
      ((DelegatingCollector) collector).finish();
    }
-    return setCollector.getDocSet();
+    return DocSetUtil.getDocSet(setCollector, this);
  }
  public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
@ -1959,7 +1986,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
      buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
-      set = setCollector.getDocSet();
+      set = DocSetUtil.getDocSet(setCollector, this);
      nDocsReturned = 0;
      ids = new int[nDocsReturned];
@ -1976,7 +2003,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
      buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
-      set = setCollector.getDocSet();
+      set = DocSetUtil.getDocSet(setCollector, this);
      totalHits = topCollector.getTotalHits();
      assert (totalHits == set.size());
--- a/solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/SortedIntDocSet.java
@ -791,7 +791,7 @@ public class SortedIntDocSet extends DocSetBase {
  }
  @Override
-  protected SortedIntDocSet clone() {
+  public SortedIntDocSet clone() {
    return new SortedIntDocSet(docs.clone());
  }
--- a/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
+++ b/solr/core/src/java/org/apache/solr/search/grouping/CommandHandler.java
@ -40,6 +40,7 @@ import org.apache.solr.schema.SchemaField;
 import org.apache.solr.search.BitDocSet;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.DocSetCollector;
 import org.apache.solr.search.DocSetUtil;
 import org.apache.solr.search.QueryCommand;
 import org.apache.solr.search.QueryResult;
 import org.apache.solr.search.QueryUtils;
@ -193,7 +194,7 @@ public class CommandHandler {
    List<Collector> allCollectors = new ArrayList<>(collectors);
    allCollectors.add(docSetCollector);
    searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors));
-    return docSetCollector.getDocSet();
+    return DocSetUtil.getDocSet( docSetCollector, searcher );
  }
  @SuppressWarnings("unchecked")
--- a/solr/core/src/test/org/apache/solr/search/TestFiltering.java
+++ b/solr/core/src/test/org/apache/solr/search/TestFiltering.java
@ -18,6 +18,7 @@ package org.apache.solr.search;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrInputDocument;
@ -42,6 +43,61 @@ public class TestFiltering extends SolrTestCaseJ4 {
    initCore("solrconfig.xml","schema_latest.xml");
  }
  @Test
  public void testLiveDocsSharing() throws Exception {
    clearIndex();
    for (int i=0; i<20; i++) {
      for (int repeat=0; repeat < (i%5==0 ? 2 : 1); repeat++) {
        assertU(adoc("id", Integer.toString(i), "foo_s", "foo", "val_i", Integer.toString(i), "val_s", Character.toString((char)('A' + i))));
      }
    }
    assertU(commit());
    String[] queries = {
        "foo_s:foo",
        "foo_s:f*",
        "*:*",
        "id:[* TO *]",
        "id:[0 TO 99]",
        "val_i:[0 TO 20]",
        "val_s:[A TO z]"
    };
    SolrQueryRequest req = req();
    try {
      SolrIndexSearcher searcher = req.getSearcher();
      DocSet live = null;
      for (String qstr :  queries) {
        Query q = QParser.getParser(qstr, null, req).getQuery();
        // System.out.println("getting set for " + q);
        DocSet set = searcher.getDocSet(q);
        if (live == null) {
          live = searcher.getLiveDocs();
        }
        assertTrue( set == live);
        QueryCommand cmd = new QueryCommand();
        cmd.setQuery( QParser.getParser(qstr, null, req).getQuery() );
        cmd.setLen(random().nextInt(30));
        cmd.setNeedDocSet(true);
        QueryResult res = new QueryResult();
        searcher.search(res, cmd);
        set = res.getDocSet();
        assertTrue( set == live );
        cmd.setQuery( QParser.getParser(qstr + " OR id:0", null, req).getQuery() );
        cmd.setFilterList( QParser.getParser(qstr + " OR id:1", null, req).getQuery() );
        res = new QueryResult();
        searcher.search(res, cmd);
        set = res.getDocSet();
        assertTrue( set == live );
      }
    } finally {
      req.close();
    }
  }
    public void testCaching() throws Exception {
    clearIndex();