SOLR-9764: share liveDocs for any DocSet of size numDocs

This commit is contained in:
yonik 2017-01-31 11:52:04 -05:00
parent d8d61ff61d
commit a43ef8f480
13 changed files with 192 additions and 39 deletions

View File

@ -141,6 +141,9 @@ Optimizations
* SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying * SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying
DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya) DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya)
* SOLR-9764: All filters that which all documents in the index now share the same memory (DocSet).
(Michael Sun, yonik)
Other Changes Other Changes
---------------------- ----------------------
* SOLR-9980: Expose configVersion in core admin status (Jessica Cheng Mallet via Tomás Fernández Löbbe) * SOLR-9980: Expose configVersion in core admin status (Jessica Cheng Mallet via Tomás Fernández Löbbe)

View File

@ -49,6 +49,7 @@ import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSetBuilder; import org.apache.solr.search.DocSetBuilder;
import org.apache.solr.search.DocSetProducer; import org.apache.solr.search.DocSetProducer;
import org.apache.solr.search.DocSetUtil;
import org.apache.solr.search.ExtendedQueryBase; import org.apache.solr.search.ExtendedQueryBase;
import org.apache.solr.search.Filter; import org.apache.solr.search.Filter;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
@ -168,7 +169,8 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited); maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
} }
return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits); DocSet set = maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
return DocSetUtil.getDocSet(set, searcher);
} }

View File

@ -261,7 +261,7 @@ public class BitDocSet extends DocSetBase {
} }
@Override @Override
protected BitDocSet clone() { public BitDocSet clone() {
return new BitDocSet(bits.clone(), size); return new BitDocSet(bits.clone(), size);
} }

View File

@ -31,7 +31,7 @@ import org.apache.solr.common.SolrException;
* *
* @since solr 0.9 * @since solr 0.9
*/ */
public interface DocSet extends Closeable, Accountable /* extends Collection<Integer> */ { public interface DocSet extends Closeable, Accountable, Cloneable /* extends Collection<Integer> */ {
/** /**
* Adds the specified document if it is not currently in the DocSet * Adds the specified document if it is not currently in the DocSet
@ -131,5 +131,7 @@ public interface DocSet extends Closeable, Accountable /* extends Collection<Int
*/ */
public void addAllTo(DocSet target); public void addAllTo(DocSet target);
public DocSet clone();
public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0); public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
} }

View File

@ -23,8 +23,8 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException;
@ -63,8 +63,21 @@ abstract class DocSetBase implements DocSet {
// don't compare matches // don't compare matches
} }
FixedBitSet bs1 = this.getBits();
FixedBitSet bs2 = toBitSet(other);
// resize both BitSets to make sure they have the same amount of zero padding
int maxNumBits = bs1.length() > bs2.length() ? bs1.length() : bs2.length();
bs1 = FixedBitSet.ensureCapacity(bs1, maxNumBits);
bs2 = FixedBitSet.ensureCapacity(bs2, maxNumBits);
// if (this.size() != other.size()) return false; // if (this.size() != other.size()) return false;
return this.getBits().equals(toBitSet(other)); return bs1.equals(bs2);
}
public DocSet clone() {
throw new RuntimeException(new CloneNotSupportedException());
} }
/** /**
@ -90,7 +103,7 @@ abstract class DocSetBase implements DocSet {
* implementation. * implementation.
*/ */
protected FixedBitSet getBits() { protected FixedBitSet getBits() {
FixedBitSet bits = new FixedBitSet(64); FixedBitSet bits = new FixedBitSet(size());
for (DocIterator iter = iterator(); iter.hasNext();) { for (DocIterator iter = iterator(); iter.hasNext();) {
int nextDoc = iter.nextDoc(); int nextDoc = iter.nextDoc();
bits = FixedBitSet.ensureCapacity(bits, nextDoc); bits = FixedBitSet.ensureCapacity(bits, nextDoc);
@ -193,7 +206,7 @@ abstract class DocSetBase implements DocSet {
@Override @Override
public int nextDoc() { public int nextDoc() {
pos = bs.nextSetBit(pos+1); pos = bs.nextSetBit(pos+1); // TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS; return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
} }

View File

@ -72,10 +72,17 @@ public class DocSetCollector extends SimpleCollector {
pos++; pos++;
} }
/** The number of documents that have been collected */
public int size() {
return pos;
}
public DocSet getDocSet() { public DocSet getDocSet() {
if (pos<=scratch.size()) { if (pos<=scratch.size()) {
// assumes docs were collected in sorted order! // assumes docs were collected in sorted order!
return new SortedIntDocSet(scratch.toArray(), pos); return new SortedIntDocSet(scratch.toArray(), pos);
// } else if (pos == maxDoc) {
// return new MatchAllDocSet(maxDoc); // a bunch of code currently relies on BitDocSet (either explicitly, or implicitly for performance)
} else { } else {
// set the bits for ids that were collected in the array // set the bits for ids that were collected in the array
scratch.copyTo(bits); scratch.copyTo(bits);

View File

@ -39,6 +39,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
/** @lucene.experimental */ /** @lucene.experimental */
public class DocSetUtil { public class DocSetUtil {
@ -71,6 +72,51 @@ public class DocSetUtil {
} }
} }
/**
* This variant of getDocSet will attempt to do some deduplication
* on certain DocSets such as DocSets that match numDocs. This means it can return
* a cached version of the set, and the returned set should not be modified.
* @lucene.experimental
*/
public static DocSet getDocSet(DocSetCollector collector, SolrIndexSearcher searcher) {
if (collector.size() == searcher.numDocs()) {
if (!searcher.isLiveDocsInstantiated()) {
searcher.setLiveDocs( collector.getDocSet() );
}
try {
return searcher.getLiveDocs();
} catch (IOException e) {
// should be impossible... liveDocs should exist, so no IO should be necessary
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
return collector.getDocSet();
}
/**
* This variant of getDocSet maps all sets with size numDocs to searcher.getLiveDocs.
* The returned set should not be modified.
* @lucene.experimental
*/
public static DocSet getDocSet(DocSet docs, SolrIndexSearcher searcher) {
if (docs.size() == searcher.numDocs()) {
if (!searcher.isLiveDocsInstantiated()) {
searcher.setLiveDocs( docs );
}
try {
// if this docset has the same cardinality as liveDocs, return liveDocs instead
// so this set will be short lived garbage.
return searcher.getLiveDocs();
} catch (IOException e) {
// should be impossible... liveDocs should exist, so no IO should be necessary
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
return docs;
}
// implementers of DocSetProducer should not call this with themselves or it will result in an infinite loop // implementers of DocSetProducer should not call this with themselves or it will result in an infinite loop
public static DocSet createDocSet(SolrIndexSearcher searcher, Query query, DocSet filter) throws IOException { public static DocSet createDocSet(SolrIndexSearcher searcher, Query query, DocSet filter) throws IOException {
@ -105,7 +151,7 @@ public class DocSetUtil {
// but we should not catch it here, as we don't know how this DocSet will be used (it could be negated before use) or cached. // but we should not catch it here, as we don't know how this DocSet will be used (it could be negated before use) or cached.
searcher.search(query, collector); searcher.search(query, collector);
return collector.getDocSet(); return getDocSet(collector, searcher);
} }
public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException { public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
@ -113,7 +159,6 @@ public class DocSetUtil {
int maxDoc = searcher.getIndexReader().maxDoc(); int maxDoc = searcher.getIndexReader().maxDoc();
int smallSetSize = smallSetSize(maxDoc); int smallSetSize = smallSetSize(maxDoc);
String field = term.field(); String field = term.field();
BytesRef termVal = term.bytes(); BytesRef termVal = term.bytes();
@ -135,15 +180,16 @@ public class DocSetUtil {
} }
} }
DocSet answer = null;
if (maxCount == 0) { if (maxCount == 0) {
return DocSet.EMPTY; answer = DocSet.EMPTY;
} else if (maxCount <= smallSetSize) {
answer = createSmallSet(leaves, postList, maxCount, firstReader);
} else {
answer = createBigSet(leaves, postList, maxDoc, firstReader);
} }
if (maxCount <= smallSetSize) { return DocSetUtil.getDocSet( answer, searcher );
return createSmallSet(leaves, postList, maxCount, firstReader);
}
return createBigSet(leaves, postList, maxDoc, firstReader);
} }
private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException { private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {

View File

@ -165,12 +165,8 @@ public class DocSlice extends DocSetBase implements DocList {
} }
@Override @Override
protected DocSlice clone() { public DocSlice clone() {
try { return (DocSlice) super.clone();
// DocSlice is not currently mutable
DocSlice slice = (DocSlice) super.clone();
} catch (CloneNotSupportedException e) {}
return null;
} }
/** WARNING: this can over-estimate real memory use since backing arrays are shared with other DocSlice instances */ /** WARNING: this can over-estimate real memory use since backing arrays are shared with other DocSlice instances */

View File

@ -290,7 +290,7 @@ public final class HashDocSet extends DocSetBase {
} }
@Override @Override
protected HashDocSet clone() { public HashDocSet clone() {
return new HashDocSet(this); return new HashDocSet(this);
} }

View File

@ -429,6 +429,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return reader.maxDoc(); return reader.maxDoc();
} }
public final int numDocs() {
return reader.numDocs();
}
public final int docFreq(Term term) throws IOException { public final int docFreq(Term term) throws IOException {
return reader.docFreq(term); return reader.docFreq(term);
} }
@ -1063,19 +1067,24 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
getDocSet(query); getDocSet(query);
} }
public BitDocSet getDocSetBits(Query q) throws IOException { private BitDocSet makeBitDocSet(DocSet answer) {
DocSet answer = getDocSet(q); // TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
if (answer instanceof BitDocSet) { // or make DocSet instances remember maxDoc
return (BitDocSet) answer;
}
FixedBitSet bs = new FixedBitSet(maxDoc()); FixedBitSet bs = new FixedBitSet(maxDoc());
DocIterator iter = answer.iterator(); DocIterator iter = answer.iterator();
while (iter.hasNext()) { while (iter.hasNext()) {
bs.set(iter.nextDoc()); bs.set(iter.nextDoc());
} }
BitDocSet answerBits = new BitDocSet(bs, answer.size()); return new BitDocSet(bs, answer.size());
}
public BitDocSet getDocSetBits(Query q) throws IOException {
DocSet answer = getDocSet(q);
if (answer instanceof BitDocSet) {
return (BitDocSet) answer;
}
BitDocSet answerBits = makeBitDocSet(answer);
if (filterCache != null) { if (filterCache != null) {
filterCache.put(q, answerBits); filterCache.put(q, answerBits);
} }
@ -1138,16 +1147,35 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
} }
private static Query matchAllDocsQuery = new MatchAllDocsQuery(); private static Query matchAllDocsQuery = new MatchAllDocsQuery();
private BitDocSet liveDocs; private volatile BitDocSet liveDocs;
/** @lucene.internal the type of DocSet returned may change in the future */
public BitDocSet getLiveDocs() throws IOException { public BitDocSet getLiveDocs() throws IOException {
// going through the filter cache will provide thread safety here // Going through the filter cache will provide thread safety here if we only had getLiveDocs,
// but the addition of setLiveDocs means we needed to add volatile to "liveDocs".
if (liveDocs == null) { if (liveDocs == null) {
liveDocs = getDocSetBits(matchAllDocsQuery); liveDocs = getDocSetBits(matchAllDocsQuery);
} }
assert liveDocs.size() == numDocs();
return liveDocs; return liveDocs;
} }
/** @lucene.internal */
public boolean isLiveDocsInstantiated() {
return liveDocs != null;
}
/** @lucene.internal */
public void setLiveDocs(DocSet docs) {
// a few places currently expect BitDocSet
assert docs.size() == numDocs();
if (docs instanceof BitDocSet) {
this.liveDocs = (BitDocSet)docs;
} else {
this.liveDocs = makeBitDocSet(docs);
}
}
public static class ProcessedFilter { public static class ProcessedFilter {
public DocSet answer; // the answer, if non-null public DocSet answer; // the answer, if non-null
public Filter filter; public Filter filter;
@ -1178,8 +1206,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
((DelegatingCollector) collector).finish(); ((DelegatingCollector) collector).finish();
} }
DocSet docSet = setCollector.getDocSet(); return DocSetUtil.getDocSet(setCollector, this);
return docSet;
} }
/** /**
@ -1251,7 +1278,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
((DelegatingCollector) collector).finish(); ((DelegatingCollector) collector).finish();
} }
return setCollector.getDocSet(); return DocSetUtil.getDocSet(setCollector, this);
} }
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException { public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
@ -1959,7 +1986,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter); buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
set = setCollector.getDocSet(); set = DocSetUtil.getDocSet(setCollector, this);
nDocsReturned = 0; nDocsReturned = 0;
ids = new int[nDocsReturned]; ids = new int[nDocsReturned];
@ -1976,7 +2003,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter); buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
set = setCollector.getDocSet(); set = DocSetUtil.getDocSet(setCollector, this);
totalHits = topCollector.getTotalHits(); totalHits = topCollector.getTotalHits();
assert (totalHits == set.size()); assert (totalHits == set.size());

View File

@ -791,7 +791,7 @@ public class SortedIntDocSet extends DocSetBase {
} }
@Override @Override
protected SortedIntDocSet clone() { public SortedIntDocSet clone() {
return new SortedIntDocSet(docs.clone()); return new SortedIntDocSet(docs.clone());
} }

View File

@ -40,6 +40,7 @@ import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet; import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSetCollector; import org.apache.solr.search.DocSetCollector;
import org.apache.solr.search.DocSetUtil;
import org.apache.solr.search.QueryCommand; import org.apache.solr.search.QueryCommand;
import org.apache.solr.search.QueryResult; import org.apache.solr.search.QueryResult;
import org.apache.solr.search.QueryUtils; import org.apache.solr.search.QueryUtils;
@ -193,7 +194,7 @@ public class CommandHandler {
List<Collector> allCollectors = new ArrayList<>(collectors); List<Collector> allCollectors = new ArrayList<>(collectors);
allCollectors.add(docSetCollector); allCollectors.add(docSetCollector);
searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors)); searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors));
return docSetCollector.getDocSet(); return DocSetUtil.getDocSet( docSetCollector, searcher );
} }
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")

View File

@ -18,6 +18,7 @@ package org.apache.solr.search;
import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument; import org.apache.solr.common.SolrInputDocument;
@ -42,6 +43,61 @@ public class TestFiltering extends SolrTestCaseJ4 {
initCore("solrconfig.xml","schema_latest.xml"); initCore("solrconfig.xml","schema_latest.xml");
} }
@Test
public void testLiveDocsSharing() throws Exception {
clearIndex();
for (int i=0; i<20; i++) {
for (int repeat=0; repeat < (i%5==0 ? 2 : 1); repeat++) {
assertU(adoc("id", Integer.toString(i), "foo_s", "foo", "val_i", Integer.toString(i), "val_s", Character.toString((char)('A' + i))));
}
}
assertU(commit());
String[] queries = {
"foo_s:foo",
"foo_s:f*",
"*:*",
"id:[* TO *]",
"id:[0 TO 99]",
"val_i:[0 TO 20]",
"val_s:[A TO z]"
};
SolrQueryRequest req = req();
try {
SolrIndexSearcher searcher = req.getSearcher();
DocSet live = null;
for (String qstr : queries) {
Query q = QParser.getParser(qstr, null, req).getQuery();
// System.out.println("getting set for " + q);
DocSet set = searcher.getDocSet(q);
if (live == null) {
live = searcher.getLiveDocs();
}
assertTrue( set == live);
QueryCommand cmd = new QueryCommand();
cmd.setQuery( QParser.getParser(qstr, null, req).getQuery() );
cmd.setLen(random().nextInt(30));
cmd.setNeedDocSet(true);
QueryResult res = new QueryResult();
searcher.search(res, cmd);
set = res.getDocSet();
assertTrue( set == live );
cmd.setQuery( QParser.getParser(qstr + " OR id:0", null, req).getQuery() );
cmd.setFilterList( QParser.getParser(qstr + " OR id:1", null, req).getQuery() );
res = new QueryResult();
searcher.search(res, cmd);
set = res.getDocSet();
assertTrue( set == live );
}
} finally {
req.close();
}
}
public void testCaching() throws Exception { public void testCaching() throws Exception {
clearIndex(); clearIndex();