SOLR-9764: share liveDocs for any DocSet of size numDocs

This commit is contained in:
yonik 2017-01-31 11:52:04 -05:00
parent d8d61ff61d
commit a43ef8f480
13 changed files with 192 additions and 39 deletions

View File

@ -141,6 +141,9 @@ Optimizations
* SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying
DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya)
* SOLR-9764: All filters that which all documents in the index now share the same memory (DocSet).
(Michael Sun, yonik)
Other Changes
----------------------
* SOLR-9980: Expose configVersion in core admin status (Jessica Cheng Mallet via Tomás Fernández Löbbe)

View File

@ -49,6 +49,7 @@ import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSetBuilder;
import org.apache.solr.search.DocSetProducer;
import org.apache.solr.search.DocSetUtil;
import org.apache.solr.search.ExtendedQueryBase;
import org.apache.solr.search.Filter;
import org.apache.solr.search.SolrIndexSearcher;
@ -168,7 +169,8 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
}
return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
DocSet set = maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
return DocSetUtil.getDocSet(set, searcher);
}

View File

@ -261,7 +261,7 @@ public class BitDocSet extends DocSetBase {
}
@Override
protected BitDocSet clone() {
public BitDocSet clone() {
return new BitDocSet(bits.clone(), size);
}

View File

@ -31,7 +31,7 @@ import org.apache.solr.common.SolrException;
*
* @since solr 0.9
*/
public interface DocSet extends Closeable, Accountable /* extends Collection<Integer> */ {
public interface DocSet extends Closeable, Accountable, Cloneable /* extends Collection<Integer> */ {
/**
* Adds the specified document if it is not currently in the DocSet
@ -131,5 +131,7 @@ public interface DocSet extends Closeable, Accountable /* extends Collection<Int
*/
public void addAllTo(DocSet target);
public DocSet clone();
public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
}

View File

@ -23,8 +23,8 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
@ -63,8 +63,21 @@ abstract class DocSetBase implements DocSet {
// don't compare matches
}
FixedBitSet bs1 = this.getBits();
FixedBitSet bs2 = toBitSet(other);
// resize both BitSets to make sure they have the same amount of zero padding
int maxNumBits = bs1.length() > bs2.length() ? bs1.length() : bs2.length();
bs1 = FixedBitSet.ensureCapacity(bs1, maxNumBits);
bs2 = FixedBitSet.ensureCapacity(bs2, maxNumBits);
// if (this.size() != other.size()) return false;
return this.getBits().equals(toBitSet(other));
return bs1.equals(bs2);
}
public DocSet clone() {
throw new RuntimeException(new CloneNotSupportedException());
}
/**
@ -90,7 +103,7 @@ abstract class DocSetBase implements DocSet {
* implementation.
*/
protected FixedBitSet getBits() {
FixedBitSet bits = new FixedBitSet(64);
FixedBitSet bits = new FixedBitSet(size());
for (DocIterator iter = iterator(); iter.hasNext();) {
int nextDoc = iter.nextDoc();
bits = FixedBitSet.ensureCapacity(bits, nextDoc);
@ -193,7 +206,7 @@ abstract class DocSetBase implements DocSet {
@Override
public int nextDoc() {
pos = bs.nextSetBit(pos+1);
pos = bs.nextSetBit(pos+1); // TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
}

View File

@ -72,10 +72,17 @@ public class DocSetCollector extends SimpleCollector {
pos++;
}
/** The number of documents that have been collected */
public int size() {
return pos;
}
public DocSet getDocSet() {
if (pos<=scratch.size()) {
// assumes docs were collected in sorted order!
return new SortedIntDocSet(scratch.toArray(), pos);
// } else if (pos == maxDoc) {
// return new MatchAllDocSet(maxDoc); // a bunch of code currently relies on BitDocSet (either explicitly, or implicitly for performance)
} else {
// set the bits for ids that were collected in the array
scratch.copyTo(bits);

View File

@ -39,6 +39,7 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.common.SolrException;
/** @lucene.experimental */
public class DocSetUtil {
@ -71,6 +72,51 @@ public class DocSetUtil {
}
}
/**
* This variant of getDocSet will attempt to do some deduplication
* on certain DocSets such as DocSets that match numDocs. This means it can return
* a cached version of the set, and the returned set should not be modified.
* @lucene.experimental
*/
public static DocSet getDocSet(DocSetCollector collector, SolrIndexSearcher searcher) {
if (collector.size() == searcher.numDocs()) {
if (!searcher.isLiveDocsInstantiated()) {
searcher.setLiveDocs( collector.getDocSet() );
}
try {
return searcher.getLiveDocs();
} catch (IOException e) {
// should be impossible... liveDocs should exist, so no IO should be necessary
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
return collector.getDocSet();
}
/**
* This variant of getDocSet maps all sets with size numDocs to searcher.getLiveDocs.
* The returned set should not be modified.
* @lucene.experimental
*/
public static DocSet getDocSet(DocSet docs, SolrIndexSearcher searcher) {
if (docs.size() == searcher.numDocs()) {
if (!searcher.isLiveDocsInstantiated()) {
searcher.setLiveDocs( docs );
}
try {
// if this docset has the same cardinality as liveDocs, return liveDocs instead
// so this set will be short lived garbage.
return searcher.getLiveDocs();
} catch (IOException e) {
// should be impossible... liveDocs should exist, so no IO should be necessary
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
}
}
return docs;
}
// implementers of DocSetProducer should not call this with themselves or it will result in an infinite loop
public static DocSet createDocSet(SolrIndexSearcher searcher, Query query, DocSet filter) throws IOException {
@ -105,7 +151,7 @@ public class DocSetUtil {
// but we should not catch it here, as we don't know how this DocSet will be used (it could be negated before use) or cached.
searcher.search(query, collector);
return collector.getDocSet();
return getDocSet(collector, searcher);
}
public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
@ -113,7 +159,6 @@ public class DocSetUtil {
int maxDoc = searcher.getIndexReader().maxDoc();
int smallSetSize = smallSetSize(maxDoc);
String field = term.field();
BytesRef termVal = term.bytes();
@ -135,15 +180,16 @@ public class DocSetUtil {
}
}
DocSet answer = null;
if (maxCount == 0) {
return DocSet.EMPTY;
answer = DocSet.EMPTY;
} else if (maxCount <= smallSetSize) {
answer = createSmallSet(leaves, postList, maxCount, firstReader);
} else {
answer = createBigSet(leaves, postList, maxDoc, firstReader);
}
if (maxCount <= smallSetSize) {
return createSmallSet(leaves, postList, maxCount, firstReader);
}
return createBigSet(leaves, postList, maxDoc, firstReader);
return DocSetUtil.getDocSet( answer, searcher );
}
private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {

View File

@ -165,12 +165,8 @@ public class DocSlice extends DocSetBase implements DocList {
}
@Override
protected DocSlice clone() {
try {
// DocSlice is not currently mutable
DocSlice slice = (DocSlice) super.clone();
} catch (CloneNotSupportedException e) {}
return null;
public DocSlice clone() {
return (DocSlice) super.clone();
}
/** WARNING: this can over-estimate real memory use since backing arrays are shared with other DocSlice instances */

View File

@ -290,7 +290,7 @@ public final class HashDocSet extends DocSetBase {
}
@Override
protected HashDocSet clone() {
public HashDocSet clone() {
return new HashDocSet(this);
}

View File

@ -429,6 +429,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return reader.maxDoc();
}
public final int numDocs() {
return reader.numDocs();
}
public final int docFreq(Term term) throws IOException {
return reader.docFreq(term);
}
@ -1063,19 +1067,24 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
getDocSet(query);
}
public BitDocSet getDocSetBits(Query q) throws IOException {
DocSet answer = getDocSet(q);
if (answer instanceof BitDocSet) {
return (BitDocSet) answer;
}
private BitDocSet makeBitDocSet(DocSet answer) {
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
// or make DocSet instances remember maxDoc
FixedBitSet bs = new FixedBitSet(maxDoc());
DocIterator iter = answer.iterator();
while (iter.hasNext()) {
bs.set(iter.nextDoc());
}
BitDocSet answerBits = new BitDocSet(bs, answer.size());
return new BitDocSet(bs, answer.size());
}
public BitDocSet getDocSetBits(Query q) throws IOException {
DocSet answer = getDocSet(q);
if (answer instanceof BitDocSet) {
return (BitDocSet) answer;
}
BitDocSet answerBits = makeBitDocSet(answer);
if (filterCache != null) {
filterCache.put(q, answerBits);
}
@ -1138,16 +1147,35 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
}
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
private BitDocSet liveDocs;
private volatile BitDocSet liveDocs;
/** @lucene.internal the type of DocSet returned may change in the future */
public BitDocSet getLiveDocs() throws IOException {
// going through the filter cache will provide thread safety here
// Going through the filter cache will provide thread safety here if we only had getLiveDocs,
// but the addition of setLiveDocs means we needed to add volatile to "liveDocs".
if (liveDocs == null) {
liveDocs = getDocSetBits(matchAllDocsQuery);
}
assert liveDocs.size() == numDocs();
return liveDocs;
}
/** @lucene.internal */
public boolean isLiveDocsInstantiated() {
return liveDocs != null;
}
/** @lucene.internal */
public void setLiveDocs(DocSet docs) {
// a few places currently expect BitDocSet
assert docs.size() == numDocs();
if (docs instanceof BitDocSet) {
this.liveDocs = (BitDocSet)docs;
} else {
this.liveDocs = makeBitDocSet(docs);
}
}
public static class ProcessedFilter {
public DocSet answer; // the answer, if non-null
public Filter filter;
@ -1178,8 +1206,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
((DelegatingCollector) collector).finish();
}
DocSet docSet = setCollector.getDocSet();
return docSet;
return DocSetUtil.getDocSet(setCollector, this);
}
/**
@ -1251,7 +1278,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
((DelegatingCollector) collector).finish();
}
return setCollector.getDocSet();
return DocSetUtil.getDocSet(setCollector, this);
}
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
@ -1959,7 +1986,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
set = setCollector.getDocSet();
set = DocSetUtil.getDocSet(setCollector, this);
nDocsReturned = 0;
ids = new int[nDocsReturned];
@ -1976,7 +2003,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
set = setCollector.getDocSet();
set = DocSetUtil.getDocSet(setCollector, this);
totalHits = topCollector.getTotalHits();
assert (totalHits == set.size());

View File

@ -791,7 +791,7 @@ public class SortedIntDocSet extends DocSetBase {
}
@Override
protected SortedIntDocSet clone() {
public SortedIntDocSet clone() {
return new SortedIntDocSet(docs.clone());
}

View File

@ -40,6 +40,7 @@ import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.DocSetCollector;
import org.apache.solr.search.DocSetUtil;
import org.apache.solr.search.QueryCommand;
import org.apache.solr.search.QueryResult;
import org.apache.solr.search.QueryUtils;
@ -193,7 +194,7 @@ public class CommandHandler {
List<Collector> allCollectors = new ArrayList<>(collectors);
allCollectors.add(docSetCollector);
searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors));
return docSetCollector.getDocSet();
return DocSetUtil.getDocSet( docSetCollector, searcher );
}
@SuppressWarnings("unchecked")

View File

@ -18,6 +18,7 @@ package org.apache.solr.search;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
@ -42,8 +43,63 @@ public class TestFiltering extends SolrTestCaseJ4 {
initCore("solrconfig.xml","schema_latest.xml");
}
@Test
public void testLiveDocsSharing() throws Exception {
clearIndex();
for (int i=0; i<20; i++) {
for (int repeat=0; repeat < (i%5==0 ? 2 : 1); repeat++) {
assertU(adoc("id", Integer.toString(i), "foo_s", "foo", "val_i", Integer.toString(i), "val_s", Character.toString((char)('A' + i))));
}
}
assertU(commit());
public void testCaching() throws Exception {
String[] queries = {
"foo_s:foo",
"foo_s:f*",
"*:*",
"id:[* TO *]",
"id:[0 TO 99]",
"val_i:[0 TO 20]",
"val_s:[A TO z]"
};
SolrQueryRequest req = req();
try {
SolrIndexSearcher searcher = req.getSearcher();
DocSet live = null;
for (String qstr : queries) {
Query q = QParser.getParser(qstr, null, req).getQuery();
// System.out.println("getting set for " + q);
DocSet set = searcher.getDocSet(q);
if (live == null) {
live = searcher.getLiveDocs();
}
assertTrue( set == live);
QueryCommand cmd = new QueryCommand();
cmd.setQuery( QParser.getParser(qstr, null, req).getQuery() );
cmd.setLen(random().nextInt(30));
cmd.setNeedDocSet(true);
QueryResult res = new QueryResult();
searcher.search(res, cmd);
set = res.getDocSet();
assertTrue( set == live );
cmd.setQuery( QParser.getParser(qstr + " OR id:0", null, req).getQuery() );
cmd.setFilterList( QParser.getParser(qstr + " OR id:1", null, req).getQuery() );
res = new QueryResult();
searcher.search(res, cmd);
set = res.getDocSet();
assertTrue( set == live );
}
} finally {
req.close();
}
}
public void testCaching() throws Exception {
clearIndex();
assertU(adoc("id","4", "val_i","1"));
assertU(adoc("id","1", "val_i","2"));