mirror of https://github.com/apache/lucene.git
SOLR-9764: share liveDocs for any DocSet of size numDocs
This commit is contained in:
parent
d8d61ff61d
commit
a43ef8f480
|
@ -141,6 +141,9 @@ Optimizations
|
|||
* SOLR-9941: Clear the deletes lists at UpdateLog before replaying from log. This prevents redundantly pre-applying
|
||||
DBQs, during the log replay, to every update in the log as if the DBQs were out of order. (hossman, Ishan Chattopadhyaya)
|
||||
|
||||
* SOLR-9764: All filters that which all documents in the index now share the same memory (DocSet).
|
||||
(Michael Sun, yonik)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
* SOLR-9980: Expose configVersion in core admin status (Jessica Cheng Mallet via Tomás Fernández Löbbe)
|
||||
|
|
|
@ -49,6 +49,7 @@ import org.apache.solr.search.BitDocSet;
|
|||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.DocSetBuilder;
|
||||
import org.apache.solr.search.DocSetProducer;
|
||||
import org.apache.solr.search.DocSetUtil;
|
||||
import org.apache.solr.search.ExtendedQueryBase;
|
||||
import org.apache.solr.search.Filter;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
@ -168,7 +169,8 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
|
|||
maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
|
||||
}
|
||||
|
||||
return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
|
||||
DocSet set = maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
|
||||
return DocSetUtil.getDocSet(set, searcher);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -261,7 +261,7 @@ public class BitDocSet extends DocSetBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected BitDocSet clone() {
|
||||
public BitDocSet clone() {
|
||||
return new BitDocSet(bits.clone(), size);
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.solr.common.SolrException;
|
|||
*
|
||||
* @since solr 0.9
|
||||
*/
|
||||
public interface DocSet extends Closeable, Accountable /* extends Collection<Integer> */ {
|
||||
public interface DocSet extends Closeable, Accountable, Cloneable /* extends Collection<Integer> */ {
|
||||
|
||||
/**
|
||||
* Adds the specified document if it is not currently in the DocSet
|
||||
|
@ -131,5 +131,7 @@ public interface DocSet extends Closeable, Accountable /* extends Collection<Int
|
|||
*/
|
||||
public void addAllTo(DocSet target);
|
||||
|
||||
public DocSet clone();
|
||||
|
||||
public static DocSet EMPTY = new SortedIntDocSet(new int[0], 0);
|
||||
}
|
||||
|
|
|
@ -23,8 +23,8 @@ import org.apache.lucene.index.LeafReader;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
|
@ -63,8 +63,21 @@ abstract class DocSetBase implements DocSet {
|
|||
// don't compare matches
|
||||
}
|
||||
|
||||
FixedBitSet bs1 = this.getBits();
|
||||
FixedBitSet bs2 = toBitSet(other);
|
||||
|
||||
// resize both BitSets to make sure they have the same amount of zero padding
|
||||
|
||||
int maxNumBits = bs1.length() > bs2.length() ? bs1.length() : bs2.length();
|
||||
bs1 = FixedBitSet.ensureCapacity(bs1, maxNumBits);
|
||||
bs2 = FixedBitSet.ensureCapacity(bs2, maxNumBits);
|
||||
|
||||
// if (this.size() != other.size()) return false;
|
||||
return this.getBits().equals(toBitSet(other));
|
||||
return bs1.equals(bs2);
|
||||
}
|
||||
|
||||
public DocSet clone() {
|
||||
throw new RuntimeException(new CloneNotSupportedException());
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -90,7 +103,7 @@ abstract class DocSetBase implements DocSet {
|
|||
* implementation.
|
||||
*/
|
||||
protected FixedBitSet getBits() {
|
||||
FixedBitSet bits = new FixedBitSet(64);
|
||||
FixedBitSet bits = new FixedBitSet(size());
|
||||
for (DocIterator iter = iterator(); iter.hasNext();) {
|
||||
int nextDoc = iter.nextDoc();
|
||||
bits = FixedBitSet.ensureCapacity(bits, nextDoc);
|
||||
|
@ -193,7 +206,7 @@ abstract class DocSetBase implements DocSet {
|
|||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
pos = bs.nextSetBit(pos+1);
|
||||
pos = bs.nextSetBit(pos+1); // TODO: this is buggy if getBits() returns a bitset that does not have a capacity of maxDoc
|
||||
return adjustedDoc = pos<max ? pos-base : NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
|
|
|
@ -72,10 +72,17 @@ public class DocSetCollector extends SimpleCollector {
|
|||
pos++;
|
||||
}
|
||||
|
||||
/** The number of documents that have been collected */
|
||||
public int size() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
public DocSet getDocSet() {
|
||||
if (pos<=scratch.size()) {
|
||||
// assumes docs were collected in sorted order!
|
||||
return new SortedIntDocSet(scratch.toArray(), pos);
|
||||
// } else if (pos == maxDoc) {
|
||||
// return new MatchAllDocSet(maxDoc); // a bunch of code currently relies on BitDocSet (either explicitly, or implicitly for performance)
|
||||
} else {
|
||||
// set the bits for ids that were collected in the array
|
||||
scratch.copyTo(bits);
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
||||
/** @lucene.experimental */
|
||||
public class DocSetUtil {
|
||||
|
@ -71,6 +72,51 @@ public class DocSetUtil {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This variant of getDocSet will attempt to do some deduplication
|
||||
* on certain DocSets such as DocSets that match numDocs. This means it can return
|
||||
* a cached version of the set, and the returned set should not be modified.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static DocSet getDocSet(DocSetCollector collector, SolrIndexSearcher searcher) {
|
||||
if (collector.size() == searcher.numDocs()) {
|
||||
if (!searcher.isLiveDocsInstantiated()) {
|
||||
searcher.setLiveDocs( collector.getDocSet() );
|
||||
}
|
||||
try {
|
||||
return searcher.getLiveDocs();
|
||||
} catch (IOException e) {
|
||||
// should be impossible... liveDocs should exist, so no IO should be necessary
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
|
||||
return collector.getDocSet();
|
||||
}
|
||||
|
||||
/**
|
||||
* This variant of getDocSet maps all sets with size numDocs to searcher.getLiveDocs.
|
||||
* The returned set should not be modified.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public static DocSet getDocSet(DocSet docs, SolrIndexSearcher searcher) {
|
||||
if (docs.size() == searcher.numDocs()) {
|
||||
if (!searcher.isLiveDocsInstantiated()) {
|
||||
searcher.setLiveDocs( docs );
|
||||
}
|
||||
try {
|
||||
// if this docset has the same cardinality as liveDocs, return liveDocs instead
|
||||
// so this set will be short lived garbage.
|
||||
return searcher.getLiveDocs();
|
||||
} catch (IOException e) {
|
||||
// should be impossible... liveDocs should exist, so no IO should be necessary
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e);
|
||||
}
|
||||
}
|
||||
|
||||
return docs;
|
||||
}
|
||||
|
||||
// implementers of DocSetProducer should not call this with themselves or it will result in an infinite loop
|
||||
public static DocSet createDocSet(SolrIndexSearcher searcher, Query query, DocSet filter) throws IOException {
|
||||
|
||||
|
@ -105,7 +151,7 @@ public class DocSetUtil {
|
|||
// but we should not catch it here, as we don't know how this DocSet will be used (it could be negated before use) or cached.
|
||||
searcher.search(query, collector);
|
||||
|
||||
return collector.getDocSet();
|
||||
return getDocSet(collector, searcher);
|
||||
}
|
||||
|
||||
public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException {
|
||||
|
@ -113,7 +159,6 @@ public class DocSetUtil {
|
|||
int maxDoc = searcher.getIndexReader().maxDoc();
|
||||
int smallSetSize = smallSetSize(maxDoc);
|
||||
|
||||
|
||||
String field = term.field();
|
||||
BytesRef termVal = term.bytes();
|
||||
|
||||
|
@ -135,15 +180,16 @@ public class DocSetUtil {
|
|||
}
|
||||
}
|
||||
|
||||
DocSet answer = null;
|
||||
if (maxCount == 0) {
|
||||
return DocSet.EMPTY;
|
||||
answer = DocSet.EMPTY;
|
||||
} else if (maxCount <= smallSetSize) {
|
||||
answer = createSmallSet(leaves, postList, maxCount, firstReader);
|
||||
} else {
|
||||
answer = createBigSet(leaves, postList, maxDoc, firstReader);
|
||||
}
|
||||
|
||||
if (maxCount <= smallSetSize) {
|
||||
return createSmallSet(leaves, postList, maxCount, firstReader);
|
||||
}
|
||||
|
||||
return createBigSet(leaves, postList, maxDoc, firstReader);
|
||||
return DocSetUtil.getDocSet( answer, searcher );
|
||||
}
|
||||
|
||||
private static DocSet createSmallSet(List<LeafReaderContext> leaves, PostingsEnum[] postList, int maxPossible, int firstReader) throws IOException {
|
||||
|
|
|
@ -165,12 +165,8 @@ public class DocSlice extends DocSetBase implements DocList {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected DocSlice clone() {
|
||||
try {
|
||||
// DocSlice is not currently mutable
|
||||
DocSlice slice = (DocSlice) super.clone();
|
||||
} catch (CloneNotSupportedException e) {}
|
||||
return null;
|
||||
public DocSlice clone() {
|
||||
return (DocSlice) super.clone();
|
||||
}
|
||||
|
||||
/** WARNING: this can over-estimate real memory use since backing arrays are shared with other DocSlice instances */
|
||||
|
|
|
@ -290,7 +290,7 @@ public final class HashDocSet extends DocSetBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected HashDocSet clone() {
|
||||
public HashDocSet clone() {
|
||||
return new HashDocSet(this);
|
||||
}
|
||||
|
||||
|
|
|
@ -429,6 +429,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
return reader.maxDoc();
|
||||
}
|
||||
|
||||
public final int numDocs() {
|
||||
return reader.numDocs();
|
||||
}
|
||||
|
||||
public final int docFreq(Term term) throws IOException {
|
||||
return reader.docFreq(term);
|
||||
}
|
||||
|
@ -1063,19 +1067,24 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
getDocSet(query);
|
||||
}
|
||||
|
||||
public BitDocSet getDocSetBits(Query q) throws IOException {
|
||||
DocSet answer = getDocSet(q);
|
||||
if (answer instanceof BitDocSet) {
|
||||
return (BitDocSet) answer;
|
||||
}
|
||||
|
||||
private BitDocSet makeBitDocSet(DocSet answer) {
|
||||
// TODO: this should be implemented in DocSet, most likely with a getBits method that takes a maxDoc argument
|
||||
// or make DocSet instances remember maxDoc
|
||||
FixedBitSet bs = new FixedBitSet(maxDoc());
|
||||
DocIterator iter = answer.iterator();
|
||||
while (iter.hasNext()) {
|
||||
bs.set(iter.nextDoc());
|
||||
}
|
||||
|
||||
BitDocSet answerBits = new BitDocSet(bs, answer.size());
|
||||
return new BitDocSet(bs, answer.size());
|
||||
}
|
||||
|
||||
public BitDocSet getDocSetBits(Query q) throws IOException {
|
||||
DocSet answer = getDocSet(q);
|
||||
if (answer instanceof BitDocSet) {
|
||||
return (BitDocSet) answer;
|
||||
}
|
||||
BitDocSet answerBits = makeBitDocSet(answer);
|
||||
if (filterCache != null) {
|
||||
filterCache.put(q, answerBits);
|
||||
}
|
||||
|
@ -1138,16 +1147,35 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
}
|
||||
|
||||
private static Query matchAllDocsQuery = new MatchAllDocsQuery();
|
||||
private BitDocSet liveDocs;
|
||||
private volatile BitDocSet liveDocs;
|
||||
|
||||
/** @lucene.internal the type of DocSet returned may change in the future */
|
||||
public BitDocSet getLiveDocs() throws IOException {
|
||||
// going through the filter cache will provide thread safety here
|
||||
// Going through the filter cache will provide thread safety here if we only had getLiveDocs,
|
||||
// but the addition of setLiveDocs means we needed to add volatile to "liveDocs".
|
||||
if (liveDocs == null) {
|
||||
liveDocs = getDocSetBits(matchAllDocsQuery);
|
||||
}
|
||||
assert liveDocs.size() == numDocs();
|
||||
return liveDocs;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public boolean isLiveDocsInstantiated() {
|
||||
return liveDocs != null;
|
||||
}
|
||||
|
||||
/** @lucene.internal */
|
||||
public void setLiveDocs(DocSet docs) {
|
||||
// a few places currently expect BitDocSet
|
||||
assert docs.size() == numDocs();
|
||||
if (docs instanceof BitDocSet) {
|
||||
this.liveDocs = (BitDocSet)docs;
|
||||
} else {
|
||||
this.liveDocs = makeBitDocSet(docs);
|
||||
}
|
||||
}
|
||||
|
||||
public static class ProcessedFilter {
|
||||
public DocSet answer; // the answer, if non-null
|
||||
public Filter filter;
|
||||
|
@ -1178,8 +1206,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
((DelegatingCollector) collector).finish();
|
||||
}
|
||||
|
||||
DocSet docSet = setCollector.getDocSet();
|
||||
return docSet;
|
||||
return DocSetUtil.getDocSet(setCollector, this);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1251,7 +1278,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
((DelegatingCollector) collector).finish();
|
||||
}
|
||||
|
||||
return setCollector.getDocSet();
|
||||
return DocSetUtil.getDocSet(setCollector, this);
|
||||
}
|
||||
|
||||
public ProcessedFilter getProcessedFilter(DocSet setFilter, List<Query> queries) throws IOException {
|
||||
|
@ -1959,7 +1986,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
|
||||
buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
|
||||
|
||||
set = setCollector.getDocSet();
|
||||
set = DocSetUtil.getDocSet(setCollector, this);
|
||||
|
||||
nDocsReturned = 0;
|
||||
ids = new int[nDocsReturned];
|
||||
|
@ -1976,7 +2003,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
|
|||
|
||||
buildAndRunCollectorChain(qr, query, collector, cmd, pf.postFilter);
|
||||
|
||||
set = setCollector.getDocSet();
|
||||
set = DocSetUtil.getDocSet(setCollector, this);
|
||||
|
||||
totalHits = topCollector.getTotalHits();
|
||||
assert (totalHits == set.size());
|
||||
|
|
|
@ -791,7 +791,7 @@ public class SortedIntDocSet extends DocSetBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected SortedIntDocSet clone() {
|
||||
public SortedIntDocSet clone() {
|
||||
return new SortedIntDocSet(docs.clone());
|
||||
}
|
||||
|
||||
|
|
|
@ -40,6 +40,7 @@ import org.apache.solr.schema.SchemaField;
|
|||
import org.apache.solr.search.BitDocSet;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.DocSetCollector;
|
||||
import org.apache.solr.search.DocSetUtil;
|
||||
import org.apache.solr.search.QueryCommand;
|
||||
import org.apache.solr.search.QueryResult;
|
||||
import org.apache.solr.search.QueryUtils;
|
||||
|
@ -193,7 +194,7 @@ public class CommandHandler {
|
|||
List<Collector> allCollectors = new ArrayList<>(collectors);
|
||||
allCollectors.add(docSetCollector);
|
||||
searchWithTimeLimiter(query, filter, MultiCollector.wrap(allCollectors));
|
||||
return docSetCollector.getDocSet();
|
||||
return DocSetUtil.getDocSet( docSetCollector, searcher );
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.search;
|
|||
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
|
@ -42,6 +43,61 @@ public class TestFiltering extends SolrTestCaseJ4 {
|
|||
initCore("solrconfig.xml","schema_latest.xml");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLiveDocsSharing() throws Exception {
|
||||
clearIndex();
|
||||
for (int i=0; i<20; i++) {
|
||||
for (int repeat=0; repeat < (i%5==0 ? 2 : 1); repeat++) {
|
||||
assertU(adoc("id", Integer.toString(i), "foo_s", "foo", "val_i", Integer.toString(i), "val_s", Character.toString((char)('A' + i))));
|
||||
}
|
||||
}
|
||||
assertU(commit());
|
||||
|
||||
String[] queries = {
|
||||
"foo_s:foo",
|
||||
"foo_s:f*",
|
||||
"*:*",
|
||||
"id:[* TO *]",
|
||||
"id:[0 TO 99]",
|
||||
"val_i:[0 TO 20]",
|
||||
"val_s:[A TO z]"
|
||||
};
|
||||
|
||||
SolrQueryRequest req = req();
|
||||
try {
|
||||
SolrIndexSearcher searcher = req.getSearcher();
|
||||
|
||||
DocSet live = null;
|
||||
for (String qstr : queries) {
|
||||
Query q = QParser.getParser(qstr, null, req).getQuery();
|
||||
// System.out.println("getting set for " + q);
|
||||
DocSet set = searcher.getDocSet(q);
|
||||
if (live == null) {
|
||||
live = searcher.getLiveDocs();
|
||||
}
|
||||
assertTrue( set == live);
|
||||
|
||||
QueryCommand cmd = new QueryCommand();
|
||||
cmd.setQuery( QParser.getParser(qstr, null, req).getQuery() );
|
||||
cmd.setLen(random().nextInt(30));
|
||||
cmd.setNeedDocSet(true);
|
||||
QueryResult res = new QueryResult();
|
||||
searcher.search(res, cmd);
|
||||
set = res.getDocSet();
|
||||
assertTrue( set == live );
|
||||
|
||||
cmd.setQuery( QParser.getParser(qstr + " OR id:0", null, req).getQuery() );
|
||||
cmd.setFilterList( QParser.getParser(qstr + " OR id:1", null, req).getQuery() );
|
||||
res = new QueryResult();
|
||||
searcher.search(res, cmd);
|
||||
set = res.getDocSet();
|
||||
assertTrue( set == live );
|
||||
}
|
||||
|
||||
} finally {
|
||||
req.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testCaching() throws Exception {
|
||||
clearIndex();
|
||||
|
|
Loading…
Reference in New Issue