Search: Avoid calling DocIdSets.toSafeBits.
This method is heavy as it builds a bitset out of a DocIdSet in order to be able to provide random-access. Now that Lucene has removed out-of-order scoring true random-access is very rarely needed and we could instead return an Bits instance that wraps the iterator. Ideally, we would use the DISI API directly but I have to admit that the Bits API is more friendly. Close #9546
This commit is contained in:
parent
e5b174ff77
commit
8540a863aa
|
@ -30,6 +30,8 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.RoaringDocIdSet;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.lucene.search.XDocIdSetIterator;
|
||||
|
||||
|
@ -109,11 +111,16 @@ public class DocIdSets {
|
|||
}
|
||||
|
||||
/**
|
||||
* Gets a set to bits.
|
||||
* Get a build a {@link Bits} instance that will match all documents
|
||||
* contained in {@code set}. Note that this is a potentially heavy
|
||||
* operation as this might require to consume an iterator of this set
|
||||
* entirely and to load it into a {@link BitSet}. Prefer using
|
||||
* {@link #asSequentialAccessBits} if you only need to consume the
|
||||
* {@link Bits} once and in order.
|
||||
*/
|
||||
public static Bits toSafeBits(LeafReader reader, @Nullable DocIdSet set) throws IOException {
|
||||
public static Bits toSafeBits(int maxDoc, @Nullable DocIdSet set) throws IOException {
|
||||
if (set == null) {
|
||||
return new Bits.MatchNoBits(reader.maxDoc());
|
||||
return new Bits.MatchNoBits(maxDoc);
|
||||
}
|
||||
Bits bits = set.bits();
|
||||
if (bits != null) {
|
||||
|
@ -121,9 +128,56 @@ public class DocIdSets {
|
|||
}
|
||||
DocIdSetIterator iterator = set.iterator();
|
||||
if (iterator == null) {
|
||||
return new Bits.MatchNoBits(reader.maxDoc());
|
||||
return new Bits.MatchNoBits(maxDoc);
|
||||
}
|
||||
return toBitSet(iterator, reader.maxDoc());
|
||||
return toBitSet(iterator, maxDoc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a {@link DocIdSet}, return a {@link Bits} instance that will match
|
||||
* all documents contained in the set. Note that the returned {@link Bits}
|
||||
* instance should only be consumed once and in order.
|
||||
*/
|
||||
public static Bits asSequentialAccessBits(final int maxDoc, @Nullable DocIdSet set) throws IOException {
|
||||
if (set == null) {
|
||||
return new Bits.MatchNoBits(maxDoc);
|
||||
}
|
||||
Bits bits = set.bits();
|
||||
if (bits != null) {
|
||||
return bits;
|
||||
}
|
||||
final DocIdSetIterator iterator = set.iterator();
|
||||
if (iterator == null) {
|
||||
return new Bits.MatchNoBits(maxDoc);
|
||||
}
|
||||
return new Bits() {
|
||||
|
||||
int previous = 0;
|
||||
|
||||
@Override
|
||||
public boolean get(int index) {
|
||||
if (index < previous) {
|
||||
throw new ElasticsearchIllegalArgumentException("This Bits instance can only be consumed in order. "
|
||||
+ "Got called on [" + index + "] while previously called on [" + previous + "]");
|
||||
}
|
||||
previous = index;
|
||||
|
||||
int doc = iterator.docID();
|
||||
if (doc < index) {
|
||||
try {
|
||||
doc = iterator.advance(index);
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchIllegalStateException("Cannot advance iterator", e);
|
||||
}
|
||||
}
|
||||
return index == doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return maxDoc;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -96,7 +96,7 @@ public class FilterableTermsEnum extends TermsEnum {
|
|||
// fully filtered, none matching, no need to iterate on this
|
||||
continue;
|
||||
}
|
||||
bits = DocIdSets.toSafeBits(context.reader(), docIdSet);
|
||||
bits = DocIdSets.toSafeBits(context.reader().maxDoc(), docIdSet);
|
||||
// Count how many docs are in our filtered set
|
||||
// TODO make this lazy-loaded only for those that need it?
|
||||
DocIdSetIterator iterator = docIdSet.iterator();
|
||||
|
|
|
@ -163,7 +163,7 @@ public class FiltersFunctionScoreQuery extends Query {
|
|||
for (int i = 0; i < filterFunctions.length; i++) {
|
||||
FilterFunction filterFunction = filterFunctions[i];
|
||||
filterFunction.function.setNextReader(context);
|
||||
docSets[i] = DocIdSets.toSafeBits(context.reader(), filterFunction.filter.getDocIdSet(context, acceptDocs));
|
||||
docSets[i] = DocIdSets.asSequentialAccessBits(context.reader().maxDoc(), filterFunction.filter.getDocIdSet(context, acceptDocs));
|
||||
}
|
||||
return new FiltersFunctionFactorScorer(this, subQueryScorer, scoreMode, filterFunctions, maxBoost, docSets, combineFunction, minScore);
|
||||
}
|
||||
|
@ -186,7 +186,7 @@ public class FiltersFunctionScoreQuery extends Query {
|
|||
weightSum++;
|
||||
}
|
||||
|
||||
Bits docSet = DocIdSets.toSafeBits(context.reader(),
|
||||
Bits docSet = DocIdSets.asSequentialAccessBits(context.reader().maxDoc(),
|
||||
filterFunction.filter.getDocIdSet(context, context.reader().getLiveDocs()));
|
||||
if (docSet.get(doc)) {
|
||||
filterFunction.function.setNextReader(context);
|
||||
|
|
|
@ -129,7 +129,7 @@ public class ParentToChildrenAggregator extends SingleBucketAggregator implement
|
|||
// The DocIdSets.toSafeBits(...) can convert to FixedBitSet, but this
|
||||
// will only happen if the none filter cache is used. (which only happens in tests)
|
||||
// Otherwise the filter cache will produce a bitset based filter.
|
||||
parentDocs = DocIdSets.toSafeBits(reader.reader(), parentDocIdSet);
|
||||
parentDocs = DocIdSets.asSequentialAccessBits(reader.reader().maxDoc(), parentDocIdSet);
|
||||
DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null);
|
||||
if (globalOrdinals != null && !DocIdSets.isEmpty(childDocIdSet)) {
|
||||
replay.add(reader);
|
||||
|
|
|
@ -51,7 +51,7 @@ public class FilterAggregator extends SingleBucketAggregator {
|
|||
@Override
|
||||
public void setNextReader(LeafReaderContext reader) {
|
||||
try {
|
||||
bits = DocIdSets.toSafeBits(reader.reader(), filter.getDocIdSet(reader, null));
|
||||
bits = DocIdSets.asSequentialAccessBits(reader.reader().maxDoc(), filter.getDocIdSet(reader, null));
|
||||
} catch (IOException ioe) {
|
||||
throw new AggregationExecutionException("Failed to aggregate filter aggregator [" + name + "]", ioe);
|
||||
}
|
||||
|
|
|
@ -69,7 +69,7 @@ public class FiltersAggregator extends BucketsAggregator {
|
|||
public void setNextReader(LeafReaderContext reader) {
|
||||
try {
|
||||
for (int i = 0; i < filters.length; i++) {
|
||||
bits[i] = DocIdSets.toSafeBits(reader.reader(), filters[i].filter.getDocIdSet(reader, null));
|
||||
bits[i] = DocIdSets.asSequentialAccessBits(reader.reader().maxDoc(), filters[i].filter.getDocIdSet(reader, null));
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
throw new AggregationExecutionException("Failed to aggregate filter aggregator [" + name + "]", ioe);
|
||||
|
|
|
@ -23,7 +23,11 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.RoaringDocIdSet;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -119,4 +123,46 @@ public class DocIdSetsTests extends ElasticsearchSingleNodeTest {
|
|||
test(indexService, true, filter);
|
||||
}
|
||||
|
||||
public void testAsSequentialAccessBits() throws IOException {
|
||||
final int maxDoc = randomIntBetween(5, 100);
|
||||
|
||||
// Null DocIdSet maps to empty bits
|
||||
Bits bits = DocIdSets.asSequentialAccessBits(100, null);
|
||||
for (int i = 0; i < maxDoc; ++i) {
|
||||
assertFalse(bits.get(i));
|
||||
}
|
||||
|
||||
// Empty set maps to empty bits
|
||||
bits = DocIdSets.asSequentialAccessBits(100, DocIdSet.EMPTY);
|
||||
for (int i = 0; i < maxDoc; ++i) {
|
||||
assertFalse(bits.get(i));
|
||||
}
|
||||
|
||||
RoaringDocIdSet.Builder b = new RoaringDocIdSet.Builder(maxDoc);
|
||||
for (int i = randomInt(maxDoc - 1); i < maxDoc; i += randomIntBetween(1, 10)) {
|
||||
b.add(i);
|
||||
}
|
||||
final RoaringDocIdSet set = b.build();
|
||||
// RoaringDocIdSet does not support random access
|
||||
assertNull(set.bits());
|
||||
|
||||
bits = DocIdSets.asSequentialAccessBits(100, set);
|
||||
bits.get(4);
|
||||
try {
|
||||
bits.get(2);
|
||||
fail("Should have thrown an exception because of out-of-order consumption");
|
||||
} catch (ElasticsearchIllegalArgumentException e) {
|
||||
// ok
|
||||
}
|
||||
|
||||
bits = DocIdSets.asSequentialAccessBits(100, set);
|
||||
DocIdSetIterator iterator = set.iterator();
|
||||
for (int i = randomInt(maxDoc - 1); i < maxDoc; i += randomIntBetween(1, 10)) {
|
||||
if (iterator.docID() < i) {
|
||||
iterator.advance(i);
|
||||
}
|
||||
|
||||
assertEquals(iterator.docID() == i, bits.get(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.search.aggregations.bucket;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
|
||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
|
@ -482,6 +483,7 @@ public class ReverseNestedTests extends ElasticsearchIntegrationTest {
|
|||
}
|
||||
|
||||
@Test
|
||||
@AwaitsFix(bugUrl="http://github.com/elasticsearch/elasticsearch/issues/9547")
|
||||
public void testSameParentDocHavingMultipleBuckets() throws Exception {
|
||||
XContentBuilder mapping = jsonBuilder().startObject().startObject("product").field("dynamic", "strict").startObject("properties")
|
||||
.startObject("id").field("type", "long").endObject()
|
||||
|
|
Loading…
Reference in New Issue