Search: Avoid calling DocIdSets.toSafeBits.
This method is heavy as it builds a bitset out of a DocIdSet in order to be able to provide random-access. Now that Lucene has removed out-of-order scoring true random-access is very rarely needed and we could instead return an Bits instance that wraps the iterator. Ideally, we would use the DISI API directly but I have to admit that the Bits API is more friendly. Close #9546
This commit is contained in:
parent
e5b174ff77
commit
8540a863aa
|
@ -30,6 +30,8 @@ import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.RamUsageEstimator;
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
import org.apache.lucene.util.RoaringDocIdSet;
|
import org.apache.lucene.util.RoaringDocIdSet;
|
||||||
import org.apache.lucene.util.SparseFixedBitSet;
|
import org.apache.lucene.util.SparseFixedBitSet;
|
||||||
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
|
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.lucene.search.XDocIdSetIterator;
|
import org.elasticsearch.common.lucene.search.XDocIdSetIterator;
|
||||||
|
|
||||||
|
@ -109,11 +111,16 @@ public class DocIdSets {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a set to bits.
|
* Get a build a {@link Bits} instance that will match all documents
|
||||||
|
* contained in {@code set}. Note that this is a potentially heavy
|
||||||
|
* operation as this might require to consume an iterator of this set
|
||||||
|
* entirely and to load it into a {@link BitSet}. Prefer using
|
||||||
|
* {@link #asSequentialAccessBits} if you only need to consume the
|
||||||
|
* {@link Bits} once and in order.
|
||||||
*/
|
*/
|
||||||
public static Bits toSafeBits(LeafReader reader, @Nullable DocIdSet set) throws IOException {
|
public static Bits toSafeBits(int maxDoc, @Nullable DocIdSet set) throws IOException {
|
||||||
if (set == null) {
|
if (set == null) {
|
||||||
return new Bits.MatchNoBits(reader.maxDoc());
|
return new Bits.MatchNoBits(maxDoc);
|
||||||
}
|
}
|
||||||
Bits bits = set.bits();
|
Bits bits = set.bits();
|
||||||
if (bits != null) {
|
if (bits != null) {
|
||||||
|
@ -121,9 +128,56 @@ public class DocIdSets {
|
||||||
}
|
}
|
||||||
DocIdSetIterator iterator = set.iterator();
|
DocIdSetIterator iterator = set.iterator();
|
||||||
if (iterator == null) {
|
if (iterator == null) {
|
||||||
return new Bits.MatchNoBits(reader.maxDoc());
|
return new Bits.MatchNoBits(maxDoc);
|
||||||
}
|
}
|
||||||
return toBitSet(iterator, reader.maxDoc());
|
return toBitSet(iterator, maxDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a {@link DocIdSet}, return a {@link Bits} instance that will match
|
||||||
|
* all documents contained in the set. Note that the returned {@link Bits}
|
||||||
|
* instance should only be consumed once and in order.
|
||||||
|
*/
|
||||||
|
public static Bits asSequentialAccessBits(final int maxDoc, @Nullable DocIdSet set) throws IOException {
|
||||||
|
if (set == null) {
|
||||||
|
return new Bits.MatchNoBits(maxDoc);
|
||||||
|
}
|
||||||
|
Bits bits = set.bits();
|
||||||
|
if (bits != null) {
|
||||||
|
return bits;
|
||||||
|
}
|
||||||
|
final DocIdSetIterator iterator = set.iterator();
|
||||||
|
if (iterator == null) {
|
||||||
|
return new Bits.MatchNoBits(maxDoc);
|
||||||
|
}
|
||||||
|
return new Bits() {
|
||||||
|
|
||||||
|
int previous = 0;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean get(int index) {
|
||||||
|
if (index < previous) {
|
||||||
|
throw new ElasticsearchIllegalArgumentException("This Bits instance can only be consumed in order. "
|
||||||
|
+ "Got called on [" + index + "] while previously called on [" + previous + "]");
|
||||||
|
}
|
||||||
|
previous = index;
|
||||||
|
|
||||||
|
int doc = iterator.docID();
|
||||||
|
if (doc < index) {
|
||||||
|
try {
|
||||||
|
doc = iterator.advance(index);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ElasticsearchIllegalStateException("Cannot advance iterator", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return index == doc;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int length() {
|
||||||
|
return maxDoc;
|
||||||
|
}
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -96,7 +96,7 @@ public class FilterableTermsEnum extends TermsEnum {
|
||||||
// fully filtered, none matching, no need to iterate on this
|
// fully filtered, none matching, no need to iterate on this
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
bits = DocIdSets.toSafeBits(context.reader(), docIdSet);
|
bits = DocIdSets.toSafeBits(context.reader().maxDoc(), docIdSet);
|
||||||
// Count how many docs are in our filtered set
|
// Count how many docs are in our filtered set
|
||||||
// TODO make this lazy-loaded only for those that need it?
|
// TODO make this lazy-loaded only for those that need it?
|
||||||
DocIdSetIterator iterator = docIdSet.iterator();
|
DocIdSetIterator iterator = docIdSet.iterator();
|
||||||
|
|
|
@ -163,7 +163,7 @@ public class FiltersFunctionScoreQuery extends Query {
|
||||||
for (int i = 0; i < filterFunctions.length; i++) {
|
for (int i = 0; i < filterFunctions.length; i++) {
|
||||||
FilterFunction filterFunction = filterFunctions[i];
|
FilterFunction filterFunction = filterFunctions[i];
|
||||||
filterFunction.function.setNextReader(context);
|
filterFunction.function.setNextReader(context);
|
||||||
docSets[i] = DocIdSets.toSafeBits(context.reader(), filterFunction.filter.getDocIdSet(context, acceptDocs));
|
docSets[i] = DocIdSets.asSequentialAccessBits(context.reader().maxDoc(), filterFunction.filter.getDocIdSet(context, acceptDocs));
|
||||||
}
|
}
|
||||||
return new FiltersFunctionFactorScorer(this, subQueryScorer, scoreMode, filterFunctions, maxBoost, docSets, combineFunction, minScore);
|
return new FiltersFunctionFactorScorer(this, subQueryScorer, scoreMode, filterFunctions, maxBoost, docSets, combineFunction, minScore);
|
||||||
}
|
}
|
||||||
|
@ -186,7 +186,7 @@ public class FiltersFunctionScoreQuery extends Query {
|
||||||
weightSum++;
|
weightSum++;
|
||||||
}
|
}
|
||||||
|
|
||||||
Bits docSet = DocIdSets.toSafeBits(context.reader(),
|
Bits docSet = DocIdSets.asSequentialAccessBits(context.reader().maxDoc(),
|
||||||
filterFunction.filter.getDocIdSet(context, context.reader().getLiveDocs()));
|
filterFunction.filter.getDocIdSet(context, context.reader().getLiveDocs()));
|
||||||
if (docSet.get(doc)) {
|
if (docSet.get(doc)) {
|
||||||
filterFunction.function.setNextReader(context);
|
filterFunction.function.setNextReader(context);
|
||||||
|
|
|
@ -129,7 +129,7 @@ public class ParentToChildrenAggregator extends SingleBucketAggregator implement
|
||||||
// The DocIdSets.toSafeBits(...) can convert to FixedBitSet, but this
|
// The DocIdSets.toSafeBits(...) can convert to FixedBitSet, but this
|
||||||
// will only happen if the none filter cache is used. (which only happens in tests)
|
// will only happen if the none filter cache is used. (which only happens in tests)
|
||||||
// Otherwise the filter cache will produce a bitset based filter.
|
// Otherwise the filter cache will produce a bitset based filter.
|
||||||
parentDocs = DocIdSets.toSafeBits(reader.reader(), parentDocIdSet);
|
parentDocs = DocIdSets.asSequentialAccessBits(reader.reader().maxDoc(), parentDocIdSet);
|
||||||
DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null);
|
DocIdSet childDocIdSet = childFilter.getDocIdSet(reader, null);
|
||||||
if (globalOrdinals != null && !DocIdSets.isEmpty(childDocIdSet)) {
|
if (globalOrdinals != null && !DocIdSets.isEmpty(childDocIdSet)) {
|
||||||
replay.add(reader);
|
replay.add(reader);
|
||||||
|
|
|
@ -51,7 +51,7 @@ public class FilterAggregator extends SingleBucketAggregator {
|
||||||
@Override
|
@Override
|
||||||
public void setNextReader(LeafReaderContext reader) {
|
public void setNextReader(LeafReaderContext reader) {
|
||||||
try {
|
try {
|
||||||
bits = DocIdSets.toSafeBits(reader.reader(), filter.getDocIdSet(reader, null));
|
bits = DocIdSets.asSequentialAccessBits(reader.reader().maxDoc(), filter.getDocIdSet(reader, null));
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
throw new AggregationExecutionException("Failed to aggregate filter aggregator [" + name + "]", ioe);
|
throw new AggregationExecutionException("Failed to aggregate filter aggregator [" + name + "]", ioe);
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,7 +69,7 @@ public class FiltersAggregator extends BucketsAggregator {
|
||||||
public void setNextReader(LeafReaderContext reader) {
|
public void setNextReader(LeafReaderContext reader) {
|
||||||
try {
|
try {
|
||||||
for (int i = 0; i < filters.length; i++) {
|
for (int i = 0; i < filters.length; i++) {
|
||||||
bits[i] = DocIdSets.toSafeBits(reader.reader(), filters[i].filter.getDocIdSet(reader, null));
|
bits[i] = DocIdSets.asSequentialAccessBits(reader.reader().maxDoc(), filters[i].filter.getDocIdSet(reader, null));
|
||||||
}
|
}
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
throw new AggregationExecutionException("Failed to aggregate filter aggregator [" + name + "]", ioe);
|
throw new AggregationExecutionException("Failed to aggregate filter aggregator [" + name + "]", ioe);
|
||||||
|
|
|
@ -23,7 +23,11 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.Filter;
|
import org.apache.lucene.search.Filter;
|
||||||
|
import org.apache.lucene.util.Bits;
|
||||||
|
import org.apache.lucene.util.RoaringDocIdSet;
|
||||||
|
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
@ -119,4 +123,46 @@ public class DocIdSetsTests extends ElasticsearchSingleNodeTest {
|
||||||
test(indexService, true, filter);
|
test(indexService, true, filter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAsSequentialAccessBits() throws IOException {
|
||||||
|
final int maxDoc = randomIntBetween(5, 100);
|
||||||
|
|
||||||
|
// Null DocIdSet maps to empty bits
|
||||||
|
Bits bits = DocIdSets.asSequentialAccessBits(100, null);
|
||||||
|
for (int i = 0; i < maxDoc; ++i) {
|
||||||
|
assertFalse(bits.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty set maps to empty bits
|
||||||
|
bits = DocIdSets.asSequentialAccessBits(100, DocIdSet.EMPTY);
|
||||||
|
for (int i = 0; i < maxDoc; ++i) {
|
||||||
|
assertFalse(bits.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
RoaringDocIdSet.Builder b = new RoaringDocIdSet.Builder(maxDoc);
|
||||||
|
for (int i = randomInt(maxDoc - 1); i < maxDoc; i += randomIntBetween(1, 10)) {
|
||||||
|
b.add(i);
|
||||||
|
}
|
||||||
|
final RoaringDocIdSet set = b.build();
|
||||||
|
// RoaringDocIdSet does not support random access
|
||||||
|
assertNull(set.bits());
|
||||||
|
|
||||||
|
bits = DocIdSets.asSequentialAccessBits(100, set);
|
||||||
|
bits.get(4);
|
||||||
|
try {
|
||||||
|
bits.get(2);
|
||||||
|
fail("Should have thrown an exception because of out-of-order consumption");
|
||||||
|
} catch (ElasticsearchIllegalArgumentException e) {
|
||||||
|
// ok
|
||||||
|
}
|
||||||
|
|
||||||
|
bits = DocIdSets.asSequentialAccessBits(100, set);
|
||||||
|
DocIdSetIterator iterator = set.iterator();
|
||||||
|
for (int i = randomInt(maxDoc - 1); i < maxDoc; i += randomIntBetween(1, 10)) {
|
||||||
|
if (iterator.docID() < i) {
|
||||||
|
iterator.advance(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertEquals(iterator.docID() == i, bits.get(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.search.aggregations.bucket;
|
package org.elasticsearch.search.aggregations.bucket;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
|
||||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||||
import org.elasticsearch.action.search.SearchResponse;
|
import org.elasticsearch.action.search.SearchResponse;
|
||||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||||
|
@ -482,6 +483,7 @@ public class ReverseNestedTests extends ElasticsearchIntegrationTest {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@AwaitsFix(bugUrl="http://github.com/elasticsearch/elasticsearch/issues/9547")
|
||||||
public void testSameParentDocHavingMultipleBuckets() throws Exception {
|
public void testSameParentDocHavingMultipleBuckets() throws Exception {
|
||||||
XContentBuilder mapping = jsonBuilder().startObject().startObject("product").field("dynamic", "strict").startObject("properties")
|
XContentBuilder mapping = jsonBuilder().startObject().startObject("product").field("dynamic", "strict").startObject("properties")
|
||||||
.startObject("id").field("type", "long").endObject()
|
.startObject("id").field("type", "long").endObject()
|
||||||
|
|
Loading…
Reference in New Issue