mirror of
https://github.com/apache/lucene.git
synced 2025-02-09 03:25:15 +00:00
LUCENE-7262: Leverage index statistics to make DocIdSetBuilder more efficient.
This commit is contained in:
parent
645889f6b2
commit
4fa2b29b20
@ -78,8 +78,8 @@ Optimizations
|
|||||||
* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
|
* LUCENE-7237: LRUQueryCache now prefers returning an uncached Scorer than
|
||||||
waiting on a lock. (Adrien Grand)
|
waiting on a lock. (Adrien Grand)
|
||||||
|
|
||||||
* LUCENE-7261, LUCENE-7264: Speed up DocIdSetBuilder (which is used by
|
* LUCENE-7261, LUCENE-7262, LUCENE-7264: Speed up DocIdSetBuilder (which is used
|
||||||
TermsQuery, multi-term queries and point queries). (Adrien Grand)
|
by TermsQuery, multi-term queries and point queries). (Adrien Grand)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
|
@ -166,7 +166,7 @@ final class MultiTermQueryConstantScoreWrapper<Q extends MultiTermQuery> extends
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Too many terms: go back to the terms we already collected and start building the bit set
|
// Too many terms: go back to the terms we already collected and start building the bit set
|
||||||
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
|
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
|
||||||
if (collectedTerms.isEmpty() == false) {
|
if (collectedTerms.isEmpty() == false) {
|
||||||
TermsEnum termsEnum2 = terms.iterator();
|
TermsEnum termsEnum2 = terms.iterator();
|
||||||
for (TermAndState t : collectedTerms) {
|
for (TermAndState t : collectedTerms) {
|
||||||
|
@ -130,7 +130,7 @@ public abstract class PointInSetQuery extends Query {
|
|||||||
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
|
throw new IllegalArgumentException("field=\"" + field + "\" was indexed with bytesPerDim=" + fieldInfo.getPointNumBytes() + " but this query has bytesPerDim=" + bytesPerDim);
|
||||||
}
|
}
|
||||||
|
|
||||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
||||||
|
|
||||||
if (numDims == 1) {
|
if (numDims == 1) {
|
||||||
|
|
||||||
|
@ -106,7 +106,7 @@ public abstract class PointRangeQuery extends Query {
|
|||||||
return new ConstantScoreWeight(this) {
|
return new ConstantScoreWeight(this) {
|
||||||
|
|
||||||
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
|
private DocIdSet buildMatchingDocIdSet(LeafReader reader, PointValues values) throws IOException {
|
||||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
||||||
|
|
||||||
values.intersect(field,
|
values.intersect(field,
|
||||||
new IntersectVisitor() {
|
new IntersectVisitor() {
|
||||||
|
@ -36,6 +36,9 @@ public class BitDocIdSet extends DocIdSet {
|
|||||||
* {@link BitSet} must not be modified afterwards.
|
* {@link BitSet} must not be modified afterwards.
|
||||||
*/
|
*/
|
||||||
public BitDocIdSet(BitSet set, long cost) {
|
public BitDocIdSet(BitSet set, long cost) {
|
||||||
|
if (cost < 0) {
|
||||||
|
throw new IllegalArgumentException("cost must be >= 0, got " + cost);
|
||||||
|
}
|
||||||
this.set = set;
|
this.set = set;
|
||||||
this.cost = cost;
|
this.cost = cost;
|
||||||
}
|
}
|
||||||
|
@ -54,6 +54,9 @@ public class BitSetIterator extends DocIdSetIterator {
|
|||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public BitSetIterator(BitSet bits, long cost) {
|
public BitSetIterator(BitSet bits, long cost) {
|
||||||
|
if (cost < 0) {
|
||||||
|
throw new IllegalArgumentException("cost must be >= 0, got " + cost);
|
||||||
|
}
|
||||||
this.bits = bits;
|
this.bits = bits;
|
||||||
this.length = bits.length();
|
this.length = bits.length();
|
||||||
this.cost = cost;
|
this.cost = cost;
|
||||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.util;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.PointValues;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.packed.PackedInts;
|
import org.apache.lucene.util.packed.PackedInts;
|
||||||
@ -65,18 +67,48 @@ public final class DocIdSetBuilder {
|
|||||||
|
|
||||||
private final int maxDoc;
|
private final int maxDoc;
|
||||||
private final int threshold;
|
private final int threshold;
|
||||||
|
// pkg-private for testing
|
||||||
|
final boolean multivalued;
|
||||||
|
final double numValuesPerDoc;
|
||||||
|
|
||||||
private int[] buffer;
|
private int[] buffer;
|
||||||
private int bufferSize;
|
private int bufferSize;
|
||||||
|
|
||||||
private FixedBitSet bitSet;
|
private FixedBitSet bitSet;
|
||||||
|
|
||||||
|
private long counter = -1;
|
||||||
private BulkAdder adder = new BufferAdder();
|
private BulkAdder adder = new BufferAdder();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a builder that can contain doc IDs between {@code 0} and {@code maxDoc}.
|
* Create a builder that can contain doc IDs between {@code 0} and {@code maxDoc}.
|
||||||
*/
|
*/
|
||||||
public DocIdSetBuilder(int maxDoc) {
|
public DocIdSetBuilder(int maxDoc) {
|
||||||
|
this(maxDoc, -1, -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a {@link DocIdSetBuilder} instance that is optimized for
|
||||||
|
* accumulating docs that match the given {@link Terms}. */
|
||||||
|
public DocIdSetBuilder(int maxDoc, Terms terms) throws IOException {
|
||||||
|
this(maxDoc, terms.getDocCount(), terms.getSumDocFreq());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a {@link DocIdSetBuilder} instance that is optimized for
|
||||||
|
* accumulating docs that match the given {@link PointValues}. */
|
||||||
|
public DocIdSetBuilder(int maxDoc, PointValues values, String field) throws IOException {
|
||||||
|
this(maxDoc, values.getDocCount(field), values.size(field));
|
||||||
|
}
|
||||||
|
|
||||||
|
DocIdSetBuilder(int maxDoc, int docCount, long valueCount) {
|
||||||
this.maxDoc = maxDoc;
|
this.maxDoc = maxDoc;
|
||||||
|
this.multivalued = docCount < 0 || docCount != valueCount;
|
||||||
|
this.numValuesPerDoc = (docCount < 0 || valueCount < 0)
|
||||||
|
// assume one value per doc, this means the cost will be overestimated
|
||||||
|
// if the docs are actually multi-valued
|
||||||
|
? 1
|
||||||
|
// otherwise compute from index stats
|
||||||
|
: (double) valueCount / docCount;
|
||||||
|
assert numValuesPerDoc >= 1;
|
||||||
|
|
||||||
// For ridiculously small sets, we'll just use a sorted int[]
|
// For ridiculously small sets, we'll just use a sorted int[]
|
||||||
// maxDoc >>> 7 is a good value if you want to save memory, lower values
|
// maxDoc >>> 7 is a good value if you want to save memory, lower values
|
||||||
// such as maxDoc >>> 11 should provide faster building but at the expense
|
// such as maxDoc >>> 11 should provide faster building but at the expense
|
||||||
@ -94,6 +126,7 @@ public final class DocIdSetBuilder {
|
|||||||
for (int i = 0; i < bufferSize; ++i) {
|
for (int i = 0; i < bufferSize; ++i) {
|
||||||
bitSet.set(buffer[i]);
|
bitSet.set(buffer[i]);
|
||||||
}
|
}
|
||||||
|
counter = this.bufferSize;
|
||||||
this.buffer = null;
|
this.buffer = null;
|
||||||
this.bufferSize = 0;
|
this.bufferSize = 0;
|
||||||
this.adder = new FixedBitSetAdder(bitSet);
|
this.adder = new FixedBitSetAdder(bitSet);
|
||||||
@ -157,7 +190,10 @@ public final class DocIdSetBuilder {
|
|||||||
growBuffer((int) newLength);
|
growBuffer((int) newLength);
|
||||||
} else {
|
} else {
|
||||||
upgradeToBitSet();
|
upgradeToBitSet();
|
||||||
|
counter += numDocs;
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
counter += numDocs;
|
||||||
}
|
}
|
||||||
return adder;
|
return adder;
|
||||||
}
|
}
|
||||||
@ -179,17 +215,32 @@ public final class DocIdSetBuilder {
|
|||||||
return l;
|
return l;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean noDups(int[] a, int len) {
|
||||||
|
for (int i = 1; i < len; ++i) {
|
||||||
|
assert a[i-1] < a[i];
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Build a {@link DocIdSet} from the accumulated doc IDs.
|
* Build a {@link DocIdSet} from the accumulated doc IDs.
|
||||||
*/
|
*/
|
||||||
public DocIdSet build() {
|
public DocIdSet build() {
|
||||||
try {
|
try {
|
||||||
if (bitSet != null) {
|
if (bitSet != null) {
|
||||||
return new BitDocIdSet(bitSet);
|
assert counter >= 0;
|
||||||
|
final long cost = Math.round(counter / numValuesPerDoc);
|
||||||
|
return new BitDocIdSet(bitSet, cost);
|
||||||
} else {
|
} else {
|
||||||
LSBRadixSorter sorter = new LSBRadixSorter();
|
LSBRadixSorter sorter = new LSBRadixSorter();
|
||||||
sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, bufferSize);
|
sorter.sort(PackedInts.bitsRequired(maxDoc - 1), buffer, bufferSize);
|
||||||
final int l = dedup(buffer, bufferSize);
|
final int l;
|
||||||
|
if (multivalued) {
|
||||||
|
l = dedup(buffer, bufferSize);
|
||||||
|
} else {
|
||||||
|
assert noDups(buffer, bufferSize);
|
||||||
|
l = bufferSize;
|
||||||
|
}
|
||||||
assert l <= bufferSize;
|
assert l <= bufferSize;
|
||||||
buffer = ArrayUtil.grow(buffer, l + 1);
|
buffer = ArrayUtil.grow(buffer, l + 1);
|
||||||
buffer[l] = DocIdSetIterator.NO_MORE_DOCS;
|
buffer[l] = DocIdSetIterator.NO_MORE_DOCS;
|
||||||
|
@ -19,6 +19,9 @@ package org.apache.lucene.util;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.PointValues;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSet;
|
import org.apache.lucene.search.DocIdSet;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
|
|
||||||
@ -158,4 +161,180 @@ public class TestDocIdSetBuilder extends LuceneTestCase {
|
|||||||
assertEquals(new BitDocIdSet(expected), builder.build());
|
assertEquals(new BitDocIdSet(expected), builder.build());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testLeverageStats() throws IOException {
|
||||||
|
// single-valued points
|
||||||
|
PointValues values = new DummyPointValues(42, 42);
|
||||||
|
DocIdSetBuilder builder = new DocIdSetBuilder(100, values, "foo");
|
||||||
|
assertEquals(1d, builder.numValuesPerDoc, 0d);
|
||||||
|
assertFalse(builder.multivalued);
|
||||||
|
DocIdSetBuilder.BulkAdder adder = builder.grow(2);
|
||||||
|
adder.add(5);
|
||||||
|
adder.add(7);
|
||||||
|
DocIdSet set = builder.build();
|
||||||
|
assertTrue(set instanceof BitDocIdSet);
|
||||||
|
assertEquals(2, set.iterator().cost());
|
||||||
|
|
||||||
|
// multi-valued points
|
||||||
|
values = new DummyPointValues(42, 63);
|
||||||
|
builder = new DocIdSetBuilder(100, values, "foo");
|
||||||
|
assertEquals(1.5, builder.numValuesPerDoc, 0d);
|
||||||
|
assertTrue(builder.multivalued);
|
||||||
|
adder = builder.grow(2);
|
||||||
|
adder.add(5);
|
||||||
|
adder.add(7);
|
||||||
|
set = builder.build();
|
||||||
|
assertTrue(set instanceof BitDocIdSet);
|
||||||
|
assertEquals(1, set.iterator().cost()); // it thinks the same doc was added twice
|
||||||
|
|
||||||
|
// incomplete stats
|
||||||
|
values = new DummyPointValues(42, -1);
|
||||||
|
builder = new DocIdSetBuilder(100, values, "foo");
|
||||||
|
assertEquals(1d, builder.numValuesPerDoc, 0d);
|
||||||
|
assertTrue(builder.multivalued);
|
||||||
|
|
||||||
|
values = new DummyPointValues(-1, 84);
|
||||||
|
builder = new DocIdSetBuilder(100, values, "foo");
|
||||||
|
assertEquals(1d, builder.numValuesPerDoc, 0d);
|
||||||
|
assertTrue(builder.multivalued);
|
||||||
|
|
||||||
|
// single-valued terms
|
||||||
|
Terms terms = new DummyTerms(42, 42);
|
||||||
|
builder = new DocIdSetBuilder(100, terms);
|
||||||
|
assertEquals(1d, builder.numValuesPerDoc, 0d);
|
||||||
|
assertFalse(builder.multivalued);
|
||||||
|
adder = builder.grow(2);
|
||||||
|
adder.add(5);
|
||||||
|
adder.add(7);
|
||||||
|
set = builder.build();
|
||||||
|
assertTrue(set instanceof BitDocIdSet);
|
||||||
|
assertEquals(2, set.iterator().cost());
|
||||||
|
|
||||||
|
// multi-valued terms
|
||||||
|
terms = new DummyTerms(42, 63);
|
||||||
|
builder = new DocIdSetBuilder(100, terms);
|
||||||
|
assertEquals(1.5, builder.numValuesPerDoc, 0d);
|
||||||
|
assertTrue(builder.multivalued);
|
||||||
|
adder = builder.grow(2);
|
||||||
|
adder.add(5);
|
||||||
|
adder.add(7);
|
||||||
|
set = builder.build();
|
||||||
|
assertTrue(set instanceof BitDocIdSet);
|
||||||
|
assertEquals(1, set.iterator().cost()); // it thinks the same doc was added twice
|
||||||
|
|
||||||
|
// incomplete stats
|
||||||
|
terms = new DummyTerms(42, -1);
|
||||||
|
builder = new DocIdSetBuilder(100, terms);
|
||||||
|
assertEquals(1d, builder.numValuesPerDoc, 0d);
|
||||||
|
assertTrue(builder.multivalued);
|
||||||
|
|
||||||
|
terms = new DummyTerms(-1, 84);
|
||||||
|
builder = new DocIdSetBuilder(100, terms);
|
||||||
|
assertEquals(1d, builder.numValuesPerDoc, 0d);
|
||||||
|
assertTrue(builder.multivalued);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class DummyTerms extends Terms {
|
||||||
|
|
||||||
|
private final int docCount;
|
||||||
|
private final long numValues;
|
||||||
|
|
||||||
|
DummyTerms(int docCount, long numValues) {
|
||||||
|
this.docCount = docCount;
|
||||||
|
this.numValues = numValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public TermsEnum iterator() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumTotalTermFreq() throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long getSumDocFreq() throws IOException {
|
||||||
|
return numValues;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocCount() throws IOException {
|
||||||
|
return docCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasFreqs() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasOffsets() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPositions() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasPayloads() {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class DummyPointValues extends PointValues {
|
||||||
|
|
||||||
|
private final int docCount;
|
||||||
|
private final long numPoints;
|
||||||
|
|
||||||
|
DummyPointValues(int docCount, long numPoints) {
|
||||||
|
this.docCount = docCount;
|
||||||
|
this.numPoints = numPoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void intersect(String fieldName, IntersectVisitor visitor) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] getMinPackedValue(String fieldName) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] getMaxPackedValue(String fieldName) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getNumDimensions(String fieldName) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getBytesPerDimension(String fieldName) throws IOException {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long size(String fieldName) {
|
||||||
|
return numPoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getDocCount(String fieldName) {
|
||||||
|
return docCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -21,6 +21,7 @@ import java.util.ArrayList;
|
|||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
@ -84,6 +85,7 @@ public class TermsQuery extends Query implements Accountable {
|
|||||||
// Same threshold as MultiTermQueryConstantScoreWrapper
|
// Same threshold as MultiTermQueryConstantScoreWrapper
|
||||||
static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
|
static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
|
||||||
|
|
||||||
|
private final Set<String> fields;
|
||||||
private final PrefixCodedTerms termData;
|
private final PrefixCodedTerms termData;
|
||||||
private final int termDataHashCode; // cached hashcode of termData
|
private final int termDataHashCode; // cached hashcode of termData
|
||||||
|
|
||||||
@ -99,13 +101,16 @@ public class TermsQuery extends Query implements Accountable {
|
|||||||
ArrayUtil.timSort(sortedTerms);
|
ArrayUtil.timSort(sortedTerms);
|
||||||
}
|
}
|
||||||
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||||
|
Set<String> fields = new HashSet<>();
|
||||||
Term previous = null;
|
Term previous = null;
|
||||||
for (Term term : sortedTerms) {
|
for (Term term : sortedTerms) {
|
||||||
if (term.equals(previous) == false) {
|
if (term.equals(previous) == false) {
|
||||||
|
fields.add(term.field());
|
||||||
builder.add(term);
|
builder.add(term);
|
||||||
}
|
}
|
||||||
previous = term;
|
previous = term;
|
||||||
}
|
}
|
||||||
|
this.fields = Collections.unmodifiableSet(fields);
|
||||||
termData = builder.finish();
|
termData = builder.finish();
|
||||||
termDataHashCode = termData.hashCode();
|
termDataHashCode = termData.hashCode();
|
||||||
}
|
}
|
||||||
@ -132,6 +137,7 @@ public class TermsQuery extends Query implements Accountable {
|
|||||||
builder.add(field, term);
|
builder.add(field, term);
|
||||||
previous.copyBytes(term);
|
previous.copyBytes(term);
|
||||||
}
|
}
|
||||||
|
fields = Collections.singleton(field);
|
||||||
termData = builder.finish();
|
termData = builder.finish();
|
||||||
termDataHashCode = termData.hashCode();
|
termDataHashCode = termData.hashCode();
|
||||||
}
|
}
|
||||||
@ -301,7 +307,15 @@ public class TermsQuery extends Query implements Accountable {
|
|||||||
matchingTerms.add(new TermAndState(field, termsEnum));
|
matchingTerms.add(new TermAndState(field, termsEnum));
|
||||||
} else {
|
} else {
|
||||||
assert matchingTerms.size() == threshold;
|
assert matchingTerms.size() == threshold;
|
||||||
builder = new DocIdSetBuilder(reader.maxDoc());
|
if (TermsQuery.this.fields.size() == 1) {
|
||||||
|
// common case: all terms are in the same field
|
||||||
|
// use an optimized builder that leverages terms stats to be more efficient
|
||||||
|
builder = new DocIdSetBuilder(reader.maxDoc(), terms);
|
||||||
|
} else {
|
||||||
|
// corner case: different fields
|
||||||
|
// don't make assumptions about the docs we will get
|
||||||
|
builder = new DocIdSetBuilder(reader.maxDoc());
|
||||||
|
}
|
||||||
docs = termsEnum.postings(docs, PostingsEnum.NONE);
|
docs = termsEnum.postings(docs, PostingsEnum.NONE);
|
||||||
builder.add(docs);
|
builder.add(docs);
|
||||||
for (TermAndState t : matchingTerms) {
|
for (TermAndState t : matchingTerms) {
|
||||||
|
@ -163,8 +163,8 @@ public class IntersectsRPTVerifyQuery extends Query {
|
|||||||
// TODO consider if IntersectsPrefixTreeQuery should simply do this and provide both sets
|
// TODO consider if IntersectsPrefixTreeQuery should simply do this and provide both sets
|
||||||
|
|
||||||
class IntersectsDifferentiatingVisitor extends VisitorTemplate {
|
class IntersectsDifferentiatingVisitor extends VisitorTemplate {
|
||||||
DocIdSetBuilder approxBuilder = new DocIdSetBuilder(maxDoc);
|
DocIdSetBuilder approxBuilder = new DocIdSetBuilder(maxDoc, terms);
|
||||||
DocIdSetBuilder exactBuilder = new DocIdSetBuilder(maxDoc);
|
DocIdSetBuilder exactBuilder = new DocIdSetBuilder(maxDoc, terms);
|
||||||
boolean approxIsEmpty = true;
|
boolean approxIsEmpty = true;
|
||||||
boolean exactIsEmpty = true;
|
boolean exactIsEmpty = true;
|
||||||
DocIdSet exactDocIdSet;
|
DocIdSet exactDocIdSet;
|
||||||
|
@ -105,16 +105,20 @@ public abstract class AbstractPrefixTreeQuery extends Query {
|
|||||||
protected final LeafReaderContext context;
|
protected final LeafReaderContext context;
|
||||||
protected final int maxDoc;
|
protected final int maxDoc;
|
||||||
|
|
||||||
protected TermsEnum termsEnum;//remember to check for null!
|
protected final Terms terms;
|
||||||
|
protected final TermsEnum termsEnum;//remember to check for null!
|
||||||
protected PostingsEnum postingsEnum;
|
protected PostingsEnum postingsEnum;
|
||||||
|
|
||||||
public BaseTermsEnumTraverser(LeafReaderContext context) throws IOException {
|
public BaseTermsEnumTraverser(LeafReaderContext context) throws IOException {
|
||||||
this.context = context;
|
this.context = context;
|
||||||
LeafReader reader = context.reader();
|
LeafReader reader = context.reader();
|
||||||
this.maxDoc = reader.maxDoc();
|
this.maxDoc = reader.maxDoc();
|
||||||
Terms terms = reader.terms(fieldName);
|
terms = reader.terms(fieldName);
|
||||||
if (terms != null)
|
if (terms != null) {
|
||||||
this.termsEnum = terms.iterator();
|
this.termsEnum = terms.iterator();
|
||||||
|
} else {
|
||||||
|
this.termsEnum = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void collectDocs(BitSet bitSet) throws IOException {
|
protected void collectDocs(BitSet bitSet) throws IOException {
|
||||||
|
@ -55,8 +55,8 @@ public class IntersectsPrefixTreeQuery extends AbstractVisitingPrefixTreeQuery {
|
|||||||
private DocIdSetBuilder results;
|
private DocIdSetBuilder results;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void start() {
|
protected void start() throws IOException {
|
||||||
results = new DocIdSetBuilder(maxDoc);
|
results = new DocIdSetBuilder(maxDoc, terms);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -93,7 +93,7 @@ final class GeoPointTermQueryConstantScoreWrapper <Q extends GeoPointMultiTermQu
|
|||||||
|
|
||||||
LeafReader reader = context.reader();
|
LeafReader reader = context.reader();
|
||||||
// approximation (postfiltering has not yet been applied)
|
// approximation (postfiltering has not yet been applied)
|
||||||
DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
|
DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc(), terms);
|
||||||
// subset of documents that need no postfiltering, this is purely an optimization
|
// subset of documents that need no postfiltering, this is purely an optimization
|
||||||
final BitSet preApproved;
|
final BitSet preApproved;
|
||||||
// dumb heuristic: if the field is really sparse, use a sparse impl
|
// dumb heuristic: if the field is really sparse, use a sparse impl
|
||||||
|
@ -97,7 +97,7 @@ final class PointInGeo3DShapeQuery extends Query {
|
|||||||
assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid;
|
assert xyzSolid.getRelationship(shape) == GeoArea.WITHIN || xyzSolid.getRelationship(shape) == GeoArea.OVERLAPS: "expected WITHIN (1) or OVERLAPS (2) but got " + xyzSolid.getRelationship(shape) + "; shape="+shape+"; XYZSolid="+xyzSolid;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc());
|
DocIdSetBuilder result = new DocIdSetBuilder(reader.maxDoc(), values, field);
|
||||||
|
|
||||||
values.intersect(field, new PointInShapeIntersectVisitor(result, shape, shapeBounds));
|
values.intersect(field, new PointInShapeIntersectVisitor(result, shape, shapeBounds));
|
||||||
|
|
||||||
|
@ -433,7 +433,7 @@ public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetPro
|
|||||||
termsVisited += builder.add(termsEnum, base, liveDocs);
|
termsVisited += builder.add(termsEnum, base, liveDocs);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
|
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc(), terms);
|
||||||
builder.grow((int)Math.min(Integer.MAX_VALUE,count));
|
builder.grow((int)Math.min(Integer.MAX_VALUE,count));
|
||||||
if (collectedTerms.isEmpty() == false) {
|
if (collectedTerms.isEmpty() == false) {
|
||||||
TermsEnum termsEnum2 = terms.iterator();
|
TermsEnum termsEnum2 = terms.iterator();
|
||||||
|
@ -226,7 +226,7 @@ public class GraphTermsQParserPlugin extends QParserPlugin {
|
|||||||
Terms terms = fields.terms(field);
|
Terms terms = fields.terms(field);
|
||||||
TermsEnum termsEnum = terms.iterator();
|
TermsEnum termsEnum = terms.iterator();
|
||||||
PostingsEnum docs = null;
|
PostingsEnum docs = null;
|
||||||
DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
|
DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc(), terms);
|
||||||
for (int i=0; i<finalContexts.size(); i++) {
|
for (int i=0; i<finalContexts.size(); i++) {
|
||||||
TermContext termContext = finalContexts.get(i);
|
TermContext termContext = finalContexts.get(i);
|
||||||
TermState termState = termContext.get(context.ord);
|
TermState termState = termContext.get(context.ord);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user