mirror of https://github.com/apache/lucene.git
LUCENE-6360: Make TermsQuery rewrite as a disjunction when there are few terms.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1680946 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
38876c2209
commit
e036005cd6
|
@ -34,7 +34,7 @@ class CoalescedUpdates {
|
||||||
final List<PrefixCodedTerms> terms = new ArrayList<>();
|
final List<PrefixCodedTerms> terms = new ArrayList<>();
|
||||||
final List<NumericDocValuesUpdate> numericDVUpdates = new ArrayList<>();
|
final List<NumericDocValuesUpdate> numericDVUpdates = new ArrayList<>();
|
||||||
final List<BinaryDocValuesUpdate> binaryDVUpdates = new ArrayList<>();
|
final List<BinaryDocValuesUpdate> binaryDVUpdates = new ArrayList<>();
|
||||||
int totalTermCount;
|
long totalTermCount;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
@ -46,7 +46,7 @@ class CoalescedUpdates {
|
||||||
}
|
}
|
||||||
|
|
||||||
void update(FrozenBufferedUpdates in) {
|
void update(FrozenBufferedUpdates in) {
|
||||||
totalTermCount += in.termCount;
|
totalTermCount += in.terms.size();
|
||||||
terms.add(in.terms);
|
terms.add(in.terms);
|
||||||
|
|
||||||
for (int queryIdx = 0; queryIdx < in.queries.length; queryIdx++) {
|
for (int queryIdx = 0; queryIdx < in.queries.length; queryIdx++) {
|
||||||
|
|
|
@ -44,7 +44,6 @@ class FrozenBufferedUpdates {
|
||||||
|
|
||||||
// Terms, in sorted order:
|
// Terms, in sorted order:
|
||||||
final PrefixCodedTerms terms;
|
final PrefixCodedTerms terms;
|
||||||
int termCount; // just for debugging
|
|
||||||
|
|
||||||
// Parallel array of deleted query, and the docIDUpto for each
|
// Parallel array of deleted query, and the docIDUpto for each
|
||||||
final Query[] queries;
|
final Query[] queries;
|
||||||
|
@ -68,7 +67,6 @@ class FrozenBufferedUpdates {
|
||||||
this.isSegmentPrivate = isSegmentPrivate;
|
this.isSegmentPrivate = isSegmentPrivate;
|
||||||
assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries";
|
assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries";
|
||||||
Term termsArray[] = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
|
Term termsArray[] = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
|
||||||
termCount = termsArray.length;
|
|
||||||
ArrayUtil.timSort(termsArray);
|
ArrayUtil.timSort(termsArray);
|
||||||
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||||
for (Term term : termsArray) {
|
for (Term term : termsArray) {
|
||||||
|
@ -167,7 +165,7 @@ class FrozenBufferedUpdates {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
String s = "";
|
String s = "";
|
||||||
if (numTermDeletes != 0) {
|
if (numTermDeletes != 0) {
|
||||||
s += " " + numTermDeletes + " deleted terms (unique count=" + termCount + ")";
|
s += " " + numTermDeletes + " deleted terms (unique count=" + terms.size() + ")";
|
||||||
}
|
}
|
||||||
if (queries.length != 0) {
|
if (queries.length != 0) {
|
||||||
s += " " + queries.length + " deleted queries";
|
s += " " + queries.length + " deleted queries";
|
||||||
|
@ -180,6 +178,6 @@ class FrozenBufferedUpdates {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean any() {
|
boolean any() {
|
||||||
return termCount > 0 || queries.length > 0 || numericDVUpdates.length > 0 || binaryDVUpdates.length > 0;
|
return terms.size() > 0 || queries.length > 0 || numericDVUpdates.length > 0 || binaryDVUpdates.length > 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.store.RAMOutputStream;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
import org.apache.lucene.util.BytesRefBuilder;
|
||||||
|
import org.apache.lucene.util.RamUsageEstimator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prefix codes term instances (prefixes are shared)
|
* Prefix codes term instances (prefixes are shared)
|
||||||
|
@ -34,15 +35,17 @@ import org.apache.lucene.util.BytesRefBuilder;
|
||||||
*/
|
*/
|
||||||
public class PrefixCodedTerms implements Accountable {
|
public class PrefixCodedTerms implements Accountable {
|
||||||
final RAMFile buffer;
|
final RAMFile buffer;
|
||||||
|
private final long size;
|
||||||
private long delGen;
|
private long delGen;
|
||||||
|
|
||||||
private PrefixCodedTerms(RAMFile buffer) {
|
private PrefixCodedTerms(RAMFile buffer, long size) {
|
||||||
this.buffer = Objects.requireNonNull(buffer);
|
this.buffer = Objects.requireNonNull(buffer);
|
||||||
|
this.size = size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public long ramBytesUsed() {
|
public long ramBytesUsed() {
|
||||||
return buffer.ramBytesUsed();
|
return buffer.ramBytesUsed() + 2 * RamUsageEstimator.NUM_BYTES_LONG;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Records del gen for this packet. */
|
/** Records del gen for this packet. */
|
||||||
|
@ -56,6 +59,7 @@ public class PrefixCodedTerms implements Accountable {
|
||||||
private RAMOutputStream output = new RAMOutputStream(buffer, false);
|
private RAMOutputStream output = new RAMOutputStream(buffer, false);
|
||||||
private Term lastTerm = new Term("");
|
private Term lastTerm = new Term("");
|
||||||
private BytesRefBuilder lastTermBytes = new BytesRefBuilder();
|
private BytesRefBuilder lastTermBytes = new BytesRefBuilder();
|
||||||
|
private long size;
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public Builder() {}
|
public Builder() {}
|
||||||
|
@ -78,6 +82,7 @@ public class PrefixCodedTerms implements Accountable {
|
||||||
lastTermBytes.copyBytes(term.bytes);
|
lastTermBytes.copyBytes(term.bytes);
|
||||||
lastTerm.bytes = lastTermBytes.get();
|
lastTerm.bytes = lastTermBytes.get();
|
||||||
lastTerm.field = term.field;
|
lastTerm.field = term.field;
|
||||||
|
size += 1;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
@ -87,7 +92,7 @@ public class PrefixCodedTerms implements Accountable {
|
||||||
public PrefixCodedTerms finish() {
|
public PrefixCodedTerms finish() {
|
||||||
try {
|
try {
|
||||||
output.close();
|
output.close();
|
||||||
return new PrefixCodedTerms(buffer);
|
return new PrefixCodedTerms(buffer, size);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
@ -172,6 +177,11 @@ public class PrefixCodedTerms implements Accountable {
|
||||||
return new TermIterator(delGen, buffer);
|
return new TermIterator(delGen, buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Return the number of terms stored in this {@link PrefixCodedTerms}. */
|
||||||
|
public long size() {
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
int h = buffer.hashCode();
|
int h = buffer.hashCode();
|
||||||
|
|
|
@ -145,10 +145,12 @@ public abstract class Weight {
|
||||||
return new DefaultBulkScorer(scorer);
|
return new DefaultBulkScorer(scorer);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Just wraps a Scorer and performs top scoring using it. */
|
/** Just wraps a Scorer and performs top scoring using it.
|
||||||
static class DefaultBulkScorer extends BulkScorer {
|
* @lucene.internal */
|
||||||
|
protected static class DefaultBulkScorer extends BulkScorer {
|
||||||
private final Scorer scorer;
|
private final Scorer scorer;
|
||||||
|
|
||||||
|
/** Sole constructor. */
|
||||||
public DefaultBulkScorer(Scorer scorer) {
|
public DefaultBulkScorer(Scorer scorer) {
|
||||||
if (scorer == null) {
|
if (scorer == null) {
|
||||||
throw new NullPointerException();
|
throw new NullPointerException();
|
||||||
|
|
|
@ -136,7 +136,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
||||||
assertTrue(queue.anyChanges());
|
assertTrue(queue.anyChanges());
|
||||||
if (random().nextInt(5) == 0) {
|
if (random().nextInt(5) == 0) {
|
||||||
FrozenBufferedUpdates freezeGlobalBuffer = queue.freezeGlobalBuffer(null);
|
FrozenBufferedUpdates freezeGlobalBuffer = queue.freezeGlobalBuffer(null);
|
||||||
assertEquals(termsSinceFreeze, freezeGlobalBuffer.termCount);
|
assertEquals(termsSinceFreeze, freezeGlobalBuffer.terms.size());
|
||||||
assertEquals(queriesSinceFreeze, freezeGlobalBuffer.queries.length);
|
assertEquals(queriesSinceFreeze, freezeGlobalBuffer.queries.length);
|
||||||
queriesSinceFreeze = 0;
|
queriesSinceFreeze = 0;
|
||||||
termsSinceFreeze = 0;
|
termsSinceFreeze = 0;
|
||||||
|
@ -168,7 +168,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
||||||
assertTrue("changes in global buffer", queue.anyChanges());
|
assertTrue("changes in global buffer", queue.anyChanges());
|
||||||
FrozenBufferedUpdates freezeGlobalBuffer = queue.freezeGlobalBuffer(null);
|
FrozenBufferedUpdates freezeGlobalBuffer = queue.freezeGlobalBuffer(null);
|
||||||
assertTrue(freezeGlobalBuffer.any());
|
assertTrue(freezeGlobalBuffer.any());
|
||||||
assertEquals(1, freezeGlobalBuffer.termCount);
|
assertEquals(1, freezeGlobalBuffer.terms.size());
|
||||||
assertFalse("all changes applied", queue.anyChanges());
|
assertFalse("all changes applied", queue.anyChanges());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -64,6 +64,7 @@ public class TestPrefixCodedTerms extends LuceneTestCase {
|
||||||
|
|
||||||
TermIterator iter = pb.iterator();
|
TermIterator iter = pb.iterator();
|
||||||
Iterator<Term> expected = terms.iterator();
|
Iterator<Term> expected = terms.iterator();
|
||||||
|
assertEquals(terms.size(), pb.size());
|
||||||
//System.out.println("TEST: now iter");
|
//System.out.println("TEST: now iter");
|
||||||
while (iter.next() != null) {
|
while (iter.next() != null) {
|
||||||
assertTrue(expected.hasNext());
|
assertTrue(expected.hasNext());
|
||||||
|
|
|
@ -18,22 +18,29 @@ package org.apache.lucene.queries;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Objects;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.index.Fields;
|
import org.apache.lucene.index.Fields;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.LeafReader;
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
import org.apache.lucene.index.LeafReaderContext;
|
||||||
import org.apache.lucene.index.PostingsEnum;
|
import org.apache.lucene.index.PostingsEnum;
|
||||||
import org.apache.lucene.index.PrefixCodedTerms;
|
import org.apache.lucene.index.PrefixCodedTerms;
|
||||||
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
|
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermContext;
|
||||||
|
import org.apache.lucene.index.TermState;
|
||||||
import org.apache.lucene.index.Terms;
|
import org.apache.lucene.index.Terms;
|
||||||
import org.apache.lucene.index.TermsEnum;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BulkScorer;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreScorer;
|
import org.apache.lucene.search.ConstantScoreScorer;
|
||||||
import org.apache.lucene.search.ConstantScoreWeight;
|
import org.apache.lucene.search.ConstantScoreWeight;
|
||||||
|
@ -41,6 +48,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
import org.apache.lucene.search.Weight;
|
import org.apache.lucene.search.Weight;
|
||||||
import org.apache.lucene.util.Accountable;
|
import org.apache.lucene.util.Accountable;
|
||||||
import org.apache.lucene.util.ArrayUtil;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
|
@ -64,15 +72,17 @@ import org.apache.lucene.util.ToStringUtils;
|
||||||
* bq.add(new TermQuery(new Term("field", "bar")), Occur.SHOULD);
|
* bq.add(new TermQuery(new Term("field", "bar")), Occur.SHOULD);
|
||||||
* Query q2 = new ConstantScoreQuery(bq);
|
* Query q2 = new ConstantScoreQuery(bq);
|
||||||
* </pre>
|
* </pre>
|
||||||
* <p>This query creates a bit set and sets bits that match any of the
|
* <p>When there are few terms, this query executes like a regular disjunction.
|
||||||
* wrapped terms. While this might help performance when there are many terms,
|
* However, when there are many terms, instead of merging iterators on the fly,
|
||||||
* it would be slower than a {@link BooleanQuery} when there are few terms to
|
* it will populate a bit set with matching docs and return a {@link Scorer}
|
||||||
* match.
|
* over this bit set.
|
||||||
* <p>NOTE: This query produces scores that are equal to its boost
|
* <p>NOTE: This query produces scores that are equal to its boost
|
||||||
*/
|
*/
|
||||||
public class TermsQuery extends Query implements Accountable {
|
public class TermsQuery extends Query implements Accountable {
|
||||||
|
|
||||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsQuery.class);
|
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(TermsQuery.class);
|
||||||
|
// Same threshold as MultiTermQueryConstantScoreWrapper
|
||||||
|
static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
|
||||||
|
|
||||||
private final PrefixCodedTerms termData;
|
private final PrefixCodedTerms termData;
|
||||||
private final int termDataHashCode; // cached hashcode of termData
|
private final int termDataHashCode; // cached hashcode of termData
|
||||||
|
@ -130,6 +140,23 @@ public class TermsQuery extends Query implements Accountable {
|
||||||
this(Arrays.asList(terms));
|
this(Arrays.asList(terms));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
|
||||||
|
if (termData.size() <= threshold) {
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
TermIterator iterator = termData.iterator();
|
||||||
|
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
|
||||||
|
bq.add(new TermQuery(new Term(iterator.field(), BytesRef.deepCopyOf(term))), Occur.SHOULD);
|
||||||
|
}
|
||||||
|
assert bq.clauses().size() == termData.size();
|
||||||
|
ConstantScoreQuery csq = new ConstantScoreQuery(bq);
|
||||||
|
csq.setBoost(getBoost());
|
||||||
|
return csq;
|
||||||
|
}
|
||||||
|
return super.rewrite(reader);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (this == obj) {
|
if (this == obj) {
|
||||||
|
@ -177,6 +204,39 @@ public class TermsQuery extends Query implements Accountable {
|
||||||
return Collections.emptyList();
|
return Collections.emptyList();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class TermAndState {
|
||||||
|
final String field;
|
||||||
|
final TermsEnum termsEnum;
|
||||||
|
final BytesRef term;
|
||||||
|
final TermState state;
|
||||||
|
final int docFreq;
|
||||||
|
final long totalTermFreq;
|
||||||
|
|
||||||
|
TermAndState(String field, TermsEnum termsEnum) throws IOException {
|
||||||
|
this.field = field;
|
||||||
|
this.termsEnum = termsEnum;
|
||||||
|
this.term = BytesRef.deepCopyOf(termsEnum.term());
|
||||||
|
this.state = termsEnum.termState();
|
||||||
|
this.docFreq = termsEnum.docFreq();
|
||||||
|
this.totalTermFreq = termsEnum.totalTermFreq();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class WeightOrBitSet {
|
||||||
|
final Weight weight;
|
||||||
|
final BitDocIdSet bitset;
|
||||||
|
|
||||||
|
WeightOrBitSet(Weight weight) {
|
||||||
|
this.weight = Objects.requireNonNull(weight);
|
||||||
|
this.bitset = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
WeightOrBitSet(BitDocIdSet bitset) {
|
||||||
|
this.bitset = bitset;
|
||||||
|
this.weight = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||||
return new ConstantScoreWeight(this) {
|
return new ConstantScoreWeight(this) {
|
||||||
|
@ -189,10 +249,20 @@ public class TermsQuery extends Query implements Accountable {
|
||||||
// order to protect highlighters
|
// order to protect highlighters
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
* On the given leaf context, try to either rewrite to a disjunction if
|
||||||
|
* there are few matching terms, or build a bitset containing matching docs.
|
||||||
|
*/
|
||||||
|
private WeightOrBitSet rewrite(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
final LeafReader reader = context.reader();
|
final LeafReader reader = context.reader();
|
||||||
BitDocIdSet.Builder builder = new BitDocIdSet.Builder(reader.maxDoc());
|
|
||||||
|
// We will first try to collect up to 'threshold' terms into 'matchingTerms'
|
||||||
|
// if there are two many terms, we will fall back to building the 'builder'
|
||||||
|
final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
|
||||||
|
assert termData.size() > threshold : "Query should have been rewritten";
|
||||||
|
List<TermAndState> matchingTerms = new ArrayList<>(threshold);
|
||||||
|
BitDocIdSet.Builder builder = null;
|
||||||
|
|
||||||
final Fields fields = reader.fields();
|
final Fields fields = reader.fields();
|
||||||
String lastField = null;
|
String lastField = null;
|
||||||
Terms terms = null;
|
Terms terms = null;
|
||||||
|
@ -209,24 +279,79 @@ public class TermsQuery extends Query implements Accountable {
|
||||||
} else {
|
} else {
|
||||||
termsEnum = terms.iterator();
|
termsEnum = terms.iterator();
|
||||||
}
|
}
|
||||||
|
lastField = field;
|
||||||
}
|
}
|
||||||
if (termsEnum != null && termsEnum.seekExact(term)) {
|
if (termsEnum != null && termsEnum.seekExact(term)) {
|
||||||
|
if (matchingTerms == null) {
|
||||||
docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
|
docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
|
||||||
builder.or(docs);
|
builder.or(docs);
|
||||||
|
} else if (matchingTerms.size() < threshold) {
|
||||||
|
matchingTerms.add(new TermAndState(field, termsEnum));
|
||||||
|
} else {
|
||||||
|
assert matchingTerms.size() == threshold;
|
||||||
|
builder = new BitDocIdSet.Builder(reader.maxDoc());
|
||||||
|
docs = termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
|
||||||
|
builder.or(docs);
|
||||||
|
for (TermAndState t : matchingTerms) {
|
||||||
|
t.termsEnum.seekExact(t.term, t.state);
|
||||||
|
docs = t.termsEnum.postings(acceptDocs, docs, PostingsEnum.NONE);
|
||||||
|
builder.or(docs);
|
||||||
|
}
|
||||||
|
matchingTerms = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
BitDocIdSet result = builder.build();
|
}
|
||||||
if (result == null) {
|
if (matchingTerms != null) {
|
||||||
return null;
|
assert builder == null;
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
for (TermAndState t : matchingTerms) {
|
||||||
|
final TermContext termContext = new TermContext(searcher.getTopReaderContext());
|
||||||
|
termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
|
||||||
|
bq.add(new TermQuery(new Term(t.field, t.term), termContext), Occur.SHOULD);
|
||||||
|
}
|
||||||
|
Query q = new ConstantScoreQuery(bq);
|
||||||
|
q.setBoost(score());
|
||||||
|
return new WeightOrBitSet(searcher.rewrite(q).createWeight(searcher, needsScores));
|
||||||
|
} else {
|
||||||
|
assert builder != null;
|
||||||
|
return new WeightOrBitSet(builder.build());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final DocIdSetIterator disi = result.iterator();
|
private Scorer scorer(BitDocIdSet set) {
|
||||||
|
if (set == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
final DocIdSetIterator disi = set.iterator();
|
||||||
if (disi == null) {
|
if (disi == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new ConstantScoreScorer(this, score(), disi);
|
return new ConstantScoreScorer(this, score(), disi);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public BulkScorer bulkScorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
|
final WeightOrBitSet weightOrBitSet = rewrite(context, acceptDocs);
|
||||||
|
if (weightOrBitSet.weight != null) {
|
||||||
|
return weightOrBitSet.weight.bulkScorer(context, acceptDocs);
|
||||||
|
} else {
|
||||||
|
final Scorer scorer = scorer(weightOrBitSet.bitset);
|
||||||
|
if (scorer == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new DefaultBulkScorer(scorer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||||
|
final WeightOrBitSet weightOrBitSet = rewrite(context, acceptDocs);
|
||||||
|
if (weightOrBitSet.weight != null) {
|
||||||
|
return weightOrBitSet.weight.scorer(context, acceptDocs);
|
||||||
|
} else {
|
||||||
|
return scorer(weightOrBitSet.bitset);
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,13 +25,21 @@ import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
import org.apache.lucene.document.Field.Store;
|
import org.apache.lucene.document.Field.Store;
|
||||||
import org.apache.lucene.document.StringField;
|
import org.apache.lucene.document.StringField;
|
||||||
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
|
import org.apache.lucene.index.Fields;
|
||||||
|
import org.apache.lucene.index.FilterDirectoryReader;
|
||||||
|
import org.apache.lucene.index.FilterLeafReader;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.LeafReader;
|
||||||
import org.apache.lucene.index.RandomIndexWriter;
|
import org.apache.lucene.index.RandomIndexWriter;
|
||||||
import org.apache.lucene.index.Term;
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.Terms;
|
||||||
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
import org.apache.lucene.search.BooleanClause.Occur;
|
||||||
import org.apache.lucene.search.BooleanQuery;
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
import org.apache.lucene.search.ConstantScoreQuery;
|
||||||
|
@ -48,6 +56,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.RamUsageTester;
|
import org.apache.lucene.util.RamUsageTester;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||||
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
import com.carrotsearch.randomizedtesting.generators.RandomStrings;
|
||||||
|
|
||||||
public class TermsQueryTest extends LuceneTestCase {
|
public class TermsQueryTest extends LuceneTestCase {
|
||||||
|
@ -223,4 +232,94 @@ public class TermsQueryTest extends LuceneTestCase {
|
||||||
// error margin within 5%
|
// error margin within 5%
|
||||||
assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 20);
|
assertEquals(actualRamBytesUsed, expectedRamBytesUsed, actualRamBytesUsed / 20);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class TermsCountingDirectoryReaderWrapper extends FilterDirectoryReader {
|
||||||
|
|
||||||
|
private final AtomicInteger counter;
|
||||||
|
|
||||||
|
public TermsCountingDirectoryReaderWrapper(DirectoryReader in, AtomicInteger counter) throws IOException {
|
||||||
|
super(in, new TermsCountingSubReaderWrapper(counter));
|
||||||
|
this.counter = counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TermsCountingSubReaderWrapper extends SubReaderWrapper {
|
||||||
|
private final AtomicInteger counter;
|
||||||
|
|
||||||
|
public TermsCountingSubReaderWrapper(AtomicInteger counter) {
|
||||||
|
this.counter = counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public LeafReader wrap(LeafReader reader) {
|
||||||
|
return new TermsCountingLeafReaderWrapper(reader, counter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static class TermsCountingLeafReaderWrapper extends FilterLeafReader {
|
||||||
|
|
||||||
|
private final AtomicInteger counter;
|
||||||
|
|
||||||
|
public TermsCountingLeafReaderWrapper(LeafReader in, AtomicInteger counter) {
|
||||||
|
super(in);
|
||||||
|
this.counter = counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Fields fields() throws IOException {
|
||||||
|
return new FilterFields(in.fields()) {
|
||||||
|
@Override
|
||||||
|
public Terms terms(String field) throws IOException {
|
||||||
|
final Terms in = this.in.terms(field);
|
||||||
|
if (in == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new FilterTerms(in) {
|
||||||
|
@Override
|
||||||
|
public TermsEnum iterator() throws IOException {
|
||||||
|
counter.incrementAndGet();
|
||||||
|
return super.iterator();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
|
||||||
|
return new TermsCountingDirectoryReaderWrapper(in, counter);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPullOneTermsEnumPerField() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
|
||||||
|
Document doc = new Document();
|
||||||
|
doc.add(new StringField("foo", "1", Store.NO));
|
||||||
|
doc.add(new StringField("bar", "2", Store.NO));
|
||||||
|
doc.add(new StringField("baz", "3", Store.NO));
|
||||||
|
w.addDocument(doc);
|
||||||
|
DirectoryReader reader = w.getReader();
|
||||||
|
w.close();
|
||||||
|
final AtomicInteger counter = new AtomicInteger();
|
||||||
|
DirectoryReader wrapped = new TermsCountingDirectoryReaderWrapper(reader, counter);
|
||||||
|
|
||||||
|
final List<Term> terms = new ArrayList<>();
|
||||||
|
final Set<String> fields = new HashSet<>();
|
||||||
|
// enough terms to avoid the rewrite
|
||||||
|
final int numTerms = TestUtil.nextInt(random(), TermsQuery.BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD + 1, 100);
|
||||||
|
for (int i = 0; i < numTerms; ++i) {
|
||||||
|
final String field = RandomPicks.randomFrom(random(), new String[] {"foo", "bar", "baz"});
|
||||||
|
final BytesRef term = new BytesRef(RandomStrings.randomUnicodeOfCodepointLength(random(), 10));
|
||||||
|
fields.add(field);
|
||||||
|
terms.add(new Term(field, term));
|
||||||
|
}
|
||||||
|
|
||||||
|
new IndexSearcher(wrapped).count(new TermsQuery(terms));
|
||||||
|
assertEquals(fields.size(), counter.get());
|
||||||
|
wrapped.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue