Speedup sandbox/DocValuesTermsQuery (#12129)

* Optimize the common case that docs only have single values for the field
* In the multivalued case, terminate reading docvalues if they are > maximum set ordinal
* Implement ScorerSupplier, so that (potentially large) number of ordinal lookups aren't performed just to get the cost()
* Graduate to Sorted(Set)DocValuesField.newSlowSetQuery to complement newSlowRangeQuery, newSlowExactQuery

Like other slow queries in these classes, it's currently only recommended to use with points, e.g. IndexOrDocValuesQuery(new PointInSetQuery, newSlowSetQuery)
This commit is contained in:
Robert Muir 2023-02-06 12:47:53 -05:00 committed by GitHub
parent 10d9c7440b
commit 0bc4135695
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 312 additions and 339 deletions

View File

@ -88,4 +88,16 @@ public class SortedDocValuesField extends Field {
public static Query newSlowExactQuery(String field, BytesRef value) {
return newSlowRangeQuery(field, value, value, true, true);
}
/**
* Create a query matching any of the specified values.
*
* <p><b>NOTE</b>: Such queries cannot efficiently advance to the next match, which makes them
* slow if they are not ANDed with a selective query. As a consequence, they are best used wrapped
* in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on points, such as
* {@link BinaryPoint#newSetQuery}.
*/
public static Query newSlowSetQuery(String field, BytesRef... values) {
return new SortedSetDocValuesSetQuery(field, values.clone());
}
}

View File

@ -90,4 +90,18 @@ public class SortedSetDocValuesField extends Field {
public static Query newSlowExactQuery(String field, BytesRef value) {
return newSlowRangeQuery(field, value, value, true, true);
}
/**
* Create a query matching any of the specified values.
*
* <p>This query also works with fields that have indexed {@link SortedDocValuesField}s.
*
* <p><b>NOTE</b>: Such queries cannot efficiently advance to the next match, which makes them
* slow if they are not ANDed with a selective query. As a consequence, they are best used wrapped
* in an {@link IndexOrDocValuesQuery}, alongside a set query that executes on points, such as
* {@link BinaryPoint#newSetQuery}.
*/
public static Query newSlowSetQuery(String field, BytesRef... values) {
return new SortedSetDocValuesSetQuery(field, values.clone());
}
}

View File

@ -0,0 +1,209 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.RamUsageEstimator;
/** Similar to SortedSetDocValuesRangeQuery but for a set */
final class SortedSetDocValuesSetQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES =
RamUsageEstimator.shallowSizeOfInstance(SortedSetDocValuesSetQuery.class);
private final String field;
private final PrefixCodedTerms termData;
private final int termDataHashCode; // cached hashcode of termData
SortedSetDocValuesSetQuery(String field, BytesRef terms[]) {
this.field = Objects.requireNonNull(field);
Objects.requireNonNull(terms);
ArrayUtil.timSort(terms);
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
BytesRef previous = null;
for (BytesRef term : terms) {
if (term.equals(previous) == false) {
builder.add(field, term);
}
previous = term;
}
termData = builder.finish();
termDataHashCode = termData.hashCode();
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) && equalsTo(getClass().cast(other));
}
private boolean equalsTo(SortedSetDocValuesSetQuery other) {
// termData might be heavy to compare so check the hash code first
return termDataHashCode == other.termDataHashCode && termData.equals(other.termData);
}
@Override
public int hashCode() {
return Objects.hash(classHash(), termDataHashCode);
}
@Override
public String toString(String defaultField) {
StringBuilder builder = new StringBuilder();
boolean first = true;
TermIterator iterator = termData.iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
if (!first) {
builder.append(' ');
}
first = false;
builder.append(new Term(iterator.field(), term).toString());
}
return builder.toString();
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES
+ RamUsageEstimator.sizeOfObject(field)
+ RamUsageEstimator.sizeOfObject(termData);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
ScorerSupplier scorerSupplier = scorerSupplier(context);
if (scorerSupplier == null) {
return null;
}
return scorerSupplier.get(Long.MAX_VALUE);
}
@Override
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
final Weight weight = this;
if (context.reader().getFieldInfos().fieldInfo(field) == null) {
return null;
}
final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
// implement ScorerSupplier, since we do some expensive stuff to make a scorer
return new ScorerSupplier() {
@Override
public Scorer get(long leadCost) throws IOException {
final LongBitSet bits = new LongBitSet(values.getValueCount());
long maxOrd = -1;
TermIterator termIterator = termData.iterator();
for (BytesRef term = termIterator.next(); term != null; term = termIterator.next()) {
final long ord = values.lookupTerm(term);
if (ord >= 0) {
maxOrd = ord;
bits.set(ord);
}
}
// no terms matched in this segment
if (maxOrd < 0) {
return new ConstantScoreScorer(weight, score(), scoreMode, DocIdSetIterator.empty());
}
final SortedDocValues singleton = DocValues.unwrapSingleton(values);
final TwoPhaseIterator iterator;
final long max = maxOrd;
if (singleton != null) {
iterator =
new TwoPhaseIterator(singleton) {
@Override
public boolean matches() throws IOException {
return bits.get(singleton.ordValue());
}
@Override
public float matchCost() {
return 3; // lookup in a bitset
}
};
} else {
iterator =
new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (int i = 0; i < values.docValueCount(); i++) {
long value = values.nextOrd();
if (value > max) {
return false; // values are sorted, terminate
} else if (bits.get(value)) {
return true;
}
}
return false;
}
@Override
public float matchCost() {
return 3; // lookup in a bitset
}
};
}
return new ConstantScoreScorer(weight, score(), scoreMode, iterator);
}
@Override
public long cost() {
return values.cost();
}
};
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return DocValues.isCacheable(ctx, field);
}
};
}
}

View File

@ -200,6 +200,6 @@ public class PrefixCodedTerms implements Accountable {
}
PrefixCodedTerms other = (PrefixCodedTerms) obj;
return delGen == other.delGen && this.content.equals(other.content);
return delGen == other.delGen && size() == other.size() && this.content.equals(other.content);
}
}

View File

@ -14,15 +14,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.search;
package org.apache.lucene.document;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
@ -31,6 +27,7 @@ import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
@ -41,18 +38,74 @@ import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
public class TestDocValuesTermsQuery extends LuceneTestCase {
public class TestSortedSetDocValuesSetQuery extends LuceneTestCase {
public void testMissingTerms() throws Exception {
String fieldName = "field1";
Directory rd = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), rd);
for (int i = 0; i < 100; i++) {
Document doc = new Document();
int term = i * 10; // terms are units of 10;
doc.add(newStringField(fieldName, "" + term, Field.Store.YES));
doc.add(new SortedDocValuesField(fieldName, new BytesRef("" + term)));
w.addDocument(doc);
}
IndexReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
int numDocs = reader.numDocs();
ScoreDoc[] results;
List<BytesRef> terms = new ArrayList<>();
terms.add(new BytesRef("5"));
results =
searcher.search(
SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])),
numDocs)
.scoreDocs;
assertEquals("Must match nothing", 0, results.length);
terms = new ArrayList<>();
terms.add(new BytesRef("10"));
results =
searcher.search(
SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])),
numDocs)
.scoreDocs;
assertEquals("Must match 1", 1, results.length);
terms = new ArrayList<>();
terms.add(new BytesRef("10"));
terms.add(new BytesRef("20"));
results =
searcher.search(
SortedDocValuesField.newSlowSetQuery(fieldName, terms.toArray(new BytesRef[0])),
numDocs)
.scoreDocs;
assertEquals("Must match 2", 2, results.length);
reader.close();
rd.close();
}
public void testEquals() {
assertEquals(new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar"));
assertEquals(
new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar", "bar"));
SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")),
SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")));
assertEquals(
new DocValuesTermsQuery("foo", "bar", "baz"), new DocValuesTermsQuery("foo", "baz", "bar"));
SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar")),
SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"), new BytesRef("bar")));
assertEquals(
SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"), new BytesRef("baz")),
SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("baz"), new BytesRef("bar")));
assertFalse(
new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo2", "bar")));
SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"))
.equals(SortedDocValuesField.newSlowSetQuery("foo2", new BytesRef("bar"))));
assertFalse(
new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo", "baz")));
SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("bar"))
.equals(SortedDocValuesField.newSlowSetQuery("foo", new BytesRef("baz"))));
}
public void testDuelTermsQuery() throws IOException {
@ -70,7 +123,7 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final Term term = allTerms.get(random().nextInt(allTerms.size()));
doc.add(new StringField(term.field(), term.text(), Store.NO));
doc.add(new StringField(term.field(), term.text(), Field.Store.NO));
doc.add(new SortedDocValuesField(term.field(), new BytesRef(term.text())));
iw.addDocument(doc);
}
@ -101,12 +154,14 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
bq.add(new TermQuery(term), Occur.SHOULD);
}
Query q1 = new BoostQuery(new ConstantScoreQuery(bq.build()), boost);
List<String> bytesTerms = new ArrayList<>();
List<BytesRef> bytesTerms = new ArrayList<>();
for (Term term : queryTerms) {
bytesTerms.add(term.text());
bytesTerms.add(term.bytes());
}
final Query q2 =
new BoostQuery(new DocValuesTermsQuery("f", bytesTerms.toArray(new String[0])), boost);
new BoostQuery(
SortedDocValuesField.newSlowSetQuery("f", bytesTerms.toArray(new BytesRef[0])),
boost);
assertSameMatches(searcher, q1, q2, true);
}
@ -130,7 +185,7 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final Term term = allTerms.get(random().nextInt(allTerms.size()));
doc.add(new StringField(term.field(), term.text(), Store.NO));
doc.add(new StringField(term.field(), term.text(), Field.Store.NO));
doc.add(new SortedDocValuesField(term.field(), new BytesRef(term.text())));
iw.addDocument(doc);
}
@ -161,12 +216,14 @@ public class TestDocValuesTermsQuery extends LuceneTestCase {
bq.add(new TermQuery(term), Occur.SHOULD);
}
Query q1 = new BoostQuery(new ConstantScoreQuery(bq.build()), boost);
List<String> bytesTerms = new ArrayList<>();
List<BytesRef> bytesTerms = new ArrayList<>();
for (Term term : queryTerms) {
bytesTerms.add(term.text());
bytesTerms.add(term.bytes());
}
final Query q2 =
new BoostQuery(new DocValuesTermsQuery("f", bytesTerms.toArray(new String[0])), boost);
new BoostQuery(
SortedDocValuesField.newSlowSetQuery("f", bytesTerms.toArray(new BytesRef[0])),
boost);
BooleanQuery.Builder bq1 = new BooleanQuery.Builder();
bq1.add(q1, Occur.MUST);

View File

@ -1,238 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.search;
import java.io.IOException;
import java.util.AbstractList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PrefixCodedTerms;
import org.apache.lucene.index.PrefixCodedTerms.TermIterator;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.RamUsageEstimator;
/**
* A {@link Query} that only accepts documents whose term value in the specified field is contained
* in the provided set of allowed terms.
*
* <p>This is the same functionality as TermsQuery (from queries/), but because of drastically
* different implementations, they also have different performance characteristics, as described
* below.
*
* <p><b>NOTE</b>: be very careful using this query: it is typically much slower than using {@code
* TermsQuery}, but in certain specialized cases may be faster.
*
* <p>With each search, this query translates the specified set of Terms into a private {@link
* LongBitSet} keyed by term number per unique {@link IndexReader} (normally one reader per
* segment). Then, during matching, the term number for each docID is retrieved from the cache and
* then checked for inclusion using the {@link LongBitSet}. Since all testing is done using RAM
* resident data structures, performance should be very fast, most likely fast enough to not require
* further caching of the DocIdSet for each possible combination of terms. However, because docIDs
* are simply scanned linearly, an index with a great many small documents may find this linear scan
* too costly.
*
* <p>In contrast, TermsQuery builds up an {@link FixedBitSet}, keyed by docID, every time it's
* created, by enumerating through all matching docs using {@link
* org.apache.lucene.index.PostingsEnum} to seek and scan through each term's docID list. While
* there is no linear scan of all docIDs, besides the allocation of the underlying array in the
* {@link FixedBitSet}, this approach requires a number of "disk seeks" in proportion to the number
* of terms, which can be exceptionally costly when there are cache misses in the OS's IO cache.
*
* <p>Generally, this filter will be slower on the first invocation for a given field, but
* subsequent invocations, even if you change the allowed set of Terms, should be faster than
* TermsQuery, especially as the number of Terms being matched increases. If you are matching only a
* very small number of terms, and those terms in turn match a very small number of documents,
* TermsQuery may perform faster.
*
* <p>Which query is best is very application dependent.
*
* @lucene.experimental
*/
public class DocValuesTermsQuery extends Query implements Accountable {
private static final long BASE_RAM_BYTES =
RamUsageEstimator.shallowSizeOfInstance(DocValuesTermsQuery.class);
private final String field;
private final PrefixCodedTerms termData;
private final int termDataHashCode; // cached hashcode of termData
public DocValuesTermsQuery(String field, Collection<BytesRef> terms) {
this.field = Objects.requireNonNull(field);
Objects.requireNonNull(terms, "Collection of terms must not be null");
BytesRef[] sortedTerms = terms.toArray(new BytesRef[terms.size()]);
ArrayUtil.timSort(sortedTerms);
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
BytesRef previous = null;
for (BytesRef term : sortedTerms) {
if (term.equals(previous) == false) {
builder.add(field, term);
}
previous = term;
}
termData = builder.finish();
termDataHashCode = termData.hashCode();
}
public DocValuesTermsQuery(String field, BytesRef... terms) {
this(field, Arrays.asList(terms));
}
public DocValuesTermsQuery(String field, String... terms) {
this(
field,
new AbstractList<BytesRef>() {
@Override
public BytesRef get(int index) {
return new BytesRef(terms[index]);
}
@Override
public int size() {
return terms.length;
}
});
}
@Override
public boolean equals(Object other) {
return sameClassAs(other) && equalsTo(getClass().cast(other));
}
private boolean equalsTo(DocValuesTermsQuery other) {
// termData might be heavy to compare so check the hash code first
return termDataHashCode == other.termDataHashCode && termData.equals(other.termData);
}
@Override
public int hashCode() {
return 31 * classHash() + termDataHashCode;
}
@Override
public String toString(String defaultField) {
StringBuilder builder = new StringBuilder();
boolean first = true;
TermIterator iterator = termData.iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
if (!first) {
builder.append(' ');
}
first = false;
builder.append(new Term(iterator.field(), term).toString());
}
return builder.toString();
}
/**
* @return the name of the field searched by this query.
*/
public String getField() {
return field;
}
/**
* @return the terms looked up by this query, prefix-encoded.
*/
public PrefixCodedTerms getTerms() {
return termData;
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES
+ RamUsageEstimator.sizeOfObject(field)
+ RamUsageEstimator.sizeOfObject(termData);
}
@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field)) {
visitor.visitLeaf(this);
}
}
@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
throws IOException {
return new ConstantScoreWeight(this, boost) {
@Override
public Scorer scorer(LeafReaderContext context) throws IOException {
final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
final LongBitSet bits = new LongBitSet(values.getValueCount());
boolean matchesAtLeastOneTerm = false;
TermIterator iterator = termData.iterator();
for (BytesRef term = iterator.next(); term != null; term = iterator.next()) {
final long ord = values.lookupTerm(term);
if (ord >= 0) {
matchesAtLeastOneTerm = true;
bits.set(ord);
}
}
if (matchesAtLeastOneTerm == false) {
return null;
}
return new ConstantScoreScorer(
this,
score(),
scoreMode,
new TwoPhaseIterator(values) {
@Override
public boolean matches() throws IOException {
for (int i = 0; i < values.docValueCount(); i++) {
if (bits.get(values.nextOrd())) {
return true;
}
}
return false;
}
@Override
public float matchCost() {
return 3; // lookup in a bitset
}
});
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return DocValues.isCacheable(ctx, field);
}
};
}
}

View File

@ -1,81 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.search;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
/**
* A basic unit test for FieldCacheTermsFilter
*
* @see DocValuesTermsQuery
*/
public class TestFieldCacheTermsFilter extends LuceneTestCase {
public void testMissingTerms() throws Exception {
String fieldName = "field1";
Directory rd = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), rd);
for (int i = 0; i < 100; i++) {
Document doc = new Document();
int term = i * 10; // terms are units of 10;
doc.add(newStringField(fieldName, "" + term, Field.Store.YES));
doc.add(new SortedDocValuesField(fieldName, new BytesRef("" + term)));
w.addDocument(doc);
}
IndexReader reader = w.getReader();
w.close();
IndexSearcher searcher = newSearcher(reader);
int numDocs = reader.numDocs();
ScoreDoc[] results;
List<String> terms = new ArrayList<>();
terms.add("5");
results =
searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs)
.scoreDocs;
assertEquals("Must match nothing", 0, results.length);
terms = new ArrayList<>();
terms.add("10");
results =
searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs)
.scoreDocs;
assertEquals("Must match 1", 1, results.length);
terms = new ArrayList<>();
terms.add("10");
terms.add("20");
results =
searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs)
.scoreDocs;
assertEquals("Must match 2", 2, results.length);
reader.close();
rd.close();
}
}