LUCENE-6289: Replace DocValuesRangeFilter with DocValuesRangeQuery.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1662244 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Adrien Grand 2015-02-25 15:26:04 +00:00
parent 7642c8fa3b
commit 0e505bcc00
12 changed files with 520 additions and 237 deletions

View File

@ -150,6 +150,9 @@ API Changes
* LUCENE-6268: Replace FieldValueFilter and DocValuesRangeFilter with equivalent
queries that support approximations. (Adrien Grand)
* LUCENE-6289: Replace DocValuesRangeFilter with DocValuesRangeQuery which
supports approximations. (Adrien Grand)
* LUCENE-6266: Remove unnecessary Directory params from SegmentInfo.toString,
SegmentInfos.files/toString, and SegmentCommitInfo.toString. (Robert Muir)

View File

@ -0,0 +1,73 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.Bits;
abstract class ConstantScoreWeight extends Weight {
private float queryNorm;
private float queryWeight;
protected ConstantScoreWeight(Query query) {
super(query);
}
@Override
public final float getValueForNormalization() throws IOException {
queryWeight = getQuery().getBoost();
return queryWeight * queryWeight;
}
@Override
public final void normalize(float norm, float topLevelBoost) {
queryNorm = norm * topLevelBoost;
queryWeight *= queryNorm;
}
@Override
public final Explanation explain(LeafReaderContext context, int doc) throws IOException {
final Scorer s = scorer(context, context.reader().getLiveDocs());
final boolean exists = (s != null && s.advance(doc) == doc);
final ComplexExplanation result = new ComplexExplanation();
if (exists) {
result.setDescription(getQuery().toString() + ", product of:");
result.setValue(queryWeight);
result.setMatch(Boolean.TRUE);
result.addDetail(new Explanation(getQuery().getBoost(), "boost"));
result.addDetail(new Explanation(queryNorm, "queryNorm"));
} else {
result.setDescription(getQuery().toString() + " doesn't match id " + doc);
result.setValue(0);
result.setMatch(Boolean.FALSE);
}
return result;
}
@Override
public final Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
return scorer(context, acceptDocs, queryWeight);
}
abstract Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException;
}

View File

@ -123,45 +123,10 @@ public final class DocValuesRangeQuery extends Query {
if (lowerVal == null && upperVal == null) {
throw new IllegalStateException("Both min and max values cannot be null, call rewrite first");
}
return new Weight(DocValuesRangeQuery.this) {
private float queryNorm;
private float queryWeight;
return new ConstantScoreWeight(DocValuesRangeQuery.this) {
@Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
@Override
public void normalize(float norm, float topLevelBoost) {
queryNorm = norm * topLevelBoost;
queryWeight *= queryNorm;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
final Scorer s = scorer(context, context.reader().getLiveDocs());
final boolean exists = (s != null && s.advance(doc) == doc);
final ComplexExplanation result = new ComplexExplanation();
if (exists) {
result.setDescription(DocValuesRangeQuery.this.toString() + ", product of:");
result.setValue(queryWeight);
result.setMatch(Boolean.TRUE);
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(queryNorm, "queryNorm"));
} else {
result.setDescription(DocValuesRangeQuery.this.toString() + " doesn't match id " + doc);
result.setValue(0);
result.setMatch(Boolean.FALSE);
}
return result;
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
public Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException {
final Bits docsWithField = context.reader().getDocsWithField(field);
if (docsWithField == null || docsWithField instanceof MatchNoBits) {
@ -240,7 +205,7 @@ public final class DocValuesRangeQuery extends Query {
throw new AssertionError();
}
return new RangeScorer(this, twoPhaseRange, queryWeight);
return new RangeScorer(this, twoPhaseRange, score);
}
};

View File

@ -1,135 +0,0 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
/**
* A {@link Filter} that only accepts documents whose single
* term value in the specified field is contained in the
* provided set of allowed terms.
*
* <p>
* This is the same functionality as TermsFilter (from
* queries/), except this filter requires that the
* field contains only a single term for all documents.
* Because of drastically different implementations, they
* also have different performance characteristics, as
* described below.
*
* <p>
* With each search, this filter translates the specified
* set of Terms into a private {@link FixedBitSet} keyed by
* term number per unique {@link IndexReader} (normally one
* reader per segment). Then, during matching, the term
* number for each docID is retrieved from the cache and
* then checked for inclusion using the {@link FixedBitSet}.
* Since all testing is done using RAM resident data
* structures, performance should be very fast, most likely
* fast enough to not require further caching of the
* DocIdSet for each possible combination of terms.
* However, because docIDs are simply scanned linearly, an
* index with a great many small documents may find this
* linear scan too costly.
*
* <p>
* In contrast, TermsFilter builds up an {@link FixedBitSet},
* keyed by docID, every time it's created, by enumerating
* through all matching docs using {@link org.apache.lucene.index.PostingsEnum} to seek
* and scan through each term's docID list. While there is
* no linear scan of all docIDs, besides the allocation of
* the underlying array in the {@link FixedBitSet}, this
* approach requires a number of "disk seeks" in proportion
* to the number of terms, which can be exceptionally costly
* when there are cache misses in the OS's IO cache.
*
* <p>
* Generally, this filter will be slower on the first
* invocation for a given field, but subsequent invocations,
* even if you change the allowed set of Terms, should be
* faster than TermsFilter, especially as the number of
* Terms being matched increases. If you are matching only
* a very small number of terms, and those terms in turn
* match a very small number of documents, TermsFilter may
* perform faster.
*
* <p>
* Which filter is best is very application dependent.
*/
public class DocValuesTermsFilter extends Filter {
private String field;
private BytesRef[] terms;
public DocValuesTermsFilter(String field, BytesRef... terms) {
this.field = field;
this.terms = terms;
}
public DocValuesTermsFilter(String field, String... terms) {
this.field = field;
this.terms = new BytesRef[terms.length];
for (int i = 0; i < terms.length; i++)
this.terms[i] = new BytesRef(terms[i]);
}
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
final SortedDocValues fcsi = DocValues.getSorted(context.reader(), field);
final FixedBitSet bits = new FixedBitSet(fcsi.getValueCount());
for (int i=0;i<terms.length;i++) {
int ord = fcsi.lookupTerm(terms[i]);
if (ord >= 0) {
bits.set(ord);
}
}
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
@Override
protected final boolean matchDoc(int doc) {
int ord = fcsi.getOrd(doc);
if (ord == -1) {
// missing
return false;
} else {
return bits.get(ord);
}
}
};
}
@Override
public String toString(String defaultField) {
StringBuilder sb = new StringBuilder();
sb.append(field).append(": [");
for (BytesRef term : terms) {
sb.append(term).append(", ");
}
if (terms.length > 0) {
sb.setLength(sb.length() - 2);
}
return sb.append(']').toString();
}
}

View File

@ -0,0 +1,227 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.AbstractList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Objects;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongBitSet;
/**
* A {@link Query} that only accepts documents whose
* term value in the specified field is contained in the
* provided set of allowed terms.
*
* <p>
* This is the same functionality as TermsQuery (from
* queries/), but because of drastically different
* implementations, they also have different performance
* characteristics, as described below.
*
* <p>
* With each search, this query translates the specified
* set of Terms into a private {@link LongBitSet} keyed by
* term number per unique {@link IndexReader} (normally one
* reader per segment). Then, during matching, the term
* number for each docID is retrieved from the cache and
* then checked for inclusion using the {@link LongBitSet}.
* Since all testing is done using RAM resident data
* structures, performance should be very fast, most likely
* fast enough to not require further caching of the
* DocIdSet for each possible combination of terms.
* However, because docIDs are simply scanned linearly, an
* index with a great many small documents may find this
* linear scan too costly.
*
* <p>
* In contrast, TermsQuery builds up an {@link FixedBitSet},
* keyed by docID, every time it's created, by enumerating
* through all matching docs using {@link org.apache.lucene.index.PostingsEnum} to seek
* and scan through each term's docID list. While there is
* no linear scan of all docIDs, besides the allocation of
* the underlying array in the {@link FixedBitSet}, this
* approach requires a number of "disk seeks" in proportion
* to the number of terms, which can be exceptionally costly
* when there are cache misses in the OS's IO cache.
*
* <p>
* Generally, this filter will be slower on the first
* invocation for a given field, but subsequent invocations,
* even if you change the allowed set of Terms, should be
* faster than TermsQuery, especially as the number of
* Terms being matched increases. If you are matching only
* a very small number of terms, and those terms in turn
* match a very small number of documents, TermsQuery may
* perform faster.
*
* <p>
* Which query is best is very application dependent.
*/
public class DocValuesTermsQuery extends Query {
private final String field;
private final BytesRef[] terms;
public DocValuesTermsQuery(String field, Collection<BytesRef> terms) {
this.field = Objects.requireNonNull(field);
this.terms = terms.toArray(new BytesRef[terms.size()]);
ArrayUtil.timSort(this.terms, BytesRef.getUTF8SortedAsUnicodeComparator());
}
public DocValuesTermsQuery(String field, BytesRef... terms) {
this(field, Arrays.asList(terms));
}
public DocValuesTermsQuery(String field, String... terms) {
this(field, new AbstractList<BytesRef>() {
@Override
public BytesRef get(int index) {
return new BytesRef(terms[index]);
}
@Override
public int size() {
return terms.length;
}
});
}
@Override
public boolean equals(Object obj) {
if (obj instanceof DocValuesTermsQuery == false) {
return false;
}
DocValuesTermsQuery that = (DocValuesTermsQuery) obj;
if (!field.equals(that.field)) {
return false;
}
if (getBoost() != that.getBoost()) {
return false;
}
return Arrays.equals(terms, that.terms);
}
@Override
public int hashCode() {
return Objects.hash(field, Arrays.asList(terms), getBoost());
}
@Override
public String toString(String defaultField) {
StringBuilder sb = new StringBuilder();
sb.append(field).append(": [");
for (BytesRef term : terms) {
sb.append(term).append(", ");
}
if (terms.length > 0) {
sb.setLength(sb.length() - 2);
}
return sb.append(']').toString();
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new ConstantScoreWeight(this) {
@Override
Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException {
final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
final LongBitSet bits = new LongBitSet(values.getValueCount());
for (BytesRef term : terms) {
final long ord = values.lookupTerm(term);
if (ord >= 0) {
bits.set(ord);
}
}
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
final TwoPhaseDocIdSetIterator twoPhaseIterator = new TwoPhaseDocIdSetIterator() {
@Override
public DocIdSetIterator approximation() {
return approximation;
}
@Override
public boolean matches() throws IOException {
final int doc = approximation.docID();
if (acceptDocs != null && acceptDocs.get(doc) == false) {
return false;
}
values.setDocument(doc);
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
if (bits.get(ord)) {
return true;
}
}
return false;
}
};
final DocIdSetIterator disi = TwoPhaseDocIdSetIterator.asDocIdSetIterator(twoPhaseIterator);
return new Scorer(this) {
@Override
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
return twoPhaseIterator;
}
@Override
public float score() throws IOException {
return score;
}
@Override
public int freq() throws IOException {
return 1;
}
@Override
public int docID() {
return disi.docID();
}
@Override
public int nextDoc() throws IOException {
return disi.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return disi.advance(target);
}
@Override
public long cost() {
return disi.cost();
}
};
}
};
}
}

View File

@ -61,45 +61,10 @@ public final class FieldValueQuery extends Query {
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new Weight(this) {
private float queryNorm;
private float queryWeight;
return new ConstantScoreWeight(this) {
@Override
public float getValueForNormalization() throws IOException {
queryWeight = getBoost();
return queryWeight * queryWeight;
}
@Override
public void normalize(float norm, float topLevelBoost) {
queryNorm = norm * topLevelBoost;
queryWeight *= queryNorm;
}
@Override
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
final Scorer s = scorer(context, context.reader().getLiveDocs());
final boolean exists = (s != null && s.advance(doc) == doc);
final ComplexExplanation result = new ComplexExplanation();
if (exists) {
result.setDescription(FieldValueQuery.this.toString() + ", product of:");
result.setValue(queryWeight);
result.setMatch(Boolean.TRUE);
result.addDetail(new Explanation(getBoost(), "boost"));
result.addDetail(new Explanation(queryNorm, "queryNorm"));
} else {
result.setDescription(FieldValueQuery.this.toString() + " doesn't match id " + doc);
result.setValue(0);
result.setMatch(Boolean.FALSE);
}
return result;
}
@Override
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
public Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException {
final Bits docsWithField = context.reader().getDocsWithField(field);
if (docsWithField == null || docsWithField instanceof MatchNoBits) {
return null;
@ -161,7 +126,7 @@ public final class FieldValueQuery extends Query {
@Override
public float score() throws IOException {
return queryWeight;
return score;
}
};
}

View File

@ -71,13 +71,13 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
Occur.SHOULD);
q.add(snear(sf("w3",2), st("w2"), st("w3"), 5, true),
Occur.SHOULD);
Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
new ItemizedFilter(new int[] {1,3}));
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
t.setBoost(1000);
q.add(t, Occur.SHOULD);
t = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2}));
t = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
t.setBoost(30);
q.add(t, Occur.SHOULD);
@ -136,11 +136,11 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
Occur.SHOULD);
Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
new ItemizedFilter(new int[] {1,3}));
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
t.setBoost(1000);
q.add(t, Occur.SHOULD);
t = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2}));
t = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
t.setBoost(-20.0f);
q.add(t, Occur.SHOULD);
@ -207,13 +207,11 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
public void testFQ5() throws Exception {
TermQuery query = new TermQuery(new Term(FIELD, "xx"));
query.setBoost(0);
bqtest(new FilteredQuery(query,
new ItemizedFilter(new int[] {1,3})),
new int[] {3});
bqtest(new FilteredQuery(query, new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))), new int[] {3});
}
public void testCSQ4() throws Exception {
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {3}));
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {3}));
q.setBoost(0);
bqtest(q, new int[] {3});
}

View File

@ -0,0 +1,188 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestUtil;
public class TestDocValuesTermsQuery extends LuceneTestCase {
public void testEquals() {
assertEquals(new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar"));
assertEquals(new DocValuesTermsQuery("foo", "bar", "baz"), new DocValuesTermsQuery("foo", "baz", "bar"));
assertFalse(new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo2", "bar")));
assertFalse(new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo", "baz")));
}
public void testDuelTermsQuery() throws IOException {
final int iters = atLeast(2);
for (int iter = 0; iter < iters; ++iter) {
final List<Term> allTerms = new ArrayList<>();
final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
for (int i = 0; i < numTerms; ++i) {
final String value = TestUtil.randomAnalysisString(random(), 10, true);
allTerms.add(new Term("f", value));
}
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final Term term = allTerms.get(random().nextInt(allTerms.size()));
doc.add(new StringField(term.field(), term.text(), Store.NO));
doc.add(new SortedDocValuesField(term.field(), new BytesRef(term.text())));
iw.addDocument(doc);
}
if (numTerms > 1 && random().nextBoolean()) {
iw.deleteDocuments(new TermQuery(allTerms.get(0)));
}
iw.commit();
final IndexReader reader = iw.getReader();
final IndexSearcher searcher = newSearcher(reader);
iw.close();
if (reader.numDocs() == 0) {
// may occasionally happen if all documents got the same term
IOUtils.close(reader, dir);
continue;
}
for (int i = 0; i < 100; ++i) {
final float boost = random().nextFloat() * 10;
final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
List<Term> queryTerms = new ArrayList<>();
for (int j = 0; j < numQueryTerms; ++j) {
queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
}
final BooleanQuery bq = new BooleanQuery();
for (Term term : queryTerms) {
bq.add(new TermQuery(term), Occur.SHOULD);
}
Query q1 = new ConstantScoreQuery(bq);
q1.setBoost(boost);
List<String> bytesTerms = new ArrayList<>();
for (Term term : queryTerms) {
bytesTerms.add(term.text());
}
final Query q2 = new DocValuesTermsQuery("f", bytesTerms.toArray(new String[0]));
q2.setBoost(boost);
assertSameMatches(searcher, q1, q2, true);
}
reader.close();
dir.close();
}
}
public void testApproximation() throws IOException {
final int iters = atLeast(2);
for (int iter = 0; iter < iters; ++iter) {
final List<Term> allTerms = new ArrayList<>();
final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
for (int i = 0; i < numTerms; ++i) {
final String value = TestUtil.randomAnalysisString(random(), 10, true);
allTerms.add(new Term("f", value));
}
Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
final int numDocs = atLeast(100);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
final Term term = allTerms.get(random().nextInt(allTerms.size()));
doc.add(new StringField(term.field(), term.text(), Store.NO));
doc.add(new SortedDocValuesField(term.field(), new BytesRef(term.text())));
iw.addDocument(doc);
}
if (numTerms > 1 && random().nextBoolean()) {
iw.deleteDocuments(new TermQuery(allTerms.get(0)));
}
iw.commit();
final IndexReader reader = iw.getReader();
final IndexSearcher searcher = newSearcher(reader);
iw.close();
if (reader.numDocs() == 0) {
// may occasionally happen if all documents got the same term
IOUtils.close(reader, dir);
continue;
}
for (int i = 0; i < 100; ++i) {
final float boost = random().nextFloat() * 10;
final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
List<Term> queryTerms = new ArrayList<>();
for (int j = 0; j < numQueryTerms; ++j) {
queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
}
final BooleanQuery bq = new BooleanQuery();
for (Term term : queryTerms) {
bq.add(new TermQuery(term), Occur.SHOULD);
}
Query q1 = new ConstantScoreQuery(bq);
q1.setBoost(boost);
List<String> bytesTerms = new ArrayList<>();
for (Term term : queryTerms) {
bytesTerms.add(term.text());
}
final Query q2 = new DocValuesTermsQuery("f", bytesTerms.toArray(new String[0]));
q2.setBoost(boost);
BooleanQuery bq1 = new BooleanQuery();
bq1.add(q1, Occur.MUST);
bq1.add(new TermQuery(allTerms.get(0)), Occur.FILTER);
BooleanQuery bq2 = new BooleanQuery();
bq2.add(q2, Occur.MUST);
bq2.add(new TermQuery(allTerms.get(0)), Occur.FILTER);
assertSameMatches(searcher, bq1, bq2, true);
}
reader.close();
dir.close();
}
}
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores) throws IOException {
final int maxDoc = searcher.getIndexReader().maxDoc();
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
assertEquals(td1.totalHits, td2.totalHits);
for (int i = 0; i < td1.scoreDocs.length; ++i) {
assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
if (scores) {
assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
}
}
}
}

View File

@ -32,7 +32,7 @@ import java.util.List;
/**
* A basic unit test for FieldCacheTermsFilter
*
* @see org.apache.lucene.search.DocValuesTermsFilter
* @see org.apache.lucene.search.DocValuesTermsQuery
*/
public class TestFieldCacheTermsFilter extends LuceneTestCase {
public void testMissingTerms() throws Exception {
@ -52,22 +52,21 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase {
IndexSearcher searcher = newSearcher(reader);
int numDocs = reader.numDocs();
ScoreDoc[] results;
MatchAllDocsQuery q = new MatchAllDocsQuery();
List<String> terms = new ArrayList<>();
terms.add("5");
results = searcher.search(new FilteredQuery(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0]))), numDocs).scoreDocs;
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match nothing", 0, results.length);
terms = new ArrayList<>();
terms.add("10");
results = searcher.search(new FilteredQuery(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0]))), numDocs).scoreDocs;
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match 1", 1, results.length);
terms = new ArrayList<>();
terms.add("10");
terms.add("20");
results = searcher.search(new FilteredQuery(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0]))), numDocs).scoreDocs;
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
assertEquals("Must match 2", 2, results.length);
reader.close();

View File

@ -105,28 +105,28 @@ public class TestSimpleExplanations extends BaseExplanationTestCase {
public void testFQ1() throws Exception {
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "w1")),
new ItemizedFilter(new int[] {0,1,2,3})),
new QueryWrapperFilter(new ItemizedQuery(new int[] {0,1,2,3}))),
new int[] {0,1,2,3});
}
public void testFQ2() throws Exception {
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "w1")),
new ItemizedFilter(new int[] {0,2,3})),
new QueryWrapperFilter(new ItemizedQuery(new int[] {0,2,3}))),
new int[] {0,2,3});
}
public void testFQ3() throws Exception {
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
new ItemizedFilter(new int[] {1,3})),
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))),
new int[] {3});
}
public void testFQ4() throws Exception {
TermQuery termQuery = new TermQuery(new Term(FIELD, "xx"));
termQuery.setBoost(1000);
qtest(new FilteredQuery(termQuery, new ItemizedFilter(new int[] {1,3})),
qtest(new FilteredQuery(termQuery, new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))),
new int[] {3});
}
public void testFQ6() throws Exception {
Query q = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
new ItemizedFilter(new int[] {1,3}));
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
q.setBoost(1000);
qtest(q, new int[] {3});
}
@ -134,15 +134,15 @@ public class TestSimpleExplanations extends BaseExplanationTestCase {
/* ConstantScoreQueries */
public void testCSQ1() throws Exception {
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,1,2,3}));
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,1,2,3}));
qtest(q, new int[] {0,1,2,3});
}
public void testCSQ2() throws Exception {
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {1,3}));
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {1,3}));
qtest(q, new int[] {1,3});
}
public void testCSQ3() throws Exception {
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2}));
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
q.setBoost(1000);
qtest(q, new int[] {0,2});
}

View File

@ -112,7 +112,7 @@ public abstract class BaseExplanationTestCase extends LuceneTestCase {
/**
* Convenience subclass of FieldCacheTermsFilter
*/
public static class ItemizedFilter extends DocValuesTermsFilter {
public static class ItemizedQuery extends DocValuesTermsQuery {
private static String[] int2str(int [] terms) {
String [] out = new String[terms.length];
for (int i = 0; i < terms.length; i++) {
@ -120,7 +120,7 @@ public abstract class BaseExplanationTestCase extends LuceneTestCase {
}
return out;
}
public ItemizedFilter(int [] keys) {
public ItemizedQuery(int [] keys) {
super(KEY, int2str(keys));
}
}

View File

@ -25,7 +25,7 @@ import org.apache.lucene.queries.TermsQuery;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.DocValuesTermsFilter;
import org.apache.lucene.search.DocValuesTermsQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
import org.apache.lucene.search.Query;
@ -93,7 +93,7 @@ public class TermsQParserPlugin extends QParserPlugin {
//note: limited to one val per doc
@Override
Filter makeFilter(String fname, BytesRef[] byteRefs) {
return new DocValuesTermsFilter(fname, byteRefs);
return new QueryWrapperFilter(new DocValuesTermsQuery(fname, byteRefs));
}
};