mirror of https://github.com/apache/lucene.git
LUCENE-6289: Replace DocValuesRangeFilter with DocValuesRangeQuery.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1662244 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7642c8fa3b
commit
0e505bcc00
|
@ -150,6 +150,9 @@ API Changes
|
|||
* LUCENE-6268: Replace FieldValueFilter and DocValuesRangeFilter with equivalent
|
||||
queries that support approximations. (Adrien Grand)
|
||||
|
||||
* LUCENE-6289: Replace DocValuesRangeFilter with DocValuesRangeQuery which
|
||||
supports approximations. (Adrien Grand)
|
||||
|
||||
* LUCENE-6266: Remove unnecessary Directory params from SegmentInfo.toString,
|
||||
SegmentInfos.files/toString, and SegmentCommitInfo.toString. (Robert Muir)
|
||||
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
||||
abstract class ConstantScoreWeight extends Weight {
|
||||
|
||||
private float queryNorm;
|
||||
private float queryWeight;
|
||||
|
||||
protected ConstantScoreWeight(Query query) {
|
||||
super(query);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final float getValueForNormalization() throws IOException {
|
||||
queryWeight = getQuery().getBoost();
|
||||
return queryWeight * queryWeight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final void normalize(float norm, float topLevelBoost) {
|
||||
queryNorm = norm * topLevelBoost;
|
||||
queryWeight *= queryNorm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
final Scorer s = scorer(context, context.reader().getLiveDocs());
|
||||
final boolean exists = (s != null && s.advance(doc) == doc);
|
||||
|
||||
final ComplexExplanation result = new ComplexExplanation();
|
||||
if (exists) {
|
||||
result.setDescription(getQuery().toString() + ", product of:");
|
||||
result.setValue(queryWeight);
|
||||
result.setMatch(Boolean.TRUE);
|
||||
result.addDetail(new Explanation(getQuery().getBoost(), "boost"));
|
||||
result.addDetail(new Explanation(queryNorm, "queryNorm"));
|
||||
} else {
|
||||
result.setDescription(getQuery().toString() + " doesn't match id " + doc);
|
||||
result.setValue(0);
|
||||
result.setMatch(Boolean.FALSE);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return scorer(context, acceptDocs, queryWeight);
|
||||
}
|
||||
|
||||
abstract Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException;
|
||||
|
||||
}
|
|
@ -123,45 +123,10 @@ public final class DocValuesRangeQuery extends Query {
|
|||
if (lowerVal == null && upperVal == null) {
|
||||
throw new IllegalStateException("Both min and max values cannot be null, call rewrite first");
|
||||
}
|
||||
return new Weight(DocValuesRangeQuery.this) {
|
||||
|
||||
private float queryNorm;
|
||||
private float queryWeight;
|
||||
return new ConstantScoreWeight(DocValuesRangeQuery.this) {
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
queryWeight = getBoost();
|
||||
return queryWeight * queryWeight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float norm, float topLevelBoost) {
|
||||
queryNorm = norm * topLevelBoost;
|
||||
queryWeight *= queryNorm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
final Scorer s = scorer(context, context.reader().getLiveDocs());
|
||||
final boolean exists = (s != null && s.advance(doc) == doc);
|
||||
|
||||
final ComplexExplanation result = new ComplexExplanation();
|
||||
if (exists) {
|
||||
result.setDescription(DocValuesRangeQuery.this.toString() + ", product of:");
|
||||
result.setValue(queryWeight);
|
||||
result.setMatch(Boolean.TRUE);
|
||||
result.addDetail(new Explanation(getBoost(), "boost"));
|
||||
result.addDetail(new Explanation(queryNorm, "queryNorm"));
|
||||
} else {
|
||||
result.setDescription(DocValuesRangeQuery.this.toString() + " doesn't match id " + doc);
|
||||
result.setValue(0);
|
||||
result.setMatch(Boolean.FALSE);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException {
|
||||
|
||||
final Bits docsWithField = context.reader().getDocsWithField(field);
|
||||
if (docsWithField == null || docsWithField instanceof MatchNoBits) {
|
||||
|
@ -240,7 +205,7 @@ public final class DocValuesRangeQuery extends Query {
|
|||
throw new AssertionError();
|
||||
}
|
||||
|
||||
return new RangeScorer(this, twoPhaseRange, queryWeight);
|
||||
return new RangeScorer(this, twoPhaseRange, score);
|
||||
}
|
||||
|
||||
};
|
||||
|
|
|
@ -1,135 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
/**
|
||||
* A {@link Filter} that only accepts documents whose single
|
||||
* term value in the specified field is contained in the
|
||||
* provided set of allowed terms.
|
||||
*
|
||||
* <p>
|
||||
* This is the same functionality as TermsFilter (from
|
||||
* queries/), except this filter requires that the
|
||||
* field contains only a single term for all documents.
|
||||
* Because of drastically different implementations, they
|
||||
* also have different performance characteristics, as
|
||||
* described below.
|
||||
*
|
||||
* <p>
|
||||
* With each search, this filter translates the specified
|
||||
* set of Terms into a private {@link FixedBitSet} keyed by
|
||||
* term number per unique {@link IndexReader} (normally one
|
||||
* reader per segment). Then, during matching, the term
|
||||
* number for each docID is retrieved from the cache and
|
||||
* then checked for inclusion using the {@link FixedBitSet}.
|
||||
* Since all testing is done using RAM resident data
|
||||
* structures, performance should be very fast, most likely
|
||||
* fast enough to not require further caching of the
|
||||
* DocIdSet for each possible combination of terms.
|
||||
* However, because docIDs are simply scanned linearly, an
|
||||
* index with a great many small documents may find this
|
||||
* linear scan too costly.
|
||||
*
|
||||
* <p>
|
||||
* In contrast, TermsFilter builds up an {@link FixedBitSet},
|
||||
* keyed by docID, every time it's created, by enumerating
|
||||
* through all matching docs using {@link org.apache.lucene.index.PostingsEnum} to seek
|
||||
* and scan through each term's docID list. While there is
|
||||
* no linear scan of all docIDs, besides the allocation of
|
||||
* the underlying array in the {@link FixedBitSet}, this
|
||||
* approach requires a number of "disk seeks" in proportion
|
||||
* to the number of terms, which can be exceptionally costly
|
||||
* when there are cache misses in the OS's IO cache.
|
||||
*
|
||||
* <p>
|
||||
* Generally, this filter will be slower on the first
|
||||
* invocation for a given field, but subsequent invocations,
|
||||
* even if you change the allowed set of Terms, should be
|
||||
* faster than TermsFilter, especially as the number of
|
||||
* Terms being matched increases. If you are matching only
|
||||
* a very small number of terms, and those terms in turn
|
||||
* match a very small number of documents, TermsFilter may
|
||||
* perform faster.
|
||||
*
|
||||
* <p>
|
||||
* Which filter is best is very application dependent.
|
||||
*/
|
||||
|
||||
public class DocValuesTermsFilter extends Filter {
|
||||
private String field;
|
||||
private BytesRef[] terms;
|
||||
|
||||
public DocValuesTermsFilter(String field, BytesRef... terms) {
|
||||
this.field = field;
|
||||
this.terms = terms;
|
||||
}
|
||||
|
||||
public DocValuesTermsFilter(String field, String... terms) {
|
||||
this.field = field;
|
||||
this.terms = new BytesRef[terms.length];
|
||||
for (int i = 0; i < terms.length; i++)
|
||||
this.terms[i] = new BytesRef(terms[i]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
final SortedDocValues fcsi = DocValues.getSorted(context.reader(), field);
|
||||
final FixedBitSet bits = new FixedBitSet(fcsi.getValueCount());
|
||||
for (int i=0;i<terms.length;i++) {
|
||||
int ord = fcsi.lookupTerm(terms[i]);
|
||||
if (ord >= 0) {
|
||||
bits.set(ord);
|
||||
}
|
||||
}
|
||||
return new DocValuesDocIdSet(context.reader().maxDoc(), acceptDocs) {
|
||||
@Override
|
||||
protected final boolean matchDoc(int doc) {
|
||||
int ord = fcsi.getOrd(doc);
|
||||
if (ord == -1) {
|
||||
// missing
|
||||
return false;
|
||||
} else {
|
||||
return bits.get(ord);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String defaultField) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(field).append(": [");
|
||||
for (BytesRef term : terms) {
|
||||
sb.append(term).append(", ");
|
||||
}
|
||||
if (terms.length > 0) {
|
||||
sb.setLength(sb.length() - 2);
|
||||
}
|
||||
return sb.append(']').toString();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,227 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.AbstractList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LongBitSet;
|
||||
|
||||
/**
|
||||
* A {@link Query} that only accepts documents whose
|
||||
* term value in the specified field is contained in the
|
||||
* provided set of allowed terms.
|
||||
*
|
||||
* <p>
|
||||
* This is the same functionality as TermsQuery (from
|
||||
* queries/), but because of drastically different
|
||||
* implementations, they also have different performance
|
||||
* characteristics, as described below.
|
||||
*
|
||||
* <p>
|
||||
* With each search, this query translates the specified
|
||||
* set of Terms into a private {@link LongBitSet} keyed by
|
||||
* term number per unique {@link IndexReader} (normally one
|
||||
* reader per segment). Then, during matching, the term
|
||||
* number for each docID is retrieved from the cache and
|
||||
* then checked for inclusion using the {@link LongBitSet}.
|
||||
* Since all testing is done using RAM resident data
|
||||
* structures, performance should be very fast, most likely
|
||||
* fast enough to not require further caching of the
|
||||
* DocIdSet for each possible combination of terms.
|
||||
* However, because docIDs are simply scanned linearly, an
|
||||
* index with a great many small documents may find this
|
||||
* linear scan too costly.
|
||||
*
|
||||
* <p>
|
||||
* In contrast, TermsQuery builds up an {@link FixedBitSet},
|
||||
* keyed by docID, every time it's created, by enumerating
|
||||
* through all matching docs using {@link org.apache.lucene.index.PostingsEnum} to seek
|
||||
* and scan through each term's docID list. While there is
|
||||
* no linear scan of all docIDs, besides the allocation of
|
||||
* the underlying array in the {@link FixedBitSet}, this
|
||||
* approach requires a number of "disk seeks" in proportion
|
||||
* to the number of terms, which can be exceptionally costly
|
||||
* when there are cache misses in the OS's IO cache.
|
||||
*
|
||||
* <p>
|
||||
* Generally, this filter will be slower on the first
|
||||
* invocation for a given field, but subsequent invocations,
|
||||
* even if you change the allowed set of Terms, should be
|
||||
* faster than TermsQuery, especially as the number of
|
||||
* Terms being matched increases. If you are matching only
|
||||
* a very small number of terms, and those terms in turn
|
||||
* match a very small number of documents, TermsQuery may
|
||||
* perform faster.
|
||||
*
|
||||
* <p>
|
||||
* Which query is best is very application dependent.
|
||||
*/
|
||||
public class DocValuesTermsQuery extends Query {
|
||||
|
||||
private final String field;
|
||||
private final BytesRef[] terms;
|
||||
|
||||
public DocValuesTermsQuery(String field, Collection<BytesRef> terms) {
|
||||
this.field = Objects.requireNonNull(field);
|
||||
this.terms = terms.toArray(new BytesRef[terms.size()]);
|
||||
ArrayUtil.timSort(this.terms, BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
}
|
||||
|
||||
public DocValuesTermsQuery(String field, BytesRef... terms) {
|
||||
this(field, Arrays.asList(terms));
|
||||
}
|
||||
|
||||
public DocValuesTermsQuery(String field, String... terms) {
|
||||
this(field, new AbstractList<BytesRef>() {
|
||||
@Override
|
||||
public BytesRef get(int index) {
|
||||
return new BytesRef(terms[index]);
|
||||
}
|
||||
@Override
|
||||
public int size() {
|
||||
return terms.length;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj instanceof DocValuesTermsQuery == false) {
|
||||
return false;
|
||||
}
|
||||
DocValuesTermsQuery that = (DocValuesTermsQuery) obj;
|
||||
if (!field.equals(that.field)) {
|
||||
return false;
|
||||
}
|
||||
if (getBoost() != that.getBoost()) {
|
||||
return false;
|
||||
}
|
||||
return Arrays.equals(terms, that.terms);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(field, Arrays.asList(terms), getBoost());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString(String defaultField) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append(field).append(": [");
|
||||
for (BytesRef term : terms) {
|
||||
sb.append(term).append(", ");
|
||||
}
|
||||
if (terms.length > 0) {
|
||||
sb.setLength(sb.length() - 2);
|
||||
}
|
||||
return sb.append(']').toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
@Override
|
||||
Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException {
|
||||
final SortedSetDocValues values = DocValues.getSortedSet(context.reader(), field);
|
||||
final LongBitSet bits = new LongBitSet(values.getValueCount());
|
||||
for (BytesRef term : terms) {
|
||||
final long ord = values.lookupTerm(term);
|
||||
if (ord >= 0) {
|
||||
bits.set(ord);
|
||||
}
|
||||
}
|
||||
|
||||
final DocIdSetIterator approximation = DocIdSetIterator.all(context.reader().maxDoc());
|
||||
final TwoPhaseDocIdSetIterator twoPhaseIterator = new TwoPhaseDocIdSetIterator() {
|
||||
@Override
|
||||
public DocIdSetIterator approximation() {
|
||||
return approximation;
|
||||
}
|
||||
@Override
|
||||
public boolean matches() throws IOException {
|
||||
final int doc = approximation.docID();
|
||||
if (acceptDocs != null && acceptDocs.get(doc) == false) {
|
||||
return false;
|
||||
}
|
||||
values.setDocument(doc);
|
||||
for (long ord = values.nextOrd(); ord != SortedSetDocValues.NO_MORE_ORDS; ord = values.nextOrd()) {
|
||||
if (bits.get(ord)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
final DocIdSetIterator disi = TwoPhaseDocIdSetIterator.asDocIdSetIterator(twoPhaseIterator);
|
||||
return new Scorer(this) {
|
||||
|
||||
@Override
|
||||
public TwoPhaseDocIdSetIterator asTwoPhaseIterator() {
|
||||
return twoPhaseIterator;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return disi.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return disi.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return disi.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return disi.cost();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
}
|
|
@ -61,45 +61,10 @@ public final class FieldValueQuery extends Query {
|
|||
|
||||
@Override
|
||||
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
|
||||
return new Weight(this) {
|
||||
|
||||
private float queryNorm;
|
||||
private float queryWeight;
|
||||
return new ConstantScoreWeight(this) {
|
||||
|
||||
@Override
|
||||
public float getValueForNormalization() throws IOException {
|
||||
queryWeight = getBoost();
|
||||
return queryWeight * queryWeight;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void normalize(float norm, float topLevelBoost) {
|
||||
queryNorm = norm * topLevelBoost;
|
||||
queryWeight *= queryNorm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Explanation explain(LeafReaderContext context, int doc) throws IOException {
|
||||
final Scorer s = scorer(context, context.reader().getLiveDocs());
|
||||
final boolean exists = (s != null && s.advance(doc) == doc);
|
||||
|
||||
final ComplexExplanation result = new ComplexExplanation();
|
||||
if (exists) {
|
||||
result.setDescription(FieldValueQuery.this.toString() + ", product of:");
|
||||
result.setValue(queryWeight);
|
||||
result.setMatch(Boolean.TRUE);
|
||||
result.addDetail(new Explanation(getBoost(), "boost"));
|
||||
result.addDetail(new Explanation(queryNorm, "queryNorm"));
|
||||
} else {
|
||||
result.setDescription(FieldValueQuery.this.toString() + " doesn't match id " + doc);
|
||||
result.setValue(0);
|
||||
result.setMatch(Boolean.FALSE);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(LeafReaderContext context, Bits acceptDocs, float score) throws IOException {
|
||||
final Bits docsWithField = context.reader().getDocsWithField(field);
|
||||
if (docsWithField == null || docsWithField instanceof MatchNoBits) {
|
||||
return null;
|
||||
|
@ -161,7 +126,7 @@ public final class FieldValueQuery extends Query {
|
|||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return queryWeight;
|
||||
return score;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -71,13 +71,13 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
|
|||
Occur.SHOULD);
|
||||
q.add(snear(sf("w3",2), st("w2"), st("w3"), 5, true),
|
||||
Occur.SHOULD);
|
||||
|
||||
|
||||
Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
|
||||
new ItemizedFilter(new int[] {1,3}));
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
|
||||
t.setBoost(1000);
|
||||
q.add(t, Occur.SHOULD);
|
||||
|
||||
t = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2}));
|
||||
t = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
|
||||
t.setBoost(30);
|
||||
q.add(t, Occur.SHOULD);
|
||||
|
||||
|
@ -136,11 +136,11 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
|
|||
Occur.SHOULD);
|
||||
|
||||
Query t = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
|
||||
new ItemizedFilter(new int[] {1,3}));
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
|
||||
t.setBoost(1000);
|
||||
q.add(t, Occur.SHOULD);
|
||||
|
||||
t = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2}));
|
||||
t = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
|
||||
t.setBoost(-20.0f);
|
||||
q.add(t, Occur.SHOULD);
|
||||
|
||||
|
@ -207,13 +207,11 @@ public class TestComplexExplanations extends BaseExplanationTestCase {
|
|||
public void testFQ5() throws Exception {
|
||||
TermQuery query = new TermQuery(new Term(FIELD, "xx"));
|
||||
query.setBoost(0);
|
||||
bqtest(new FilteredQuery(query,
|
||||
new ItemizedFilter(new int[] {1,3})),
|
||||
new int[] {3});
|
||||
bqtest(new FilteredQuery(query, new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))), new int[] {3});
|
||||
}
|
||||
|
||||
public void testCSQ4() throws Exception {
|
||||
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {3}));
|
||||
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {3}));
|
||||
q.setBoost(0);
|
||||
bqtest(q, new int[] {3});
|
||||
}
|
||||
|
|
|
@ -0,0 +1,188 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.SortedDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
public class TestDocValuesTermsQuery extends LuceneTestCase {
|
||||
|
||||
public void testEquals() {
|
||||
assertEquals(new DocValuesTermsQuery("foo", "bar"), new DocValuesTermsQuery("foo", "bar"));
|
||||
assertEquals(new DocValuesTermsQuery("foo", "bar", "baz"), new DocValuesTermsQuery("foo", "baz", "bar"));
|
||||
assertFalse(new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo2", "bar")));
|
||||
assertFalse(new DocValuesTermsQuery("foo", "bar").equals(new DocValuesTermsQuery("foo", "baz")));
|
||||
}
|
||||
|
||||
public void testDuelTermsQuery() throws IOException {
|
||||
final int iters = atLeast(2);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
final List<Term> allTerms = new ArrayList<>();
|
||||
final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
|
||||
for (int i = 0; i < numTerms; ++i) {
|
||||
final String value = TestUtil.randomAnalysisString(random(), 10, true);
|
||||
allTerms.add(new Term("f", value));
|
||||
}
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
final int numDocs = atLeast(100);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
final Term term = allTerms.get(random().nextInt(allTerms.size()));
|
||||
doc.add(new StringField(term.field(), term.text(), Store.NO));
|
||||
doc.add(new SortedDocValuesField(term.field(), new BytesRef(term.text())));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
if (numTerms > 1 && random().nextBoolean()) {
|
||||
iw.deleteDocuments(new TermQuery(allTerms.get(0)));
|
||||
}
|
||||
iw.commit();
|
||||
final IndexReader reader = iw.getReader();
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
iw.close();
|
||||
|
||||
if (reader.numDocs() == 0) {
|
||||
// may occasionally happen if all documents got the same term
|
||||
IOUtils.close(reader, dir);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
final float boost = random().nextFloat() * 10;
|
||||
final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
|
||||
List<Term> queryTerms = new ArrayList<>();
|
||||
for (int j = 0; j < numQueryTerms; ++j) {
|
||||
queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
|
||||
}
|
||||
final BooleanQuery bq = new BooleanQuery();
|
||||
for (Term term : queryTerms) {
|
||||
bq.add(new TermQuery(term), Occur.SHOULD);
|
||||
}
|
||||
Query q1 = new ConstantScoreQuery(bq);
|
||||
q1.setBoost(boost);
|
||||
List<String> bytesTerms = new ArrayList<>();
|
||||
for (Term term : queryTerms) {
|
||||
bytesTerms.add(term.text());
|
||||
}
|
||||
final Query q2 = new DocValuesTermsQuery("f", bytesTerms.toArray(new String[0]));
|
||||
q2.setBoost(boost);
|
||||
assertSameMatches(searcher, q1, q2, true);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testApproximation() throws IOException {
|
||||
final int iters = atLeast(2);
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
final List<Term> allTerms = new ArrayList<>();
|
||||
final int numTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 10));
|
||||
for (int i = 0; i < numTerms; ++i) {
|
||||
final String value = TestUtil.randomAnalysisString(random(), 10, true);
|
||||
allTerms.add(new Term("f", value));
|
||||
}
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir);
|
||||
final int numDocs = atLeast(100);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
Document doc = new Document();
|
||||
final Term term = allTerms.get(random().nextInt(allTerms.size()));
|
||||
doc.add(new StringField(term.field(), term.text(), Store.NO));
|
||||
doc.add(new SortedDocValuesField(term.field(), new BytesRef(term.text())));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
if (numTerms > 1 && random().nextBoolean()) {
|
||||
iw.deleteDocuments(new TermQuery(allTerms.get(0)));
|
||||
}
|
||||
iw.commit();
|
||||
final IndexReader reader = iw.getReader();
|
||||
final IndexSearcher searcher = newSearcher(reader);
|
||||
iw.close();
|
||||
|
||||
if (reader.numDocs() == 0) {
|
||||
// may occasionally happen if all documents got the same term
|
||||
IOUtils.close(reader, dir);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 100; ++i) {
|
||||
final float boost = random().nextFloat() * 10;
|
||||
final int numQueryTerms = TestUtil.nextInt(random(), 1, 1 << TestUtil.nextInt(random(), 1, 8));
|
||||
List<Term> queryTerms = new ArrayList<>();
|
||||
for (int j = 0; j < numQueryTerms; ++j) {
|
||||
queryTerms.add(allTerms.get(random().nextInt(allTerms.size())));
|
||||
}
|
||||
final BooleanQuery bq = new BooleanQuery();
|
||||
for (Term term : queryTerms) {
|
||||
bq.add(new TermQuery(term), Occur.SHOULD);
|
||||
}
|
||||
Query q1 = new ConstantScoreQuery(bq);
|
||||
q1.setBoost(boost);
|
||||
List<String> bytesTerms = new ArrayList<>();
|
||||
for (Term term : queryTerms) {
|
||||
bytesTerms.add(term.text());
|
||||
}
|
||||
final Query q2 = new DocValuesTermsQuery("f", bytesTerms.toArray(new String[0]));
|
||||
q2.setBoost(boost);
|
||||
|
||||
BooleanQuery bq1 = new BooleanQuery();
|
||||
bq1.add(q1, Occur.MUST);
|
||||
bq1.add(new TermQuery(allTerms.get(0)), Occur.FILTER);
|
||||
|
||||
BooleanQuery bq2 = new BooleanQuery();
|
||||
bq2.add(q2, Occur.MUST);
|
||||
bq2.add(new TermQuery(allTerms.get(0)), Occur.FILTER);
|
||||
|
||||
assertSameMatches(searcher, bq1, bq2, true);
|
||||
}
|
||||
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
||||
private void assertSameMatches(IndexSearcher searcher, Query q1, Query q2, boolean scores) throws IOException {
|
||||
final int maxDoc = searcher.getIndexReader().maxDoc();
|
||||
final TopDocs td1 = searcher.search(q1, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
|
||||
final TopDocs td2 = searcher.search(q2, maxDoc, scores ? Sort.RELEVANCE : Sort.INDEXORDER);
|
||||
assertEquals(td1.totalHits, td2.totalHits);
|
||||
for (int i = 0; i < td1.scoreDocs.length; ++i) {
|
||||
assertEquals(td1.scoreDocs[i].doc, td2.scoreDocs[i].doc);
|
||||
if (scores) {
|
||||
assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 10e-7);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -32,7 +32,7 @@ import java.util.List;
|
|||
/**
|
||||
* A basic unit test for FieldCacheTermsFilter
|
||||
*
|
||||
* @see org.apache.lucene.search.DocValuesTermsFilter
|
||||
* @see org.apache.lucene.search.DocValuesTermsQuery
|
||||
*/
|
||||
public class TestFieldCacheTermsFilter extends LuceneTestCase {
|
||||
public void testMissingTerms() throws Exception {
|
||||
|
@ -52,22 +52,21 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase {
|
|||
IndexSearcher searcher = newSearcher(reader);
|
||||
int numDocs = reader.numDocs();
|
||||
ScoreDoc[] results;
|
||||
MatchAllDocsQuery q = new MatchAllDocsQuery();
|
||||
|
||||
List<String> terms = new ArrayList<>();
|
||||
terms.add("5");
|
||||
results = searcher.search(new FilteredQuery(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0]))), numDocs).scoreDocs;
|
||||
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
|
||||
assertEquals("Must match nothing", 0, results.length);
|
||||
|
||||
terms = new ArrayList<>();
|
||||
terms.add("10");
|
||||
results = searcher.search(new FilteredQuery(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0]))), numDocs).scoreDocs;
|
||||
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
|
||||
assertEquals("Must match 1", 1, results.length);
|
||||
|
||||
terms = new ArrayList<>();
|
||||
terms.add("10");
|
||||
terms.add("20");
|
||||
results = searcher.search(new FilteredQuery(q, new DocValuesTermsFilter(fieldName, terms.toArray(new String[0]))), numDocs).scoreDocs;
|
||||
results = searcher.search(new DocValuesTermsQuery(fieldName, terms.toArray(new String[0])), numDocs).scoreDocs;
|
||||
assertEquals("Must match 2", 2, results.length);
|
||||
|
||||
reader.close();
|
||||
|
|
|
@ -105,28 +105,28 @@ public class TestSimpleExplanations extends BaseExplanationTestCase {
|
|||
|
||||
public void testFQ1() throws Exception {
|
||||
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "w1")),
|
||||
new ItemizedFilter(new int[] {0,1,2,3})),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {0,1,2,3}))),
|
||||
new int[] {0,1,2,3});
|
||||
}
|
||||
public void testFQ2() throws Exception {
|
||||
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "w1")),
|
||||
new ItemizedFilter(new int[] {0,2,3})),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {0,2,3}))),
|
||||
new int[] {0,2,3});
|
||||
}
|
||||
public void testFQ3() throws Exception {
|
||||
qtest(new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
|
||||
new ItemizedFilter(new int[] {1,3})),
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))),
|
||||
new int[] {3});
|
||||
}
|
||||
public void testFQ4() throws Exception {
|
||||
TermQuery termQuery = new TermQuery(new Term(FIELD, "xx"));
|
||||
termQuery.setBoost(1000);
|
||||
qtest(new FilteredQuery(termQuery, new ItemizedFilter(new int[] {1,3})),
|
||||
qtest(new FilteredQuery(termQuery, new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3}))),
|
||||
new int[] {3});
|
||||
}
|
||||
public void testFQ6() throws Exception {
|
||||
Query q = new FilteredQuery(new TermQuery(new Term(FIELD, "xx")),
|
||||
new ItemizedFilter(new int[] {1,3}));
|
||||
new QueryWrapperFilter(new ItemizedQuery(new int[] {1,3})));
|
||||
q.setBoost(1000);
|
||||
qtest(q, new int[] {3});
|
||||
}
|
||||
|
@ -134,15 +134,15 @@ public class TestSimpleExplanations extends BaseExplanationTestCase {
|
|||
/* ConstantScoreQueries */
|
||||
|
||||
public void testCSQ1() throws Exception {
|
||||
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,1,2,3}));
|
||||
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,1,2,3}));
|
||||
qtest(q, new int[] {0,1,2,3});
|
||||
}
|
||||
public void testCSQ2() throws Exception {
|
||||
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {1,3}));
|
||||
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {1,3}));
|
||||
qtest(q, new int[] {1,3});
|
||||
}
|
||||
public void testCSQ3() throws Exception {
|
||||
Query q = new ConstantScoreQuery(new ItemizedFilter(new int[] {0,2}));
|
||||
Query q = new ConstantScoreQuery(new ItemizedQuery(new int[] {0,2}));
|
||||
q.setBoost(1000);
|
||||
qtest(q, new int[] {0,2});
|
||||
}
|
||||
|
|
|
@ -112,7 +112,7 @@ public abstract class BaseExplanationTestCase extends LuceneTestCase {
|
|||
/**
|
||||
* Convenience subclass of FieldCacheTermsFilter
|
||||
*/
|
||||
public static class ItemizedFilter extends DocValuesTermsFilter {
|
||||
public static class ItemizedQuery extends DocValuesTermsQuery {
|
||||
private static String[] int2str(int [] terms) {
|
||||
String [] out = new String[terms.length];
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
|
@ -120,7 +120,7 @@ public abstract class BaseExplanationTestCase extends LuceneTestCase {
|
|||
}
|
||||
return out;
|
||||
}
|
||||
public ItemizedFilter(int [] keys) {
|
||||
public ItemizedQuery(int [] keys) {
|
||||
super(KEY, int2str(keys));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.queries.TermsQuery;
|
|||
import org.apache.lucene.search.AutomatonQuery;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocValuesTermsFilter;
|
||||
import org.apache.lucene.search.DocValuesTermsQuery;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.MultiTermQueryWrapperFilter;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -93,7 +93,7 @@ public class TermsQParserPlugin extends QParserPlugin {
|
|||
//note: limited to one val per doc
|
||||
@Override
|
||||
Filter makeFilter(String fname, BytesRef[] byteRefs) {
|
||||
return new DocValuesTermsFilter(fname, byteRefs);
|
||||
return new QueryWrapperFilter(new DocValuesTermsQuery(fname, byteRefs));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in New Issue