mirror of https://github.com/apache/lucene.git
LUCENE-5938: Add a new sparse fixed bit set and remove ConstantScoreAutoRewrite.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1628402 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
672fc08ec3
commit
8f96faf3d3
|
@ -136,6 +136,9 @@ API Changes
|
|||
* LUCENE-5569: *AtomicReader/AtomicReaderContext have been renamed to *LeafReader/LeafReaderContext.
|
||||
(Ryan Ernst)
|
||||
|
||||
* LUCENE-5938: Removed MultiTermQuery.ConstantScoreAutoRewrite as
|
||||
MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE is usually better. (Adrien Grand)
|
||||
|
||||
Bug Fixes
|
||||
|
||||
* LUCENE-5650: Enforce read-only access to any path outside the temporary
|
||||
|
@ -178,6 +181,10 @@ Optimizations
|
|||
* LUCENE-5963: Reduce memory allocations in
|
||||
AnalyzingSuggester. (Markus Heiden via Mike McCandless)
|
||||
|
||||
* LUCENE-5938: MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE is now faster on
|
||||
queries that match few documents by using a sparse bit set implementation.
|
||||
(Adrien Grand)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-5909: Smoke tester now has better command line parsing and
|
||||
|
|
|
@ -1,220 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
|
||||
class ConstantScoreAutoRewrite extends TermCollectingRewrite<BooleanQuery> {
|
||||
|
||||
// Defaults derived from rough tests with a 20.0 million
|
||||
// doc Wikipedia index. With more than 350 terms in the
|
||||
// query, the filter method is fastest:
|
||||
public static int DEFAULT_TERM_COUNT_CUTOFF = 350;
|
||||
|
||||
// If the query will hit more than 1 in 1000 of the docs
|
||||
// in the index (0.1%), the filter method is fastest:
|
||||
public static double DEFAULT_DOC_COUNT_PERCENT = 0.1;
|
||||
|
||||
private int termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF;
|
||||
private double docCountPercent = DEFAULT_DOC_COUNT_PERCENT;
|
||||
|
||||
/** If the number of terms in this query is equal to or
|
||||
* larger than this setting then {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} is used. */
|
||||
public void setTermCountCutoff(int count) {
|
||||
termCountCutoff = count;
|
||||
}
|
||||
|
||||
/** @see #setTermCountCutoff */
|
||||
public int getTermCountCutoff() {
|
||||
return termCountCutoff;
|
||||
}
|
||||
|
||||
/** If the number of documents to be visited in the
|
||||
* postings exceeds this specified percentage of the
|
||||
* maxDoc() for the index, then {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} is used.
|
||||
* @param percent 0.0 to 100.0 */
|
||||
public void setDocCountPercent(double percent) {
|
||||
docCountPercent = percent;
|
||||
}
|
||||
|
||||
/** @see #setDocCountPercent */
|
||||
public double getDocCountPercent() {
|
||||
return docCountPercent;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected BooleanQuery getTopLevelQuery() {
|
||||
return new BooleanQuery(true);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost /*ignored*/, TermContext states) {
|
||||
topLevel.add(new TermQuery(term, states), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
|
||||
|
||||
// Get the enum and start visiting terms. If we
|
||||
// exhaust the enum before hitting either of the
|
||||
// cutoffs, we use ConstantBooleanQueryRewrite; else,
|
||||
// ConstantFilterRewrite:
|
||||
final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
|
||||
final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
|
||||
|
||||
final CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit);
|
||||
collectTerms(reader, query, col);
|
||||
final int size = col.pendingTerms.size();
|
||||
if (col.hasCutOff) {
|
||||
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
|
||||
} else {
|
||||
final BooleanQuery bq = getTopLevelQuery();
|
||||
if (size > 0) {
|
||||
final BytesRefHash pendingTerms = col.pendingTerms;
|
||||
final int sort[] = pendingTerms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
for(int i = 0; i < size; i++) {
|
||||
final int pos = sort[i];
|
||||
// docFreq is not used for constant score here, we pass 1
|
||||
// to explicitely set a fake value, so it's not calculated
|
||||
addClause(bq, new Term(query.field, pendingTerms.get(pos, new BytesRef())), 1, 1.0f, col.array.termState[pos]);
|
||||
}
|
||||
}
|
||||
// Strip scores
|
||||
final Query result = new ConstantScoreQuery(bq);
|
||||
result.setBoost(query.getBoost());
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
static final class CutOffTermCollector extends TermCollector {
|
||||
CutOffTermCollector(int docCountCutoff, int termCountLimit) {
|
||||
this.docCountCutoff = docCountCutoff;
|
||||
this.termCountLimit = termCountLimit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextEnum(TermsEnum termsEnum) {
|
||||
this.termsEnum = termsEnum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean collect(BytesRef bytes) throws IOException {
|
||||
int pos = pendingTerms.add(bytes);
|
||||
docVisitCount += termsEnum.docFreq();
|
||||
if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
|
||||
hasCutOff = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
final TermState termState = termsEnum.termState();
|
||||
assert termState != null;
|
||||
if (pos < 0) {
|
||||
pos = (-pos)-1;
|
||||
array.termState[pos].register(termState, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
|
||||
} else {
|
||||
array.termState[pos] = new TermContext(topReaderContext, termState, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int docVisitCount = 0;
|
||||
boolean hasCutOff = false;
|
||||
TermsEnum termsEnum;
|
||||
|
||||
final int docCountCutoff, termCountLimit;
|
||||
final TermStateByteStart array = new TermStateByteStart(16);
|
||||
final BytesRefHash pendingTerms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 1279;
|
||||
return (int) (prime * termCountCutoff + Double.doubleToLongBits(docCountPercent));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (this == obj)
|
||||
return true;
|
||||
if (obj == null)
|
||||
return false;
|
||||
if (getClass() != obj.getClass())
|
||||
return false;
|
||||
|
||||
ConstantScoreAutoRewrite other = (ConstantScoreAutoRewrite) obj;
|
||||
if (other.termCountCutoff != termCountCutoff) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Double.doubleToLongBits(other.docCountPercent) != Double.doubleToLongBits(docCountPercent)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Special implementation of BytesStartArray that keeps parallel arrays for {@link TermContext} */
|
||||
static final class TermStateByteStart extends DirectBytesStartArray {
|
||||
TermContext[] termState;
|
||||
|
||||
public TermStateByteStart(int initSize) {
|
||||
super(initSize);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] init() {
|
||||
final int[] ord = super.init();
|
||||
termState = new TermContext[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
assert termState.length >= ord.length;
|
||||
return ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] grow() {
|
||||
final int[] ord = super.grow();
|
||||
if (termState.length < ord.length) {
|
||||
TermContext[] tmpTermState = new TermContext[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_OBJECT_REF)];
|
||||
System.arraycopy(termState, 0, tmpTermState, 0, termState.length);
|
||||
termState = tmpTermState;
|
||||
}
|
||||
assert termState.length >= ord.length;
|
||||
return ord;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] clear() {
|
||||
termState = null;
|
||||
return super.clear();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -49,8 +49,8 @@ import org.apache.lucene.util.AttributeSource;
|
|||
* prevents this.
|
||||
*
|
||||
* <p>The recommended rewrite method is {@link
|
||||
* #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU
|
||||
* computing unhelpful scores, and it tries to pick the most
|
||||
* #CONSTANT_SCORE_FILTER_REWRITE}: it doesn't spend CPU
|
||||
* computing unhelpful scores, and is the most
|
||||
* performant rewrite method given the query. If you
|
||||
* need scoring (like {@link FuzzyQuery}, use
|
||||
* {@link TopTermsScoringBooleanQueryRewrite} which uses
|
||||
|
@ -58,12 +58,12 @@ import org.apache.lucene.util.AttributeSource;
|
|||
* and not hit this limitation.
|
||||
*
|
||||
* Note that org.apache.lucene.queryparser.classic.QueryParser produces
|
||||
* MultiTermQueries using {@link
|
||||
* #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default.
|
||||
* MultiTermQueries using {@link #CONSTANT_SCORE_FILTER_REWRITE}
|
||||
* by default.
|
||||
*/
|
||||
public abstract class MultiTermQuery extends Query {
|
||||
protected final String field;
|
||||
protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
|
||||
protected RewriteMethod rewriteMethod = CONSTANT_SCORE_FILTER_REWRITE;
|
||||
|
||||
/** Abstract class that defines how the query is rewritten. */
|
||||
public static abstract class RewriteMethod {
|
||||
|
@ -104,7 +104,7 @@ public abstract class MultiTermQuery extends Query {
|
|||
* query. Note that typically such scores are
|
||||
* meaningless to the user, and require non-trivial CPU
|
||||
* to compute, so it's almost always better to use {@link
|
||||
* #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead.
|
||||
* #CONSTANT_SCORE_FILTER_REWRITE} instead.
|
||||
*
|
||||
* <p><b>NOTE</b>: This rewrite method will hit {@link
|
||||
* BooleanQuery.TooManyClauses} if the number of terms
|
||||
|
@ -208,40 +208,6 @@ public abstract class MultiTermQuery extends Query {
|
|||
topLevel.add(q, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
}
|
||||
|
||||
/** A rewrite method that tries to pick the best
|
||||
* constant-score rewrite method based on term and
|
||||
* document counts from the query. If both the number of
|
||||
* terms and documents is small enough, then {@link
|
||||
* #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used.
|
||||
* Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is
|
||||
* used.
|
||||
*/
|
||||
public static class ConstantScoreAutoRewrite extends org.apache.lucene.search.ConstantScoreAutoRewrite {}
|
||||
|
||||
/** Read-only default instance of {@link
|
||||
* ConstantScoreAutoRewrite}, with {@link
|
||||
* ConstantScoreAutoRewrite#setTermCountCutoff} set to
|
||||
* {@link
|
||||
* ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF}
|
||||
* and {@link
|
||||
* ConstantScoreAutoRewrite#setDocCountPercent} set to
|
||||
* {@link
|
||||
* ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}.
|
||||
* Note that you cannot alter the configuration of this
|
||||
* instance; you'll need to create a private instance
|
||||
* instead. */
|
||||
public final static RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new ConstantScoreAutoRewrite() {
|
||||
@Override
|
||||
public void setTermCountCutoff(int count) {
|
||||
throw new UnsupportedOperationException("Please create a private instance");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setDocCountPercent(double percent) {
|
||||
throw new UnsupportedOperationException("Please create a private instance");
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Constructs a query matching terms that cannot be represented with a single
|
||||
|
|
|
@ -19,14 +19,14 @@ package org.apache.lucene.search;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
|
||||
/**
|
||||
* A wrapper for {@link MultiTermQuery}, that exposes its
|
||||
|
@ -43,7 +43,7 @@ import org.apache.lucene.util.Bits;
|
|||
* this is why it is not abstract.
|
||||
*/
|
||||
public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filter {
|
||||
|
||||
|
||||
protected final Q query;
|
||||
|
||||
/**
|
||||
|
@ -52,7 +52,7 @@ public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filte
|
|||
protected MultiTermQueryWrapperFilter(Q query) {
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
// query.toString should be ok for the filter, too, if the query boost is 1.0f
|
||||
|
@ -77,7 +77,7 @@ public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filte
|
|||
|
||||
/** Returns the field name for this query */
|
||||
public final String getField() { return query.getField(); }
|
||||
|
||||
|
||||
/**
|
||||
* Returns a DocIdSet with documents that should be permitted in search
|
||||
* results.
|
||||
|
@ -99,24 +99,13 @@ public class MultiTermQueryWrapperFilter<Q extends MultiTermQuery> extends Filte
|
|||
|
||||
final TermsEnum termsEnum = query.getTermsEnum(terms);
|
||||
assert termsEnum != null;
|
||||
if (termsEnum.next() != null) {
|
||||
// fill into a FixedBitSet
|
||||
final FixedBitSet bitSet = new FixedBitSet(context.reader().maxDoc());
|
||||
DocsEnum docsEnum = null;
|
||||
do {
|
||||
// System.out.println(" iter termCount=" + termCount + " term=" +
|
||||
// enumerator.term().toBytesString());
|
||||
docsEnum = termsEnum.docs(acceptDocs, docsEnum, DocsEnum.FLAG_NONE);
|
||||
int docid;
|
||||
while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
bitSet.set(docid);
|
||||
}
|
||||
} while (termsEnum.next() != null);
|
||||
// System.out.println(" done termCount=" + termCount);
|
||||
|
||||
return bitSet;
|
||||
} else {
|
||||
return null;
|
||||
DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
|
||||
DocsEnum docs = null;
|
||||
while (termsEnum.next() != null) {
|
||||
docs = termsEnum.docs(acceptDocs, docs, DocsEnum.FLAG_NONE);
|
||||
builder.or(docs);
|
||||
}
|
||||
return builder.build();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,7 +72,7 @@ import org.apache.lucene.index.Term; // for javadocs
|
|||
* details.
|
||||
*
|
||||
* <p>This query defaults to {@linkplain
|
||||
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}.
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}.
|
||||
* With precision steps of ≤4, this query can be run with
|
||||
* one of the BooleanQuery rewrite methods without changing
|
||||
* BooleanQuery's default max clause count.
|
||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
* is built by QueryParser for input like <code>app*</code>.
|
||||
*
|
||||
* <p>This query uses the {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
|
||||
* rewrite method. */
|
||||
public class PrefixQuery extends MultiTermQuery {
|
||||
private Term prefix;
|
||||
|
|
|
@ -45,7 +45,7 @@ public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewr
|
|||
* query. Note that typically such scores are
|
||||
* meaningless to the user, and require non-trivial CPU
|
||||
* to compute, so it's almost always better to use {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead.
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} instead.
|
||||
*
|
||||
* <p><b>NOTE</b>: This rewrite method will hit {@link
|
||||
* BooleanQuery.TooManyClauses} if the number of terms
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
* for numerical ranges; use {@link NumericRangeQuery} instead.
|
||||
*
|
||||
* <p>This query uses the {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
|
||||
* rewrite method.
|
||||
* @since 2.9
|
||||
*/
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.util.automaton.Automaton;
|
|||
* a Wildcard term should not start with the wildcard <code>*</code>
|
||||
*
|
||||
* <p>This query uses the {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
|
||||
* rewrite method.
|
||||
*
|
||||
* @see AutomatonQuery
|
||||
|
|
|
@ -0,0 +1,94 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* A builder of {@link DocIdSet}s that supports random access.
|
||||
* @lucene.internal
|
||||
*/
|
||||
public final class DocIdSetBuilder {
|
||||
|
||||
private final int maxDoc;
|
||||
private final int threshold;
|
||||
private SparseFixedBitSet sparseSet;
|
||||
private FixedBitSet denseSet;
|
||||
|
||||
// we cache an upper bound of the cost of this builder so that we don't have
|
||||
// to re-compute approximateCardinality on the sparse set every time
|
||||
private long costUpperBound;
|
||||
|
||||
/** Sole constructor. */
|
||||
public DocIdSetBuilder(int maxDoc) {
|
||||
this.maxDoc = maxDoc;
|
||||
threshold = maxDoc >>> 14;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the content of the provided {@link DocIdSetIterator} to this builder.
|
||||
*/
|
||||
public void or(DocIdSetIterator it) throws IOException {
|
||||
if (denseSet != null) {
|
||||
// already upgraded
|
||||
denseSet.or(it);
|
||||
return;
|
||||
}
|
||||
|
||||
final long itCost = it.cost();
|
||||
costUpperBound += itCost;
|
||||
if (costUpperBound >= threshold) {
|
||||
costUpperBound = (sparseSet == null ? 0 : sparseSet.approximateCardinality()) + itCost;
|
||||
|
||||
if (costUpperBound >= threshold) {
|
||||
// upgrade
|
||||
denseSet = new FixedBitSet(maxDoc);
|
||||
denseSet.or(it);
|
||||
if (sparseSet != null) {
|
||||
denseSet.or(sparseSet.iterator());
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// we are still sparse
|
||||
if (sparseSet == null) {
|
||||
sparseSet = new SparseFixedBitSet(maxDoc);
|
||||
}
|
||||
sparseSet.or(it);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a {@link DocIdSet} that contains all doc ids that have been added.
|
||||
* This method may return <tt>null</tt> if no documents were addded to this
|
||||
* builder.
|
||||
* NOTE: this is a destructive operation, the builder should not be used
|
||||
* anymore after this method has been called.
|
||||
*/
|
||||
public DocIdSet build() {
|
||||
final DocIdSet result = denseSet != null ? denseSet : sparseSet;
|
||||
denseSet = null;
|
||||
sparseSet = null;
|
||||
costUpperBound = 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,341 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
/**
|
||||
* A bit set that only stores longs that have at least one bit which is set.
|
||||
* The way it works is that the space of bits is divided into blocks of
|
||||
* 4096 bits, which is 64 longs. Then for each block, we have:<ul>
|
||||
* <li>a long[] which stores the non-zero longs for that block</li>
|
||||
* <li>a long so that bit <tt>i</tt> being set means that the <code>i-th</code>
|
||||
* long of the block is non-null, and its offset in the array of longs is
|
||||
* the number of one bits on the right of the <code>i-th</code> bit.</li></ul>
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class SparseFixedBitSet extends DocIdSet implements Bits {
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SparseFixedBitSet.class);
|
||||
private static final long SINGLE_ELEMENT_ARRAY_BYTES_USED = RamUsageEstimator.sizeOf(new long[1]);
|
||||
|
||||
private static int blockCount(int length) {
|
||||
int blockCount = length >>> 12;
|
||||
if ((blockCount << 12) < length) {
|
||||
++blockCount;
|
||||
}
|
||||
assert (blockCount << 12) >= length;
|
||||
return blockCount;
|
||||
}
|
||||
|
||||
final long[] indices;
|
||||
final long[][] bits;
|
||||
final int length;
|
||||
int nonZeroLongCount;
|
||||
long ramBytesUsed;
|
||||
|
||||
/** Create a {@link SparseFixedBitSet} that can contain bits between
|
||||
* <code>0</code> included and <code>length</code> excluded. */
|
||||
public SparseFixedBitSet(int length) {
|
||||
if (length < 1) {
|
||||
throw new IllegalArgumentException("length needs to be >= 1");
|
||||
}
|
||||
this.length = length;
|
||||
final int blockCount = blockCount(length);
|
||||
indices = new long[blockCount];
|
||||
bits = new long[blockCount][];
|
||||
ramBytesUsed = BASE_RAM_BYTES_USED
|
||||
+ RamUsageEstimator.shallowSizeOf(indices)
|
||||
+ RamUsageEstimator.shallowSizeOf(bits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isCacheable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Bits bits() throws IOException {
|
||||
return this;
|
||||
}
|
||||
|
||||
private boolean consistent(int index) {
|
||||
assert index >= 0 && index < length : "index=" + index + ",length=" + length;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the cardinality of this set.
|
||||
* NOTE: this operation runs in linear time.
|
||||
*/
|
||||
public int cardinality() {
|
||||
int cardinality = 0;
|
||||
for (long[] bitArray : bits) {
|
||||
if (bitArray != null) {
|
||||
for (long bits : bitArray) {
|
||||
cardinality += Long.bitCount(bits);
|
||||
}
|
||||
}
|
||||
}
|
||||
return cardinality;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an approximation of the cardinality of this set, assuming that bits
|
||||
* are uniformly distributed. This operation runs in constant time.
|
||||
*/
|
||||
public int approximateCardinality() {
|
||||
// this is basically the linear counting algorithm
|
||||
final int totalLongs = (length + 63) >>> 6; // total number of longs in the space
|
||||
assert totalLongs >= nonZeroLongCount;
|
||||
final int zeroLongs = totalLongs - nonZeroLongCount; // number of longs that are zeros
|
||||
// No need to guard against division by zero, it will return +Infinity and things will work as expected
|
||||
final long estimate = Math.round(totalLongs * Math.log((double) totalLongs / zeroLongs));
|
||||
return (int) Math.min(length, estimate);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean get(int i) {
|
||||
assert consistent(i);
|
||||
final int i4096 = i >>> 12;
|
||||
final long index = indices[i4096];
|
||||
final int i64 = i >>> 6;
|
||||
// first check the index, if the i64-th bit is not set, then i is not set
|
||||
// note: this relies on the fact that shifts are mod 64 in java
|
||||
if ((index & (1L << i64)) == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// if it is set, then we count the number of bits that are set on the right
|
||||
// of i64, and that gives us the index of the long that stores the bits we
|
||||
// are interested in
|
||||
final long bits = this.bits[i4096][Long.bitCount(index & ((1L << i64) - 1))];
|
||||
return (bits & (1L << i)) != 0;
|
||||
}
|
||||
|
||||
private static int oversize(int s) {
|
||||
int newSize = s + (s >>> 1);
|
||||
if (newSize > 50) {
|
||||
newSize = 64;
|
||||
}
|
||||
return newSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the bit at index <tt>i</tt>.
|
||||
*/
|
||||
public void set(int i) {
|
||||
assert consistent(i);
|
||||
final int i4096 = i >>> 12;
|
||||
final long index = indices[i4096];
|
||||
final int i64 = i >>> 6;
|
||||
if (index == 0) {
|
||||
// if the index is 0, it means that we just found a block of 4096 bits
|
||||
// that has no bit that is set yet. So let's initialize a new block:
|
||||
indices[i4096] = 1L << i64; // shifts are mod 64 in java
|
||||
assert bits[i4096] == null;
|
||||
bits[i4096] = new long[] { 1L << i }; // shifts are mod 64 in java
|
||||
++nonZeroLongCount;
|
||||
ramBytesUsed += SINGLE_ELEMENT_ARRAY_BYTES_USED;
|
||||
} else if ((index & (1L << i64)) == 0) {
|
||||
// in that case we found a block of 4096 bits that has some values, but
|
||||
// the sub-block of 64 bits that we are interested in has no value yet,
|
||||
// so we need to insert a new long
|
||||
insertLong(i4096, i64, i, index);
|
||||
} else {
|
||||
// in that case the sub 64-bits block we are interested in already exists,
|
||||
// we just need to set a bit in an existing long: the number of ones on
|
||||
// the right of i64 gives us the index of the long we need to update
|
||||
bits[i4096][Long.bitCount(index & ((1L << i64) - 1))] |= 1L << i; // shifts are mod 64 in java
|
||||
}
|
||||
}
|
||||
|
||||
private void insertLong(int i4096, int i64, int i, long index) {
|
||||
indices[i4096] |= 1L << i64; // shifts are mod 64 in java
|
||||
// we count the number of bits that are set on the right of i64
|
||||
// this gives us the index at which to perform the insertion
|
||||
final int o = Long.bitCount(index & ((1L << i64) - 1));
|
||||
final long[] bitArray = bits[i4096];
|
||||
if (bitArray[bitArray.length - 1] == 0) {
|
||||
// since we only store non-zero longs, if the last value is 0, it means
|
||||
// that we alreay have extra space, make use of it
|
||||
System.arraycopy(bitArray, o, bitArray, o + 1, bitArray.length - o - 1);
|
||||
bitArray[o] = 1L << i;
|
||||
} else {
|
||||
// we don't have extra space so we need to resize to insert the new long
|
||||
final int newSize = oversize(bitArray.length + 1);
|
||||
final long[] newBitArray = new long[newSize];
|
||||
System.arraycopy(bitArray, 0, newBitArray, 0, o);
|
||||
newBitArray[o] = 1L << i;
|
||||
System.arraycopy(bitArray, o, newBitArray, o + 1, bitArray.length - o);
|
||||
bits[i4096] = newBitArray;
|
||||
ramBytesUsed += (newSize - bitArray.length) * RamUsageEstimator.NUM_BYTES_LONG;
|
||||
}
|
||||
++nonZeroLongCount;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add the documents contained in the provided {@link DocIdSetIterator} to
|
||||
* this bit set.
|
||||
*/
|
||||
public void or(DocIdSetIterator it) throws IOException {
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
set(doc);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return ramBytesUsed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return new Iterator();
|
||||
}
|
||||
|
||||
class Iterator extends DocIdSetIterator {
|
||||
|
||||
private int doc = -1;
|
||||
private int cost = -1;
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
/** Return the first document that occurs on or after the provided block index. */
|
||||
private int firstDoc(int i4096) {
|
||||
long index = 0;
|
||||
while (i4096 < indices.length) {
|
||||
index = indices[i4096];
|
||||
if (index != 0) {
|
||||
final int i64 = Long.numberOfTrailingZeros(index);
|
||||
return doc = (i4096 << 12) | (i64 << 6) | Long.numberOfTrailingZeros(bits[i4096][0]);
|
||||
}
|
||||
i4096 += 1;
|
||||
}
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (doc == NO_MORE_DOCS || ++doc >= length) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
}
|
||||
return currentOrNextDoc();
|
||||
}
|
||||
|
||||
private int currentOrNextDoc() {
|
||||
final int i4096 = doc >>> 12;
|
||||
final long index = indices[i4096];
|
||||
if (index == 0) {
|
||||
// if the index is zero, it means that there is no value in the
|
||||
// current block, so return the first document of the next block
|
||||
return firstDoc(i4096 + 1);
|
||||
} else {
|
||||
// now we are on a block that contains at least one document
|
||||
assert Long.bitCount(index) <= bits[i4096].length;
|
||||
int i64 = doc >>> 6;
|
||||
long indexBits = index >>> i64; // shifts are mod 64 in java
|
||||
if (indexBits == 0) {
|
||||
// if neither the i64-th bit or any other bit on its left is set then
|
||||
// it means that there are no more documents in this block, go to the
|
||||
// next one
|
||||
return firstDoc(i4096 + 1);
|
||||
}
|
||||
|
||||
// We know we still have some 64-bits blocks that have bits set, let's
|
||||
// advance to the next one by skipping trailing zeros of the index
|
||||
int i1 = doc & 0x3F;
|
||||
int trailingZeros = Long.numberOfTrailingZeros(indexBits);
|
||||
if (trailingZeros != 0) {
|
||||
// no bits in the current long, go to the next one
|
||||
i64 += trailingZeros;
|
||||
i1 = 0;
|
||||
}
|
||||
|
||||
// So now we are on a sub 64-bits block that has values
|
||||
assert (index & (1L << i64)) != 0;
|
||||
// we count the number of ones on the left of i64 to figure out the
|
||||
// index of the long that contains the bits we are interested in
|
||||
int longIndex = Long.bitCount(index & ((1L << i64) - 1)); // shifts are mod 64 in java
|
||||
final long[] longArray = bits[i4096];
|
||||
assert longArray[longIndex] != 0;
|
||||
long bits = SparseFixedBitSet.this.bits[i4096][longIndex] >>> i1; // shifts are mod 64 in java
|
||||
if (bits != 0L) {
|
||||
// hurray, we found some non-zero bits, this gives us the next document:
|
||||
i1 += Long.numberOfTrailingZeros(bits);
|
||||
return doc = (i4096 << 12) | ((i64 & 0x3F) << 6) | i1;
|
||||
}
|
||||
|
||||
// otherwise it means that although we were on a sub-64 block that contains
|
||||
// documents, all documents of this sub-block have already been consumed
|
||||
// so two cases:
|
||||
indexBits = index >>> i64 >>> 1; // we don't shift by (i64+1) otherwise we might shift by a multiple of 64 which is a no-op
|
||||
if (indexBits == 0) {
|
||||
// Case 1: this was the last long of the block of 4096 bits, then go
|
||||
// to the next block
|
||||
return firstDoc(i4096 + 1);
|
||||
}
|
||||
// Case 2: go to the next sub 64-bits block in the current block of 4096 bits
|
||||
// by skipping trailing zeros of the index
|
||||
trailingZeros = Long.numberOfTrailingZeros(indexBits);
|
||||
i64 += 1 + trailingZeros;
|
||||
bits = longArray[longIndex + 1];
|
||||
assert bits != 0;
|
||||
i1 = Long.numberOfTrailingZeros(bits);
|
||||
return doc = (i4096 << 12) | ((i64 & 0x3F) << 6) | i1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
if (target >= length) {
|
||||
return doc = NO_MORE_DOCS;
|
||||
} else {
|
||||
doc = target;
|
||||
}
|
||||
|
||||
return currentOrNextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
// although constant-time, approximateCardinality is a bit expensive so
|
||||
// we cache it to avoid performance traps eg. when sorting iterators by
|
||||
// cost
|
||||
if (cost < 0) {
|
||||
cost = approximateCardinality();
|
||||
}
|
||||
assert cost >= 0;
|
||||
return cost;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -81,7 +83,14 @@ public class TestReaderClosed extends LuceneTestCase {
|
|||
reader.close(); // close original child reader
|
||||
try {
|
||||
searcher.search(query, 5);
|
||||
} catch (AlreadyClosedException ace) {
|
||||
} catch (Exception e) {
|
||||
AlreadyClosedException ace = null;
|
||||
for (Throwable t = e; t != null; t = t.getCause()) {
|
||||
if (t instanceof AlreadyClosedException) {
|
||||
ace = (AlreadyClosedException) t;
|
||||
}
|
||||
}
|
||||
assertNotNull("Query failed, but not due to an AlreadyClosedException", ace);
|
||||
assertEquals(
|
||||
"this IndexReader cannot be used anymore as one of its child readers was closed",
|
||||
ace.getMessage()
|
||||
|
|
|
@ -98,9 +98,6 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
|
||||
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
|
||||
assertEquals(expected, automatonQueryNrHits(query));
|
||||
|
||||
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
|
||||
assertEquals(expected, automatonQueryNrHits(query));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -110,9 +110,6 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase {
|
|||
|
||||
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
|
||||
assertEquals(expected, automatonQueryNrHits(query));
|
||||
|
||||
query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
|
||||
assertEquals(expected, automatonQueryNrHits(query));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -160,7 +160,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
result[i].score, SCORE_COMP_THRESH);
|
||||
}
|
||||
|
||||
result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, 1000).scoreDocs;
|
||||
result = search.search(csrq("data", "1", "6", T, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, 1000).scoreDocs;
|
||||
numHits = result.length;
|
||||
assertEquals("wrong number of results", 6, numHits);
|
||||
for (int i = 0; i < numHits; i++) {
|
||||
|
@ -204,7 +204,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
|
||||
bq = new BooleanQuery();
|
||||
bq.add(dummyTerm, BooleanClause.Occur.SHOULD); // hits one doc
|
||||
bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), BooleanClause.Occur.SHOULD); // hits no docs
|
||||
bq.add(csrq("data", "#", "#", T, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), BooleanClause.Occur.SHOULD); // hits no docs
|
||||
result = search.search(bq, null, 1000).scoreDocs;
|
||||
numHits = result.length;
|
||||
assertEquals("wrong number of results", 1, numHits);
|
||||
|
@ -346,37 +346,37 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
result = search.search(csrq("id", minIP, maxIP, T, T), null, numDocs).scoreDocs;
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", minIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("find all", numDocs, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, T, F), null, numDocs).scoreDocs;
|
||||
assertEquals("all but last", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", minIP, maxIP, T, F, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("all but last", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, F, T), null, numDocs).scoreDocs;
|
||||
assertEquals("all but first", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", minIP, maxIP, F, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("all but first", numDocs - 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, F, F), null, numDocs).scoreDocs;
|
||||
assertEquals("all but ends", numDocs - 2, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", minIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("all but ends", numDocs - 2, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, maxIP, T, T), null, numDocs).scoreDocs;
|
||||
assertEquals("med and up", 1 + maxId - medId, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", medIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("med and up", 1 + maxId - medId, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, medIP, T, T), null, numDocs).scoreDocs;
|
||||
assertEquals("up to med", 1 + medId - minId, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", minIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("up to med", 1 + medId - minId, result.length);
|
||||
|
||||
// unbounded id
|
||||
|
@ -404,49 +404,49 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
result = search.search(csrq("id", minIP, minIP, F, F), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,F,F", 0, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", minIP, minIP, F, F, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,F,F", 0, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, medIP, F, F), null, numDocs).scoreDocs;
|
||||
assertEquals("med,med,F,F", 0, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", medIP, medIP, F, F, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("med,med,F,F", 0, result.length);
|
||||
|
||||
result = search.search(csrq("id", maxIP, maxIP, F, F), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,F,F", 0, result.length);
|
||||
|
||||
result = search.search(csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", maxIP, maxIP, F, F, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,F,F", 0, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, minIP, T, T), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,T,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", minIP, minIP, T, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("min,min,T,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", null, minIP, F, T), null, numDocs).scoreDocs;
|
||||
assertEquals("nul,min,F,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", null, minIP, F, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("nul,min,F,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", maxIP, maxIP, T, T), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,T,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", maxIP, maxIP, T, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("max,max,T,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", maxIP, null, T, F), null, numDocs).scoreDocs;
|
||||
assertEquals("max,nul,T,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", maxIP, null, T, F, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("max,nul,T,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, medIP, T, T), null, numDocs).scoreDocs;
|
||||
assertEquals("med,med,T,T", 1, result.length);
|
||||
|
||||
result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT), null, numDocs).scoreDocs;
|
||||
result = search.search(csrq("id", medIP, medIP, T, T, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE), null, numDocs).scoreDocs;
|
||||
assertEquals("med,med,T,T", 1, result.length);
|
||||
}
|
||||
|
||||
|
|
|
@ -138,12 +138,6 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
|
|||
// use a large PQ here to only test duplicate terms and dont mix up when all scores are equal
|
||||
checkDuplicateTerms(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(1024));
|
||||
checkDuplicateTerms(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(1024));
|
||||
|
||||
// Test auto rewrite (but only boolean mode), so we set the limits to large values to always get a BQ
|
||||
final MultiTermQuery.ConstantScoreAutoRewrite rewrite = new MultiTermQuery.ConstantScoreAutoRewrite();
|
||||
rewrite.setTermCountCutoff(Integer.MAX_VALUE);
|
||||
rewrite.setDocCountPercent(100.);
|
||||
checkDuplicateTerms(rewrite);
|
||||
}
|
||||
|
||||
private void checkBooleanQueryBoosts(BooleanQuery bq) {
|
||||
|
@ -235,7 +229,6 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
|
|||
checkMaxClauseLimitation(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
|
||||
|
||||
checkNoMaxClauseLimitation(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
checkNoMaxClauseLimitation(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
|
||||
checkNoMaxClauseLimitation(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(1024));
|
||||
checkNoMaxClauseLimitation(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(1024));
|
||||
}
|
||||
|
|
|
@ -87,12 +87,6 @@ public class TestWildcard
|
|||
assertTrue(q instanceof ConstantScoreQuery);
|
||||
assertEquals(q.getBoost(), wq.getBoost(), 0.1);
|
||||
|
||||
wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
|
||||
wq.setBoost(0.3F);
|
||||
q = searcher.rewrite(wq);
|
||||
assertTrue(q instanceof ConstantScoreQuery);
|
||||
assertEquals(q.getBoost(), wq.getBoost(), 0.1);
|
||||
|
||||
wq.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
|
||||
wq.setBoost(0.4F);
|
||||
q = searcher.rewrite(wq);
|
||||
|
|
|
@ -0,0 +1,92 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
|
||||
public class TestDocIdSetBuilder extends LuceneTestCase {
|
||||
|
||||
public void testEmpty() throws IOException {
|
||||
assertEquals(null, new DocIdSetBuilder(1 + random().nextInt(1000)).build());
|
||||
}
|
||||
|
||||
private void assertEquals(DocIdSet d1, DocIdSet d2) throws IOException {
|
||||
if (d1 == null) {
|
||||
if (d2 != null) {
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, d2.iterator().nextDoc());
|
||||
}
|
||||
} else if (d2 == null) {
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, d1.iterator().nextDoc());
|
||||
} else {
|
||||
DocIdSetIterator i1 = d1.iterator();
|
||||
DocIdSetIterator i2 = d2.iterator();
|
||||
for (int doc = i1.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = i1.nextDoc()) {
|
||||
assertEquals(doc, i2.nextDoc());
|
||||
}
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, i2.nextDoc());
|
||||
}
|
||||
}
|
||||
|
||||
public void testSparse() throws IOException {
|
||||
final int maxDoc = 1000000 + random().nextInt(1000000);
|
||||
DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
|
||||
final int numIterators = 1 + random().nextInt(10);
|
||||
final FixedBitSet ref = new FixedBitSet(maxDoc);
|
||||
for (int i = 0; i < numIterators; ++i) {
|
||||
final int baseInc = 200000 + random().nextInt(10000);
|
||||
WAH8DocIdSet.Builder b = new WAH8DocIdSet.Builder();
|
||||
for (int doc = random().nextInt(100); doc < maxDoc; doc += baseInc + random().nextInt(10000)) {
|
||||
b.add(doc);
|
||||
ref.set(doc);
|
||||
}
|
||||
builder.or(b.build().iterator());
|
||||
}
|
||||
DocIdSet result = builder.build();
|
||||
assertTrue(result instanceof SparseFixedBitSet);
|
||||
assertEquals(ref, result);
|
||||
}
|
||||
|
||||
public void testDense() throws IOException {
|
||||
final int maxDoc = 1000000 + random().nextInt(1000000);
|
||||
DocIdSetBuilder builder = new DocIdSetBuilder(maxDoc);
|
||||
final int numIterators = 1 + random().nextInt(10);
|
||||
final FixedBitSet ref = new FixedBitSet(maxDoc);
|
||||
if (random().nextBoolean()) {
|
||||
// try upgrades
|
||||
final int doc = random().nextInt(maxDoc);
|
||||
ref.set(doc);
|
||||
builder.or(new WAH8DocIdSet.Builder().add(doc).build().iterator());
|
||||
}
|
||||
for (int i = 0; i < numIterators; ++i) {
|
||||
final int baseInc = 2 + random().nextInt(10000);
|
||||
WAH8DocIdSet.Builder b = new WAH8DocIdSet.Builder();
|
||||
for (int doc = random().nextInt(10000); doc < maxDoc; doc += baseInc + random().nextInt(2000)) {
|
||||
b.add(doc);
|
||||
ref.set(doc);
|
||||
}
|
||||
builder.or(b.build().iterator());
|
||||
}
|
||||
DocIdSet result = builder.build();
|
||||
assertTrue(result instanceof FixedBitSet);
|
||||
assertEquals(ref, result);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class TestSparseFixedBitSet extends BaseDocIdSetTestCase<SparseFixedBitSet> {
|
||||
|
||||
@Override
|
||||
public SparseFixedBitSet copyOf(BitSet bs, int length) throws IOException {
|
||||
final SparseFixedBitSet set = new SparseFixedBitSet(length);
|
||||
// SparseFixedBitSet can be sensitive to the order of insertion so
|
||||
// randomize insertion a bit
|
||||
List<Integer> buffer = new ArrayList<>();
|
||||
for (int doc = bs.nextSetBit(0); doc != -1; doc = bs.nextSetBit(doc + 1)) {
|
||||
buffer.add(doc);
|
||||
if (buffer.size() >= 100000) {
|
||||
Collections.shuffle(buffer);
|
||||
for (int i : buffer) {
|
||||
set.set(i);
|
||||
}
|
||||
buffer.clear();
|
||||
}
|
||||
}
|
||||
Collections.shuffle(buffer);
|
||||
for (int i : buffer) {
|
||||
set.set(i);
|
||||
}
|
||||
return set;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void assertEquals(int numBits, BitSet ds1, SparseFixedBitSet ds2) throws IOException {
|
||||
for (int i = 0; i < numBits; ++i) {
|
||||
assertEquals(ds1.get(i), ds2.get(i));
|
||||
}
|
||||
assertEquals(ds1.cardinality(), ds2.cardinality());
|
||||
super.assertEquals(numBits, ds1, ds2);
|
||||
}
|
||||
|
||||
public void testApproximateCardinality() {
|
||||
final SparseFixedBitSet set = new SparseFixedBitSet(10000);
|
||||
final int first = random().nextInt(1000);
|
||||
final int interval = 200 + random().nextInt(1000);
|
||||
for (int i = first; i < set.length(); i += interval) {
|
||||
set.set(i);
|
||||
}
|
||||
assertEquals(set.cardinality(), set.approximateCardinality(), 20);
|
||||
}
|
||||
|
||||
public void testApproximateCardinalityOnDenseSet() {
|
||||
// this tests that things work as expected in approximateCardinality when
|
||||
// all longs are different than 0, in which case we divide by zero
|
||||
final int numDocs = 70;//TestUtil.nextInt(random(), 1, 10000);
|
||||
final SparseFixedBitSet set = new SparseFixedBitSet(numDocs);
|
||||
for (int i = 0; i < set.length(); ++i) {
|
||||
set.set(i);
|
||||
}
|
||||
assertEquals(numDocs, set.approximateCardinality());
|
||||
}
|
||||
|
||||
}
|
|
@ -17,15 +17,6 @@ package org.apache.lucene.queries;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -33,6 +24,20 @@ import java.util.Collections;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.DocIdSetBuilder;
|
||||
|
||||
/**
|
||||
* Constructs a filter for docs matching any of the terms added to this class.
|
||||
* Unlike a RangeFilter this can be used for filtering on multiple terms that are not necessarily in
|
||||
|
@ -178,11 +183,11 @@ public final class TermsFilter extends Filter {
|
|||
@Override
|
||||
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptDocs) throws IOException {
|
||||
final LeafReader reader = context.reader();
|
||||
FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time
|
||||
DocIdSetBuilder builder = new DocIdSetBuilder(reader.maxDoc());
|
||||
final Fields fields = reader.fields();
|
||||
final BytesRef spare = new BytesRef(this.termsBytes);
|
||||
if (fields == null) {
|
||||
return result;
|
||||
return builder.build();
|
||||
}
|
||||
Terms terms = null;
|
||||
TermsEnum termsEnum = null;
|
||||
|
@ -195,21 +200,12 @@ public final class TermsFilter extends Filter {
|
|||
spare.length = offsets[i+1] - offsets[i];
|
||||
if (termsEnum.seekExact(spare)) {
|
||||
docs = termsEnum.docs(acceptDocs, docs, DocsEnum.FLAG_NONE); // no freq since we don't need them
|
||||
if (result == null) {
|
||||
if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
result = new FixedBitSet(reader.maxDoc());
|
||||
// lazy init but don't do it in the hot loop since we could read many docs
|
||||
result.set(docs.docID());
|
||||
}
|
||||
}
|
||||
while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
result.set(docs.docID());
|
||||
}
|
||||
builder.or(docs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
return builder.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -28,19 +28,19 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
|
|
@ -61,7 +61,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
|
|||
Operator operator = OR_OPERATOR;
|
||||
|
||||
boolean lowercaseExpandedTerms = true;
|
||||
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
|
||||
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
|
||||
boolean allowLeadingWildcard = false;
|
||||
|
||||
protected String field;
|
||||
|
@ -274,7 +274,7 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
|
|||
}
|
||||
|
||||
/**
|
||||
* By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
|
||||
* By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
|
||||
* when creating a {@link PrefixQuery}, {@link WildcardQuery} or {@link TermRangeQuery}. This implementation is generally preferable because it
|
||||
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
|
||||
* c) avoids any {@link TooManyClauses} exception.
|
||||
|
|
|
@ -73,7 +73,7 @@ public interface CommonQueryParserConfiguration {
|
|||
|
||||
/**
|
||||
* By default, it uses
|
||||
* {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} when creating a
|
||||
* {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} when creating a
|
||||
* prefix, wildcard and range queries. This implementation is generally
|
||||
* preferable because it a) Runs faster b) Does not have the scarcity of terms
|
||||
* unduly influence score c) avoids any {@link TooManyListenersException}
|
||||
|
|
|
@ -265,7 +265,7 @@ public class StandardQueryParser extends QueryParserHelper implements CommonQuer
|
|||
|
||||
/**
|
||||
* By default, it uses
|
||||
* {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} when creating a
|
||||
* {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} when creating a
|
||||
* prefix, wildcard and range queries. This implementation is generally
|
||||
* preferable because it a) Runs faster b) Does not have the scarcity of terms
|
||||
* unduly influence score c) avoids any {@link TooManyListenersException}
|
||||
|
|
|
@ -208,7 +208,7 @@ public class StandardQueryConfigHandler extends QueryConfigHandler {
|
|||
set(ConfigurationKeys.FIELD_BOOST_MAP, new LinkedHashMap<String, Float>());
|
||||
set(ConfigurationKeys.FUZZY_CONFIG, new FuzzyConfig());
|
||||
set(ConfigurationKeys.LOCALE, Locale.getDefault());
|
||||
set(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD, MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
|
||||
set(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD, MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
set(ConfigurationKeys.FIELD_DATE_RESOLUTION_MAP, new HashMap<CharSequence, DateTools.Resolution>());
|
||||
|
||||
}
|
||||
|
|
|
@ -30,7 +30,7 @@ import org.apache.lucene.search.MultiTermQuery;
|
|||
/**
|
||||
* This processor instates the default
|
||||
* {@link org.apache.lucene.search.MultiTermQuery.RewriteMethod},
|
||||
* {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}, for multi-term
|
||||
* {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}, for multi-term
|
||||
* query nodes.
|
||||
*/
|
||||
public class MultiTermRewriteMethodProcessor extends QueryNodeProcessorImpl {
|
||||
|
|
|
@ -321,15 +321,15 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
StandardQueryParser qp = new StandardQueryParser(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false));
|
||||
Query q = qp.parse("foo*bar", "field");
|
||||
assertTrue(q instanceof WildcardQuery);
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((MultiTermQuery) q).getRewriteMethod());
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
|
||||
|
||||
q = qp.parse("foo*", "field");
|
||||
assertTrue(q instanceof PrefixQuery);
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((MultiTermQuery) q).getRewriteMethod());
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
|
||||
|
||||
q = qp.parse("[a TO z]", "field");
|
||||
assertTrue(q instanceof TermRangeQuery);
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((MultiTermQuery) q).getRewriteMethod());
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE, ((MultiTermQuery) q).getRewriteMethod());
|
||||
}
|
||||
|
||||
public void testCJK() throws Exception {
|
||||
|
@ -659,7 +659,7 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
|
||||
public void testRange() throws Exception {
|
||||
assertQueryEquals("[ a TO z]", null, "[a TO z]");
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
|
||||
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
|
||||
|
@ -1152,7 +1152,7 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
assertTrue(qp.parse("/[A-Z][123]/^0.5", df) instanceof RegexpQuery);
|
||||
assertEquals(q, qp.parse("/[A-Z][123]/^0.5", df));
|
||||
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)qp.parse("/[A-Z][123]/^0.5", df)).getRewriteMethod());
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
|
||||
Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
|
||||
assertEquals(escaped, qp.parse("/[a-z]\\/[123]/", df));
|
||||
|
|
|
@ -571,7 +571,7 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
assertQueryEquals("[ a TO z}", null, "[a TO z}");
|
||||
assertQueryEquals("{ a TO z]", null, "{a TO z]");
|
||||
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]")).getRewriteMethod());
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE, ((TermRangeQuery)getQuery("[ a TO z]")).getRewriteMethod());
|
||||
|
||||
CommonQueryParserConfiguration qp = getParserConfig( new MockAnalyzer(random(), MockTokenizer.SIMPLE, true));
|
||||
|
||||
|
@ -987,7 +987,7 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
assertTrue(getQuery("/[A-Z][123]/^0.5",qp) instanceof RegexpQuery);
|
||||
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE, ((RegexpQuery)getQuery("/[A-Z][123]/^0.5",qp)).getRewriteMethod());
|
||||
assertEquals(q, getQuery("/[A-Z][123]/^0.5",qp));
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
|
||||
|
||||
Query escaped = new RegexpQuery(new Term("field", "[a-z]\\/[123]"));
|
||||
assertEquals(escaped, getQuery("/[a-z]\\/[123]/",qp));
|
||||
|
|
|
@ -16,15 +16,21 @@ package org.apache.lucene.sandbox.queries;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.*;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SlowCompositeReaderWrapper;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.DocIdSet;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.Filter;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.util.SparseFixedBitSet;
|
||||
|
||||
/**
|
||||
* Filter to remove duplicate values from search results.
|
||||
|
@ -87,8 +93,8 @@ public class DuplicateFilter extends Filter {
|
|||
}
|
||||
}
|
||||
|
||||
private FixedBitSet correctBits(LeafReader reader, Bits acceptDocs) throws IOException {
|
||||
FixedBitSet bits = new FixedBitSet(reader.maxDoc()); //assume all are INvalid
|
||||
private SparseFixedBitSet correctBits(LeafReader reader, Bits acceptDocs) throws IOException {
|
||||
SparseFixedBitSet bits = new SparseFixedBitSet(reader.maxDoc()); //assume all are INvalid
|
||||
Terms terms = reader.fields().terms(fieldName);
|
||||
|
||||
if (terms == null) {
|
||||
|
|
|
@ -36,7 +36,7 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
* for numerical ranges; use {@link NumericRangeQuery} instead.
|
||||
*
|
||||
* <p>This query uses the {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
|
||||
* MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
|
||||
* rewrite method.
|
||||
* @deprecated Index collation keys with CollationKeyAnalyzer or ICUCollationKeyAnalyzer instead.
|
||||
* This class will be removed in Lucene 5.0
|
||||
|
|
|
@ -57,7 +57,7 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
/** Test length=0. */
|
||||
public void testNoBit() throws IOException {
|
||||
final BitSet bs = new BitSet(1);
|
||||
final T copy = copyOf(bs, 0);
|
||||
final T copy = copyOf(bs, TestUtil.nextInt(random(), 1, 10000));
|
||||
assertEquals(0, bs, copy);
|
||||
}
|
||||
|
||||
|
@ -67,7 +67,7 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
if (random().nextBoolean()) {
|
||||
bs.set(0);
|
||||
}
|
||||
final T copy = copyOf(bs, 1);
|
||||
final T copy = copyOf(bs, TestUtil.nextInt(random(), 1, 10000));
|
||||
assertEquals(1, bs, copy);
|
||||
}
|
||||
|
||||
|
@ -80,7 +80,7 @@ public abstract class BaseDocIdSetTestCase<T extends DocIdSet> extends LuceneTes
|
|||
if (random().nextBoolean()) {
|
||||
bs.set(1);
|
||||
}
|
||||
final T copy = copyOf(bs, 2);
|
||||
final T copy = copyOf(bs, TestUtil.nextInt(random(), 1, 10000));
|
||||
assertEquals(2, bs, copy);
|
||||
}
|
||||
|
||||
|
|
|
@ -78,7 +78,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
/** The default operator that parser uses to combine query terms */
|
||||
Operator operator = OR_OPERATOR;
|
||||
|
||||
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
|
||||
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
|
||||
boolean allowLeadingWildcard = true;
|
||||
|
||||
String defaultField;
|
||||
|
@ -294,7 +294,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
|||
|
||||
|
||||
/**
|
||||
* By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
|
||||
* By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}
|
||||
* when creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable because it
|
||||
* a) Runs faster b) Does not have the scarcity of terms unduly influence score
|
||||
* c) avoids any "TooManyBooleanClauses" exception.
|
||||
|
|
|
@ -746,7 +746,7 @@ public abstract class FieldType extends FieldProperties {
|
|||
if (!field.indexed() && field.hasDocValues()) {
|
||||
return field.multiValued() ? new DocTermOrdsRewriteMethod() : new DocValuesRewriteMethod();
|
||||
} else {
|
||||
return MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
|
||||
return MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeFilter;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.join.FixedBitSetCachingWrapperFilter;
|
||||
import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
|
@ -34,6 +35,7 @@ import javax.xml.stream.XMLStreamReader;
|
|||
|
||||
|
||||
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
@ -566,8 +568,8 @@ public class AddBlockUpdateTest extends SolrTestCaseJ4 {
|
|||
|
||||
protected ToParentBlockJoinQuery join(final String childTerm) {
|
||||
return new ToParentBlockJoinQuery(
|
||||
new TermQuery(new Term(child, childTerm)), new TermRangeFilter(parent,
|
||||
null, null, false, false), ScoreMode.None);
|
||||
new TermQuery(new Term(child, childTerm)), new FixedBitSetCachingWrapperFilter(new TermRangeFilter(parent,
|
||||
null, null, false, false)), ScoreMode.None);
|
||||
}
|
||||
|
||||
private Collection<? extends Callable<Void>> callables(List<Document> blocks) {
|
||||
|
|
Loading…
Reference in New Issue