mirror of https://github.com/apache/lucene.git
LUCENE-1603: improve MultiTermQuery for better sharing, and so Trie(Numeric)RangeQuery can use it
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@765581 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
022d104c38
commit
55c0cc3922
11
CHANGES.txt
11
CHANGES.txt
|
@ -278,11 +278,12 @@ New features
|
|||
turnaround than the normal approach of commiting the changes and
|
||||
then reopening a reader. (Jason Rutherglen via Mike McCandless)
|
||||
|
||||
21. LUCENE-1603: Some improvements to MultiTermQuery: return
|
||||
DocIdSet.EMPTY_DOCIDSET if there are no terms in the enum; track
|
||||
the total number of terms it visited during rewrite
|
||||
(getTotalNumberOfTerms). Also, FilteredTermEnum is now more
|
||||
friendly to subclasses. (Uwe Schindler via Mike McCandless)
|
||||
21. LUCENE-1603: Added new MultiTermQueryWrapperFilter, to wrap any
|
||||
MultiTermQuery as a Filter. Also made some improvements to
|
||||
MultiTermQuery: return DocIdSet.EMPTY_DOCIDSET if there are no
|
||||
terms in the enum; track the total number of terms it visited
|
||||
during rewrite (getTotalNumberOfTerms). FilteredTermEnum is also
|
||||
more friendly to subclassing. (Uwe Schindler via Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
|
|
@ -18,13 +18,9 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
|
||||
/**
|
||||
|
@ -97,7 +93,7 @@ public abstract class MultiTermQuery extends Query {
|
|||
}
|
||||
|
||||
protected Filter getFilter() {
|
||||
return new MultiTermFilter(this);
|
||||
return new MultiTermQueryWrapperFilter(this);
|
||||
}
|
||||
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
|
@ -176,80 +172,4 @@ public abstract class MultiTermQuery extends Query {
|
|||
return term.hashCode() + Float.floatToRawIntBits(getBoost());
|
||||
}
|
||||
|
||||
static class MultiTermFilter extends Filter {
|
||||
MultiTermQuery mtq;
|
||||
|
||||
abstract class TermGenerator {
|
||||
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term == null)
|
||||
break;
|
||||
mtq.numberOfTerms++;
|
||||
termDocs.seek(term);
|
||||
while (termDocs.next()) {
|
||||
handleDoc(termDocs.doc());
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
}
|
||||
abstract public void handleDoc(int doc);
|
||||
}
|
||||
|
||||
public MultiTermFilter(MultiTermQuery mtq) {
|
||||
this.mtq = mtq;
|
||||
}
|
||||
|
||||
public BitSet bits(IndexReader reader) throws IOException {
|
||||
final TermEnum enumerator = mtq.getEnum(reader);
|
||||
try {
|
||||
final BitSet bitSet = new BitSet(reader.maxDoc());
|
||||
new TermGenerator() {
|
||||
public void handleDoc(int doc) {
|
||||
bitSet.set(doc);
|
||||
}
|
||||
}.generate(reader, enumerator);
|
||||
return bitSet;
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
}
|
||||
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
final TermEnum enumerator = mtq.getEnum(reader);
|
||||
try {
|
||||
// if current term in enum is null, the enum is empty -> shortcut
|
||||
if (enumerator.term() == null)
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
// else fill into a OpenBitSet
|
||||
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
|
||||
new TermGenerator() {
|
||||
public void handleDoc(int doc) {
|
||||
bitSet.set(doc);
|
||||
}
|
||||
}.generate(reader, enumerator);
|
||||
return bitSet;
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
}
|
||||
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (!(o instanceof MultiTermFilter))
|
||||
return false;
|
||||
|
||||
final MultiTermFilter filter = (MultiTermFilter) o;
|
||||
return mtq.equals(filter.mtq);
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return mtq.hashCode();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,164 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
/**
|
||||
* A wrapper for {@link MultiTermQuery}, that exposes its
|
||||
* functionality as a {@link Filter}.
|
||||
* <P>
|
||||
* <code>MultiTermQueryWrapperFilter</code> is not designed to
|
||||
* be used by itself. Normally you subclass it to provide a Filter
|
||||
* counterpart for a {@link MultiTermQuery} subclass.
|
||||
* <P>
|
||||
* For example, {@link RangeFilter} and {@link PrefixFilter} extend
|
||||
* <code>MultiTermQueryWrapperFilter</code>.
|
||||
* This class also provides the functionality behind
|
||||
* {@link MultiTermQuery#getFilter}, this is why it is not abstract.
|
||||
*/
|
||||
public class MultiTermQueryWrapperFilter extends Filter {
|
||||
|
||||
protected final MultiTermQuery query;
|
||||
|
||||
/**
|
||||
* Wrap a {@link MultiTermQuery} as a Filter.
|
||||
*/
|
||||
protected MultiTermQueryWrapperFilter(MultiTermQuery query) {
|
||||
this.query = query;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public String toString() {
|
||||
// query.toString should be ok for the filter, too, if the query boost is 1.0f
|
||||
return query.toString();
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final boolean equals(final Object o) {
|
||||
if (o==this) return true;
|
||||
if (o==null) return false;
|
||||
if (this.getClass().equals(o.getClass())) {
|
||||
return this.query.equals( ((MultiTermQueryWrapperFilter)o).query );
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
//@Override
|
||||
public final int hashCode() {
|
||||
return query.hashCode();
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Return the number of unique terms visited during execution of the filter.
|
||||
* If there are many of them, you may consider using another filter type
|
||||
* or optimize your total term count in index.
|
||||
* <p>This method is not thread safe, be sure to only call it when no filter is running!
|
||||
* If you re-use the same filter instance for another
|
||||
* search, be sure to first reset the term counter
|
||||
* with {@link #clearTotalNumberOfTerms}.
|
||||
* @see #clearTotalNumberOfTerms
|
||||
*/
|
||||
public int getTotalNumberOfTerms() {
|
||||
return query.getTotalNumberOfTerms();
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: Resets the counting of unique terms.
|
||||
* Do this before executing the filter.
|
||||
* @see #getTotalNumberOfTerms
|
||||
*/
|
||||
public void clearTotalNumberOfTerms() {
|
||||
query.clearTotalNumberOfTerms();
|
||||
}
|
||||
|
||||
abstract class TermGenerator {
|
||||
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
|
||||
TermDocs termDocs = reader.termDocs();
|
||||
try {
|
||||
do {
|
||||
Term term = enumerator.term();
|
||||
if (term == null)
|
||||
break;
|
||||
query.numberOfTerms++;
|
||||
termDocs.seek(term);
|
||||
while (termDocs.next()) {
|
||||
handleDoc(termDocs.doc());
|
||||
}
|
||||
} while (enumerator.next());
|
||||
} finally {
|
||||
termDocs.close();
|
||||
}
|
||||
}
|
||||
abstract public void handleDoc(int doc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a BitSet with true for documents which should be
|
||||
* permitted in search results, and false for those that should
|
||||
* not.
|
||||
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||
*/
|
||||
//@Override
|
||||
public BitSet bits(IndexReader reader) throws IOException {
|
||||
final TermEnum enumerator = query.getEnum(reader);
|
||||
try {
|
||||
final BitSet bitSet = new BitSet(reader.maxDoc());
|
||||
new TermGenerator() {
|
||||
public void handleDoc(int doc) {
|
||||
bitSet.set(doc);
|
||||
}
|
||||
}.generate(reader, enumerator);
|
||||
return bitSet;
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a DocIdSet with documents that should be
|
||||
* permitted in search results.
|
||||
*/
|
||||
//@Override
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
final TermEnum enumerator = query.getEnum(reader);
|
||||
try {
|
||||
// if current term in enum is null, the enum is empty -> shortcut
|
||||
if (enumerator.term() == null)
|
||||
return DocIdSet.EMPTY_DOCIDSET;
|
||||
// else fill into a OpenBitSet
|
||||
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
|
||||
new TermGenerator() {
|
||||
public void handleDoc(int doc) {
|
||||
bitSet.set(doc);
|
||||
}
|
||||
}.generate(reader, enumerator);
|
||||
return bitSet;
|
||||
} finally {
|
||||
enumerator.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -17,48 +17,25 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
||||
/**
|
||||
* A Filter that restricts search results to values that have a matching prefix in a given
|
||||
* field.
|
||||
*
|
||||
* <p>
|
||||
* This code borrows heavily from {@link PrefixQuery}, but is implemented as a Filter
|
||||
*
|
||||
* </p>
|
||||
*/
|
||||
public class PrefixFilter extends Filter {
|
||||
protected final Term prefix;
|
||||
private PrefixQuery prefixQuery;
|
||||
public class PrefixFilter extends MultiTermQueryWrapperFilter {
|
||||
|
||||
public PrefixFilter(Term prefix) {
|
||||
this.prefix = prefix;
|
||||
this.prefixQuery = new PrefixQuery(prefix);
|
||||
super(new PrefixQuery(prefix));
|
||||
}
|
||||
|
||||
public Term getPrefix() { return prefix; }
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||
*/
|
||||
public BitSet bits(IndexReader reader) throws IOException {
|
||||
return prefixQuery.getFilter().bits(reader);
|
||||
}
|
||||
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
return prefixQuery.getFilter().getDocIdSet(reader);
|
||||
}
|
||||
public Term getPrefix() { return ((PrefixQuery)query).getPrefix(); }
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
public String toString () {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
buffer.append("PrefixFilter(");
|
||||
buffer.append(prefix.toString());
|
||||
buffer.append(getPrefix().toString());
|
||||
buffer.append(")");
|
||||
return buffer.toString();
|
||||
}
|
||||
|
|
|
@ -17,33 +17,16 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.BitSet;
|
||||
import java.text.Collator;
|
||||
|
||||
/**
|
||||
* A Filter that restricts search results to a range of values in a given
|
||||
* field.
|
||||
*
|
||||
* <p>
|
||||
* This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter
|
||||
*
|
||||
* </p>
|
||||
*
|
||||
* If you construct a large number of range filters with different ranges but on the
|
||||
* same field, {@link FieldCacheRangeFilter} may have significantly better performance.
|
||||
*/
|
||||
public class RangeFilter extends Filter {
|
||||
|
||||
private String fieldName;
|
||||
private String lowerTerm;
|
||||
private String upperTerm;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
private Collator collator;
|
||||
private RangeQuery rangeQuery;
|
||||
public class RangeFilter extends MultiTermQueryWrapperFilter {
|
||||
|
||||
/**
|
||||
* @param fieldName The field this range applies to
|
||||
|
@ -57,25 +40,7 @@ public class RangeFilter extends Filter {
|
|||
*/
|
||||
public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
|
||||
boolean includeLower, boolean includeUpper) {
|
||||
this.fieldName = fieldName;
|
||||
this.lowerTerm = lowerTerm;
|
||||
this.upperTerm = upperTerm;
|
||||
this.includeLower = includeLower;
|
||||
this.includeUpper = includeUpper;
|
||||
|
||||
if (null == lowerTerm && null == upperTerm) {
|
||||
throw new IllegalArgumentException
|
||||
("At least one value must be non-null");
|
||||
}
|
||||
if (includeLower && null == lowerTerm) {
|
||||
throw new IllegalArgumentException
|
||||
("The lower bound must be non-null to be inclusive");
|
||||
}
|
||||
if (includeUpper && null == upperTerm) {
|
||||
throw new IllegalArgumentException
|
||||
("The upper bound must be non-null to be inclusive");
|
||||
}
|
||||
initRangeQuery();
|
||||
super(new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -98,13 +63,7 @@ public class RangeFilter extends Filter {
|
|||
public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
|
||||
boolean includeLower, boolean includeUpper,
|
||||
Collator collator) {
|
||||
this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
|
||||
this.collator = collator;
|
||||
initRangeQuery();
|
||||
}
|
||||
|
||||
private void initRangeQuery() {
|
||||
rangeQuery = new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator);
|
||||
super(new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -122,66 +81,4 @@ public class RangeFilter extends Filter {
|
|||
public static RangeFilter More(String fieldName, String lowerTerm) {
|
||||
return new RangeFilter(fieldName, lowerTerm, null, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a BitSet with true for documents which should be
|
||||
* permitted in search results, and false for those that should
|
||||
* not.
|
||||
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
|
||||
*/
|
||||
public BitSet bits(IndexReader reader) throws IOException {
|
||||
return rangeQuery.getFilter().bits(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a DocIdSet with documents that should be
|
||||
* permitted in search results.
|
||||
*/
|
||||
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
|
||||
return rangeQuery.getFilter().getDocIdSet(reader);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuffer buffer = new StringBuffer();
|
||||
buffer.append(fieldName);
|
||||
buffer.append(":");
|
||||
buffer.append(includeLower ? "[" : "{");
|
||||
if (null != lowerTerm) {
|
||||
buffer.append(lowerTerm);
|
||||
}
|
||||
buffer.append("-");
|
||||
if (null != upperTerm) {
|
||||
buffer.append(upperTerm);
|
||||
}
|
||||
buffer.append(includeUpper ? "]" : "}");
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
/** Returns true if <code>o</code> is equal to this. */
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (!(o instanceof RangeFilter)) return false;
|
||||
RangeFilter other = (RangeFilter) o;
|
||||
|
||||
if (!this.fieldName.equals(other.fieldName)
|
||||
|| this.includeLower != other.includeLower
|
||||
|| this.includeUpper != other.includeUpper
|
||||
|| (this.collator != null && ! this.collator.equals(other.collator))
|
||||
) { return false; }
|
||||
if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
|
||||
if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/** Returns a hash code value for this object.*/
|
||||
public int hashCode() {
|
||||
int h = fieldName.hashCode();
|
||||
h ^= lowerTerm != null ? lowerTerm.hashCode() : 0xB6ECE882;
|
||||
h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper
|
||||
h ^= (upperTerm != null ? (upperTerm.hashCode()) : 0x91BEC2C2);
|
||||
h ^= (includeLower ? 0xD484B933 : 0)
|
||||
^ (includeUpper ? 0x6AE423AC : 0);
|
||||
h ^= collator != null ? collator.hashCode() : 0;
|
||||
return h;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue