LUCENE-1603: improve MultiTermQuery for better sharing, and so Trie(Numeric)RangeQuery can use it

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@765581 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-04-16 11:37:23 +00:00
parent 022d104c38
commit 55c0cc3922
5 changed files with 178 additions and 219 deletions

View File

@ -278,11 +278,12 @@ New features
turnaround than the normal approach of commiting the changes and turnaround than the normal approach of commiting the changes and
then reopening a reader. (Jason Rutherglen via Mike McCandless) then reopening a reader. (Jason Rutherglen via Mike McCandless)
21. LUCENE-1603: Some improvements to MultiTermQuery: return 21. LUCENE-1603: Added new MultiTermQueryWrapperFilter, to wrap any
DocIdSet.EMPTY_DOCIDSET if there are no terms in the enum; track MultiTermQuery as a Filter. Also made some improvements to
the total number of terms it visited during rewrite MultiTermQuery: return DocIdSet.EMPTY_DOCIDSET if there are no
(getTotalNumberOfTerms). Also, FilteredTermEnum is now more terms in the enum; track the total number of terms it visited
friendly to subclasses. (Uwe Schindler via Mike McCandless) during rewrite (getTotalNumberOfTerms). FilteredTermEnum is also
more friendly to subclassing. (Uwe Schindler via Mike McCandless)
Optimizations Optimizations

View File

@ -18,13 +18,9 @@ package org.apache.lucene.search;
*/ */
import java.io.IOException; import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.util.ToStringUtils;
/** /**
@ -97,7 +93,7 @@ public abstract class MultiTermQuery extends Query {
} }
protected Filter getFilter() { protected Filter getFilter() {
return new MultiTermFilter(this); return new MultiTermQueryWrapperFilter(this);
} }
public Query rewrite(IndexReader reader) throws IOException { public Query rewrite(IndexReader reader) throws IOException {
@ -176,80 +172,4 @@ public abstract class MultiTermQuery extends Query {
return term.hashCode() + Float.floatToRawIntBits(getBoost()); return term.hashCode() + Float.floatToRawIntBits(getBoost());
} }
static class MultiTermFilter extends Filter {
MultiTermQuery mtq;
abstract class TermGenerator {
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term == null)
break;
mtq.numberOfTerms++;
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
}
} while (enumerator.next());
} finally {
termDocs.close();
}
}
abstract public void handleDoc(int doc);
}
public MultiTermFilter(MultiTermQuery mtq) {
this.mtq = mtq;
}
public BitSet bits(IndexReader reader) throws IOException {
final TermEnum enumerator = mtq.getEnum(reader);
try {
final BitSet bitSet = new BitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader, enumerator);
return bitSet;
} finally {
enumerator.close();
}
}
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
final TermEnum enumerator = mtq.getEnum(reader);
try {
// if current term in enum is null, the enum is empty -> shortcut
if (enumerator.term() == null)
return DocIdSet.EMPTY_DOCIDSET;
// else fill into a OpenBitSet
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader, enumerator);
return bitSet;
} finally {
enumerator.close();
}
}
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof MultiTermFilter))
return false;
final MultiTermFilter filter = (MultiTermFilter) o;
return mtq.equals(filter.mtq);
}
public int hashCode() {
return mtq.hashCode();
}
}
} }

View File

@ -0,0 +1,164 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;
import java.io.IOException;
import java.util.BitSet;
/**
* A wrapper for {@link MultiTermQuery}, that exposes its
* functionality as a {@link Filter}.
* <P>
* <code>MultiTermQueryWrapperFilter</code> is not designed to
* be used by itself. Normally you subclass it to provide a Filter
* counterpart for a {@link MultiTermQuery} subclass.
* <P>
* For example, {@link RangeFilter} and {@link PrefixFilter} extend
* <code>MultiTermQueryWrapperFilter</code>.
* This class also provides the functionality behind
* {@link MultiTermQuery#getFilter}, this is why it is not abstract.
*/
public class MultiTermQueryWrapperFilter extends Filter {
protected final MultiTermQuery query;
/**
* Wrap a {@link MultiTermQuery} as a Filter.
*/
protected MultiTermQueryWrapperFilter(MultiTermQuery query) {
this.query = query;
}
//@Override
public String toString() {
// query.toString should be ok for the filter, too, if the query boost is 1.0f
return query.toString();
}
//@Override
public final boolean equals(final Object o) {
if (o==this) return true;
if (o==null) return false;
if (this.getClass().equals(o.getClass())) {
return this.query.equals( ((MultiTermQueryWrapperFilter)o).query );
}
return false;
}
//@Override
public final int hashCode() {
return query.hashCode();
}
/**
* Expert: Return the number of unique terms visited during execution of the filter.
* If there are many of them, you may consider using another filter type
* or optimize your total term count in index.
* <p>This method is not thread safe, be sure to only call it when no filter is running!
* If you re-use the same filter instance for another
* search, be sure to first reset the term counter
* with {@link #clearTotalNumberOfTerms}.
* @see #clearTotalNumberOfTerms
*/
public int getTotalNumberOfTerms() {
return query.getTotalNumberOfTerms();
}
/**
* Expert: Resets the counting of unique terms.
* Do this before executing the filter.
* @see #getTotalNumberOfTerms
*/
public void clearTotalNumberOfTerms() {
query.clearTotalNumberOfTerms();
}
abstract class TermGenerator {
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term == null)
break;
query.numberOfTerms++;
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
}
} while (enumerator.next());
} finally {
termDocs.close();
}
}
abstract public void handleDoc(int doc);
}
/**
* Returns a BitSet with true for documents which should be
* permitted in search results, and false for those that should
* not.
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
//@Override
public BitSet bits(IndexReader reader) throws IOException {
final TermEnum enumerator = query.getEnum(reader);
try {
final BitSet bitSet = new BitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader, enumerator);
return bitSet;
} finally {
enumerator.close();
}
}
/**
* Returns a DocIdSet with documents that should be
* permitted in search results.
*/
//@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
final TermEnum enumerator = query.getEnum(reader);
try {
// if current term in enum is null, the enum is empty -> shortcut
if (enumerator.term() == null)
return DocIdSet.EMPTY_DOCIDSET;
// else fill into a OpenBitSet
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader, enumerator);
return bitSet;
} finally {
enumerator.close();
}
}
}

View File

@ -17,48 +17,25 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
/** /**
* A Filter that restricts search results to values that have a matching prefix in a given * A Filter that restricts search results to values that have a matching prefix in a given
* field. * field.
*
* <p>
* This code borrows heavily from {@link PrefixQuery}, but is implemented as a Filter
*
* </p>
*/ */
public class PrefixFilter extends Filter { public class PrefixFilter extends MultiTermQueryWrapperFilter {
protected final Term prefix;
private PrefixQuery prefixQuery;
public PrefixFilter(Term prefix) { public PrefixFilter(Term prefix) {
this.prefix = prefix; super(new PrefixQuery(prefix));
this.prefixQuery = new PrefixQuery(prefix);
} }
public Term getPrefix() { return prefix; } public Term getPrefix() { return ((PrefixQuery)query).getPrefix(); }
/**
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
return prefixQuery.getFilter().bits(reader);
}
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return prefixQuery.getFilter().getDocIdSet(reader);
}
/** Prints a user-readable version of this query. */ /** Prints a user-readable version of this query. */
public String toString () { public String toString () {
StringBuffer buffer = new StringBuffer(); StringBuffer buffer = new StringBuffer();
buffer.append("PrefixFilter("); buffer.append("PrefixFilter(");
buffer.append(prefix.toString()); buffer.append(getPrefix().toString());
buffer.append(")"); buffer.append(")");
return buffer.toString(); return buffer.toString();
} }

View File

@ -17,33 +17,16 @@ package org.apache.lucene.search;
* limitations under the License. * limitations under the License.
*/ */
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.util.BitSet;
import java.text.Collator; import java.text.Collator;
/** /**
* A Filter that restricts search results to a range of values in a given * A Filter that restricts search results to a range of values in a given
* field. * field.
* *
* <p>
* This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter
*
* </p>
*
* If you construct a large number of range filters with different ranges but on the * If you construct a large number of range filters with different ranges but on the
* same field, {@link FieldCacheRangeFilter} may have significantly better performance. * same field, {@link FieldCacheRangeFilter} may have significantly better performance.
*/ */
public class RangeFilter extends Filter { public class RangeFilter extends MultiTermQueryWrapperFilter {
private String fieldName;
private String lowerTerm;
private String upperTerm;
private boolean includeLower;
private boolean includeUpper;
private Collator collator;
private RangeQuery rangeQuery;
/** /**
* @param fieldName The field this range applies to * @param fieldName The field this range applies to
@ -57,25 +40,7 @@ public class RangeFilter extends Filter {
*/ */
public RangeFilter(String fieldName, String lowerTerm, String upperTerm, public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
boolean includeLower, boolean includeUpper) { boolean includeLower, boolean includeUpper) {
this.fieldName = fieldName; super(new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper));
this.lowerTerm = lowerTerm;
this.upperTerm = upperTerm;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
if (null == lowerTerm && null == upperTerm) {
throw new IllegalArgumentException
("At least one value must be non-null");
}
if (includeLower && null == lowerTerm) {
throw new IllegalArgumentException
("The lower bound must be non-null to be inclusive");
}
if (includeUpper && null == upperTerm) {
throw new IllegalArgumentException
("The upper bound must be non-null to be inclusive");
}
initRangeQuery();
} }
/** /**
@ -98,13 +63,7 @@ public class RangeFilter extends Filter {
public RangeFilter(String fieldName, String lowerTerm, String upperTerm, public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
boolean includeLower, boolean includeUpper, boolean includeLower, boolean includeUpper,
Collator collator) { Collator collator) {
this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper); super(new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
this.collator = collator;
initRangeQuery();
}
private void initRangeQuery() {
rangeQuery = new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator);
} }
/** /**
@ -122,66 +81,4 @@ public class RangeFilter extends Filter {
public static RangeFilter More(String fieldName, String lowerTerm) { public static RangeFilter More(String fieldName, String lowerTerm) {
return new RangeFilter(fieldName, lowerTerm, null, true, false); return new RangeFilter(fieldName, lowerTerm, null, true, false);
} }
/**
* Returns a BitSet with true for documents which should be
* permitted in search results, and false for those that should
* not.
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
return rangeQuery.getFilter().bits(reader);
}
/**
* Returns a DocIdSet with documents that should be
* permitted in search results.
*/
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return rangeQuery.getFilter().getDocIdSet(reader);
}
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append(fieldName);
buffer.append(":");
buffer.append(includeLower ? "[" : "{");
if (null != lowerTerm) {
buffer.append(lowerTerm);
}
buffer.append("-");
if (null != upperTerm) {
buffer.append(upperTerm);
}
buffer.append(includeUpper ? "]" : "}");
return buffer.toString();
}
/** Returns true if <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof RangeFilter)) return false;
RangeFilter other = (RangeFilter) o;
if (!this.fieldName.equals(other.fieldName)
|| this.includeLower != other.includeLower
|| this.includeUpper != other.includeUpper
|| (this.collator != null && ! this.collator.equals(other.collator))
) { return false; }
if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
return true;
}
/** Returns a hash code value for this object.*/
public int hashCode() {
int h = fieldName.hashCode();
h ^= lowerTerm != null ? lowerTerm.hashCode() : 0xB6ECE882;
h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper
h ^= (upperTerm != null ? (upperTerm.hashCode()) : 0x91BEC2C2);
h ^= (includeLower ? 0xD484B933 : 0)
^ (includeUpper ? 0x6AE423AC : 0);
h ^= collator != null ? collator.hashCode() : 0;
return h;
}
} }