LUCENE-1603: improve MultiTermQuery for better sharing, and so Trie(Numeric)RangeQuery can use it

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@765581 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-04-16 11:37:23 +00:00
parent 022d104c38
commit 55c0cc3922
5 changed files with 178 additions and 219 deletions

View File

@ -278,11 +278,12 @@ New features
turnaround than the normal approach of commiting the changes and
then reopening a reader. (Jason Rutherglen via Mike McCandless)
21. LUCENE-1603: Some improvements to MultiTermQuery: return
DocIdSet.EMPTY_DOCIDSET if there are no terms in the enum; track
the total number of terms it visited during rewrite
(getTotalNumberOfTerms). Also, FilteredTermEnum is now more
friendly to subclasses. (Uwe Schindler via Mike McCandless)
21. LUCENE-1603: Added new MultiTermQueryWrapperFilter, to wrap any
MultiTermQuery as a Filter. Also made some improvements to
MultiTermQuery: return DocIdSet.EMPTY_DOCIDSET if there are no
terms in the enum; track the total number of terms it visited
during rewrite (getTotalNumberOfTerms). FilteredTermEnum is also
more friendly to subclassing. (Uwe Schindler via Mike McCandless)
Optimizations

View File

@ -18,13 +18,9 @@ package org.apache.lucene.search;
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.util.ToStringUtils;
/**
@ -97,7 +93,7 @@ public abstract class MultiTermQuery extends Query {
}
protected Filter getFilter() {
return new MultiTermFilter(this);
return new MultiTermQueryWrapperFilter(this);
}
public Query rewrite(IndexReader reader) throws IOException {
@ -176,80 +172,4 @@ public abstract class MultiTermQuery extends Query {
return term.hashCode() + Float.floatToRawIntBits(getBoost());
}
static class MultiTermFilter extends Filter {
MultiTermQuery mtq;
abstract class TermGenerator {
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term == null)
break;
mtq.numberOfTerms++;
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
}
} while (enumerator.next());
} finally {
termDocs.close();
}
}
abstract public void handleDoc(int doc);
}
public MultiTermFilter(MultiTermQuery mtq) {
this.mtq = mtq;
}
public BitSet bits(IndexReader reader) throws IOException {
final TermEnum enumerator = mtq.getEnum(reader);
try {
final BitSet bitSet = new BitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader, enumerator);
return bitSet;
} finally {
enumerator.close();
}
}
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
final TermEnum enumerator = mtq.getEnum(reader);
try {
// if current term in enum is null, the enum is empty -> shortcut
if (enumerator.term() == null)
return DocIdSet.EMPTY_DOCIDSET;
// else fill into a OpenBitSet
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader, enumerator);
return bitSet;
} finally {
enumerator.close();
}
}
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof MultiTermFilter))
return false;
final MultiTermFilter filter = (MultiTermFilter) o;
return mtq.equals(filter.mtq);
}
public int hashCode() {
return mtq.hashCode();
}
}
}

View File

@ -0,0 +1,164 @@
package org.apache.lucene.search;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.OpenBitSet;
import java.io.IOException;
import java.util.BitSet;
/**
* A wrapper for {@link MultiTermQuery}, that exposes its
* functionality as a {@link Filter}.
* <P>
* <code>MultiTermQueryWrapperFilter</code> is not designed to
* be used by itself. Normally you subclass it to provide a Filter
* counterpart for a {@link MultiTermQuery} subclass.
* <P>
* For example, {@link RangeFilter} and {@link PrefixFilter} extend
* <code>MultiTermQueryWrapperFilter</code>.
* This class also provides the functionality behind
* {@link MultiTermQuery#getFilter}, this is why it is not abstract.
*/
public class MultiTermQueryWrapperFilter extends Filter {
protected final MultiTermQuery query;
/**
* Wrap a {@link MultiTermQuery} as a Filter.
*/
protected MultiTermQueryWrapperFilter(MultiTermQuery query) {
this.query = query;
}
//@Override
public String toString() {
// query.toString should be ok for the filter, too, if the query boost is 1.0f
return query.toString();
}
//@Override
public final boolean equals(final Object o) {
if (o==this) return true;
if (o==null) return false;
if (this.getClass().equals(o.getClass())) {
return this.query.equals( ((MultiTermQueryWrapperFilter)o).query );
}
return false;
}
//@Override
public final int hashCode() {
return query.hashCode();
}
/**
* Expert: Return the number of unique terms visited during execution of the filter.
* If there are many of them, you may consider using another filter type
* or optimize your total term count in index.
* <p>This method is not thread safe, be sure to only call it when no filter is running!
* If you re-use the same filter instance for another
* search, be sure to first reset the term counter
* with {@link #clearTotalNumberOfTerms}.
* @see #clearTotalNumberOfTerms
*/
public int getTotalNumberOfTerms() {
return query.getTotalNumberOfTerms();
}
/**
* Expert: Resets the counting of unique terms.
* Do this before executing the filter.
* @see #getTotalNumberOfTerms
*/
public void clearTotalNumberOfTerms() {
query.clearTotalNumberOfTerms();
}
abstract class TermGenerator {
public void generate(IndexReader reader, TermEnum enumerator) throws IOException {
TermDocs termDocs = reader.termDocs();
try {
do {
Term term = enumerator.term();
if (term == null)
break;
query.numberOfTerms++;
termDocs.seek(term);
while (termDocs.next()) {
handleDoc(termDocs.doc());
}
} while (enumerator.next());
} finally {
termDocs.close();
}
}
abstract public void handleDoc(int doc);
}
/**
* Returns a BitSet with true for documents which should be
* permitted in search results, and false for those that should
* not.
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
//@Override
public BitSet bits(IndexReader reader) throws IOException {
final TermEnum enumerator = query.getEnum(reader);
try {
final BitSet bitSet = new BitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader, enumerator);
return bitSet;
} finally {
enumerator.close();
}
}
/**
* Returns a DocIdSet with documents that should be
* permitted in search results.
*/
//@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
final TermEnum enumerator = query.getEnum(reader);
try {
// if current term in enum is null, the enum is empty -> shortcut
if (enumerator.term() == null)
return DocIdSet.EMPTY_DOCIDSET;
// else fill into a OpenBitSet
final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
new TermGenerator() {
public void handleDoc(int doc) {
bitSet.set(doc);
}
}.generate(reader, enumerator);
return bitSet;
} finally {
enumerator.close();
}
}
}

View File

@ -17,48 +17,25 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
/**
* A Filter that restricts search results to values that have a matching prefix in a given
* field.
*
* <p>
* This code borrows heavily from {@link PrefixQuery}, but is implemented as a Filter
*
* </p>
*/
public class PrefixFilter extends Filter {
protected final Term prefix;
private PrefixQuery prefixQuery;
public class PrefixFilter extends MultiTermQueryWrapperFilter {
public PrefixFilter(Term prefix) {
this.prefix = prefix;
this.prefixQuery = new PrefixQuery(prefix);
super(new PrefixQuery(prefix));
}
public Term getPrefix() { return prefix; }
/**
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
return prefixQuery.getFilter().bits(reader);
}
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return prefixQuery.getFilter().getDocIdSet(reader);
}
public Term getPrefix() { return ((PrefixQuery)query).getPrefix(); }
/** Prints a user-readable version of this query. */
public String toString () {
StringBuffer buffer = new StringBuffer();
buffer.append("PrefixFilter(");
buffer.append(prefix.toString());
buffer.append(getPrefix().toString());
buffer.append(")");
return buffer.toString();
}

View File

@ -17,34 +17,17 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
import java.util.BitSet;
import java.text.Collator;
/**
* A Filter that restricts search results to a range of values in a given
* field.
*
* <p>
* This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter
*
* </p>
*
* If you construct a large number of range filters with different ranges but on the
* same field, {@link FieldCacheRangeFilter} may have significantly better performance.
*/
public class RangeFilter extends Filter {
public class RangeFilter extends MultiTermQueryWrapperFilter {
private String fieldName;
private String lowerTerm;
private String upperTerm;
private boolean includeLower;
private boolean includeUpper;
private Collator collator;
private RangeQuery rangeQuery;
/**
* @param fieldName The field this range applies to
* @param lowerTerm The lower bound on this range
@ -57,25 +40,7 @@ public class RangeFilter extends Filter {
*/
public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
boolean includeLower, boolean includeUpper) {
this.fieldName = fieldName;
this.lowerTerm = lowerTerm;
this.upperTerm = upperTerm;
this.includeLower = includeLower;
this.includeUpper = includeUpper;
if (null == lowerTerm && null == upperTerm) {
throw new IllegalArgumentException
("At least one value must be non-null");
}
if (includeLower && null == lowerTerm) {
throw new IllegalArgumentException
("The lower bound must be non-null to be inclusive");
}
if (includeUpper && null == upperTerm) {
throw new IllegalArgumentException
("The upper bound must be non-null to be inclusive");
}
initRangeQuery();
super(new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper));
}
/**
@ -98,13 +63,7 @@ public class RangeFilter extends Filter {
public RangeFilter(String fieldName, String lowerTerm, String upperTerm,
boolean includeLower, boolean includeUpper,
Collator collator) {
this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper);
this.collator = collator;
initRangeQuery();
}
private void initRangeQuery() {
rangeQuery = new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator);
super(new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator));
}
/**
@ -122,66 +81,4 @@ public class RangeFilter extends Filter {
public static RangeFilter More(String fieldName, String lowerTerm) {
return new RangeFilter(fieldName, lowerTerm, null, true, false);
}
/**
* Returns a BitSet with true for documents which should be
* permitted in search results, and false for those that should
* not.
* @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
return rangeQuery.getFilter().bits(reader);
}
/**
* Returns a DocIdSet with documents that should be
* permitted in search results.
*/
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return rangeQuery.getFilter().getDocIdSet(reader);
}
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append(fieldName);
buffer.append(":");
buffer.append(includeLower ? "[" : "{");
if (null != lowerTerm) {
buffer.append(lowerTerm);
}
buffer.append("-");
if (null != upperTerm) {
buffer.append(upperTerm);
}
buffer.append(includeUpper ? "]" : "}");
return buffer.toString();
}
/** Returns true if <code>o</code> is equal to this. */
public boolean equals(Object o) {
if (this == o) return true;
if (!(o instanceof RangeFilter)) return false;
RangeFilter other = (RangeFilter) o;
if (!this.fieldName.equals(other.fieldName)
|| this.includeLower != other.includeLower
|| this.includeUpper != other.includeUpper
|| (this.collator != null && ! this.collator.equals(other.collator))
) { return false; }
if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false;
if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false;
return true;
}
/** Returns a hash code value for this object.*/
public int hashCode() {
int h = fieldName.hashCode();
h ^= lowerTerm != null ? lowerTerm.hashCode() : 0xB6ECE882;
h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper
h ^= (upperTerm != null ? (upperTerm.hashCode()) : 0x91BEC2C2);
h ^= (includeLower ? 0xD484B933 : 0)
^ (includeUpper ? 0x6AE423AC : 0);
h ^= collator != null ? collator.hashCode() : 0;
return h;
}
}