From 55c0cc3922f5501e705f1194ece3191675bf606e Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Thu, 16 Apr 2009 11:37:23 +0000 Subject: [PATCH] LUCENE-1603: improve MultiTermQuery for better sharing, and so Trie(Numeric)RangeQuery can use it git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@765581 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 11 +- .../apache/lucene/search/MultiTermQuery.java | 82 +-------- .../search/MultiTermQueryWrapperFilter.java | 164 ++++++++++++++++++ .../apache/lucene/search/PrefixFilter.java | 31 +--- .../org/apache/lucene/search/RangeFilter.java | 109 +----------- 5 files changed, 178 insertions(+), 219 deletions(-) create mode 100644 src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java diff --git a/CHANGES.txt b/CHANGES.txt index 41855708941..082c6f21fca 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -278,11 +278,12 @@ New features turnaround than the normal approach of commiting the changes and then reopening a reader. (Jason Rutherglen via Mike McCandless) -21. LUCENE-1603: Some improvements to MultiTermQuery: return - DocIdSet.EMPTY_DOCIDSET if there are no terms in the enum; track - the total number of terms it visited during rewrite - (getTotalNumberOfTerms). Also, FilteredTermEnum is now more - friendly to subclasses. (Uwe Schindler via Mike McCandless) +21. LUCENE-1603: Added new MultiTermQueryWrapperFilter, to wrap any + MultiTermQuery as a Filter. Also made some improvements to + MultiTermQuery: return DocIdSet.EMPTY_DOCIDSET if there are no + terms in the enum; track the total number of terms it visited + during rewrite (getTotalNumberOfTerms). FilteredTermEnum is also + more friendly to subclassing. (Uwe Schindler via Mike McCandless) Optimizations diff --git a/src/java/org/apache/lucene/search/MultiTermQuery.java b/src/java/org/apache/lucene/search/MultiTermQuery.java index 1a8ea903cbc..36ef46cf816 100644 --- a/src/java/org/apache/lucene/search/MultiTermQuery.java +++ b/src/java/org/apache/lucene/search/MultiTermQuery.java @@ -18,13 +18,9 @@ package org.apache.lucene.search; */ import java.io.IOException; -import java.util.BitSet; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util.ToStringUtils; /** @@ -97,7 +93,7 @@ public abstract class MultiTermQuery extends Query { } protected Filter getFilter() { - return new MultiTermFilter(this); + return new MultiTermQueryWrapperFilter(this); } public Query rewrite(IndexReader reader) throws IOException { @@ -176,80 +172,4 @@ public abstract class MultiTermQuery extends Query { return term.hashCode() + Float.floatToRawIntBits(getBoost()); } - static class MultiTermFilter extends Filter { - MultiTermQuery mtq; - - abstract class TermGenerator { - public void generate(IndexReader reader, TermEnum enumerator) throws IOException { - TermDocs termDocs = reader.termDocs(); - try { - do { - Term term = enumerator.term(); - if (term == null) - break; - mtq.numberOfTerms++; - termDocs.seek(term); - while (termDocs.next()) { - handleDoc(termDocs.doc()); - } - } while (enumerator.next()); - } finally { - termDocs.close(); - } - } - abstract public void handleDoc(int doc); - } - - public MultiTermFilter(MultiTermQuery mtq) { - this.mtq = mtq; - } - - public BitSet bits(IndexReader reader) throws IOException { - final TermEnum enumerator = mtq.getEnum(reader); - try { - final BitSet bitSet = new BitSet(reader.maxDoc()); - new TermGenerator() { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader, enumerator); - return bitSet; - } finally { - enumerator.close(); - } - } - - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final TermEnum enumerator = mtq.getEnum(reader); - try { - // if current term in enum is null, the enum is empty -> shortcut - if (enumerator.term() == null) - return DocIdSet.EMPTY_DOCIDSET; - // else fill into a OpenBitSet - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - new TermGenerator() { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader, enumerator); - return bitSet; - } finally { - enumerator.close(); - } - } - - public boolean equals(Object o) { - if (this == o) - return true; - if (!(o instanceof MultiTermFilter)) - return false; - - final MultiTermFilter filter = (MultiTermFilter) o; - return mtq.equals(filter.mtq); - } - - public int hashCode() { - return mtq.hashCode(); - } - } } diff --git a/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java b/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java new file mode 100644 index 00000000000..2e016a9f931 --- /dev/null +++ b/src/java/org/apache/lucene/search/MultiTermQueryWrapperFilter.java @@ -0,0 +1,164 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.util.OpenBitSet; + +import java.io.IOException; +import java.util.BitSet; + +/** + * A wrapper for {@link MultiTermQuery}, that exposes its + * functionality as a {@link Filter}. + *

+ * MultiTermQueryWrapperFilter is not designed to + * be used by itself. Normally you subclass it to provide a Filter + * counterpart for a {@link MultiTermQuery} subclass. + *

+ * For example, {@link RangeFilter} and {@link PrefixFilter} extend + * MultiTermQueryWrapperFilter. + * This class also provides the functionality behind + * {@link MultiTermQuery#getFilter}, this is why it is not abstract. + */ +public class MultiTermQueryWrapperFilter extends Filter { + + protected final MultiTermQuery query; + + /** + * Wrap a {@link MultiTermQuery} as a Filter. + */ + protected MultiTermQueryWrapperFilter(MultiTermQuery query) { + this.query = query; + } + + //@Override + public String toString() { + // query.toString should be ok for the filter, too, if the query boost is 1.0f + return query.toString(); + } + + //@Override + public final boolean equals(final Object o) { + if (o==this) return true; + if (o==null) return false; + if (this.getClass().equals(o.getClass())) { + return this.query.equals( ((MultiTermQueryWrapperFilter)o).query ); + } + return false; + } + + //@Override + public final int hashCode() { + return query.hashCode(); + } + + /** + * Expert: Return the number of unique terms visited during execution of the filter. + * If there are many of them, you may consider using another filter type + * or optimize your total term count in index. + *

This method is not thread safe, be sure to only call it when no filter is running! + * If you re-use the same filter instance for another + * search, be sure to first reset the term counter + * with {@link #clearTotalNumberOfTerms}. + * @see #clearTotalNumberOfTerms + */ + public int getTotalNumberOfTerms() { + return query.getTotalNumberOfTerms(); + } + + /** + * Expert: Resets the counting of unique terms. + * Do this before executing the filter. + * @see #getTotalNumberOfTerms + */ + public void clearTotalNumberOfTerms() { + query.clearTotalNumberOfTerms(); + } + + abstract class TermGenerator { + public void generate(IndexReader reader, TermEnum enumerator) throws IOException { + TermDocs termDocs = reader.termDocs(); + try { + do { + Term term = enumerator.term(); + if (term == null) + break; + query.numberOfTerms++; + termDocs.seek(term); + while (termDocs.next()) { + handleDoc(termDocs.doc()); + } + } while (enumerator.next()); + } finally { + termDocs.close(); + } + } + abstract public void handleDoc(int doc); + } + + /** + * Returns a BitSet with true for documents which should be + * permitted in search results, and false for those that should + * not. + * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. + */ + //@Override + public BitSet bits(IndexReader reader) throws IOException { + final TermEnum enumerator = query.getEnum(reader); + try { + final BitSet bitSet = new BitSet(reader.maxDoc()); + new TermGenerator() { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader, enumerator); + return bitSet; + } finally { + enumerator.close(); + } + } + + /** + * Returns a DocIdSet with documents that should be + * permitted in search results. + */ + //@Override + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + final TermEnum enumerator = query.getEnum(reader); + try { + // if current term in enum is null, the enum is empty -> shortcut + if (enumerator.term() == null) + return DocIdSet.EMPTY_DOCIDSET; + // else fill into a OpenBitSet + final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); + new TermGenerator() { + public void handleDoc(int doc) { + bitSet.set(doc); + } + }.generate(reader, enumerator); + return bitSet; + } finally { + enumerator.close(); + } + } + +} diff --git a/src/java/org/apache/lucene/search/PrefixFilter.java b/src/java/org/apache/lucene/search/PrefixFilter.java index 0fd413781cc..8664903618c 100755 --- a/src/java/org/apache/lucene/search/PrefixFilter.java +++ b/src/java/org/apache/lucene/search/PrefixFilter.java @@ -17,48 +17,25 @@ package org.apache.lucene.search; * limitations under the License. */ -import java.io.IOException; -import java.util.BitSet; - -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; /** * A Filter that restricts search results to values that have a matching prefix in a given * field. - * - *

- * This code borrows heavily from {@link PrefixQuery}, but is implemented as a Filter - * - *

*/ -public class PrefixFilter extends Filter { - protected final Term prefix; - private PrefixQuery prefixQuery; +public class PrefixFilter extends MultiTermQueryWrapperFilter { public PrefixFilter(Term prefix) { - this.prefix = prefix; - this.prefixQuery = new PrefixQuery(prefix); + super(new PrefixQuery(prefix)); } - public Term getPrefix() { return prefix; } - - /** - * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. - */ - public BitSet bits(IndexReader reader) throws IOException { - return prefixQuery.getFilter().bits(reader); - } - - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return prefixQuery.getFilter().getDocIdSet(reader); - } + public Term getPrefix() { return ((PrefixQuery)query).getPrefix(); } /** Prints a user-readable version of this query. */ public String toString () { StringBuffer buffer = new StringBuffer(); buffer.append("PrefixFilter("); - buffer.append(prefix.toString()); + buffer.append(getPrefix().toString()); buffer.append(")"); return buffer.toString(); } diff --git a/src/java/org/apache/lucene/search/RangeFilter.java b/src/java/org/apache/lucene/search/RangeFilter.java index 29ccd359633..de93456814b 100644 --- a/src/java/org/apache/lucene/search/RangeFilter.java +++ b/src/java/org/apache/lucene/search/RangeFilter.java @@ -17,34 +17,17 @@ package org.apache.lucene.search; * limitations under the License. */ -import org.apache.lucene.index.IndexReader; - -import java.io.IOException; -import java.util.BitSet; import java.text.Collator; /** * A Filter that restricts search results to a range of values in a given * field. * - *

- * This code borrows heavily from {@link RangeQuery}, but is implemented as a Filter - * - *

- * * If you construct a large number of range filters with different ranges but on the * same field, {@link FieldCacheRangeFilter} may have significantly better performance. */ -public class RangeFilter extends Filter { +public class RangeFilter extends MultiTermQueryWrapperFilter { - private String fieldName; - private String lowerTerm; - private String upperTerm; - private boolean includeLower; - private boolean includeUpper; - private Collator collator; - private RangeQuery rangeQuery; - /** * @param fieldName The field this range applies to * @param lowerTerm The lower bound on this range @@ -57,25 +40,7 @@ public class RangeFilter extends Filter { */ public RangeFilter(String fieldName, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper) { - this.fieldName = fieldName; - this.lowerTerm = lowerTerm; - this.upperTerm = upperTerm; - this.includeLower = includeLower; - this.includeUpper = includeUpper; - - if (null == lowerTerm && null == upperTerm) { - throw new IllegalArgumentException - ("At least one value must be non-null"); - } - if (includeLower && null == lowerTerm) { - throw new IllegalArgumentException - ("The lower bound must be non-null to be inclusive"); - } - if (includeUpper && null == upperTerm) { - throw new IllegalArgumentException - ("The upper bound must be non-null to be inclusive"); - } - initRangeQuery(); + super(new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper)); } /** @@ -98,13 +63,7 @@ public class RangeFilter extends Filter { public RangeFilter(String fieldName, String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, Collator collator) { - this(fieldName, lowerTerm, upperTerm, includeLower, includeUpper); - this.collator = collator; - initRangeQuery(); - } - - private void initRangeQuery() { - rangeQuery = new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator); + super(new RangeQuery(fieldName, lowerTerm, upperTerm, includeLower, includeUpper, collator)); } /** @@ -122,66 +81,4 @@ public class RangeFilter extends Filter { public static RangeFilter More(String fieldName, String lowerTerm) { return new RangeFilter(fieldName, lowerTerm, null, true, false); } - - /** - * Returns a BitSet with true for documents which should be - * permitted in search results, and false for those that should - * not. - * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. - */ - public BitSet bits(IndexReader reader) throws IOException { - return rangeQuery.getFilter().bits(reader); - } - - /** - * Returns a DocIdSet with documents that should be - * permitted in search results. - */ - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return rangeQuery.getFilter().getDocIdSet(reader); - } - - public String toString() { - StringBuffer buffer = new StringBuffer(); - buffer.append(fieldName); - buffer.append(":"); - buffer.append(includeLower ? "[" : "{"); - if (null != lowerTerm) { - buffer.append(lowerTerm); - } - buffer.append("-"); - if (null != upperTerm) { - buffer.append(upperTerm); - } - buffer.append(includeUpper ? "]" : "}"); - return buffer.toString(); - } - - /** Returns true if o is equal to this. */ - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof RangeFilter)) return false; - RangeFilter other = (RangeFilter) o; - - if (!this.fieldName.equals(other.fieldName) - || this.includeLower != other.includeLower - || this.includeUpper != other.includeUpper - || (this.collator != null && ! this.collator.equals(other.collator)) - ) { return false; } - if (this.lowerTerm != null ? !this.lowerTerm.equals(other.lowerTerm) : other.lowerTerm != null) return false; - if (this.upperTerm != null ? !this.upperTerm.equals(other.upperTerm) : other.upperTerm != null) return false; - return true; - } - - /** Returns a hash code value for this object.*/ - public int hashCode() { - int h = fieldName.hashCode(); - h ^= lowerTerm != null ? lowerTerm.hashCode() : 0xB6ECE882; - h = (h << 1) | (h >>> 31); // rotate to distinguish lower from upper - h ^= (upperTerm != null ? (upperTerm.hashCode()) : 0x91BEC2C2); - h ^= (includeLower ? 0xD484B933 : 0) - ^ (includeUpper ? 0x6AE423AC : 0); - h ^= collator != null ? collator.hashCode() : 0; - return h; - } }