diff --git a/CHANGES.txt b/CHANGES.txt index ef1285cc8a5..bd907a2fb6c 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -15,12 +15,16 @@ API Changes 2. LUCENE-1150: Re-expose StandardTokenizer's constants publicly; this was accidentally lost with LUCENE-966. (Nicolas Lalevée via Mike McCandless) + + 3. LUCENE-584: Changed Filter API to return a DocIdSet instead of a + java.util.BitSet. This allows using more efficient data structures + for Filters and makes them more flexible. (Paul Elschot, Michael Busch) Bug fixes New features -1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis + 1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis process. The flag is not indexed/stored and is thus only used by analysis. 2. LUCENE-1147: Add -segment option to CheckIndex tool so you can @@ -28,6 +32,12 @@ New features McCandless) 3. LUCENE-1045: Reopened this issue to add support for short and bytes. + + 4. LUCENE-584: Added new data structures to o.a.l.util, such as + OpenBitSet and SortedVIntList. These extend DocIdSet and can + directly be used for Filters with the new Filter API. Also changed + the core Filters to use OpenBitSet instead of java.util.BitSet. + (Paul Elschot, Michael Busch) Optimizations diff --git a/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java b/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java index 525e8130d9a..734aa66669f 100644 --- a/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java +++ b/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java @@ -37,8 +37,8 @@ public class ChainedFilterTest extends TestCase { private Query query; // private DateFilter dateFilter; DateFilter was deprecated and removed private RangeFilter dateFilter; - private QueryFilter bobFilter; - private QueryFilter sueFilter; + private QueryWrapperFilter bobFilter; + private QueryWrapperFilter sueFilter; public void setUp() throws Exception { directory = new RAMDirectory(); @@ -74,9 +74,9 @@ public class ChainedFilterTest extends TestCase { // just treat dates as strings and select the whole range for now... dateFilter = new RangeFilter("date","","ZZZZ",true,true); - bobFilter = new QueryFilter( + bobFilter = new QueryWrapperFilter( new TermQuery(new Term("owner", "bob"))); - sueFilter = new QueryFilter( + sueFilter = new QueryWrapperFilter( new TermQuery(new Term("owner", "sue"))); } diff --git a/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java b/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java index ce63169ca8d..08e896b4571 100644 --- a/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java +++ b/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java @@ -8,7 +8,7 @@ import java.util.Map.Entry; import org.apache.lucene.search.CachingWrapperFilter; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryFilter; +import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.xmlparser.DOMUtils; import org.apache.lucene.xmlparser.FilterBuilder; import org.apache.lucene.xmlparser.FilterBuilderFactory; @@ -105,7 +105,7 @@ public class CachedFilterBuilder implements FilterBuilder { //cache miss if (qb != null) { - cachedFilter = new QueryFilter(q); + cachedFilter = new QueryWrapperFilter(q); } else { cachedFilter = new CachingWrapperFilter(f); diff --git a/src/java/org/apache/lucene/search/CachingSpanFilter.java b/src/java/org/apache/lucene/search/CachingSpanFilter.java index dcd9e3fab12..aac5207dc8c 100644 --- a/src/java/org/apache/lucene/search/CachingSpanFilter.java +++ b/src/java/org/apache/lucene/search/CachingSpanFilter.java @@ -43,11 +43,19 @@ public class CachingSpanFilter extends SpanFilter { this.filter = filter; } + /** + * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. + */ public BitSet bits(IndexReader reader) throws IOException { SpanFilterResult result = getCachedResult(reader); return result != null ? result.getBits() : null; } - + + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + SpanFilterResult result = getCachedResult(reader); + return result != null ? result.getDocIdSet() : null; + } + private SpanFilterResult getCachedResult(IndexReader reader) throws IOException { SpanFilterResult result = null; if (cache == null) { diff --git a/src/java/org/apache/lucene/search/CachingWrapperFilter.java b/src/java/org/apache/lucene/search/CachingWrapperFilter.java index b6c0564fb60..92fe7cd1e3b 100644 --- a/src/java/org/apache/lucene/search/CachingWrapperFilter.java +++ b/src/java/org/apache/lucene/search/CachingWrapperFilter.java @@ -43,6 +43,9 @@ public class CachingWrapperFilter extends Filter { this.filter = filter; } + /** + * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. + */ public BitSet bits(IndexReader reader) throws IOException { if (cache == null) { cache = new WeakHashMap(); @@ -63,6 +66,28 @@ public class CachingWrapperFilter extends Filter { return bits; } + + public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + if (cache == null) { + cache = new WeakHashMap(); + } + + synchronized (cache) { // check cache + DocIdSet cached = (DocIdSet) cache.get(reader); + if (cached != null) { + return cached; + } + } + + final DocIdSet docIdSet = filter.getDocIdSet(reader); + + synchronized (cache) { // update cache + cache.put(reader, docIdSet); + } + + return docIdSet; + + } public String toString() { return "CachingWrapperFilter("+filter+")"; diff --git a/src/java/org/apache/lucene/search/ConstantScoreQuery.java b/src/java/org/apache/lucene/search/ConstantScoreQuery.java index b8658d0dbec..1a28180894c 100644 --- a/src/java/org/apache/lucene/search/ConstantScoreQuery.java +++ b/src/java/org/apache/lucene/search/ConstantScoreQuery.java @@ -85,7 +85,7 @@ public class ConstantScoreQuery extends Query { public Explanation explain(IndexReader reader, int doc) throws IOException { ConstantScorer cs = (ConstantScorer)scorer(reader); - boolean exists = cs.bits.get(doc); + boolean exists = cs.docIdSetIterator.skipTo(doc) && (cs.docIdSetIterator.doc() == doc); ComplexExplanation result = new ComplexExplanation(); @@ -107,23 +107,22 @@ public class ConstantScoreQuery extends Query { } protected class ConstantScorer extends Scorer { - final BitSet bits; + final DocIdSetIterator docIdSetIterator; final float theScore; int doc=-1; public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException { super(similarity); theScore = w.getValue(); - bits = filter.bits(reader); + docIdSetIterator = filter.getDocIdSet(reader).iterator(); } public boolean next() throws IOException { - doc = bits.nextSetBit(doc+1); - return doc >= 0; + return docIdSetIterator.next(); } public int doc() { - return doc; + return docIdSetIterator.doc(); } public float score() throws IOException { @@ -131,8 +130,7 @@ public class ConstantScoreQuery extends Query { } public boolean skipTo(int target) throws IOException { - doc = bits.nextSetBit(target); // requires JDK 1.4 - return doc >= 0; + return docIdSetIterator.skipTo(target); } public Explanation explain(int doc) throws IOException { @@ -170,3 +168,4 @@ public class ConstantScoreQuery extends Query { } + diff --git a/src/java/org/apache/lucene/search/DocIdSet.java b/src/java/org/apache/lucene/search/DocIdSet.java new file mode 100644 index 00000000000..239c6c0d87f --- /dev/null +++ b/src/java/org/apache/lucene/search/DocIdSet.java @@ -0,0 +1,27 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** + * A DocIdSet contains a set of doc ids. Implementing classes must provide + * a {@link DocIdSetIterator} to access the set. + */ +public abstract class DocIdSet { + public abstract DocIdSetIterator iterator(); +} diff --git a/src/java/org/apache/lucene/search/DocIdSetIterator.java b/src/java/org/apache/lucene/search/DocIdSetIterator.java new file mode 100644 index 00000000000..5f22ebf85f4 --- /dev/null +++ b/src/java/org/apache/lucene/search/DocIdSetIterator.java @@ -0,0 +1,49 @@ +package org.apache.lucene.search; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +/** + * This abstract class defines methods to iterate over a set of + * non-decreasing doc ids. + */ +public abstract class DocIdSetIterator { + /** Returns the current document number.
This is invalid until {@link + #next()} is called for the first time.*/ + public abstract int doc(); + + /** Moves to the next docId in the set. Returns true, iff + * there is such a docId. */ + public abstract boolean next() throws IOException; + + /** Skips entries to the first beyond the current whose document number is + * greater than or equal to target.
Returns true iff there is such + * an entry.
Behaves as if written:
+ * boolean skipTo(int target) { + * do { + * if (!next()) + * return false; + * } while (target > doc()); + * return true; + * } + *+ * Some implementations are considerably more efficient than that. + */ + public abstract boolean skipTo(int target) throws IOException; +} diff --git a/src/java/org/apache/lucene/search/Filter.java b/src/java/org/apache/lucene/search/Filter.java index 008c15d9453..25cffcf8db5 100644 --- a/src/java/org/apache/lucene/search/Filter.java +++ b/src/java/org/apache/lucene/search/Filter.java @@ -20,11 +20,32 @@ package org.apache.lucene.search; import java.util.BitSet; import java.io.IOException; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.DocIdBitSet; -/** Abstract base class providing a mechanism to restrict searches to a subset - of an index. */ +/** Abstract base class providing a mechanism to use a subset of an index + * for restriction or permission of index search results. + *
+ * Note: In Lucene 3.0 {@link #bits(IndexReader)} will be removed
+ * and {@link #getDocIdSet(IndexReader)} will be defined as abstract.
+ * All implementing classes must therefore implement {@link #getDocIdSet(IndexReader)}
+ * in order to work with Lucene 3.0.
+ */
public abstract class Filter implements java.io.Serializable {
- /** Returns a BitSet with true for documents which should be permitted in
- search results, and false for those that should not. */
- public abstract BitSet bits(IndexReader reader) throws IOException;
+ /**
+ * @return A BitSet with true for documents which should be permitted in
+ * search results, and false for those that should not.
+ * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
+ */
+ public BitSet bits(IndexReader reader) throws IOException {
+ return null;
+ }
+
+ /**
+ * @return a DocIdSet that provides the documents which should be
+ * permitted or prohibited in search results.
+ * @see DocIdBitSet
+ */
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ return new DocIdBitSet(bits(reader));
+ }
}
diff --git a/src/java/org/apache/lucene/search/FilteredQuery.java b/src/java/org/apache/lucene/search/FilteredQuery.java
index 516f73a9f91..7729bef6044 100644
--- a/src/java/org/apache/lucene/search/FilteredQuery.java
+++ b/src/java/org/apache/lucene/search/FilteredQuery.java
@@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.ToStringUtils;
import java.io.IOException;
-import java.util.BitSet;
import java.util.Set;
@@ -47,7 +46,7 @@ extends Query {
/**
* Constructs a new query which applies a filter to the results of the original query.
- * Filter.bits() will be called every time this query is used in a search.
+ * Filter.getDocIdSet() will be called every time this query is used in a search.
* @param query Query to be filtered, cannot be null
.
* @param filter Filter to apply to query results, cannot be null
.
*/
@@ -86,13 +85,15 @@ extends Query {
inner.addDetail(preBoost);
}
Filter f = FilteredQuery.this.filter;
- BitSet matches = f.bits(ir);
- if (matches.get(i))
+ DocIdSetIterator docIdSetIterator = f.getDocIdSet(ir).iterator();
+ if (docIdSetIterator.skipTo(i) && (docIdSetIterator.doc() == i)) {
return inner;
- Explanation result = new Explanation
- (0.0f, "failure to match filter: " + f.toString());
- result.addDetail(inner);
- return result;
+ } else {
+ Explanation result = new Explanation
+ (0.0f, "failure to match filter: " + f.toString());
+ result.addDetail(inner);
+ return result;
+ }
}
// return this query
@@ -100,50 +101,49 @@ extends Query {
// return a filtering scorer
public Scorer scorer (IndexReader indexReader) throws IOException {
- final Scorer scorer = weight.scorer (indexReader);
- final BitSet bitset = filter.bits (indexReader);
- return new Scorer (similarity) {
+ final Scorer scorer = weight.scorer(indexReader);
+ final DocIdSetIterator docIdSetIterator = filter.getDocIdSet(indexReader).iterator();
- public boolean next() throws IOException {
- do {
- if (! scorer.next()) {
+ return new Scorer(similarity) {
+
+ private boolean advanceToCommon() throws IOException {
+ while (scorer.doc() != docIdSetIterator.doc()) {
+ if (scorer.doc() < docIdSetIterator.doc()) {
+ if (!scorer.skipTo(docIdSetIterator.doc())) {
+ return false;
+ }
+ } else if (!docIdSetIterator.skipTo(scorer.doc())) {
return false;
}
- } while (! bitset.get(scorer.doc()));
- /* When skipTo() is allowed on scorer it should be used here
- * in combination with bitset.nextSetBit(...)
- * See the while loop in skipTo() below.
- */
+ }
return true;
}
+
+ public boolean next() throws IOException {
+ return docIdSetIterator.next() && scorer.next() && advanceToCommon();
+ }
+
public int doc() { return scorer.doc(); }
public boolean skipTo(int i) throws IOException {
- if (! scorer.skipTo(i)) {
- return false;
- }
- while (! bitset.get(scorer.doc())) {
- int nextFiltered = bitset.nextSetBit(scorer.doc() + 1);
- if (nextFiltered == -1) {
- return false;
- } else if (! scorer.skipTo(nextFiltered)) {
- return false;
- }
- }
- return true;
- }
+ return docIdSetIterator.skipTo(i)
+ && scorer.skipTo(docIdSetIterator.doc())
+ && advanceToCommon();
+ }
public float score() throws IOException { return getBoost() * scorer.score(); }
// add an explanation about whether the document was filtered
public Explanation explain (int i) throws IOException {
- Explanation exp = scorer.explain (i);
- exp.setValue(getBoost() * exp.getValue());
+ Explanation exp = scorer.explain(i);
- if (bitset.get(i))
+ if (docIdSetIterator.skipTo(i) && (docIdSetIterator.doc() == i)) {
exp.setDescription ("allowed by filter: "+exp.getDescription());
- else
+ exp.setValue(getBoost() * exp.getValue());
+ } else {
exp.setDescription ("removed by filter: "+exp.getDescription());
+ exp.setValue(0.0f);
+ }
return exp;
}
};
diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java
index 031a8d55a77..cb9ef692355 100644
--- a/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -128,22 +128,33 @@ public class IndexSearcher extends Searcher {
// inherit javadoc
public void search(Weight weight, Filter filter,
final HitCollector results) throws IOException {
- HitCollector collector = results;
- if (filter != null) {
- final BitSet bits = filter.bits(reader);
- collector = new HitCollector() {
- public final void collect(int doc, float score) {
- if (bits.get(doc)) { // skip docs not in bits
- results.collect(doc, score);
- }
- }
- };
- }
Scorer scorer = weight.scorer(reader);
if (scorer == null)
return;
- scorer.score(collector);
+
+ if (filter == null) {
+ scorer.score(results);
+ return;
+ }
+
+ DocIdSetIterator docIdSetIterator = filter.getDocIdSet(reader).iterator(); // CHECKME: use ConjunctionScorer here?
+ boolean more = docIdSetIterator.next();
+ while (more) {
+ int filterDocId = docIdSetIterator.doc();
+ if (! scorer.skipTo(filterDocId)) {
+ more = false;
+ } else {
+ int scorerDocId = scorer.doc();
+ if (scorerDocId == filterDocId) { // permitted by filter
+ results.collect(scorerDocId, scorer.score());
+ more = docIdSetIterator.skipTo(scorerDocId + 1);
+ } else {
+ more = docIdSetIterator.skipTo(scorerDocId);
+ }
+ }
+ }
+
}
public Query rewrite(Query original) throws IOException {
diff --git a/src/java/org/apache/lucene/search/PrefixFilter.java b/src/java/org/apache/lucene/search/PrefixFilter.java
index a624163ce89..cac0ecd9eca 100755
--- a/src/java/org/apache/lucene/search/PrefixFilter.java
+++ b/src/java/org/apache/lucene/search/PrefixFilter.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.OpenBitSet;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermEnum;
@@ -39,6 +40,9 @@ public class PrefixFilter extends Filter {
public Term getPrefix() { return prefix; }
+ /**
+ * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
+ */
public BitSet bits(IndexReader reader) throws IOException {
final BitSet bitSet = new BitSet(reader.maxDoc());
new PrefixGenerator(prefix) {
@@ -48,6 +52,16 @@ public class PrefixFilter extends Filter {
}.generate(reader);
return bitSet;
}
+
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
+ new PrefixGenerator(prefix) {
+ public void handleDoc(int doc) {
+ bitSet.set(doc);
+ }
+ }.generate(reader);
+ return bitSet;
+ }
/** Prints a user-readable version of this query. */
public String toString () {
@@ -105,3 +119,4 @@ abstract class PrefixGenerator implements IdGenerator {
}
+
diff --git a/src/java/org/apache/lucene/search/QueryWrapperFilter.java b/src/java/org/apache/lucene/search/QueryWrapperFilter.java
index 2db05572ad8..d36dac7769d 100644
--- a/src/java/org/apache/lucene/search/QueryWrapperFilter.java
+++ b/src/java/org/apache/lucene/search/QueryWrapperFilter.java
@@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.BitSet;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.OpenBitSet;
/**
* Constrains search results to only match those which also match a provided
@@ -44,6 +45,9 @@ public class QueryWrapperFilter extends Filter {
this.query = query;
}
+ /**
+ * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
+ */
public BitSet bits(IndexReader reader) throws IOException {
final BitSet bits = new BitSet(reader.maxDoc());
@@ -54,6 +58,17 @@ public class QueryWrapperFilter extends Filter {
});
return bits;
}
+
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
+
+ new IndexSearcher(reader).search(query, new HitCollector() {
+ public final void collect(int doc, float score) {
+ bits.set(doc); // set bit for hit
+ }
+ });
+ return bits;
+ }
public String toString() {
return "QueryWrapperFilter(" + query + ")";
diff --git a/src/java/org/apache/lucene/search/RangeFilter.java b/src/java/org/apache/lucene/search/RangeFilter.java
index 75a65d77cbe..049f1fc8c21 100644
--- a/src/java/org/apache/lucene/search/RangeFilter.java
+++ b/src/java/org/apache/lucene/search/RangeFilter.java
@@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.OpenBitSet;
import java.io.IOException;
import java.util.BitSet;
@@ -94,6 +95,7 @@ public class RangeFilter extends Filter {
* Returns a BitSet with true for documents which should be
* permitted in search results, and false for those that should
* not.
+ * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc());
@@ -152,6 +154,68 @@ public class RangeFilter extends Filter {
return bits;
}
+ /**
+ * Returns a DocIdSet with documents that should be
+ * permitted in search results.
+ */
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ OpenBitSet bits = new OpenBitSet(reader.maxDoc());
+
+ TermEnum enumerator =
+ (null != lowerTerm
+ ? reader.terms(new Term(fieldName, lowerTerm))
+ : reader.terms(new Term(fieldName,"")));
+
+ try {
+
+ if (enumerator.term() == null) {
+ return bits;
+ }
+
+ boolean checkLower = false;
+ if (!includeLower) // make adjustments to set to exclusive
+ checkLower = true;
+
+ TermDocs termDocs = reader.termDocs();
+ try {
+
+ do {
+ Term term = enumerator.term();
+ if (term != null && term.field().equals(fieldName)) {
+ if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
+ checkLower = false;
+ if (upperTerm != null) {
+ int compare = upperTerm.compareTo(term.text());
+ /* if beyond the upper term, or is exclusive and
+ * this is equal to the upper term, break out */
+ if ((compare < 0) ||
+ (!includeUpper && compare==0)) {
+ break;
+ }
+ }
+ /* we have a good term, find the docs */
+
+ termDocs.seek(enumerator.term());
+ while (termDocs.next()) {
+ bits.set(termDocs.doc());
+ }
+ }
+ } else {
+ break;
+ }
+ }
+ while (enumerator.next());
+
+ } finally {
+ termDocs.close();
+ }
+ } finally {
+ enumerator.close();
+ }
+
+ return bits;
+ }
+
public String toString() {
StringBuffer buffer = new StringBuffer();
buffer.append(fieldName);
diff --git a/src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java b/src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java
index 9ac6d40688b..a1dc1856bf3 100644
--- a/src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java
+++ b/src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java
@@ -50,9 +50,21 @@ public class RemoteCachingWrapperFilter extends Filter {
* searcher side of a remote connection.
* @param reader the index reader for the Filter
* @return the bitset
+ * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
*/
public BitSet bits(IndexReader reader) throws IOException {
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
return cachedFilter.bits(reader);
}
+
+ /**
+ * Uses the {@link FilterManager} to keep the cache for a filter on the
+ * searcher side of a remote connection.
+ * @param reader the index reader for the Filter
+ * @return the DocIdSet
+ */
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+ Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
+ return cachedFilter.getDocIdSet(reader);
+ }
}
diff --git a/src/java/org/apache/lucene/search/Scorer.java b/src/java/org/apache/lucene/search/Scorer.java
index 2da2738633a..21cd5986746 100644
--- a/src/java/org/apache/lucene/search/Scorer.java
+++ b/src/java/org/apache/lucene/search/Scorer.java
@@ -33,7 +33,7 @@ import java.io.IOException;
*
- * When this method is used the {@link #explain(int)} method should not - * be used. - *
- * - * @return true iff there is another document matching the query. - * @see BooleanQuery#setAllowDocsOutOfOrder - */ - public abstract boolean next() throws IOException; - - /** Returns the current document number matching the query. - * Initially invalid, until {@link #next()} is called the first time. - */ - public abstract int doc(); - /** Returns the score of the current document matching the query. * Initially invalid, until {@link #next()} or {@link #skipTo(int)} * is called the first time. */ public abstract float score() throws IOException; - /** - * Skips to the document matching this Scorer with the lowest doc Id - * greater than or equal to a given target. - * - *- * The behavior of this method is undefined if the target specified is - * less than or equal to the current value of {@link #doc()}. - *
- * Behaves as if written: - *
- * boolean skipTo(int target) { - * do { - * if (!next()) - * return false; - * } while (target > doc()); - * return true; - * } - *- * Most implementations are considerably more efficient than that. - * - * - *
- * When this method is used the {@link #explain(int)} method should not - * be used. - *
- * - * @param target The target document number. - * @return true iff there is such a match. - * @see BooleanQuery#setAllowDocsOutOfOrder - */ - public abstract boolean skipTo(int target) throws IOException; - /** Returns an explanation of the score for a document. *OpenBitSet
is faster than java.util.BitSet
in most operations
+ * and *much* faster at calculating cardinality of sets and results of set operations.
+ * It can also handle sets of larger cardinality (up to 64 * 2**32-1)
+ *
+ * The goals of OpenBitSet
are the fastest implementation possible, and
+ * maximum code reuse. Extra safety and encapsulation
+ * may always be built on top, but if that's built in, the cost can never be removed (and
+ * hence people re-implement their own version in order to get better performance).
+ * If you want a "safe", totally encapsulated (and slower and limited) BitSet
+ * class, use java.util.BitSet
.
+ *
+ * cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 3.36 | 3.96 | 1.44 | 1.46 | 1.99 | 1.58 | +
1% full | 3.31 | 3.90 | 1.04 | 0.99 | +
cardinality | intersect_count | union | nextSetBit | get | iterator | +|
---|---|---|---|---|---|---|
50% full | 2.50 | 3.50 | 1.00 | 1.03 | 1.12 | 1.25 | +
1% full | 2.51 | 3.49 | 1.00 | 1.02 | +
+ * numWords are the number of elements in the array that contain
+ * set bits (non-zero longs).
+ * numWords should be <= bits.length, and
+ * any existing words in the array at position >= numWords should be zero.
+ *
+ */
+ public OpenBitSet(long[] bits, int numWords) {
+ this.bits = bits;
+ this.wlen = numWords;
+ }
+
+ public DocIdSetIterator iterator() {
+ return new OpenBitSetIterator(bits, wlen);
+ }
+
+ /** Returns the current capacity in bits (1 greater than the index of the last bit) */
+ public long capacity() { return bits.length << 6; }
+
+ /**
+ * Returns the current capacity of this set. Included for
+ * compatibility. This is *not* equal to {@link #cardinality}
+ */
+ public long size() {
+ return capacity();
+ }
+
+ /** Returns true if there are no set bits */
+ public boolean isEmpty() { return cardinality()==0; }
+
+ /** Expert: returns the long[] storing the bits */
+ public long[] getBits() { return bits; }
+
+ /** Expert: sets a new long[] to use as the bit storage */
+ public void setBits(long[] bits) { this.bits = bits; }
+
+ /** Expert: gets the number of longs in the array that are in use */
+ public int getNumWords() { return wlen; }
+
+ /** Expert: sets the number of longs in the array that are in use */
+ public void setNumWords(int nWords) { this.wlen=nWords; }
+
+
+
+ /** Returns true or false for the specified bit index. */
+ public boolean get(int index) {
+ int i = index >> 6; // div 64
+ // signed shift will keep a negative index and force an
+ // array-index-out-of-bounds-exception, removing the need for an explicit check.
+ if (i>=bits.length) return false;
+
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+
+ /** Returns true or false for the specified bit index.
+ * The index should be less than the OpenBitSet size
+ */
+ public boolean fastGet(int index) {
+ int i = index >> 6; // div 64
+ // signed shift will keep a negative index and force an
+ // array-index-out-of-bounds-exception, removing the need for an explicit check.
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+
+
+ /** Returns true or false for the specified bit index
+ * The index should be less than the OpenBitSet size
+ */
+ public boolean get(long index) {
+ int i = (int)(index >> 6); // div 64
+ if (i>=bits.length) return false;
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /** Returns true or false for the specified bit index. Allows specifying
+ * an index outside the current size. */
+ public boolean fastGet(long index) {
+ int i = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ return (bits[i] & bitmask) != 0;
+ }
+
+ /*
+ // alternate implementation of get()
+ public boolean get1(int index) {
+ int i = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ return ((bits[i]>>>bit) & 0x01) != 0;
+ // this does a long shift and a bittest (on x86) vs
+ // a long shift, and a long AND, (the test for zero is prob a no-op)
+ // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
+ }
+ */
+
+
+ /** returns 1 if the bit is set, 0 if not.
+ * The index should be less than the OpenBitSet size
+ */
+ public int getBit(int index) {
+ int i = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ return ((int)(bits[i]>>>bit)) & 0x01;
+ }
+
+
+ /*
+ public boolean get2(int index) {
+ int word = index >> 6; // div 64
+ int bit = index & 0x0000003f; // mod 64
+ return (bits[word] << bit) < 0; // hmmm, this would work if bit order were reversed
+ // we could right shift and check for parity bit, if it was available to us.
+ }
+ */
+
+ /** sets a bit, expanding the set size if necessary */
+ public void set(long index) {
+ int wordNum = expandingWordNum(index);
+ int bit = (int)index & 0x3f;
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+
+ /** Sets the bit at the specified index.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void fastSet(int index) {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+ /** Sets the bit at the specified index.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void fastSet(long index) {
+ int wordNum = (int)(index >> 6);
+ int bit = (int)index & 0x3f;
+ long bitmask = 1L << bit;
+ bits[wordNum] |= bitmask;
+ }
+
+ /** Sets a range of bits, expanding the set size if necessary
+ *
+ * @param startIndex lower index
+ * @param endIndex one-past the last bit to set
+ */
+ public void set(long startIndex, long endIndex) {
+ if (endIndex <= startIndex) return;
+
+ int startWord = (int)(startIndex>>6);
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = expandingWordNum(endIndex-1);
+
+ long startmask = -1L << startIndex;
+ long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ if (startWord == endWord) {
+ bits[startWord] |= (startmask & endmask);
+ return;
+ }
+
+ bits[startWord] |= startmask;
+ Arrays.fill(bits, startWord+1, endWord, -1L);
+ bits[endWord] |= endmask;
+ }
+
+
+
+ protected int expandingWordNum(long index) {
+ int wordNum = (int)(index >> 6);
+ if (wordNum>=wlen) {
+ ensureCapacity(index+1);
+ wlen = wordNum+1;
+ }
+ return wordNum;
+ }
+
+
+ /** clears a bit.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void fastClear(int index) {
+ int wordNum = index >> 6;
+ int bit = index & 0x03f;
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ // hmmm, it takes one more instruction to clear than it does to set... any
+ // way to work around this? If there were only 63 bits per word, we could
+ // use a right shift of 10111111...111 in binary to position the 0 in the
+ // correct place (using sign extension).
+ // Could also use Long.rotateRight() or rotateLeft() *if* they were converted
+ // by the JVM into a native instruction.
+ // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
+ }
+
+ /** clears a bit.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void fastClear(long index) {
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ }
+
+ /** clears a bit, allowing access beyond the current set size without changing the size.*/
+ public void clear(long index) {
+ int wordNum = (int)(index >> 6); // div 64
+ if (wordNum>=wlen) return;
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] &= ~bitmask;
+ }
+
+ /** Clears a range of bits. Clearing past the end does not change the size of the set.
+ *
+ * @param startIndex lower index
+ * @param endIndex one-past the last bit to clear
+ */
+ public void clear(long startIndex, long endIndex) {
+ if (endIndex <= startIndex) return;
+
+ int startWord = (int)(startIndex>>6);
+ if (startWord >= wlen) return;
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = (int)((endIndex-1)>>6);
+
+ long startmask = -1L << startIndex;
+ long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ // invert masks since we are clearing
+ startmask = ~startmask;
+ endmask = ~endmask;
+
+ if (startWord == endWord) {
+ bits[startWord] &= (startmask | endmask);
+ return;
+ }
+
+ bits[startWord] &= startmask;
+
+ int middle = Math.min(wlen, endWord);
+ Arrays.fill(bits, startWord+1, middle, 0L);
+ if (endWord < wlen) {
+ bits[endWord] &= endmask;
+ }
+ }
+
+
+
+ /** Sets a bit and returns the previous value.
+ * The index should be less than the OpenBitSet size.
+ */
+ public boolean getAndSet(int index) {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ boolean val = (bits[wordNum] & bitmask) != 0;
+ bits[wordNum] |= bitmask;
+ return val;
+ }
+
+ /** Sets a bit and returns the previous value.
+ * The index should be less than the OpenBitSet size.
+ */
+ public boolean getAndSet(long index) {
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ boolean val = (bits[wordNum] & bitmask) != 0;
+ bits[wordNum] |= bitmask;
+ return val;
+ }
+
+ /** flips a bit.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void fastFlip(int index) {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /** flips a bit.
+ * The index should be less than the OpenBitSet size.
+ */
+ public void fastFlip(long index) {
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /** flips a bit, expanding the set size if necessary */
+ public void flip(long index) {
+ int wordNum = expandingWordNum(index);
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ }
+
+ /** flips a bit and returns the resulting bit value.
+ * The index should be less than the OpenBitSet size.
+ */
+ public boolean flipAndGet(int index) {
+ int wordNum = index >> 6; // div 64
+ int bit = index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ return (bits[wordNum] & bitmask) != 0;
+ }
+
+ /** flips a bit and returns the resulting bit value.
+ * The index should be less than the OpenBitSet size.
+ */
+ public boolean flipAndGet(long index) {
+ int wordNum = (int)(index >> 6); // div 64
+ int bit = (int)index & 0x3f; // mod 64
+ long bitmask = 1L << bit;
+ bits[wordNum] ^= bitmask;
+ return (bits[wordNum] & bitmask) != 0;
+ }
+
+ /** Flips a range of bits, expanding the set size if necessary
+ *
+ * @param startIndex lower index
+ * @param endIndex one-past the last bit to flip
+ */
+ public void flip(long startIndex, long endIndex) {
+ if (endIndex <= startIndex) return;
+ int oldlen = wlen;
+ int startWord = (int)(startIndex>>6);
+
+ // since endIndex is one past the end, this is index of the last
+ // word to be changed.
+ int endWord = expandingWordNum(endIndex-1);
+
+ /*** Grrr, java shifting wraps around so -1L>>>64 == -1
+ * for that reason, make sure not to use endmask if the bits to flip will
+ * be zero in the last word (redefine endWord to be the last changed...)
+ long startmask = -1L << (startIndex & 0x3f); // example: 11111...111000
+ long endmask = -1L >>> (64-(endIndex & 0x3f)); // example: 00111...111111
+ ***/
+
+ long startmask = -1L << startIndex;
+ long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+ if (startWord == endWord) {
+ bits[startWord] ^= (startmask & endmask);
+ return;
+ }
+
+ bits[startWord] ^= startmask;
+
+ for (int i=startWord+1; i
The code for compressing the differences between ascending integers was
+ * borrowed from {@link org.apache.lucene.store.IndexInput} and
+ * {@link org.apache.lucene.store.IndexOutput}.
+ */
+public class SortedVIntList extends DocIdSet {
+ /** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
+ * a SortedVIntList representing the index numbers of the set bits
+ * will be smaller than that BitSet.
+ */
+ final static int BITS2VINTLIST_SIZE = 8;
+
+ private int size;
+ private byte[] bytes;
+ private int lastBytePos;
+
+ /**
+ * Create a SortedVIntList from all elements of an array of integers.
+ *
+ * @param sortedInts A sorted array of non negative integers.
+ */
+ public SortedVIntList(int[] sortedInts) {
+ this(sortedInts, sortedInts.length);
+ }
+
+ /**
+ * Create a SortedVIntList from an array of integers.
+ * @param sortedInts An array of sorted non negative integers.
+ * @param inputSize The number of integers to be used from the array.
+ */
+ public SortedVIntList(int[] sortedInts, int inputSize) {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder();
+ for (int i = 0; i < inputSize; i++) {
+ builder.addInt(sortedInts[i]);
+ }
+ builder.done();
+ }
+
+ /**
+ * Create a SortedVIntList from a BitSet.
+ * @param bits A bit set representing a set of integers.
+ */
+ public SortedVIntList(BitSet bits) {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder();
+ int nextInt = bits.nextSetBit(0);
+ while (nextInt != -1) {
+ builder.addInt(nextInt);
+ nextInt = bits.nextSetBit(nextInt + 1);
+ }
+ builder.done();
+ }
+
+ /**
+ * Create a SortedVIntList from an OpenBitSet.
+ * @param bits A bit set representing a set of integers.
+ */
+ public SortedVIntList(OpenBitSet bits) {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder();
+ int nextInt = bits.nextSetBit(0);
+ while (nextInt != -1) {
+ builder.addInt(nextInt);
+ nextInt = bits.nextSetBit(nextInt + 1);
+ }
+ builder.done();
+ }
+
+ /**
+ * Create a SortedVIntList.
+ * @param docIdSetIterator An iterator providing document numbers as a set of integers.
+ * This DocIdSetIterator is iterated completely when this constructor
+ * is called and it must provide the integers in non
+ * decreasing order.
+ */
+ public SortedVIntList(DocIdSetIterator docIdSetIterator) throws IOException {
+ SortedVIntListBuilder builder = new SortedVIntListBuilder();
+ while (docIdSetIterator.next()) {
+ builder.addInt(docIdSetIterator.doc());
+ }
+ builder.done();
+ }
+
+
+ private class SortedVIntListBuilder {
+ private int lastInt = 0;
+
+ SortedVIntListBuilder() {
+ initBytes();
+ lastInt = 0;
+ }
+
+ void addInt(int nextInt) {
+ int diff = nextInt - lastInt;
+ if (diff < 0) {
+ throw new IllegalArgumentException(
+ "Input not sorted or first element negative.");
+ }
+
+ if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
+ // biggest possible int does not fit
+ resizeBytes((bytes.length * 2) + MAX_BYTES_PER_INT);
+ }
+
+ // See org.apache.lucene.store.IndexOutput.writeVInt()
+ while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set.
+ bytes[lastBytePos++] = (byte) ((diff & VB1) | ~VB1);
+ diff >>>= BIT_SHIFT;
+ }
+ bytes[lastBytePos++] = (byte) diff; // Last byte, high bit not set.
+ size++;
+ lastInt = nextInt;
+ }
+
+ void done() {
+ resizeBytes(lastBytePos);
+ }
+ }
+
+
+ private void initBytes() {
+ size = 0;
+ bytes = new byte[128]; // initial byte size
+ lastBytePos = 0;
+ }
+
+ private void resizeBytes(int newSize) {
+ if (newSize != bytes.length) {
+ byte[] newBytes = new byte[newSize];
+ System.arraycopy(bytes, 0, newBytes, 0, lastBytePos);
+ bytes = newBytes;
+ }
+ }
+
+ private static final int VB1 = 0x7F;
+ private static final int BIT_SHIFT = 7;
+ private final int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
+
+ /**
+ * @return The total number of sorted integers.
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * @return The size of the byte array storing the compressed sorted integers.
+ */
+ public int getByteSize() {
+ return bytes.length;
+ }
+
+ /**
+ * @return An iterator over the sorted integers.
+ */
+ public DocIdSetIterator iterator() {
+ return new DocIdSetIterator() {
+ int bytePos = 0;
+ int lastInt = 0;
+
+ private void advance() {
+ // See org.apache.lucene.store.IndexInput.readVInt()
+ byte b = bytes[bytePos++];
+ lastInt += b & VB1;
+ for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT) {
+ b = bytes[bytePos++];
+ lastInt += (b & VB1) << s;
+ }
+ }
+
+ public int doc() {return lastInt;}
+
+ public boolean next() {
+ if (bytePos >= lastBytePos) {
+ return false;
+ } else {
+ advance();
+ return true;
+ }
+ }
+
+ public boolean skipTo(int docNr) {
+ while (bytePos < lastBytePos) {
+ advance();
+ if (lastInt >= docNr) { // No skipping to docNr available.
+ return true;
+ }
+ }
+ return false;
+ }
+ };
+ }
+}
+
diff --git a/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java b/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java
index 4783b2f4e99..7019e79f155 100644
--- a/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java
+++ b/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java
@@ -43,13 +43,13 @@ public class CachingWrapperFilterHelper extends CachingWrapperFilter {
this.shouldHaveCache = shouldHaveCache;
}
- public BitSet bits(IndexReader reader) throws IOException {
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
if (cache == null) {
cache = new WeakHashMap();
}
synchronized (cache) { // check cache
- BitSet cached = (BitSet) cache.get(reader);
+ DocIdSet cached = (DocIdSet) cache.get(reader);
if (shouldHaveCache) {
TestCase.assertNotNull("Cache should have data ", cached);
} else {
@@ -60,7 +60,7 @@ public class CachingWrapperFilterHelper extends CachingWrapperFilter {
}
}
- final BitSet bits = filter.bits(reader);
+ final DocIdSet bits = filter.getDocIdSet(reader);
synchronized (cache) { // update cache
cache.put(reader, bits);
diff --git a/src/test/org/apache/lucene/search/MockFilter.java b/src/test/org/apache/lucene/search/MockFilter.java
index 4f36fce01cc..0ec7258e310 100644
--- a/src/test/org/apache/lucene/search/MockFilter.java
+++ b/src/test/org/apache/lucene/search/MockFilter.java
@@ -18,14 +18,15 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.DocIdBitSet;
import java.util.BitSet;
public class MockFilter extends Filter {
private boolean wasCalled;
- public BitSet bits(IndexReader reader) {
+ public DocIdSet getDocIdSet(IndexReader reader) {
wasCalled = true;
- return new BitSet();
+ return new DocIdBitSet(new BitSet());
}
public void clear() {
diff --git a/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java b/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java
index 859bac0a66f..257a51c1e8a 100644
--- a/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java
+++ b/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java
@@ -42,7 +42,7 @@ public class RemoteCachingWrapperFilterHelper extends RemoteCachingWrapperFilter
this.shouldHaveCache = shouldHaveCache;
}
- public BitSet bits(IndexReader reader) throws IOException {
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
TestCase.assertNotNull("Filter should not be null", cachedFilter);
@@ -55,6 +55,6 @@ public class RemoteCachingWrapperFilterHelper extends RemoteCachingWrapperFilter
if (filter instanceof CachingWrapperFilterHelper) {
((CachingWrapperFilterHelper)cachedFilter).setShouldHaveCache(shouldHaveCache);
}
- return cachedFilter.bits(reader);
+ return cachedFilter.getDocIdSet(reader);
}
}
diff --git a/src/test/org/apache/lucene/search/SingleDocTestFilter.java b/src/test/org/apache/lucene/search/SingleDocTestFilter.java
index c1d8b1ba093..b429f7025be 100644
--- a/src/test/org/apache/lucene/search/SingleDocTestFilter.java
+++ b/src/test/org/apache/lucene/search/SingleDocTestFilter.java
@@ -18,6 +18,7 @@ package org.apache.lucene.search;
*/
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.DocIdBitSet;
import java.util.BitSet;
import java.io.IOException;
@@ -29,9 +30,9 @@ public class SingleDocTestFilter extends Filter {
this.doc = doc;
}
- public BitSet bits(IndexReader reader) throws IOException {
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
BitSet bits = new BitSet(reader.maxDoc());
bits.set(doc);
- return bits;
+ return new DocIdBitSet(bits);
}
}
diff --git a/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java b/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java
index ae552d76f2a..9a4c7d03b99 100644
--- a/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java
+++ b/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java
@@ -36,12 +36,12 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
CachingWrapperFilter cacher = new CachingWrapperFilter(filter);
// first time, nested filter is called
- cacher.bits(reader);
+ cacher.getDocIdSet(reader);
assertTrue("first time", filter.wasCalled());
// second time, nested filter should not be called
filter.clear();
- cacher.bits(reader);
+ cacher.getDocIdSet(reader);
assertFalse("second time", filter.wasCalled());
reader.close();
diff --git a/src/test/org/apache/lucene/search/TestExplanations.java b/src/test/org/apache/lucene/search/TestExplanations.java
index ed06916ca16..4840325b845 100644
--- a/src/test/org/apache/lucene/search/TestExplanations.java
+++ b/src/test/org/apache/lucene/search/TestExplanations.java
@@ -33,6 +33,7 @@ import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.DocIdBitSet;
import java.util.Random;
import java.util.BitSet;
@@ -122,12 +123,12 @@ public class TestExplanations extends LuceneTestCase {
public ItemizedFilter(int[] docs) {
this.docs = docs;
}
- public BitSet bits(IndexReader r) {
+ public DocIdSet getDocIdSet(IndexReader r) {
BitSet b = new BitSet(r.maxDoc());
for (int i = 0; i < docs.length; i++) {
b.set(docs[i]);
}
- return b;
+ return new DocIdBitSet(b);
}
}
diff --git a/src/test/org/apache/lucene/search/TestFilteredQuery.java b/src/test/org/apache/lucene/search/TestFilteredQuery.java
index b5b1ae8af90..c4be79511fb 100644
--- a/src/test/org/apache/lucene/search/TestFilteredQuery.java
+++ b/src/test/org/apache/lucene/search/TestFilteredQuery.java
@@ -26,6 +26,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.DocIdBitSet;
import java.util.BitSet;
@@ -82,11 +83,11 @@ extends LuceneTestCase {
// must be static for serialization tests
private static Filter newStaticFilterB() {
return new Filter() {
- public BitSet bits (IndexReader reader) {
+ public DocIdSet getDocIdSet (IndexReader reader) {
BitSet bitset = new BitSet(5);
bitset.set (1);
bitset.set (3);
- return bitset;
+ return new DocIdBitSet(bitset);
}
};
}
@@ -150,10 +151,10 @@ extends LuceneTestCase {
// must be static for serialization tests
private static Filter newStaticFilterA() {
return new Filter() {
- public BitSet bits (IndexReader reader) {
+ public DocIdSet getDocIdSet (IndexReader reader) {
BitSet bitset = new BitSet(5);
bitset.set(0, 5);
- return bitset;
+ return new DocIdBitSet(bitset);
}
};
}
@@ -200,3 +201,4 @@ extends LuceneTestCase {
}
+
diff --git a/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java b/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java
index 63bf0af9a3e..63779c9b50b 100644
--- a/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java
+++ b/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java
@@ -91,7 +91,7 @@ public class TestRemoteCachingWrapperFilter extends LuceneTestCase {
public void testTermRemoteFilter() throws Exception {
- CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))));
+ CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))));
// This is what we are fixing - if one uses a CachingWrapperFilter(Helper) it will never
// cache the filter on the remote site
@@ -112,16 +112,16 @@ public class TestRemoteCachingWrapperFilter extends LuceneTestCase {
// assert that we get the same cached Filter, even if we create a new instance of RemoteCachingWrapperFilter(Helper)
// this should pass because the Filter parameters are the same, and the cache uses Filter's hashCode() as cache keys,
// and Filters' hashCode() builds on Filter parameters, not the Filter instance itself
- rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false);
+ rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))), false);
rcwfh.shouldHaveCache(false);
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
- rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false);
+ rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))), false);
rcwfh.shouldHaveCache(true);
search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");
// assert that we get a non-cached version of the Filter because this is a new Query (type:b)
- rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "b"))), false);
+ rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "b"))), false);
rcwfh.shouldHaveCache(false);
search(new TermQuery(new Term("type", "b")), rcwfh, 0, "B");
}
diff --git a/src/test/org/apache/lucene/search/TestRemoteSearchable.java b/src/test/org/apache/lucene/search/TestRemoteSearchable.java
index 63b463c3304..d1e0f2b1948 100644
--- a/src/test/org/apache/lucene/search/TestRemoteSearchable.java
+++ b/src/test/org/apache/lucene/search/TestRemoteSearchable.java
@@ -116,11 +116,11 @@ public class TestRemoteSearchable extends LuceneTestCase {
Searcher searcher = new MultiSearcher(searchables);
Hits hits = searcher.search(
new TermQuery(new Term("test", "text")),
- new QueryFilter(new TermQuery(new Term("test", "test"))));
+ new QueryWrapperFilter(new TermQuery(new Term("test", "test"))));
assertEquals(1, hits.length());
Hits nohits = searcher.search(
new TermQuery(new Term("test", "text")),
- new QueryFilter(new TermQuery(new Term("test", "non-existent-term"))));
+ new QueryWrapperFilter(new TermQuery(new Term("test", "non-existent-term"))));
assertEquals(0, nohits.length());
}
@@ -129,7 +129,7 @@ public class TestRemoteSearchable extends LuceneTestCase {
Searchable[] searchables = { getRemote() };
Searcher searcher = new MultiSearcher(searchables);
Hits hits = searcher.search(
- new ConstantScoreQuery(new QueryFilter(
+ new ConstantScoreQuery(new QueryWrapperFilter(
new TermQuery(new Term("test", "test")))));
assertEquals(1, hits.length());
}
diff --git a/src/test/org/apache/lucene/search/TestScorerPerf.java b/src/test/org/apache/lucene/search/TestScorerPerf.java
index ad180da8e75..6f5dcdcb259 100755
--- a/src/test/org/apache/lucene/search/TestScorerPerf.java
+++ b/src/test/org/apache/lucene/search/TestScorerPerf.java
@@ -1,5 +1,6 @@
package org.apache.lucene.search;
+import org.apache.lucene.util.DocIdBitSet;
import org.apache.lucene.util.LuceneTestCase;
import java.util.Random;
@@ -95,16 +96,6 @@ public class TestScorerPerf extends LuceneTestCase {
return sets;
}
- public static class BitSetFilter extends Filter {
- public BitSet set;
- public BitSetFilter(BitSet set) {
- this.set = set;
- }
- public BitSet bits(IndexReader reader) throws IOException {
- return set;
- }
- }
-
public static class CountingHitCollector extends HitCollector {
int count=0;
int sum=0;
@@ -137,8 +128,12 @@ public class TestScorerPerf extends LuceneTestCase {
BitSet addClause(BooleanQuery bq, BitSet result) {
- BitSet rnd = sets[r.nextInt(sets.length)];
- Query q = new ConstantScoreQuery(new BitSetFilter(rnd));
+ final BitSet rnd = sets[r.nextInt(sets.length)];
+ Query q = new ConstantScoreQuery(new Filter() {
+ public DocIdSet getDocIdSet(IndexReader reader) {
+ return new DocIdBitSet(rnd);
+ };
+ });
bq.add(q, BooleanClause.Occur.MUST);
if (validate) {
if (result==null) result = (BitSet)rnd.clone();
diff --git a/src/test/org/apache/lucene/search/TestSort.java b/src/test/org/apache/lucene/search/TestSort.java
index bb8ac51a570..8e021cc7cde 100644
--- a/src/test/org/apache/lucene/search/TestSort.java
+++ b/src/test/org/apache/lucene/search/TestSort.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.DocIdBitSet;
import java.io.IOException;
import java.io.Serializable;
@@ -571,10 +572,10 @@ implements Serializable {
// a filter that only allows through the first hit
Filter filt = new Filter() {
- public BitSet bits(IndexReader reader) throws IOException {
+ public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
BitSet bs = new BitSet(reader.maxDoc());
bs.set(docs1.scoreDocs[0].doc);
- return bs;
+ return new DocIdBitSet(bs);
}
};
diff --git a/src/test/org/apache/lucene/search/TestSpanQueryFilter.java b/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
index 17311bab170..7d8ce1e01a0 100644
--- a/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
+++ b/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
@@ -56,20 +56,36 @@ public class TestSpanQueryFilter extends LuceneTestCase {
SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(10).trim()));
SpanQueryFilter filter = new SpanQueryFilter(query);
SpanFilterResult result = filter.bitSpans(reader);
- BitSet bits = result.getBits();
- assertTrue("bits is null and it shouldn't be", bits != null);
- assertTrue("tenth bit is not on", bits.get(10));
+ DocIdSet docIdSet = result.getDocIdSet();
+ assertTrue("docIdSet is null and it shouldn't be", docIdSet != null);
+ assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
List spans = result.getPositions();
assertTrue("spans is null and it shouldn't be", spans != null);
- assertTrue("spans Size: " + spans.size() + " is not: " + bits.cardinality(), spans.size() == bits.cardinality());
+ int size = getDocIdSetSize(docIdSet);
+ assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size);
for (Iterator iterator = spans.iterator(); iterator.hasNext();) {
SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.next();
assertTrue("info is null and it shouldn't be", info != null);
//The doc should indicate the bit is on
- assertTrue("Bit is not on and it should be", bits.get(info.getDoc()));
+ assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc());
//There should be two positions in each
assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
}
reader.close();
}
+
+ int getDocIdSetSize(DocIdSet docIdSet) throws Exception {
+ int size = 0;
+ DocIdSetIterator it = docIdSet.iterator();
+ while (it.next()) {
+ size++;
+ }
+ return size;
+ }
+
+ public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception {
+ DocIdSetIterator it = docIdSet.iterator();
+ assertTrue(msg, it.skipTo(docId));
+ assertTrue(msg, it.doc() == docId);
+ }
}
diff --git a/src/test/org/apache/lucene/util/TestOpenBitSet.java b/src/test/org/apache/lucene/util/TestOpenBitSet.java
new file mode 100644
index 00000000000..33fa7a12b46
--- /dev/null
+++ b/src/test/org/apache/lucene/util/TestOpenBitSet.java
@@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util;
+
+import junit.framework.TestCase;
+
+import java.util.Random;
+import java.util.BitSet;
+
+/**
+ * @version $Id$
+ */
+public class TestOpenBitSet extends TestCase {
+ static Random rand = new Random();
+
+ void doGet(BitSet a, OpenBitSet b) {
+ int max = a.size();
+ for (int i=0; i