LUCENE-584: Changed Filter API to return a DocIdSet instead of a java.util.BitSet. This allows using more efficient data structures for Filters and makes them more flexible.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@617859 13f79535-47bb-0310-9956-ffa450edef68
2008-02-02 19:04:03 +00:00 · 2008-02-02 19:04:03 +00:00 · 18b61286fa
parent 1caf5cb9ce
commit 18b61286fa
40 changed files with 2857 additions and 175 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -15,12 +15,16 @@ API Changes
 2. LUCENE-1150: Re-expose StandardTokenizer's constants publicly;
    this was accidentally lost with LUCENE-966.  (Nicolas Lalevée via
    Mike McCandless)
+	
+ 3. LUCENE-584: Changed Filter API to return a DocIdSet instead of a
+    java.util.BitSet. This allows using more efficient data structures
+    for Filters and makes them more flexible. (Paul Elschot, Michael Busch)

 Bug fixes
    
 New features

-1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis
+ 1. LUCENE-1137: Added Token.set/getFlags() accessors for passing more information about a Token through the analysis
    process.  The flag is not indexed/stored and is thus only used by analysis.

 2. LUCENE-1147: Add -segment option to CheckIndex tool so you can
@ -28,6 +32,12 @@ New features
    McCandless)

 3. LUCENE-1045: Reopened this issue to add support for short and bytes. 
+ 
+ 4. LUCENE-584: Added new data structures to o.a.l.util, such as 
+    OpenBitSet and SortedVIntList. These extend DocIdSet and can 
+    directly be used for Filters with the new Filter API. Also changed
+    the core Filters to use OpenBitSet instead of java.util.BitSet.
+    (Paul Elschot, Michael Busch)

 Optimizations

--- a/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java
+++ b/contrib/miscellaneous/src/test/org/apache/lucene/misc/ChainedFilterTest.java
@ -37,8 +37,8 @@ public class ChainedFilterTest extends TestCase {
  private Query query;
  // private DateFilter dateFilter;   DateFilter was deprecated and removed
  private RangeFilter dateFilter;
-  private QueryFilter bobFilter;
-  private QueryFilter sueFilter;
+  private QueryWrapperFilter bobFilter;
+  private QueryWrapperFilter sueFilter;

  public void setUp() throws Exception {
    directory = new RAMDirectory();
@ -74,9 +74,9 @@ public class ChainedFilterTest extends TestCase {
    // just treat dates as strings and select the whole range for now...
    dateFilter = new RangeFilter("date","","ZZZZ",true,true);

-    bobFilter = new QueryFilter(
+    bobFilter = new QueryWrapperFilter(
        new TermQuery(new Term("owner", "bob")));
-    sueFilter = new QueryFilter(
+    sueFilter = new QueryWrapperFilter(
        new TermQuery(new Term("owner", "sue")));
  }

--- a/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java
+++ b/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/CachedFilterBuilder.java
@ -8,7 +8,7 @@ import java.util.Map.Entry;
 import org.apache.lucene.search.CachingWrapperFilter;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.search.QueryFilter;
+import org.apache.lucene.search.QueryWrapperFilter;
 import org.apache.lucene.xmlparser.DOMUtils;
 import org.apache.lucene.xmlparser.FilterBuilder;
 import org.apache.lucene.xmlparser.FilterBuilderFactory;
@ -105,7 +105,7 @@ public class CachedFilterBuilder implements FilterBuilder {
 		//cache miss
 		if (qb != null)
 		{
-			cachedFilter = new QueryFilter(q);
+			cachedFilter = new QueryWrapperFilter(q);
 		} else
 		{
 			cachedFilter = new CachingWrapperFilter(f);
--- a/src/java/org/apache/lucene/search/CachingSpanFilter.java
+++ b/src/java/org/apache/lucene/search/CachingSpanFilter.java
@ -43,11 +43,19 @@ public class CachingSpanFilter extends SpanFilter {
    this.filter = filter;
  }

+  /**
+   * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
+   */
  public BitSet bits(IndexReader reader) throws IOException {
    SpanFilterResult result = getCachedResult(reader);
    return result != null ? result.getBits() : null;
  }
-
+  
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+    SpanFilterResult result = getCachedResult(reader);
+    return result != null ? result.getDocIdSet() : null;
+  }
+  
  private SpanFilterResult getCachedResult(IndexReader reader) throws IOException {
    SpanFilterResult result = null;
    if (cache == null) {
--- a/src/java/org/apache/lucene/search/CachingWrapperFilter.java
+++ b/src/java/org/apache/lucene/search/CachingWrapperFilter.java
@ -43,6 +43,9 @@ public class CachingWrapperFilter extends Filter {
    this.filter = filter;
  }

+  /**
+   * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
+   */
  public BitSet bits(IndexReader reader) throws IOException {
    if (cache == null) {
      cache = new WeakHashMap();
@ -63,6 +66,28 @@ public class CachingWrapperFilter extends Filter {

    return bits;
  }
+  
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+    if (cache == null) {
+      cache = new WeakHashMap();
+    }
+
+    synchronized (cache) {  // check cache
+      DocIdSet cached = (DocIdSet) cache.get(reader);
+      if (cached != null) {
+        return cached;
+      }
+    }
+
+    final DocIdSet docIdSet = filter.getDocIdSet(reader);
+
+    synchronized (cache) {  // update cache
+      cache.put(reader, docIdSet);
+    }
+
+    return docIdSet;
+    
+  }

  public String toString() {
    return "CachingWrapperFilter("+filter+")";
--- a/src/java/org/apache/lucene/search/ConstantScoreQuery.java
+++ b/src/java/org/apache/lucene/search/ConstantScoreQuery.java
@ -85,7 +85,7 @@ public class ConstantScoreQuery extends Query {
    public Explanation explain(IndexReader reader, int doc) throws IOException {

      ConstantScorer cs = (ConstantScorer)scorer(reader);
-      boolean exists = cs.bits.get(doc);
+      boolean exists = cs.docIdSetIterator.skipTo(doc) && (cs.docIdSetIterator.doc() == doc);

      ComplexExplanation result = new ComplexExplanation();

@ -107,23 +107,22 @@ public class ConstantScoreQuery extends Query {
  }

  protected class ConstantScorer extends Scorer {
-    final BitSet bits;
+    final DocIdSetIterator docIdSetIterator;
    final float theScore;
    int doc=-1;

    public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException {
      super(similarity);
      theScore = w.getValue();
-      bits = filter.bits(reader);
+      docIdSetIterator = filter.getDocIdSet(reader).iterator();
    }

    public boolean next() throws IOException {
-      doc = bits.nextSetBit(doc+1);
-      return doc >= 0;
+      return docIdSetIterator.next();
    }

    public int doc() {
-      return doc;
+      return docIdSetIterator.doc();
    }

    public float score() throws IOException {
@ -131,8 +130,7 @@ public class ConstantScoreQuery extends Query {
    }

    public boolean skipTo(int target) throws IOException {
-      doc = bits.nextSetBit(target);  // requires JDK 1.4
-      return doc >= 0;
+      return docIdSetIterator.skipTo(target);
    }

    public Explanation explain(int doc) throws IOException {
@ -170,3 +168,4 @@ public class ConstantScoreQuery extends Query {
 }


+
--- a/src/java/org/apache/lucene/search/DocIdSet.java
+++ b/src/java/org/apache/lucene/search/DocIdSet.java
@ -0,0 +1,27 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * A DocIdSet contains a set of doc ids. Implementing classes must provide
+ * a {@link DocIdSetIterator} to access the set. 
+ */
+public abstract class DocIdSet {
+	public abstract DocIdSetIterator iterator();
+}
--- a/src/java/org/apache/lucene/search/DocIdSetIterator.java
+++ b/src/java/org/apache/lucene/search/DocIdSetIterator.java
@ -0,0 +1,49 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/**
+ * This abstract class defines methods to iterate over a set of
+ * non-decreasing doc ids.
+ */
+public abstract class DocIdSetIterator {
+    /** Returns the current document number.  <p> This is invalid until {@link
+    #next()} is called for the first time.*/
+    public abstract int doc();
+    
+    /** Moves to the next docId in the set. Returns true, iff
+     * there is such a docId. */
+    public abstract boolean next() throws IOException;
+    
+    /** Skips entries to the first beyond the current whose document number is
+     * greater than or equal to <i>target</i>. <p>Returns true iff there is such
+     * an entry.  <p>Behaves as if written: <pre>
+     *   boolean skipTo(int target) {
+     *     do {
+     *       if (!next())
+     *         return false;
+     *     } while (target > doc());
+     *     return true;
+     *   }
+     * </pre>
+     * Some implementations are considerably more efficient than that.
+     */
+    public abstract boolean skipTo(int target) throws IOException;
+}
--- a/src/java/org/apache/lucene/search/Filter.java
+++ b/src/java/org/apache/lucene/search/Filter.java
@ -20,11 +20,32 @@ package org.apache.lucene.search;
 import java.util.BitSet;
 import java.io.IOException;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.DocIdBitSet;

-/** Abstract base class providing a mechanism to restrict searches to a subset
- of an index. */
+/** Abstract base class providing a mechanism to use a subset of an index
+ *  for restriction or permission of index search results.
+ *  <p>
+ *  <b>Note:</b> In Lucene 3.0 {@link #bits(IndexReader)} will be removed
+ *  and {@link #getDocIdSet(IndexReader)} will be defined as abstract.
+ *  All implementing classes must therefore implement {@link #getDocIdSet(IndexReader)}
+ *  in order to work with Lucene 3.0.
+ */
 public abstract class Filter implements java.io.Serializable {
-  /** Returns a BitSet with true for documents which should be permitted in
-    search results, and false for those that should not. */
-  public abstract BitSet bits(IndexReader reader) throws IOException;
+  /**
+   * @return A BitSet with true for documents which should be permitted in
+   * search results, and false for those that should not.
+   * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
+   */
+  public BitSet bits(IndexReader reader) throws IOException {
+    return null;
+  }
+	
+  /**
+   * @return a DocIdSet that provides the documents which should be
+   * permitted or prohibited in search results.
+   * @see DocIdBitSet
+   */
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+    return new DocIdBitSet(bits(reader));
+  }
 }
--- a/src/java/org/apache/lucene/search/FilteredQuery.java
+++ b/src/java/org/apache/lucene/search/FilteredQuery.java
@ -21,7 +21,6 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.util.ToStringUtils;

 import java.io.IOException;
-import java.util.BitSet;
 import java.util.Set;


@ -47,7 +46,7 @@ extends Query {

  /**
   * Constructs a new query which applies a filter to the results of the original query.
-   * Filter.bits() will be called every time this query is used in a search.
+   * Filter.getDocIdSet() will be called every time this query is used in a search.
   * @param query  Query to be filtered, cannot be <code>null</code>.
   * @param filter Filter to apply to query results, cannot be <code>null</code>.
   */
@ -86,13 +85,15 @@ extends Query {
          inner.addDetail(preBoost);
        }
        Filter f = FilteredQuery.this.filter;
-        BitSet matches = f.bits(ir);
-        if (matches.get(i))
+        DocIdSetIterator docIdSetIterator = f.getDocIdSet(ir).iterator();
+        if (docIdSetIterator.skipTo(i) && (docIdSetIterator.doc() == i)) {
          return inner;
-        Explanation result = new Explanation
-          (0.0f, "failure to match filter: " + f.toString());
-        result.addDetail(inner);
-        return result;
+        } else {
+          Explanation result = new Explanation
+            (0.0f, "failure to match filter: " + f.toString());
+          result.addDetail(inner);
+          return result;
+        }
      }

      // return this query
@ -100,50 +101,49 @@ extends Query {

      // return a filtering scorer
       public Scorer scorer (IndexReader indexReader) throws IOException {
-        final Scorer scorer = weight.scorer (indexReader);
-        final BitSet bitset = filter.bits (indexReader);
-        return new Scorer (similarity) {
+        final Scorer scorer = weight.scorer(indexReader);
+        final DocIdSetIterator docIdSetIterator = filter.getDocIdSet(indexReader).iterator();

-          public boolean next() throws IOException {
-            do {
-              if (! scorer.next()) {
+        return new Scorer(similarity) {
+
+          private boolean advanceToCommon() throws IOException {
+            while (scorer.doc() != docIdSetIterator.doc()) {
+              if (scorer.doc() < docIdSetIterator.doc()) {
+                if (!scorer.skipTo(docIdSetIterator.doc())) {
+                  return false;
+                }
+              } else if (!docIdSetIterator.skipTo(scorer.doc())) {
                return false;
              }
-            } while (! bitset.get(scorer.doc()));
-            /* When skipTo() is allowed on scorer it should be used here
-             * in combination with bitset.nextSetBit(...)
-             * See the while loop in skipTo() below.
-             */
+            }
            return true;
          }
+
+          public boolean next() throws IOException {
+            return docIdSetIterator.next() && scorer.next() && advanceToCommon();
+          }
+
          public int doc() { return scorer.doc(); }

          public boolean skipTo(int i) throws IOException {
-            if (! scorer.skipTo(i)) {
-              return false;
-            }
-            while (! bitset.get(scorer.doc())) {
-              int nextFiltered = bitset.nextSetBit(scorer.doc() + 1);
-              if (nextFiltered == -1) {
-                return false;
-              } else if (! scorer.skipTo(nextFiltered)) {
-                return false;
-              }
-            }
-            return true;
-           }
+            return docIdSetIterator.skipTo(i)
+                && scorer.skipTo(docIdSetIterator.doc())
+                && advanceToCommon();
+          }

          public float score() throws IOException { return getBoost() * scorer.score(); }

          // add an explanation about whether the document was filtered
          public Explanation explain (int i) throws IOException {
-            Explanation exp = scorer.explain (i);
-            exp.setValue(getBoost() * exp.getValue());
+            Explanation exp = scorer.explain(i);
            
-            if (bitset.get(i))
+            if (docIdSetIterator.skipTo(i) && (docIdSetIterator.doc() == i)) {
              exp.setDescription ("allowed by filter: "+exp.getDescription());
-            else
+              exp.setValue(getBoost() * exp.getValue());
+            } else {
              exp.setDescription ("removed by filter: "+exp.getDescription());
+              exp.setValue(0.0f);
+            }
            return exp;
          }
        };
--- a/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@ -128,22 +128,33 @@ public class IndexSearcher extends Searcher {
  // inherit javadoc
  public void search(Weight weight, Filter filter,
                     final HitCollector results) throws IOException {
-    HitCollector collector = results;
-    if (filter != null) {
-      final BitSet bits = filter.bits(reader);
-      collector = new HitCollector() {
-          public final void collect(int doc, float score) {
-            if (bits.get(doc)) {                  // skip docs not in bits
-              results.collect(doc, score);
-            }
-          }
-        };
-    }

    Scorer scorer = weight.scorer(reader);
    if (scorer == null)
      return;
-    scorer.score(collector);
+
+    if (filter == null) {
+      scorer.score(results);
+      return;
+    }
+
+    DocIdSetIterator docIdSetIterator = filter.getDocIdSet(reader).iterator(); // CHECKME: use ConjunctionScorer here?
+    boolean more = docIdSetIterator.next();
+    while (more) {
+      int filterDocId = docIdSetIterator.doc();
+      if (! scorer.skipTo(filterDocId)) {
+        more = false;
+      } else {
+        int scorerDocId = scorer.doc();
+        if (scorerDocId == filterDocId) { // permitted by filter
+          results.collect(scorerDocId, scorer.score());
+          more = docIdSetIterator.skipTo(scorerDocId + 1);
+        } else {
+          more = docIdSetIterator.skipTo(scorerDocId);
+        }
+      }
+    }
+
  }

  public Query rewrite(Query original) throws IOException {
--- a/src/java/org/apache/lucene/search/PrefixFilter.java
+++ b/src/java/org/apache/lucene/search/PrefixFilter.java
@ -18,6 +18,7 @@ package org.apache.lucene.search;
 */

 import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermEnum;
@ -39,6 +40,9 @@ public class PrefixFilter extends Filter {

  public Term getPrefix() { return prefix; }

+  /**
+   * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
+   */  
  public BitSet bits(IndexReader reader) throws IOException {
    final BitSet bitSet = new BitSet(reader.maxDoc());
    new PrefixGenerator(prefix) {
@ -48,6 +52,16 @@ public class PrefixFilter extends Filter {
    }.generate(reader);
    return bitSet;
  }
+  
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+    final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
+    new PrefixGenerator(prefix) {
+      public void handleDoc(int doc) {
+        bitSet.set(doc);
+      }
+    }.generate(reader);
+    return bitSet;
+  }

  /** Prints a user-readable version of this query. */
  public String toString () {
@ -105,3 +119,4 @@ abstract class PrefixGenerator implements IdGenerator {
 }


+
--- a/src/java/org/apache/lucene/search/QueryWrapperFilter.java
+++ b/src/java/org/apache/lucene/search/QueryWrapperFilter.java
@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.BitSet;

 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.OpenBitSet;

 /** 
 * Constrains search results to only match those which also match a provided
@ -44,6 +45,9 @@ public class QueryWrapperFilter extends Filter {
    this.query = query;
  }

+  /**
+   * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
+   */
  public BitSet bits(IndexReader reader) throws IOException {
    final BitSet bits = new BitSet(reader.maxDoc());

@ -54,6 +58,17 @@ public class QueryWrapperFilter extends Filter {
    });
    return bits;
  }
+  
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+    final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
+
+    new IndexSearcher(reader).search(query, new HitCollector() {
+      public final void collect(int doc, float score) {
+        bits.set(doc);  // set bit for hit
+      }
+    });
+    return bits;
+  }

  public String toString() {
    return "QueryWrapperFilter(" + query + ")";
--- a/src/java/org/apache/lucene/search/RangeFilter.java
+++ b/src/java/org/apache/lucene/search/RangeFilter.java
@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermDocs;
 import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.util.OpenBitSet;

 import java.io.IOException;
 import java.util.BitSet;
@ -94,6 +95,7 @@ public class RangeFilter extends Filter {
     * Returns a BitSet with true for documents which should be
     * permitted in search results, and false for those that should
     * not.
+     * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
     */
    public BitSet bits(IndexReader reader) throws IOException {
        BitSet bits = new BitSet(reader.maxDoc());
@ -152,6 +154,68 @@ public class RangeFilter extends Filter {
        return bits;
    }
    
+    /**
+     * Returns a DocIdSet with documents that should be
+     * permitted in search results.
+     */
+    public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+        OpenBitSet bits = new OpenBitSet(reader.maxDoc());
+        
+        TermEnum enumerator =
+            (null != lowerTerm
+             ? reader.terms(new Term(fieldName, lowerTerm))
+             : reader.terms(new Term(fieldName,"")));
+        
+        try {
+            
+            if (enumerator.term() == null) {
+                return bits;
+            }
+            
+            boolean checkLower = false;
+            if (!includeLower) // make adjustments to set to exclusive
+                checkLower = true;
+        
+            TermDocs termDocs = reader.termDocs();
+            try {
+                
+                do {
+                    Term term = enumerator.term();
+                    if (term != null && term.field().equals(fieldName)) {
+                        if (!checkLower || null==lowerTerm || term.text().compareTo(lowerTerm) > 0) {
+                            checkLower = false;
+                            if (upperTerm != null) {
+                                int compare = upperTerm.compareTo(term.text());
+                                /* if beyond the upper term, or is exclusive and
+                                 * this is equal to the upper term, break out */
+                                if ((compare < 0) ||
+                                    (!includeUpper && compare==0)) {
+                                    break;
+                                }
+                            }
+                            /* we have a good term, find the docs */
+                            
+                            termDocs.seek(enumerator.term());
+                            while (termDocs.next()) {
+                                bits.set(termDocs.doc());
+                            }
+                        }
+                    } else {
+                        break;
+                    }
+                }
+                while (enumerator.next());
+                
+            } finally {
+                termDocs.close();
+            }
+        } finally {
+            enumerator.close();
+        }
+
+        return bits;
+    }
+    
    public String toString() {
        StringBuffer buffer = new StringBuffer();
        buffer.append(fieldName);
--- a/src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java
+++ b/src/java/org/apache/lucene/search/RemoteCachingWrapperFilter.java
@ -50,9 +50,21 @@ public class RemoteCachingWrapperFilter extends Filter {
   * searcher side of a remote connection.
   * @param reader the index reader for the Filter
   * @return the bitset
+   * @deprecated Use {@link #getDocIdSet(IndexReader)} instead.
   */
  public BitSet bits(IndexReader reader) throws IOException {
    Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
    return cachedFilter.bits(reader);
  }
+  
+  /**
+   * Uses the {@link FilterManager} to keep the cache for a filter on the 
+   * searcher side of a remote connection.
+   * @param reader the index reader for the Filter
+   * @return the DocIdSet
+   */
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
+    Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
+    return cachedFilter.getDocIdSet(reader);
+  }
 }
--- a/src/java/org/apache/lucene/search/Scorer.java
+++ b/src/java/org/apache/lucene/search/Scorer.java
@ -33,7 +33,7 @@ import java.io.IOException;
 * </p>
 * @see BooleanQuery#setAllowDocsOutOfOrder
 */
-public abstract class Scorer {
+public abstract class Scorer extends DocIdSetIterator {
  private Similarity similarity;

  /** Constructs a Scorer.
@ -76,65 +76,12 @@ public abstract class Scorer {
    return true;
  }

-  /**
-   * Advances to the document matching this Scorer with the lowest doc Id
-   * greater than the current value of {@link #doc()} (or to the matching
-   * document with the lowest doc Id if next has never been called on
-   * this Scorer).
-   *
-   * <p>
-   * When this method is used the {@link #explain(int)} method should not
-   * be used.
-   * </p>
-   *
-   * @return true iff there is another document matching the query.
-   * @see BooleanQuery#setAllowDocsOutOfOrder
-   */
-  public abstract boolean next() throws IOException;
-
-  /** Returns the current document number matching the query.
-   * Initially invalid, until {@link #next()} is called the first time.
-   */
-  public abstract int doc();
-
  /** Returns the score of the current document matching the query.
   * Initially invalid, until {@link #next()} or {@link #skipTo(int)}
   * is called the first time.
   */
  public abstract float score() throws IOException;

-  /**
-   * Skips to the document matching this Scorer with the lowest doc Id
-   * greater than or equal to a given target.
-   *
-   * <p>
-   * The behavior of this method is undefined if the target specified is
-   * less than or equal to the current value of {@link #doc()}.
-   * <p>
-   * Behaves as if written:
-   * <pre>
-   *   boolean skipTo(int target) {
-   *     do {
-   *       if (!next())
-   * 	     return false;
-   *     } while (target > doc());
-   *     return true;
-   *   }
-   * </pre>
-   * Most implementations are considerably more efficient than that.
-   * </p>
-   *
-   * <p>
-   * When this method is used the {@link #explain(int)} method should not
-   * be used.
-   * </p>
-   *
-   * @param target The target document number.
-   * @return true iff there is such a match.
-   * @see BooleanQuery#setAllowDocsOutOfOrder
-   */
-  public abstract boolean skipTo(int target) throws IOException;
-
  /** Returns an explanation of the score for a document.
   * <br>When this method is used, the {@link #next()}, {@link #skipTo(int)} and
   * {@link #score(HitCollector)} methods should not be used.
--- a/src/java/org/apache/lucene/search/Searchable.java
+++ b/src/java/org/apache/lucene/search/Searchable.java
@ -48,7 +48,7 @@ public interface Searchable extends java.rmi.Remote {
   * non-high-scoring hits.
   *
   * @param weight to match documents
-   * @param filter if non-null, a bitset used to eliminate some documents
+   * @param filter if non-null, used to permit documents to be collected.
   * @param results to receive hits
   * @throws BooleanQuery.TooManyClauses
   */
--- a/src/java/org/apache/lucene/search/Searcher.java
+++ b/src/java/org/apache/lucene/search/Searcher.java
@ -109,7 +109,7 @@ public abstract class Searcher implements Searchable {
   * non-high-scoring hits.
   *
   * @param query to match documents
-   * @param filter if non-null, a bitset used to eliminate some documents
+   * @param filter if non-null, used to permit documents to be collected.
   * @param results to receive hits
   * @throws BooleanQuery.TooManyClauses
   */
--- a/src/java/org/apache/lucene/search/SpanFilter.java
+++ b/src/java/org/apache/lucene/search/SpanFilter.java
@ -30,7 +30,7 @@ import java.io.IOException;
 public abstract class SpanFilter extends Filter{
  /** Returns a SpanFilterResult with true for documents which should be permitted in
    search results, and false for those that should not and Spans for where the true docs match.
-   * @param reader The {@link org.apache.lucene.index.IndexReader} to load position and bitset information from
+   * @param reader The {@link org.apache.lucene.index.IndexReader} to load position and DocIdSet information from
   * @return A {@link SpanFilterResult}
   * @throws java.io.IOException if there was an issue accessing the necessary information
   * */
--- a/src/java/org/apache/lucene/search/SpanFilterResult.java
+++ b/src/java/org/apache/lucene/search/SpanFilterResult.java
@ -28,19 +28,33 @@ import java.util.List;
 *
 **/
 public class SpanFilterResult {
+  /** @deprecated */
  private BitSet bits;
+  
+  private DocIdSet docIdSet;
  private List positions;//Spans spans;

  /**
   *
   * @param bits The bits for the Filter
   * @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
+   * @deprecated Use {@link #SpanFilterResult(DocIdSet, List)} instead
   */
  public SpanFilterResult(BitSet bits, List positions) {
    this.bits = bits;
    this.positions = positions;
  }
-
+  
+  /**
+  *
+  * @param docIdSet The DocIdSet for the Filter
+  * @param positions A List of {@link org.apache.lucene.search.SpanFilterResult.PositionInfo} objects
+  */
+  public SpanFilterResult(DocIdSet docIdSet, List positions) {
+    this.docIdSet = docIdSet;
+    this.positions = positions;
+  }
+  
  /**
   * The first entry in the array corresponds to the first "on" bit.
   * Entries are increasing by document order
@ -50,11 +64,17 @@ public class SpanFilterResult {
    return positions;
  }

+  /** 
+   * @deprecated Use {@link #getDocIdSet()}
+   */
  public BitSet getBits() {
    return bits;
  }
-
  
+  /** Returns the docIdSet */
+  public DocIdSet getDocIdSet() {
+    return docIdSet;
+  }

  public static class PositionInfo {
    private int doc;
@ -115,3 +135,4 @@ public class SpanFilterResult {
 }


+
--- a/src/java/org/apache/lucene/search/SpanQueryFilter.java
+++ b/src/java/org/apache/lucene/search/SpanQueryFilter.java
@ -19,6 +19,7 @@ package org.apache.lucene.search;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.Spans;
+import org.apache.lucene.util.OpenBitSet;

 import java.io.IOException;
 import java.util.ArrayList;
@ -54,15 +55,14 @@ public class SpanQueryFilter extends SpanFilter {
    this.query = query;
  }

-  public BitSet bits(IndexReader reader) throws IOException {
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    SpanFilterResult result = bitSpans(reader);
-    return result.getBits();
+    return result.getDocIdSet();
  }

-
  public SpanFilterResult bitSpans(IndexReader reader) throws IOException {

-    final BitSet bits = new BitSet(reader.maxDoc());
+    final OpenBitSet bits = new OpenBitSet(reader.maxDoc());
    Spans spans = query.getSpans(reader);
    List tmp = new ArrayList(20);
    int currentDoc = -1;
--- a/src/java/org/apache/lucene/util/BitUtil.java
+++ b/src/java/org/apache/lucene/util/BitUtil.java
@ -0,0 +1,799 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util; // from org.apache.solr.util rev 555343
+
+/**  A variety of high efficiencly bit twiddling routines.
+ *
+ * @version $Id$
+ */
+public class BitUtil {
+
+  /** Returns the number of bits set in the long */
+  public static int pop(long x) {
+  /* Hacker's Delight 32 bit pop function:
+   * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
+   *
+  int pop(unsigned x) {
+     x = x - ((x >> 1) & 0x55555555);
+     x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
+     x = (x + (x >> 4)) & 0x0F0F0F0F;
+     x = x + (x >> 8);
+     x = x + (x >> 16);
+     return x & 0x0000003F;
+    }
+  ***/
+
+    // 64 bit java version of the C function from above
+    x = x - ((x >>> 1) & 0x5555555555555555L);
+    x = (x & 0x3333333333333333L) + ((x >>>2 ) & 0x3333333333333333L);
+    x = (x + (x >>> 4)) & 0x0F0F0F0F0F0F0F0FL;
+    x = x + (x >>> 8);
+    x = x + (x >>> 16);
+    x = x + (x >>> 32);
+    return ((int)x) & 0x7F;
+  }
+
+  /*** Returns the number of set bits in an array of longs. */
+  public static long pop_array(long A[], int wordOffset, int numWords) {
+    /*
+    * Robert Harley and David Seal's bit counting algorithm, as documented
+    * in the revisions of Hacker's Delight
+    * http://www.hackersdelight.org/revisions.pdf
+    * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc
+    *
+    * This function was adapted to Java, and extended to use 64 bit words.
+    * if only we had access to wider registers like SSE from java...
+    *
+    * This function can be transformed to compute the popcount of other functions
+    * on bitsets via something like this:
+    * sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g'
+    *
+    */
+    int n = wordOffset+numWords;
+    long tot=0, tot8=0;
+    long ones=0, twos=0, fours=0;
+
+    int i;
+    for (i = wordOffset; i <= n - 8; i+=8) {
+      /***  C macro from Hacker's Delight
+       #define CSA(h,l, a,b,c) \
+       {unsigned u = a ^ b; unsigned v = c; \
+       h = (a & b) | (u & v); l = u ^ v;}
+       ***/
+
+      long twosA,twosB,foursA,foursB,eights;
+
+      // CSA(twosA, ones, ones, A[i], A[i+1])
+      {
+        long b=A[i], c=A[i+1];
+        long u=ones ^ b;
+        twosA=(ones & b)|( u & c);
+        ones=u^c;
+      }
+      // CSA(twosB, ones, ones, A[i+2], A[i+3])
+      {
+        long b=A[i+2], c=A[i+3];
+        long u=ones^b;
+        twosB =(ones&b)|(u&c);
+        ones=u^c;
+      }
+      //CSA(foursA, twos, twos, twosA, twosB)
+      {
+        long u=twos^twosA;
+        foursA=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+      //CSA(twosA, ones, ones, A[i+4], A[i+5])
+      {
+        long b=A[i+4], c=A[i+5];
+        long u=ones^b;
+        twosA=(ones&b)|(u&c);
+        ones=u^c;
+      }
+      // CSA(twosB, ones, ones, A[i+6], A[i+7])
+      {
+        long b=A[i+6], c=A[i+7];
+        long u=ones^b;
+        twosB=(ones&b)|(u&c);
+        ones=u^c;
+      }
+      //CSA(foursB, twos, twos, twosA, twosB)
+      {
+        long u=twos^twosA;
+        foursB=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+
+      //CSA(eights, fours, fours, foursA, foursB)
+      {
+        long u=fours^foursA;
+        eights=(fours&foursA)|(u&foursB);
+        fours=u^foursB;
+      }
+      tot8 += pop(eights);
+    }
+
+    // handle trailing words in a binary-search manner...
+    // derived from the loop above by setting specific elements to 0.
+    // the original method in Hackers Delight used a simple for loop:
+    //   for (i = i; i < n; i++)      // Add in the last elements
+    //  tot = tot + pop(A[i]);
+
+    if (i<=n-4) {
+      long twosA, twosB, foursA, eights;
+      {
+        long b=A[i], c=A[i+1];
+        long u=ones ^ b;
+        twosA=(ones & b)|( u & c);
+        ones=u^c;
+      }
+      {
+        long b=A[i+2], c=A[i+3];
+        long u=ones^b;
+        twosB =(ones&b)|(u&c);
+        ones=u^c;
+      }
+      {
+        long u=twos^twosA;
+        foursA=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+      eights=fours&foursA;
+      fours=fours^foursA;
+
+      tot8 += pop(eights);
+      i+=4;
+    }
+
+    if (i<=n-2) {
+      long b=A[i], c=A[i+1];
+      long u=ones ^ b;
+      long twosA=(ones & b)|( u & c);
+      ones=u^c;
+
+      long foursA=twos&twosA;
+      twos=twos^twosA;
+
+      long eights=fours&foursA;
+      fours=fours^foursA;
+
+      tot8 += pop(eights);
+      i+=2;
+    }
+
+    if (i<n) {
+      tot += pop(A[i]);
+    }
+
+    tot += (pop(fours)<<2)
+            + (pop(twos)<<1)
+            + pop(ones)
+            + (tot8<<3);
+
+    return tot;
+  }
+
+  /** Returns the popcount or cardinality of the two sets after an intersection.
+   * Neither array is modified.
+   */
+  public static long pop_intersect(long A[], long B[], int wordOffset, int numWords) {
+    // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g'
+    int n = wordOffset+numWords;
+    long tot=0, tot8=0;
+    long ones=0, twos=0, fours=0;
+
+    int i;
+    for (i = wordOffset; i <= n - 8; i+=8) {
+      long twosA,twosB,foursA,foursB,eights;
+
+      // CSA(twosA, ones, ones, (A[i] & B[i]), (A[i+1] & B[i+1]))
+      {
+        long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]);
+        long u=ones ^ b;
+        twosA=(ones & b)|( u & c);
+        ones=u^c;
+      }
+      // CSA(twosB, ones, ones, (A[i+2] & B[i+2]), (A[i+3] & B[i+3]))
+      {
+        long b=(A[i+2] & B[i+2]), c=(A[i+3] & B[i+3]);
+        long u=ones^b;
+        twosB =(ones&b)|(u&c);
+        ones=u^c;
+      }
+      //CSA(foursA, twos, twos, twosA, twosB)
+      {
+        long u=twos^twosA;
+        foursA=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+      //CSA(twosA, ones, ones, (A[i+4] & B[i+4]), (A[i+5] & B[i+5]))
+      {
+        long b=(A[i+4] & B[i+4]), c=(A[i+5] & B[i+5]);
+        long u=ones^b;
+        twosA=(ones&b)|(u&c);
+        ones=u^c;
+      }
+      // CSA(twosB, ones, ones, (A[i+6] & B[i+6]), (A[i+7] & B[i+7]))
+      {
+        long b=(A[i+6] & B[i+6]), c=(A[i+7] & B[i+7]);
+        long u=ones^b;
+        twosB=(ones&b)|(u&c);
+        ones=u^c;
+      }
+      //CSA(foursB, twos, twos, twosA, twosB)
+      {
+        long u=twos^twosA;
+        foursB=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+
+      //CSA(eights, fours, fours, foursA, foursB)
+      {
+        long u=fours^foursA;
+        eights=(fours&foursA)|(u&foursB);
+        fours=u^foursB;
+      }
+      tot8 += pop(eights);
+    }
+
+
+    if (i<=n-4) {
+      long twosA, twosB, foursA, eights;
+      {
+        long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]);
+        long u=ones ^ b;
+        twosA=(ones & b)|( u & c);
+        ones=u^c;
+      }
+      {
+        long b=(A[i+2] & B[i+2]), c=(A[i+3] & B[i+3]);
+        long u=ones^b;
+        twosB =(ones&b)|(u&c);
+        ones=u^c;
+      }
+      {
+        long u=twos^twosA;
+        foursA=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+      eights=fours&foursA;
+      fours=fours^foursA;
+
+      tot8 += pop(eights);
+      i+=4;
+    }
+
+    if (i<=n-2) {
+      long b=(A[i] & B[i]), c=(A[i+1] & B[i+1]);
+      long u=ones ^ b;
+      long twosA=(ones & b)|( u & c);
+      ones=u^c;
+
+      long foursA=twos&twosA;
+      twos=twos^twosA;
+
+      long eights=fours&foursA;
+      fours=fours^foursA;
+
+      tot8 += pop(eights);
+      i+=2;
+    }
+
+    if (i<n) {
+      tot += pop((A[i] & B[i]));
+    }
+
+    tot += (pop(fours)<<2)
+            + (pop(twos)<<1)
+            + pop(ones)
+            + (tot8<<3);
+
+    return tot;
+  }
+
+  /** Returns the popcount or cardinality of the union of two sets.
+    * Neither array is modified.
+    */
+   public static long pop_union(long A[], long B[], int wordOffset, int numWords) {
+     // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \| B[\1]\)/g'
+     int n = wordOffset+numWords;
+     long tot=0, tot8=0;
+     long ones=0, twos=0, fours=0;
+
+     int i;
+     for (i = wordOffset; i <= n - 8; i+=8) {
+       /***  C macro from Hacker's Delight
+        #define CSA(h,l, a,b,c) \
+        {unsigned u = a ^ b; unsigned v = c; \
+        h = (a & b) | (u & v); l = u ^ v;}
+        ***/
+
+       long twosA,twosB,foursA,foursB,eights;
+
+       // CSA(twosA, ones, ones, (A[i] | B[i]), (A[i+1] | B[i+1]))
+       {
+         long b=(A[i] | B[i]), c=(A[i+1] | B[i+1]);
+         long u=ones ^ b;
+         twosA=(ones & b)|( u & c);
+         ones=u^c;
+       }
+       // CSA(twosB, ones, ones, (A[i+2] | B[i+2]), (A[i+3] | B[i+3]))
+       {
+         long b=(A[i+2] | B[i+2]), c=(A[i+3] | B[i+3]);
+         long u=ones^b;
+         twosB =(ones&b)|(u&c);
+         ones=u^c;
+       }
+       //CSA(foursA, twos, twos, twosA, twosB)
+       {
+         long u=twos^twosA;
+         foursA=(twos&twosA)|(u&twosB);
+         twos=u^twosB;
+       }
+       //CSA(twosA, ones, ones, (A[i+4] | B[i+4]), (A[i+5] | B[i+5]))
+       {
+         long b=(A[i+4] | B[i+4]), c=(A[i+5] | B[i+5]);
+         long u=ones^b;
+         twosA=(ones&b)|(u&c);
+         ones=u^c;
+       }
+       // CSA(twosB, ones, ones, (A[i+6] | B[i+6]), (A[i+7] | B[i+7]))
+       {
+         long b=(A[i+6] | B[i+6]), c=(A[i+7] | B[i+7]);
+         long u=ones^b;
+         twosB=(ones&b)|(u&c);
+         ones=u^c;
+       }
+       //CSA(foursB, twos, twos, twosA, twosB)
+       {
+         long u=twos^twosA;
+         foursB=(twos&twosA)|(u&twosB);
+         twos=u^twosB;
+       }
+
+       //CSA(eights, fours, fours, foursA, foursB)
+       {
+         long u=fours^foursA;
+         eights=(fours&foursA)|(u&foursB);
+         fours=u^foursB;
+       }
+       tot8 += pop(eights);
+     }
+
+
+     if (i<=n-4) {
+       long twosA, twosB, foursA, eights;
+       {
+         long b=(A[i] | B[i]), c=(A[i+1] | B[i+1]);
+         long u=ones ^ b;
+         twosA=(ones & b)|( u & c);
+         ones=u^c;
+       }
+       {
+         long b=(A[i+2] | B[i+2]), c=(A[i+3] | B[i+3]);
+         long u=ones^b;
+         twosB =(ones&b)|(u&c);
+         ones=u^c;
+       }
+       {
+         long u=twos^twosA;
+         foursA=(twos&twosA)|(u&twosB);
+         twos=u^twosB;
+       }
+       eights=fours&foursA;
+       fours=fours^foursA;
+
+       tot8 += pop(eights);
+       i+=4;
+     }
+
+     if (i<=n-2) {
+       long b=(A[i] | B[i]), c=(A[i+1] | B[i+1]);
+       long u=ones ^ b;
+       long twosA=(ones & b)|( u & c);
+       ones=u^c;
+
+       long foursA=twos&twosA;
+       twos=twos^twosA;
+
+       long eights=fours&foursA;
+       fours=fours^foursA;
+
+       tot8 += pop(eights);
+       i+=2;
+     }
+
+     if (i<n) {
+       tot += pop((A[i] | B[i]));
+     }
+
+     tot += (pop(fours)<<2)
+             + (pop(twos)<<1)
+             + pop(ones)
+             + (tot8<<3);
+
+     return tot;
+   }
+
+  /** Returns the popcount or cardinality of A & ~B
+   * Neither array is modified.
+   */
+  public static long pop_andnot(long A[], long B[], int wordOffset, int numWords) {
+    // generated from pop_array via sed 's/A\[\([^]]*\)\]/\(A[\1] \& ~B[\1]\)/g'
+    int n = wordOffset+numWords;
+    long tot=0, tot8=0;
+    long ones=0, twos=0, fours=0;
+
+    int i;
+    for (i = wordOffset; i <= n - 8; i+=8) {
+      /***  C macro from Hacker's Delight
+       #define CSA(h,l, a,b,c) \
+       {unsigned u = a ^ b; unsigned v = c; \
+       h = (a & b) | (u & v); l = u ^ v;}
+       ***/
+
+      long twosA,twosB,foursA,foursB,eights;
+
+      // CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i+1] & ~B[i+1]))
+      {
+        long b=(A[i] & ~B[i]), c=(A[i+1] & ~B[i+1]);
+        long u=ones ^ b;
+        twosA=(ones & b)|( u & c);
+        ones=u^c;
+      }
+      // CSA(twosB, ones, ones, (A[i+2] & ~B[i+2]), (A[i+3] & ~B[i+3]))
+      {
+        long b=(A[i+2] & ~B[i+2]), c=(A[i+3] & ~B[i+3]);
+        long u=ones^b;
+        twosB =(ones&b)|(u&c);
+        ones=u^c;
+      }
+      //CSA(foursA, twos, twos, twosA, twosB)
+      {
+        long u=twos^twosA;
+        foursA=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+      //CSA(twosA, ones, ones, (A[i+4] & ~B[i+4]), (A[i+5] & ~B[i+5]))
+      {
+        long b=(A[i+4] & ~B[i+4]), c=(A[i+5] & ~B[i+5]);
+        long u=ones^b;
+        twosA=(ones&b)|(u&c);
+        ones=u^c;
+      }
+      // CSA(twosB, ones, ones, (A[i+6] & ~B[i+6]), (A[i+7] & ~B[i+7]))
+      {
+        long b=(A[i+6] & ~B[i+6]), c=(A[i+7] & ~B[i+7]);
+        long u=ones^b;
+        twosB=(ones&b)|(u&c);
+        ones=u^c;
+      }
+      //CSA(foursB, twos, twos, twosA, twosB)
+      {
+        long u=twos^twosA;
+        foursB=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+
+      //CSA(eights, fours, fours, foursA, foursB)
+      {
+        long u=fours^foursA;
+        eights=(fours&foursA)|(u&foursB);
+        fours=u^foursB;
+      }
+      tot8 += pop(eights);
+    }
+
+
+    if (i<=n-4) {
+      long twosA, twosB, foursA, eights;
+      {
+        long b=(A[i] & ~B[i]), c=(A[i+1] & ~B[i+1]);
+        long u=ones ^ b;
+        twosA=(ones & b)|( u & c);
+        ones=u^c;
+      }
+      {
+        long b=(A[i+2] & ~B[i+2]), c=(A[i+3] & ~B[i+3]);
+        long u=ones^b;
+        twosB =(ones&b)|(u&c);
+        ones=u^c;
+      }
+      {
+        long u=twos^twosA;
+        foursA=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+      eights=fours&foursA;
+      fours=fours^foursA;
+
+      tot8 += pop(eights);
+      i+=4;
+    }
+
+    if (i<=n-2) {
+      long b=(A[i] & ~B[i]), c=(A[i+1] & ~B[i+1]);
+      long u=ones ^ b;
+      long twosA=(ones & b)|( u & c);
+      ones=u^c;
+
+      long foursA=twos&twosA;
+      twos=twos^twosA;
+
+      long eights=fours&foursA;
+      fours=fours^foursA;
+
+      tot8 += pop(eights);
+      i+=2;
+    }
+
+    if (i<n) {
+      tot += pop((A[i] & ~B[i]));
+    }
+
+    tot += (pop(fours)<<2)
+            + (pop(twos)<<1)
+            + pop(ones)
+            + (tot8<<3);
+
+    return tot;
+  }
+
+  public static long pop_xor(long A[], long B[], int wordOffset, int numWords) {
+    int n = wordOffset+numWords;
+    long tot=0, tot8=0;
+    long ones=0, twos=0, fours=0;
+
+    int i;
+    for (i = wordOffset; i <= n - 8; i+=8) {
+      /***  C macro from Hacker's Delight
+       #define CSA(h,l, a,b,c) \
+       {unsigned u = a ^ b; unsigned v = c; \
+       h = (a & b) | (u & v); l = u ^ v;}
+       ***/
+
+      long twosA,twosB,foursA,foursB,eights;
+
+      // CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i+1] ^ B[i+1]))
+      {
+        long b=(A[i] ^ B[i]), c=(A[i+1] ^ B[i+1]);
+        long u=ones ^ b;
+        twosA=(ones & b)|( u & c);
+        ones=u^c;
+      }
+      // CSA(twosB, ones, ones, (A[i+2] ^ B[i+2]), (A[i+3] ^ B[i+3]))
+      {
+        long b=(A[i+2] ^ B[i+2]), c=(A[i+3] ^ B[i+3]);
+        long u=ones^b;
+        twosB =(ones&b)|(u&c);
+        ones=u^c;
+      }
+      //CSA(foursA, twos, twos, twosA, twosB)
+      {
+        long u=twos^twosA;
+        foursA=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+      //CSA(twosA, ones, ones, (A[i+4] ^ B[i+4]), (A[i+5] ^ B[i+5]))
+      {
+        long b=(A[i+4] ^ B[i+4]), c=(A[i+5] ^ B[i+5]);
+        long u=ones^b;
+        twosA=(ones&b)|(u&c);
+        ones=u^c;
+      }
+      // CSA(twosB, ones, ones, (A[i+6] ^ B[i+6]), (A[i+7] ^ B[i+7]))
+      {
+        long b=(A[i+6] ^ B[i+6]), c=(A[i+7] ^ B[i+7]);
+        long u=ones^b;
+        twosB=(ones&b)|(u&c);
+        ones=u^c;
+      }
+      //CSA(foursB, twos, twos, twosA, twosB)
+      {
+        long u=twos^twosA;
+        foursB=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+
+      //CSA(eights, fours, fours, foursA, foursB)
+      {
+        long u=fours^foursA;
+        eights=(fours&foursA)|(u&foursB);
+        fours=u^foursB;
+      }
+      tot8 += pop(eights);
+    }
+
+
+    if (i<=n-4) {
+      long twosA, twosB, foursA, eights;
+      {
+        long b=(A[i] ^ B[i]), c=(A[i+1] ^ B[i+1]);
+        long u=ones ^ b;
+        twosA=(ones & b)|( u & c);
+        ones=u^c;
+      }
+      {
+        long b=(A[i+2] ^ B[i+2]), c=(A[i+3] ^ B[i+3]);
+        long u=ones^b;
+        twosB =(ones&b)|(u&c);
+        ones=u^c;
+      }
+      {
+        long u=twos^twosA;
+        foursA=(twos&twosA)|(u&twosB);
+        twos=u^twosB;
+      }
+      eights=fours&foursA;
+      fours=fours^foursA;
+
+      tot8 += pop(eights);
+      i+=4;
+    }
+
+    if (i<=n-2) {
+      long b=(A[i] ^ B[i]), c=(A[i+1] ^ B[i+1]);
+      long u=ones ^ b;
+      long twosA=(ones & b)|( u & c);
+      ones=u^c;
+
+      long foursA=twos&twosA;
+      twos=twos^twosA;
+
+      long eights=fours&foursA;
+      fours=fours^foursA;
+
+      tot8 += pop(eights);
+      i+=2;
+    }
+
+    if (i<n) {
+      tot += pop((A[i] ^ B[i]));
+    }
+
+    tot += (pop(fours)<<2)
+            + (pop(twos)<<1)
+            + pop(ones)
+            + (tot8<<3);
+
+    return tot;
+  }
+
+  /* python code to generate ntzTable
+  def ntz(val):
+    if val==0: return 8
+    i=0
+    while (val&0x01)==0:
+      i = i+1
+      val >>= 1
+    return i
+  print ','.join([ str(ntz(i)) for i in range(256) ])
+  ***/
+  /** table of number of trailing zeros in a byte */
+  public static final byte[] ntzTable = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0};
+
+
+  /** Returns number of trailing zeros in the 64 bit long value. */
+  public static int ntz(long val) {
+    // A full binary search to determine the low byte was slower than
+    // a linear search for nextSetBit().  This is most likely because
+    // the implementation of nextSetBit() shifts bits to the right, increasing
+    // the probability that the first non-zero byte is in the rhs.
+    //
+    // This implementation does a single binary search at the top level only
+    // so that all other bit shifting can be done on ints instead of longs to
+    // remain friendly to 32 bit architectures.  In addition, the case of a
+    // non-zero first byte is checked for first because it is the most common
+    // in dense bit arrays.
+
+    int lower = (int)val;
+    int lowByte = lower & 0xff;
+    if (lowByte != 0) return ntzTable[lowByte];
+
+    if (lower!=0) {
+      lowByte = (lower>>>8) & 0xff;
+      if (lowByte != 0) return ntzTable[lowByte] + 8;
+      lowByte = (lower>>>16) & 0xff;
+      if (lowByte != 0) return ntzTable[lowByte] + 16;
+      // no need to mask off low byte for the last byte in the 32 bit word
+      // no need to check for zero on the last byte either.
+      return ntzTable[lower>>>24] + 24;
+    } else {
+      // grab upper 32 bits
+      int upper=(int)(val>>32);
+      lowByte = upper & 0xff;
+      if (lowByte != 0) return ntzTable[lowByte] + 32;
+      lowByte = (upper>>>8) & 0xff;
+      if (lowByte != 0) return ntzTable[lowByte] + 40;
+      lowByte = (upper>>>16) & 0xff;
+      if (lowByte != 0) return ntzTable[lowByte] + 48;
+      // no need to mask off low byte for the last byte in the 32 bit word
+      // no need to check for zero on the last byte either.
+      return ntzTable[upper>>>24] + 56;
+    }
+  }
+
+  /** returns 0 based index of first set bit
+   * (only works for x!=0)
+   * <br/> This is an alternate implementation of ntz()
+   */
+  public static int ntz2(long x) {
+   int n = 0;
+   int y = (int)x;
+   if (y==0) {n+=32; y = (int)(x>>>32); }   // the only 64 bit shift necessary
+   if ((y & 0x0000FFFF) == 0) { n+=16; y>>>=16; }
+   if ((y & 0x000000FF) == 0) { n+=8; y>>>=8; }
+   return (ntzTable[ y & 0xff ]) + n;
+  }
+
+  /** returns 0 based index of first set bit
+   * <br/> This is an alternate implementation of ntz()
+   */
+  public static int ntz3(long x) {
+   // another implementation taken from Hackers Delight, extended to 64 bits
+   // and converted to Java.
+   // Many 32 bit ntz algorithms are at http://www.hackersdelight.org/HDcode/ntz.cc
+   int n = 1;
+
+   // do the first step as a long, all others as ints.
+   int y = (int)x;
+   if (y==0) {n+=32; y = (int)(x>>>32); }
+   if ((y & 0x0000FFFF) == 0) { n+=16; y>>>=16; }
+   if ((y & 0x000000FF) == 0) { n+=8; y>>>=8; }
+   if ((y & 0x0000000F) == 0) { n+=4; y>>>=4; }
+   if ((y & 0x00000003) == 0) { n+=2; y>>>=2; }
+   return n - (y & 1);
+  }
+
+
+  /** returns true if v is a power of two or zero*/
+  public static boolean isPowerOfTwo(int v) {
+    return ((v & (v-1)) == 0);
+  }
+
+  /** returns true if v is a power of two or zero*/
+  public static boolean isPowerOfTwo(long v) {
+    return ((v & (v-1)) == 0);
+  }
+
+  /** returns the next highest power of two, or the current value if it's already a power of two or zero*/
+  public static int nextHighestPowerOfTwo(int v) {
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v++;
+    return v;
+  }
+
+  /** returns the next highest power of two, or the current value if it's already a power of two or zero*/
+   public static long nextHighestPowerOfTwo(long v) {
+    v--;
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v |= v >> 32;
+    v++;
+    return v;
+  }
+
+}
--- a/src/java/org/apache/lucene/util/DocIdBitSet.java
+++ b/src/java/org/apache/lucene/util/DocIdBitSet.java
@ -0,0 +1,77 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.BitSet;
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+
+
+/** Simple DocIdSet and DocIdSetIterator backed by a BitSet */
+public class DocIdBitSet extends DocIdSet {
+  private BitSet bitSet;
+    
+  public DocIdBitSet(BitSet bitSet) {
+    this.bitSet = bitSet;
+  }
+
+  public DocIdSetIterator iterator() {
+    return new DocIdBitSetIterator(bitSet);
+  }
+  
+  /**
+   * Returns the underlying BitSet. 
+   */
+  public BitSet getBitSet() {
+	return this.bitSet;
+  }
+  
+  private static class DocIdBitSetIterator extends DocIdSetIterator {
+    private int docId;
+    private BitSet bitSet;
+    
+    DocIdBitSetIterator(BitSet bitSet) {
+      this.bitSet = bitSet;
+      this.docId = -1;
+    }
+    
+    public int doc() {
+      assert docId != -1;
+      return docId;
+    }
+    
+    public boolean next() {
+      // (docId + 1) on next line requires -1 initial value for docNr:
+      return checkNextDocId(bitSet.nextSetBit(docId + 1));
+    }
+  
+    public boolean skipTo(int skipDocNr) {
+      return checkNextDocId( bitSet.nextSetBit(skipDocNr));
+    }
+  
+    private boolean checkNextDocId(int d) {
+      if (d == -1) { // -1 returned by BitSet.nextSetBit() when exhausted
+        docId = Integer.MAX_VALUE;
+        return false;
+      } else {
+        docId = d;
+        return true;
+      }
+    }
+  }
+}
--- a/src/java/org/apache/lucene/util/OpenBitSet.java
+++ b/src/java/org/apache/lucene/util/OpenBitSet.java
@ -0,0 +1,773 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util;
+
+import java.util.Arrays;
+import java.io.Serializable;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+
+/** An "open" BitSet implementation that allows direct access to the array of words
+ * storing the bits.
+ * <p/>
+ * Unlike java.util.bitet, the fact that bits are packed into an array of longs
+ * is part of the interface.  This allows efficient implementation of other algorithms
+ * by someone other than the author.  It also allows one to efficiently implement
+ * alternate serialization or interchange formats.
+ * <p/>
+ * <code>OpenBitSet</code> is faster than <code>java.util.BitSet</code> in most operations
+ * and *much* faster at calculating cardinality of sets and results of set operations.
+ * It can also handle sets of larger cardinality (up to 64 * 2**32-1)
+ * <p/>
+ * The goals of <code>OpenBitSet</code> are the fastest implementation possible, and
+ * maximum code reuse.  Extra safety and encapsulation
+ * may always be built on top, but if that's built in, the cost can never be removed (and
+ * hence people re-implement their own version in order to get better performance).
+ * If you want a "safe", totally encapsulated (and slower and limited) BitSet
+ * class, use <code>java.util.BitSet</code>.
+ * <p/>
+ * <h3>Performance Results</h3>
+ *
+ Test system: Pentium 4, Sun Java 1.5_06 -server -Xbatch -Xmx64M
+<br/>BitSet size = 1,000,000
+<br/>Results are java.util.BitSet time divided by OpenBitSet time.
+<table border="1">
+ <tr>
+  <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
+ </tr>
+ <tr>
+  <th>50% full</th> <td>3.36</td> <td>3.96</td> <td>1.44</td> <td>1.46</td> <td>1.99</td> <td>1.58</td>
+ </tr>
+ <tr>
+   <th>1% full</th> <td>3.31</td> <td>3.90</td> <td>&nbsp;</td> <td>1.04</td> <td>&nbsp;</td> <td>0.99</td>
+ </tr>
+</table>
+<br/>
+Test system: AMD Opteron, 64 bit linux, Sun Java 1.5_06 -server -Xbatch -Xmx64M
+<br/>BitSet size = 1,000,000
+<br/>Results are java.util.BitSet time divided by OpenBitSet time.
+<table border="1">
+ <tr>
+  <th></th> <th>cardinality</th> <th>intersect_count</th> <th>union</th> <th>nextSetBit</th> <th>get</th> <th>iterator</th>
+ </tr>
+ <tr>
+  <th>50% full</th> <td>2.50</td> <td>3.50</td> <td>1.00</td> <td>1.03</td> <td>1.12</td> <td>1.25</td>
+ </tr>
+ <tr>
+   <th>1% full</th> <td>2.51</td> <td>3.49</td> <td>&nbsp;</td> <td>1.00</td> <td>&nbsp;</td> <td>1.02</td>
+ </tr>
+</table>
+
+ * @version $Id$
+ */
+
+public class OpenBitSet extends DocIdSet implements Cloneable, Serializable {
+  protected long[] bits;
+  protected int wlen;   // number of words (elements) used in the array
+
+  /** Constructs an OpenBitSet large enough to hold numBits.
+   *
+   * @param numBits
+   */
+  public OpenBitSet(long numBits) {
+    bits = new long[bits2words(numBits)];
+    wlen = bits.length;
+  }
+
+  public OpenBitSet() {
+    this(64);
+  }
+
+  /** Constructs an OpenBitSet from an existing long[].
+   * <br/>
+   * The first 64 bits are in long[0],
+   * with bit index 0 at the least significant bit, and bit index 63 at the most significant.
+   * Given a bit index,
+   * the word containing it is long[index/64], and it is at bit number index%64 within that word.
+   * <p>
+   * numWords are the number of elements in the array that contain
+   * set bits (non-zero longs).
+   * numWords should be &lt= bits.length, and
+   * any existing words in the array at position &gt= numWords should be zero.
+   *
+   */
+  public OpenBitSet(long[] bits, int numWords) {
+    this.bits = bits;
+    this.wlen = numWords;
+  }
+  
+  public DocIdSetIterator iterator() {
+    return new OpenBitSetIterator(bits, wlen);
+  }
+
+  /** Returns the current capacity in bits (1 greater than the index of the last bit) */
+  public long capacity() { return bits.length << 6; }
+
+ /**
+  * Returns the current capacity of this set.  Included for
+  * compatibility.  This is *not* equal to {@link #cardinality}
+  */
+  public long size() {
+      return capacity();
+  }
+
+  /** Returns true if there are no set bits */
+  public boolean isEmpty() { return cardinality()==0; }
+
+  /** Expert: returns the long[] storing the bits */
+  public long[] getBits() { return bits; }
+
+  /** Expert: sets a new long[] to use as the bit storage */
+  public void setBits(long[] bits) { this.bits = bits; }
+
+  /** Expert: gets the number of longs in the array that are in use */
+  public int getNumWords() { return wlen; }
+
+  /** Expert: sets the number of longs in the array that are in use */
+  public void setNumWords(int nWords) { this.wlen=nWords; }
+
+
+
+  /** Returns true or false for the specified bit index. */
+  public boolean get(int index) {
+    int i = index >> 6;               // div 64
+    // signed shift will keep a negative index and force an
+    // array-index-out-of-bounds-exception, removing the need for an explicit check.
+    if (i>=bits.length) return false;
+
+    int bit = index & 0x3f;           // mod 64
+    long bitmask = 1L << bit;
+    return (bits[i] & bitmask) != 0;
+  }
+
+
+ /** Returns true or false for the specified bit index.
+   * The index should be less than the OpenBitSet size
+   */
+  public boolean fastGet(int index) {
+    int i = index >> 6;               // div 64
+    // signed shift will keep a negative index and force an
+    // array-index-out-of-bounds-exception, removing the need for an explicit check.
+    int bit = index & 0x3f;           // mod 64
+    long bitmask = 1L << bit;
+    return (bits[i] & bitmask) != 0;
+  }
+
+
+
+ /** Returns true or false for the specified bit index
+  * The index should be less than the OpenBitSet size
+  */
+  public boolean get(long index) {
+    int i = (int)(index >> 6);             // div 64
+    if (i>=bits.length) return false;
+    int bit = (int)index & 0x3f;           // mod 64
+    long bitmask = 1L << bit;
+    return (bits[i] & bitmask) != 0;
+  }
+
+  /** Returns true or false for the specified bit index.  Allows specifying
+   * an index outside the current size. */
+  public boolean fastGet(long index) {
+    int i = (int)(index >> 6);               // div 64
+    int bit = (int)index & 0x3f;           // mod 64
+    long bitmask = 1L << bit;
+    return (bits[i] & bitmask) != 0;
+  }
+
+  /*
+  // alternate implementation of get()
+  public boolean get1(int index) {
+    int i = index >> 6;                // div 64
+    int bit = index & 0x3f;            // mod 64
+    return ((bits[i]>>>bit) & 0x01) != 0;
+    // this does a long shift and a bittest (on x86) vs
+    // a long shift, and a long AND, (the test for zero is prob a no-op)
+    // testing on a P4 indicates this is slower than (bits[i] & bitmask) != 0;
+  }
+  */
+
+
+  /** returns 1 if the bit is set, 0 if not.
+   * The index should be less than the OpenBitSet size
+   */
+  public int getBit(int index) {
+    int i = index >> 6;                // div 64
+    int bit = index & 0x3f;            // mod 64
+    return ((int)(bits[i]>>>bit)) & 0x01;
+  }
+
+
+  /*
+  public boolean get2(int index) {
+    int word = index >> 6;            // div 64
+    int bit = index & 0x0000003f;     // mod 64
+    return (bits[word] << bit) < 0;   // hmmm, this would work if bit order were reversed
+    // we could right shift and check for parity bit, if it was available to us.
+  }
+  */
+
+  /** sets a bit, expanding the set size if necessary */
+  public void set(long index) {
+    int wordNum = expandingWordNum(index);
+    int bit = (int)index & 0x3f;
+    long bitmask = 1L << bit;
+    bits[wordNum] |= bitmask;
+  }
+
+
+ /** Sets the bit at the specified index.
+  * The index should be less than the OpenBitSet size.
+  */
+  public void fastSet(int index) {
+    int wordNum = index >> 6;      // div 64
+    int bit = index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] |= bitmask;
+  }
+
+ /** Sets the bit at the specified index.
+  * The index should be less than the OpenBitSet size.
+  */
+  public void fastSet(long index) {
+    int wordNum = (int)(index >> 6);
+    int bit = (int)index & 0x3f;
+    long bitmask = 1L << bit;
+    bits[wordNum] |= bitmask;
+  }
+
+  /** Sets a range of bits, expanding the set size if necessary
+   *
+   * @param startIndex lower index
+   * @param endIndex one-past the last bit to set
+   */
+  public void set(long startIndex, long endIndex) {
+    if (endIndex <= startIndex) return;
+
+    int startWord = (int)(startIndex>>6);
+
+    // since endIndex is one past the end, this is index of the last
+    // word to be changed.
+    int endWord   = expandingWordNum(endIndex-1);
+
+    long startmask = -1L << startIndex;
+    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+    if (startWord == endWord) {
+      bits[startWord] |= (startmask & endmask);
+      return;
+    }
+
+    bits[startWord] |= startmask;
+    Arrays.fill(bits, startWord+1, endWord, -1L);
+    bits[endWord] |= endmask;
+  }
+
+
+
+  protected int expandingWordNum(long index) {
+    int wordNum = (int)(index >> 6);
+    if (wordNum>=wlen) {
+      ensureCapacity(index+1);
+      wlen = wordNum+1;
+    }
+    return wordNum;
+  }
+
+
+  /** clears a bit.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void fastClear(int index) {
+    int wordNum = index >> 6;
+    int bit = index & 0x03f;
+    long bitmask = 1L << bit;
+    bits[wordNum] &= ~bitmask;
+    // hmmm, it takes one more instruction to clear than it does to set... any
+    // way to work around this?  If there were only 63 bits per word, we could
+    // use a right shift of 10111111...111 in binary to position the 0 in the
+    // correct place (using sign extension).
+    // Could also use Long.rotateRight() or rotateLeft() *if* they were converted
+    // by the JVM into a native instruction.
+    // bits[word] &= Long.rotateLeft(0xfffffffe,bit);
+  }
+
+  /** clears a bit.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void fastClear(long index) {
+    int wordNum = (int)(index >> 6); // div 64
+    int bit = (int)index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] &= ~bitmask;
+  }
+
+  /** clears a bit, allowing access beyond the current set size without changing the size.*/
+  public void clear(long index) {
+    int wordNum = (int)(index >> 6); // div 64
+    if (wordNum>=wlen) return;
+    int bit = (int)index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] &= ~bitmask;
+  }
+
+  /** Clears a range of bits.  Clearing past the end does not change the size of the set.
+   *
+   * @param startIndex lower index
+   * @param endIndex one-past the last bit to clear
+   */
+  public void clear(long startIndex, long endIndex) {
+    if (endIndex <= startIndex) return;
+
+    int startWord = (int)(startIndex>>6);
+    if (startWord >= wlen) return;
+
+    // since endIndex is one past the end, this is index of the last
+    // word to be changed.
+    int endWord   = (int)((endIndex-1)>>6);
+
+    long startmask = -1L << startIndex;
+    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+    // invert masks since we are clearing
+    startmask = ~startmask;
+    endmask = ~endmask;
+
+    if (startWord == endWord) {
+      bits[startWord] &= (startmask | endmask);
+      return;
+    }
+
+    bits[startWord] &= startmask;
+
+    int middle = Math.min(wlen, endWord);
+    Arrays.fill(bits, startWord+1, middle, 0L);
+    if (endWord < wlen) {
+      bits[endWord] &= endmask;
+    }
+  }
+
+
+
+  /** Sets a bit and returns the previous value.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean getAndSet(int index) {
+    int wordNum = index >> 6;      // div 64
+    int bit = index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    boolean val = (bits[wordNum] & bitmask) != 0;
+    bits[wordNum] |= bitmask;
+    return val;
+  }
+
+  /** Sets a bit and returns the previous value.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean getAndSet(long index) {
+    int wordNum = (int)(index >> 6);      // div 64
+    int bit = (int)index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    boolean val = (bits[wordNum] & bitmask) != 0;
+    bits[wordNum] |= bitmask;
+    return val;
+  }
+
+  /** flips a bit.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void fastFlip(int index) {
+    int wordNum = index >> 6;      // div 64
+    int bit = index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+  }
+
+  /** flips a bit.
+   * The index should be less than the OpenBitSet size.
+   */
+  public void fastFlip(long index) {
+    int wordNum = (int)(index >> 6);   // div 64
+    int bit = (int)index & 0x3f;       // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+  }
+
+  /** flips a bit, expanding the set size if necessary */
+  public void flip(long index) {
+    int wordNum = expandingWordNum(index);
+    int bit = (int)index & 0x3f;       // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+  }
+
+  /** flips a bit and returns the resulting bit value.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean flipAndGet(int index) {
+    int wordNum = index >> 6;      // div 64
+    int bit = index & 0x3f;     // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+    return (bits[wordNum] & bitmask) != 0;
+  }
+
+  /** flips a bit and returns the resulting bit value.
+   * The index should be less than the OpenBitSet size.
+   */
+  public boolean flipAndGet(long index) {
+    int wordNum = (int)(index >> 6);   // div 64
+    int bit = (int)index & 0x3f;       // mod 64
+    long bitmask = 1L << bit;
+    bits[wordNum] ^= bitmask;
+    return (bits[wordNum] & bitmask) != 0;
+  }
+
+  /** Flips a range of bits, expanding the set size if necessary
+   *
+   * @param startIndex lower index
+   * @param endIndex one-past the last bit to flip
+   */
+  public void flip(long startIndex, long endIndex) {
+    if (endIndex <= startIndex) return;
+    int oldlen = wlen;
+    int startWord = (int)(startIndex>>6);
+
+    // since endIndex is one past the end, this is index of the last
+    // word to be changed.
+    int endWord   = expandingWordNum(endIndex-1);
+
+    /*** Grrr, java shifting wraps around so -1L>>>64 == -1
+     * for that reason, make sure not to use endmask if the bits to flip will
+     * be zero in the last word (redefine endWord to be the last changed...)
+    long startmask = -1L << (startIndex & 0x3f);     // example: 11111...111000
+    long endmask = -1L >>> (64-(endIndex & 0x3f));   // example: 00111...111111
+    ***/
+
+    long startmask = -1L << startIndex;
+    long endmask = -1L >>> -endIndex;  // 64-(endIndex&0x3f) is the same as -endIndex due to wrap
+
+    if (startWord == endWord) {
+      bits[startWord] ^= (startmask & endmask);
+      return;
+    }
+
+    bits[startWord] ^= startmask;
+
+    for (int i=startWord+1; i<endWord; i++) {
+      bits[i] = ~bits[i];
+    }
+
+    bits[endWord] ^= endmask;
+  }
+
+
+  /*
+  public static int pop(long v0, long v1, long v2, long v3) {
+    // derived from pop_array by setting last four elems to 0.
+    // exchanges one pop() call for 10 elementary operations
+    // saving about 7 instructions... is there a better way?
+      long twosA=v0 & v1;
+      long ones=v0^v1;
+
+      long u2=ones^v2;
+      long twosB =(ones&v2)|(u2&v3);
+      ones=u2^v3;
+
+      long fours=(twosA&twosB);
+      long twos=twosA^twosB;
+
+      return (pop(fours)<<2)
+             + (pop(twos)<<1)
+             + pop(ones);
+
+  }
+  */
+
+
+  /** @return the number of set bits */
+  public long cardinality() {
+    return BitUtil.pop_array(bits,0,wlen);
+  }
+
+ /** Returns the popcount or cardinality of the intersection of the two sets.
+   * Neither set is modified.
+   */
+  public static long intersectionCount(OpenBitSet a, OpenBitSet b) {
+    return BitUtil.pop_intersect(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
+ }
+
+  /** Returns the popcount or cardinality of the union of the two sets.
+    * Neither set is modified.
+    */
+  public static long unionCount(OpenBitSet a, OpenBitSet b) {
+    long tot = BitUtil.pop_union(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
+    if (a.wlen < b.wlen) {
+      tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen-a.wlen);
+    } else if (a.wlen > b.wlen) {
+      tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
+    }
+    return tot;
+  }
+
+  /** Returns the popcount or cardinality of "a and not b"
+   * or "intersection(a, not(b))".
+   * Neither set is modified.
+   */
+  public static long andNotCount(OpenBitSet a, OpenBitSet b) {
+    long tot = BitUtil.pop_andnot(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
+    if (a.wlen > b.wlen) {
+      tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
+    }
+    return tot;
+  }
+
+ /** Returns the popcount or cardinality of the exclusive-or of the two sets.
+  * Neither set is modified.
+  */
+  public static long xorCount(OpenBitSet a, OpenBitSet b) {
+    long tot = BitUtil.pop_xor(a.bits, b.bits, 0, Math.min(a.wlen, b.wlen));
+    if (a.wlen < b.wlen) {
+      tot += BitUtil.pop_array(b.bits, a.wlen, b.wlen-a.wlen);
+    } else if (a.wlen > b.wlen) {
+      tot += BitUtil.pop_array(a.bits, b.wlen, a.wlen-b.wlen);
+    }
+    return tot;
+  }
+
+
+  /** Returns the index of the first set bit starting at the index specified.
+   *  -1 is returned if there are no more set bits.
+   */
+  public int nextSetBit(int index) {
+    int i = index>>6;
+    if (i>=wlen) return -1;
+    int subIndex = index & 0x3f;      // index within the word
+    long word = bits[i] >> subIndex;  // skip all the bits to the right of index
+
+    if (word!=0) {
+      return (i<<6) + subIndex + BitUtil.ntz(word);
+    }
+
+    while(++i < wlen) {
+      word = bits[i];
+      if (word!=0) return (i<<6) + BitUtil.ntz(word);
+    }
+
+    return -1;
+  }
+
+  /** Returns the index of the first set bit starting at the index specified.
+   *  -1 is returned if there are no more set bits.
+   */
+  public long nextSetBit(long index) {
+    int i = (int)(index>>>6);
+    if (i>=wlen) return -1;
+    int subIndex = (int)index & 0x3f; // index within the word
+    long word = bits[i] >>> subIndex;  // skip all the bits to the right of index
+
+    if (word!=0) {
+      return (((long)i)<<6) + (subIndex + BitUtil.ntz(word));
+    }
+
+    while(++i < wlen) {
+      word = bits[i];
+      if (word!=0) return (((long)i)<<6) + BitUtil.ntz(word);
+    }
+
+    return -1;
+  }
+
+
+
+
+  public Object clone() {
+    try {
+      OpenBitSet obs = (OpenBitSet)super.clone();
+      obs.bits = (long[]) obs.bits.clone();  // hopefully an array clone is as fast(er) than arraycopy
+      return obs;
+    } catch (CloneNotSupportedException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  /** this = this AND other */
+  public void intersect(OpenBitSet other) {
+    int newLen= Math.min(this.wlen,other.wlen);
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    // testing against zero can be more efficient
+    int pos=newLen;
+    while(--pos>=0) {
+      thisArr[pos] &= otherArr[pos];
+    }
+    if (this.wlen > newLen) {
+      // fill zeros from the new shorter length to the old length
+      Arrays.fill(bits,newLen,this.wlen,0);
+    }
+    this.wlen = newLen;
+  }
+
+  /** this = this OR other */
+  public void union(OpenBitSet other) {
+    int newLen = Math.max(wlen,other.wlen);
+    ensureCapacityWords(newLen);
+
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    int pos=Math.min(wlen,other.wlen);
+    while(--pos>=0) {
+      thisArr[pos] |= otherArr[pos];
+    }
+    if (this.wlen < newLen) {
+      System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen-this.wlen);
+    }
+    this.wlen = newLen;
+  }
+
+
+  /** Remove all elements set in other. this = this AND_NOT other */
+  public void remove(OpenBitSet other) {
+    int idx = Math.min(wlen,other.wlen);
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    while(--idx>=0) {
+      thisArr[idx] &= ~otherArr[idx];
+    }
+  }
+
+  /** this = this XOR other */
+  public void xor(OpenBitSet other) {
+    int newLen = Math.max(wlen,other.wlen);
+    ensureCapacityWords(newLen);
+
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    int pos=Math.min(wlen,other.wlen);
+    while(--pos>=0) {
+      thisArr[pos] ^= otherArr[pos];
+    }
+    if (this.wlen < newLen) {
+      System.arraycopy(otherArr, this.wlen, thisArr, this.wlen, newLen-this.wlen);
+    }
+    this.wlen = newLen;
+  }
+
+
+  // some BitSet compatability methods
+
+  //** see {@link intersect} */
+  public void and(OpenBitSet other) {
+    intersect(other);
+  }
+
+  //** see {@link union} */
+  public void or(OpenBitSet other) {
+    union(other);
+  }
+
+  //** see {@link andNot} */
+  public void andNot(OpenBitSet other) {
+    remove(other);
+  }
+
+  /** returns true if the sets have any elements in common */
+  public boolean intersects(OpenBitSet other) {
+    int pos = Math.min(this.wlen, other.wlen);
+    long[] thisArr = this.bits;
+    long[] otherArr = other.bits;
+    while (--pos>=0) {
+      if ((thisArr[pos] & otherArr[pos])!=0) return true;
+    }
+    return false;
+  }
+
+
+
+  /** Expand the long[] with the size given as a number of words (64 bit longs).
+   * getNumWords() is unchanged by this call.
+   */
+  public void ensureCapacityWords(int numWords) {
+    if (bits.length < numWords) {
+      long[] newBits = new long[numWords];
+      System.arraycopy(bits,0,newBits,0,wlen);
+      bits = newBits;
+    }
+  }
+
+  /** Ensure that the long[] is big enough to hold numBits, expanding it if necessary.
+   * getNumWords() is unchanged by this call.
+   */
+  public void ensureCapacity(long numBits) {
+    ensureCapacityWords(bits2words(numBits));
+  }
+
+  /** Lowers numWords, the number of words in use,
+   * by checking for trailing zero words.
+   */
+  public void trimTrailingZeros() {
+    int idx = wlen-1;
+    while (idx>=0 && bits[idx]==0) idx--;
+    wlen = idx+1;
+  }
+
+  /** returns the number of 64 bit words it would take to hold numBits */
+  public static int bits2words(long numBits) {
+   return (int)(((numBits-1)>>>6)+1);
+  }
+
+
+  /** returns true if both sets have the same bits set */
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (!(o instanceof OpenBitSet)) return false;
+    OpenBitSet a;
+    OpenBitSet b = (OpenBitSet)o;
+    // make a the larger set.
+    if (b.wlen > this.wlen) {
+      a = b; b=this;
+    } else {
+      a=this;
+    }
+
+    // check for any set bits out of the range of b
+    for (int i=a.wlen-1; i>=b.wlen; i--) {
+      if (a.bits[i]!=0) return false;
+    }
+
+    for (int i=b.wlen-1; i>=0; i--) {
+      if (a.bits[i] != b.bits[i]) return false;
+    }
+
+    return true;
+  }
+
+
+  public int hashCode() {
+      long h = 0x98761234;  // something non-zero for length==0
+      for (int i = bits.length; --i>=0;) {
+      h ^= bits[i];
+      h = (h << 1) | (h >>> 31); // rotate left
+    }
+    return (int)((h>>32) ^ h);  // fold leftmost bits into right
+  }
+
+}
+
+
--- a/src/java/org/apache/lucene/util/OpenBitSetIterator.java
+++ b/src/java/org/apache/lucene/util/OpenBitSetIterator.java
@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.DocIdSetIterator;
+
+/** An iterator to iterate over set bits in an OpenBitSet.
+ * This is faster than nextSetBit() for iterating over the complete set of bits,
+ * especially when the density of the bits set is high.
+ *
+ * @version $Id$
+ */
+public class OpenBitSetIterator extends DocIdSetIterator {
+
+  // The General Idea: instead of having an array per byte that has
+  // the offsets of the next set bit, that array could be
+  // packed inside a 32 bit integer (8 4 bit numbers).  That
+  // should be faster than accessing an array for each index, and
+  // the total array size is kept smaller (256*sizeof(int))=1K
+  protected final static int[] bitlist={
+    0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x854,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321
+  };
+  /***** the python code that generated bitlist
+  def bits2int(val):
+  arr=0
+  for shift in range(8,0,-1):
+    if val & 0x80:
+      arr = (arr << 4) | shift
+    val = val << 1
+  return arr
+
+  def int_table():
+    tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ]
+    return ','.join(tbl)
+  ******/
+
+  // hmmm, what about an iterator that finds zeros though,
+  // or a reverse iterator... should they be separate classes
+  // for efficiency, or have a common root interface?  (or
+  // maybe both?  could ask for a SetBitsIterator, etc...
+
+
+  private final long[] arr;
+  private final int words;
+  private int i=-1;
+  private long word;
+  private int wordShift;
+  private int indexArray;
+  private int curDocId;
+
+  public OpenBitSetIterator(OpenBitSet obs) {
+    this(obs.getBits(), obs.getNumWords());
+  }
+
+  public OpenBitSetIterator(long[] bits, int numWords) {
+    arr = bits;
+    words = numWords;
+  }
+
+  // 64 bit shifts
+  private void shift() {
+    if ((int)word ==0) {wordShift +=32; word = word >>>32; }
+    if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; }
+    if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; }
+    indexArray = bitlist[(int)word & 0xff];
+  }
+
+  /***** alternate shift implementations
+  // 32 bit shifts, but a long shift needed at the end
+  private void shift2() {
+    int y = (int)word;
+    if (y==0) {wordShift +=32; y = (int)(word >>>32); }
+    if ((y & 0x0000FFFF) == 0) { wordShift +=16; y>>>=16; }
+    if ((y & 0x000000FF) == 0) { wordShift +=8; y>>>=8; }
+    indexArray = bitlist[y & 0xff];
+    word >>>= (wordShift +1);
+  }
+
+  private void shift3() {
+    int lower = (int)word;
+    int lowByte = lower & 0xff;
+    if (lowByte != 0) {
+      indexArray=bitlist[lowByte];
+      return;
+    }
+    shift();
+  }
+  ******/
+
+  public boolean next() {
+    if (indexArray==0) {
+      if (word!=0) {
+        word >>>= 8;
+        wordShift += 8;
+      }
+
+      while (word==0) {
+        if (++i >= words) {
+          curDocId = -1;
+          return false;
+        }
+        word = arr[i];
+        wordShift =-1;  // loop invariant code motion should move this
+      }
+
+      // after the first time, should I go with a linear search, or
+      // stick with the binary search in shift?
+      shift();
+    }
+
+    int bitIndex = (indexArray & 0x0f) + wordShift;
+    indexArray >>>= 4;
+    // should i<<6 be cached as a separate variable?
+    // it would only save one cycle in the best circumstances.
+    curDocId = (i<<6) + bitIndex;
+    return true;
+  }
+
+  public boolean skipTo(int target) {
+    indexArray=0;
+    i = target >> 6;
+    if (i>=words) {
+      word =0; // setup so next() will also return -1
+      curDocId = -1;
+      return false;
+    }
+    wordShift = target & 0x3f;
+    word = arr[i] >>> wordShift;
+    if (word !=0) {
+      wordShift--; // compensate for 1 based arrIndex
+    } else {
+      while (word ==0) {
+        if (++i >= words) {
+          curDocId = -1;
+          return false;
+        }
+        word = arr[i];
+      }
+      wordShift =-1;
+    }
+
+    shift();
+
+    int bitIndex = (indexArray & 0x0f) + wordShift;
+    indexArray >>>= 4;
+    // should i<<6 be cached as a separate variable?
+    // it would only save one cycle in the best circumstances.
+    curDocId = (i<<6) + bitIndex;
+    return true;
+  }
+  
+  public int doc() {
+    return this.curDocId;
+  }
+
+}
--- a/src/java/org/apache/lucene/util/SortedVIntList.java
+++ b/src/java/org/apache/lucene/util/SortedVIntList.java
@ -0,0 +1,218 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.lucene.search.DocIdSet;
+import org.apache.lucene.search.DocIdSetIterator;
+
+/**
+ *  Store and iterate sorted integers in compressed form in RAM.
+ *  <br>The code for compressing the differences between ascending integers was
+ *  borrowed from {@link org.apache.lucene.store.IndexInput} and
+ *  {@link org.apache.lucene.store.IndexOutput}.
+ */
+public class SortedVIntList extends DocIdSet {
+  /** When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set,
+   * a SortedVIntList representing the index numbers of the set bits
+   * will be smaller than that BitSet.
+   */
+  final static int BITS2VINTLIST_SIZE = 8;
+
+  private int size;
+  private byte[] bytes;
+  private int lastBytePos;
+    
+  /**
+   *  Create a SortedVIntList from all elements of an array of integers.
+   *
+   * @param  sortedInts  A sorted array of non negative integers.
+   */
+  public SortedVIntList(int[] sortedInts) {
+    this(sortedInts, sortedInts.length);
+  }
+
+  /**
+   * Create a SortedVIntList from an array of integers.
+   * @param  sortedInts  An array of sorted non negative integers.
+   * @param  inputSize   The number of integers to be used from the array.
+   */
+  public SortedVIntList(int[] sortedInts, int inputSize) {
+    SortedVIntListBuilder builder = new SortedVIntListBuilder();
+    for (int i = 0; i < inputSize; i++) {
+      builder.addInt(sortedInts[i]);
+    }
+    builder.done();
+  }
+
+  /**
+   * Create a SortedVIntList from a BitSet.
+   * @param  bits  A bit set representing a set of integers.
+   */
+  public SortedVIntList(BitSet bits) {
+    SortedVIntListBuilder builder = new SortedVIntListBuilder();
+    int nextInt = bits.nextSetBit(0);
+    while (nextInt != -1) {
+      builder.addInt(nextInt);
+      nextInt = bits.nextSetBit(nextInt + 1);
+    }
+    builder.done();
+  }
+
+  /**
+   * Create a SortedVIntList from an OpenBitSet.
+   * @param  bits  A bit set representing a set of integers.
+   */
+  public SortedVIntList(OpenBitSet bits) {
+    SortedVIntListBuilder builder = new SortedVIntListBuilder();
+    int nextInt = bits.nextSetBit(0);
+    while (nextInt != -1) {
+      builder.addInt(nextInt);
+      nextInt = bits.nextSetBit(nextInt + 1);
+    }
+    builder.done();
+  }
+
+  /**
+   * Create a SortedVIntList.
+   * @param  docIdSetIterator  An iterator providing document numbers as a set of integers.
+   *                  This DocIdSetIterator is iterated completely when this constructor
+   *                  is called and it must provide the integers in non
+   *                  decreasing order.
+   */
+  public SortedVIntList(DocIdSetIterator docIdSetIterator) throws IOException {
+    SortedVIntListBuilder builder = new SortedVIntListBuilder();
+    while (docIdSetIterator.next()) {
+      builder.addInt(docIdSetIterator.doc());
+    }
+    builder.done();
+  }
+
+
+  private class SortedVIntListBuilder {
+    private int lastInt = 0;
+    
+    SortedVIntListBuilder() {
+      initBytes();
+      lastInt = 0;
+    }
+
+    void addInt(int nextInt) {
+      int diff = nextInt - lastInt;
+      if (diff < 0) {
+        throw new IllegalArgumentException(
+            "Input not sorted or first element negative.");
+      }
+  
+      if ((lastBytePos + MAX_BYTES_PER_INT) > bytes.length) {
+        // biggest possible int does not fit
+        resizeBytes((bytes.length * 2) + MAX_BYTES_PER_INT);
+      }
+  
+      // See org.apache.lucene.store.IndexOutput.writeVInt()
+      while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set.
+        bytes[lastBytePos++] = (byte) ((diff & VB1) | ~VB1);
+        diff >>>= BIT_SHIFT;
+      }
+      bytes[lastBytePos++] = (byte) diff; // Last byte, high bit not set.
+      size++;
+      lastInt = nextInt;
+    }
+    
+    void done() {
+      resizeBytes(lastBytePos);
+    }
+  }
+
+
+  private void initBytes() {
+    size = 0;
+    bytes = new byte[128]; // initial byte size
+    lastBytePos = 0;
+  }
+
+  private void resizeBytes(int newSize) {
+    if (newSize != bytes.length) {
+      byte[] newBytes = new byte[newSize];
+      System.arraycopy(bytes, 0, newBytes, 0, lastBytePos);
+      bytes = newBytes;
+    }
+  }
+
+  private static final int VB1 = 0x7F;
+  private static final int BIT_SHIFT = 7;
+  private final int MAX_BYTES_PER_INT = (31 / BIT_SHIFT) + 1;
+
+  /**
+   * @return    The total number of sorted integers.
+   */
+  public int size() {
+    return size;
+  }
+
+  /**
+   * @return The size of the byte array storing the compressed sorted integers.
+   */
+  public int getByteSize() {
+    return bytes.length;
+  }
+
+  /**
+   * @return    An iterator over the sorted integers.
+   */
+  public DocIdSetIterator iterator() {
+    return new DocIdSetIterator() {
+      int bytePos = 0;
+      int lastInt = 0;
+      
+      private void advance() {
+        // See org.apache.lucene.store.IndexInput.readVInt()
+        byte b = bytes[bytePos++];
+        lastInt += b & VB1;
+        for (int s = BIT_SHIFT; (b & ~VB1) != 0; s += BIT_SHIFT) {
+          b = bytes[bytePos++];
+          lastInt += (b & VB1) << s;
+        }
+      }
+      
+      public int doc() {return lastInt;}
+      
+      public boolean next() {
+        if (bytePos >= lastBytePos) {
+          return false;
+        } else {
+          advance();
+          return true;
+        }
+      }
+
+      public boolean skipTo(int docNr) {
+        while (bytePos < lastBytePos) {
+          advance();
+          if (lastInt >= docNr) { // No skipping to docNr available.
+            return true;
+          }
+        }
+        return false;
+      }
+    };
+  }
+}
+
--- a/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java
+++ b/src/test/org/apache/lucene/search/CachingWrapperFilterHelper.java
@ -43,13 +43,13 @@ public class CachingWrapperFilterHelper extends CachingWrapperFilter {
    this.shouldHaveCache = shouldHaveCache;
  }
  
-  public BitSet bits(IndexReader reader) throws IOException {
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    if (cache == null) {
      cache = new WeakHashMap();
    }
    
    synchronized (cache) {  // check cache
-      BitSet cached = (BitSet) cache.get(reader);
+      DocIdSet cached = (DocIdSet) cache.get(reader);
      if (shouldHaveCache) {
        TestCase.assertNotNull("Cache should have data ", cached);
      } else {
@ -60,7 +60,7 @@ public class CachingWrapperFilterHelper extends CachingWrapperFilter {
      }
    }

-    final BitSet bits = filter.bits(reader);
+    final DocIdSet bits = filter.getDocIdSet(reader);

    synchronized (cache) {  // update cache
      cache.put(reader, bits);
--- a/src/test/org/apache/lucene/search/MockFilter.java
+++ b/src/test/org/apache/lucene/search/MockFilter.java
@ -18,14 +18,15 @@ package org.apache.lucene.search;
 */

 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.DocIdBitSet;
 import java.util.BitSet;

 public class MockFilter extends Filter {
  private boolean wasCalled;

-  public BitSet bits(IndexReader reader) {
+  public DocIdSet getDocIdSet(IndexReader reader) {
    wasCalled = true;
-    return new BitSet();
+    return new DocIdBitSet(new BitSet());
  }

  public void clear() {
--- a/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java
+++ b/src/test/org/apache/lucene/search/RemoteCachingWrapperFilterHelper.java
@ -42,7 +42,7 @@ public class RemoteCachingWrapperFilterHelper extends RemoteCachingWrapperFilter
    this.shouldHaveCache = shouldHaveCache;
  }

-  public BitSet bits(IndexReader reader) throws IOException {
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    Filter cachedFilter = FilterManager.getInstance().getFilter(filter);
    
    TestCase.assertNotNull("Filter should not be null", cachedFilter);
@ -55,6 +55,6 @@ public class RemoteCachingWrapperFilterHelper extends RemoteCachingWrapperFilter
    if (filter instanceof CachingWrapperFilterHelper) {
      ((CachingWrapperFilterHelper)cachedFilter).setShouldHaveCache(shouldHaveCache);
    }
-    return cachedFilter.bits(reader);
+    return cachedFilter.getDocIdSet(reader);
  }
 }
--- a/src/test/org/apache/lucene/search/SingleDocTestFilter.java
+++ b/src/test/org/apache/lucene/search/SingleDocTestFilter.java
@ -18,6 +18,7 @@ package org.apache.lucene.search;
 */

 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.DocIdBitSet;

 import java.util.BitSet;
 import java.io.IOException;
@ -29,9 +30,9 @@ public class SingleDocTestFilter extends Filter {
    this.doc = doc;
  }

-  public BitSet bits(IndexReader reader) throws IOException {
+  public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
    BitSet bits = new BitSet(reader.maxDoc());
    bits.set(doc);
-    return bits;
+    return new DocIdBitSet(bits);
  }
 }
--- a/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java
+++ b/src/test/org/apache/lucene/search/TestCachingWrapperFilter.java
@ -36,12 +36,12 @@ public class TestCachingWrapperFilter extends LuceneTestCase {
    CachingWrapperFilter cacher = new CachingWrapperFilter(filter);

    // first time, nested filter is called
-    cacher.bits(reader);
+    cacher.getDocIdSet(reader);
    assertTrue("first time", filter.wasCalled());

    // second time, nested filter should not be called
    filter.clear();
-    cacher.bits(reader);
+    cacher.getDocIdSet(reader);
    assertFalse("second time", filter.wasCalled());

    reader.close();
--- a/src/test/org/apache/lucene/search/TestExplanations.java
+++ b/src/test/org/apache/lucene/search/TestExplanations.java
@ -33,6 +33,7 @@ import org.apache.lucene.queryParser.QueryParser;
 import org.apache.lucene.queryParser.ParseException;

 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.DocIdBitSet;

 import java.util.Random;
 import java.util.BitSet;
@ -122,12 +123,12 @@ public class TestExplanations extends LuceneTestCase {
    public ItemizedFilter(int[] docs) {
      this.docs = docs;
    }
-    public BitSet bits(IndexReader r) {
+    public DocIdSet getDocIdSet(IndexReader r) {
      BitSet b = new BitSet(r.maxDoc());
      for (int i = 0; i < docs.length; i++) {
        b.set(docs[i]);
      }
-      return b;
+      return new DocIdBitSet(b);
    }
  }

--- a/src/test/org/apache/lucene/search/TestFilteredQuery.java
+++ b/src/test/org/apache/lucene/search/TestFilteredQuery.java
@ -26,6 +26,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.search.BooleanClause.Occur;
 import org.apache.lucene.store.RAMDirectory;
 import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util.DocIdBitSet;

 import java.util.BitSet;

@ -82,11 +83,11 @@ extends LuceneTestCase {
  // must be static for serialization tests
  private static Filter newStaticFilterB() {
    return new Filter() {
-      public BitSet bits (IndexReader reader) {
+      public DocIdSet getDocIdSet (IndexReader reader) {
        BitSet bitset = new BitSet(5);
        bitset.set (1);
        bitset.set (3);
-        return bitset;
+        return new DocIdBitSet(bitset);
      }
    };
  }
@ -150,10 +151,10 @@ extends LuceneTestCase {
  // must be static for serialization tests 
  private static Filter newStaticFilterA() {
    return new Filter() {
-      public BitSet bits (IndexReader reader) {
+      public DocIdSet getDocIdSet (IndexReader reader) {
        BitSet bitset = new BitSet(5);
        bitset.set(0, 5);
-        return bitset;
+        return new DocIdBitSet(bitset);
      }
    };
  }
@ -200,3 +201,4 @@ extends LuceneTestCase {
 }


+
--- a/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java
+++ b/src/test/org/apache/lucene/search/TestRemoteCachingWrapperFilter.java
@ -91,7 +91,7 @@ public class TestRemoteCachingWrapperFilter extends LuceneTestCase {


  public void testTermRemoteFilter() throws Exception {
-    CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))));
+    CachingWrapperFilterHelper cwfh = new CachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))));
    
    // This is what we are fixing - if one uses a CachingWrapperFilter(Helper) it will never 
    // cache the filter on the remote site
@ -112,16 +112,16 @@ public class TestRemoteCachingWrapperFilter extends LuceneTestCase {
    // assert that we get the same cached Filter, even if we create a new instance of RemoteCachingWrapperFilter(Helper)
    // this should pass because the Filter parameters are the same, and the cache uses Filter's hashCode() as cache keys,
    // and Filters' hashCode() builds on Filter parameters, not the Filter instance itself
-    rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false);
+    rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))), false);
    rcwfh.shouldHaveCache(false);
    search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");

-    rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "a"))), false);
+    rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "a"))), false);
    rcwfh.shouldHaveCache(true);
    search(new TermQuery(new Term("test", "test")), rcwfh, 0, "A");

    // assert that we get a non-cached version of the Filter because this is a new Query (type:b)
-    rcwfh = new RemoteCachingWrapperFilterHelper(new QueryFilter(new TermQuery(new Term("type", "b"))), false);
+    rcwfh = new RemoteCachingWrapperFilterHelper(new QueryWrapperFilter(new TermQuery(new Term("type", "b"))), false);
    rcwfh.shouldHaveCache(false);
    search(new TermQuery(new Term("type", "b")), rcwfh, 0, "B");
  }
--- a/src/test/org/apache/lucene/search/TestRemoteSearchable.java
+++ b/src/test/org/apache/lucene/search/TestRemoteSearchable.java
@ -116,11 +116,11 @@ public class TestRemoteSearchable extends LuceneTestCase {
    Searcher searcher = new MultiSearcher(searchables);
    Hits hits = searcher.search(
          new TermQuery(new Term("test", "text")),
-          new QueryFilter(new TermQuery(new Term("test", "test"))));
+          new QueryWrapperFilter(new TermQuery(new Term("test", "test"))));
    assertEquals(1, hits.length());
    Hits nohits = searcher.search(
          new TermQuery(new Term("test", "text")),
-          new QueryFilter(new TermQuery(new Term("test", "non-existent-term"))));
+          new QueryWrapperFilter(new TermQuery(new Term("test", "non-existent-term"))));
    assertEquals(0, nohits.length());
  }

@ -129,7 +129,7 @@ public class TestRemoteSearchable extends LuceneTestCase {
    Searchable[] searchables = { getRemote() };
    Searcher searcher = new MultiSearcher(searchables);
    Hits hits = searcher.search(
-          new ConstantScoreQuery(new QueryFilter(
+          new ConstantScoreQuery(new QueryWrapperFilter(
                                   new TermQuery(new Term("test", "test")))));
    assertEquals(1, hits.length());
  }
--- a/src/test/org/apache/lucene/search/TestScorerPerf.java
+++ b/src/test/org/apache/lucene/search/TestScorerPerf.java
@ -1,5 +1,6 @@
 package org.apache.lucene.search;

+import org.apache.lucene.util.DocIdBitSet;
 import org.apache.lucene.util.LuceneTestCase;

 import java.util.Random;
@ -95,16 +96,6 @@ public class TestScorerPerf extends LuceneTestCase {
    return sets;
  }

-  public static class BitSetFilter extends Filter {
-    public BitSet set;
-    public BitSetFilter(BitSet set) {
-      this.set = set;
-    }
-    public BitSet bits(IndexReader reader) throws IOException {
-      return set;
-    }
-  }
-
  public static class CountingHitCollector extends HitCollector {
    int count=0;
    int sum=0;
@ -137,8 +128,12 @@ public class TestScorerPerf extends LuceneTestCase {


  BitSet addClause(BooleanQuery bq, BitSet result) {
-    BitSet rnd = sets[r.nextInt(sets.length)];
-    Query q = new ConstantScoreQuery(new BitSetFilter(rnd));
+    final BitSet rnd = sets[r.nextInt(sets.length)];
+    Query q = new ConstantScoreQuery(new Filter() {
+      public DocIdSet getDocIdSet(IndexReader reader) {
+        return new DocIdBitSet(rnd);
+      };
+    });
    bq.add(q, BooleanClause.Occur.MUST);
    if (validate) {
      if (result==null) result = (BitSet)rnd.clone();
--- a/src/test/org/apache/lucene/search/TestSort.java
+++ b/src/test/org/apache/lucene/search/TestSort.java
@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.util.DocIdBitSet;

 import java.io.IOException;
 import java.io.Serializable;
@ -571,10 +572,10 @@ implements Serializable {

    // a filter that only allows through the first hit
    Filter filt = new Filter() {
-      public BitSet bits(IndexReader reader) throws IOException {
+      public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
        BitSet bs = new BitSet(reader.maxDoc());
        bs.set(docs1.scoreDocs[0].doc);
-        return bs;
+        return new DocIdBitSet(bs);
      }
    };

--- a/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
+++ b/src/test/org/apache/lucene/search/TestSpanQueryFilter.java
@ -56,20 +56,36 @@ public class TestSpanQueryFilter extends LuceneTestCase {
    SpanTermQuery query = new SpanTermQuery(new Term("field", English.intToEnglish(10).trim()));
    SpanQueryFilter filter = new SpanQueryFilter(query);
    SpanFilterResult result = filter.bitSpans(reader);
-    BitSet bits = result.getBits();
-    assertTrue("bits is null and it shouldn't be", bits != null);
-    assertTrue("tenth bit is not on", bits.get(10));
+    DocIdSet docIdSet = result.getDocIdSet();
+    assertTrue("docIdSet is null and it shouldn't be", docIdSet != null);
+    assertContainsDocId("docIdSet doesn't contain docId 10", docIdSet, 10);
    List spans = result.getPositions();
    assertTrue("spans is null and it shouldn't be", spans != null);
-    assertTrue("spans Size: " + spans.size() + " is not: " + bits.cardinality(), spans.size() == bits.cardinality());
+    int size = getDocIdSetSize(docIdSet);
+    assertTrue("spans Size: " + spans.size() + " is not: " + size, spans.size() == size);
    for (Iterator iterator = spans.iterator(); iterator.hasNext();) {
       SpanFilterResult.PositionInfo info = (SpanFilterResult.PositionInfo) iterator.next();
      assertTrue("info is null and it shouldn't be", info != null);
      //The doc should indicate the bit is on
-      assertTrue("Bit is not on and it should be", bits.get(info.getDoc()));
+      assertContainsDocId("docIdSet doesn't contain docId " + info.getDoc(), docIdSet, info.getDoc());
      //There should be two positions in each
      assertTrue("info.getPositions() Size: " + info.getPositions().size() + " is not: " + 2, info.getPositions().size() == 2);
    }
    reader.close();
  }
+  
+  int getDocIdSetSize(DocIdSet docIdSet) throws Exception {
+    int size = 0;
+    DocIdSetIterator it = docIdSet.iterator();
+    while (it.next()) {
+      size++;
+    }
+    return size;
+  }
+  
+  public void assertContainsDocId(String msg, DocIdSet docIdSet, int docId) throws Exception {
+    DocIdSetIterator it = docIdSet.iterator();
+    assertTrue(msg, it.skipTo(docId));
+    assertTrue(msg, it.doc() == docId);
+  }
 }
--- a/src/test/org/apache/lucene/util/TestOpenBitSet.java
+++ b/src/test/org/apache/lucene/util/TestOpenBitSet.java
@ -0,0 +1,203 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.util;
+
+import junit.framework.TestCase;
+
+import java.util.Random;
+import java.util.BitSet;
+
+/**
+ * @version $Id$
+ */
+public class TestOpenBitSet extends TestCase {
+  static Random rand = new Random();
+
+  void doGet(BitSet a, OpenBitSet b) {
+    int max = a.size();
+    for (int i=0; i<max; i++) {
+      if (a.get(i) != b.get(i)) {
+        fail("mismatch: BitSet=["+i+"]="+a.get(i));
+      }
+    }
+  }
+
+  void doNextSetBit(BitSet a, OpenBitSet b) {
+    int aa=-1,bb=-1;
+    do {
+      aa = a.nextSetBit(aa+1);
+      bb = b.nextSetBit(bb+1);
+      assertEquals(aa,bb);
+    } while (aa>=0);
+  }
+
+  // test interleaving different BitSetIterator.next()
+  void doIterate(BitSet a, OpenBitSet b) {
+    int aa=-1,bb=-1;
+    OpenBitSetIterator iterator = new OpenBitSetIterator(b);
+    do {
+      aa = a.nextSetBit(aa+1);
+      if (rand.nextBoolean())
+        iterator.next();
+      else
+        iterator.skipTo(bb+1);
+      bb = iterator.doc();
+      assertEquals(aa,bb);
+    } while (aa>=0);
+  }
+
+
+  void doRandomSets(int maxSize, int iter) {
+    BitSet a0=null;
+    OpenBitSet b0=null;
+
+    for (int i=0; i<iter; i++) {
+      int sz = rand.nextInt(maxSize);
+      BitSet a = new BitSet(sz);
+      OpenBitSet b = new OpenBitSet(sz);
+
+      // test the various ways of setting bits
+      if (sz>0) {
+        int nOper = rand.nextInt(sz);
+        for (int j=0; j<nOper; j++) {
+          int idx;         
+
+          idx = rand.nextInt(sz);
+          a.set(idx);
+          b.fastSet(idx);
+          idx = rand.nextInt(sz);
+          a.clear(idx);
+          b.fastClear(idx);
+          idx = rand.nextInt(sz);
+          a.flip(idx);
+          b.fastFlip(idx);
+
+          boolean val = b.flipAndGet(idx);
+          boolean val2 = b.flipAndGet(idx);
+          assertTrue(val != val2);
+
+          val = b.getAndSet(idx);
+          assertTrue(val2 == val);
+          assertTrue(b.get(idx));
+          
+          if (!val) b.fastClear(idx);
+          assertTrue(b.get(idx) == val);
+        }
+      }
+
+      // test that the various ways of accessing the bits are equivalent
+      doGet(a,b);
+
+      // test ranges, including possible extension
+      int fromIndex, toIndex;
+      fromIndex = rand.nextInt(sz+80);
+      toIndex = fromIndex + rand.nextInt((sz>>1)+1);
+      BitSet aa = (BitSet)a.clone(); aa.flip(fromIndex,toIndex);
+      OpenBitSet bb = (OpenBitSet)b.clone(); bb.flip(fromIndex,toIndex);
+
+      doIterate(aa,bb);   // a problem here is from flip or doIterate
+
+      fromIndex = rand.nextInt(sz+80);
+      toIndex = fromIndex + rand.nextInt((sz>>1)+1);
+      aa = (BitSet)a.clone(); aa.clear(fromIndex,toIndex);
+      bb = (OpenBitSet)b.clone(); bb.clear(fromIndex,toIndex);
+
+      doNextSetBit(aa,bb);  // a problem here is from clear() or nextSetBit
+
+      fromIndex = rand.nextInt(sz+80);
+      toIndex = fromIndex + rand.nextInt((sz>>1)+1);
+      aa = (BitSet)a.clone(); aa.set(fromIndex,toIndex);
+      bb = (OpenBitSet)b.clone(); bb.set(fromIndex,toIndex);
+
+      doNextSetBit(aa,bb);  // a problem here is from set() or nextSetBit     
+
+
+      if (a0 != null) {
+        assertEquals( a.equals(a0), b.equals(b0));
+
+        assertEquals(a.cardinality(), b.cardinality());
+
+        BitSet a_and = (BitSet)a.clone(); a_and.and(a0);
+        BitSet a_or = (BitSet)a.clone(); a_or.or(a0);
+        BitSet a_xor = (BitSet)a.clone(); a_xor.xor(a0);
+        BitSet a_andn = (BitSet)a.clone(); a_andn.andNot(a0);
+
+        OpenBitSet b_and = (OpenBitSet)b.clone(); assertEquals(b,b_and); b_and.and(b0);
+        OpenBitSet b_or = (OpenBitSet)b.clone(); b_or.or(b0);
+        OpenBitSet b_xor = (OpenBitSet)b.clone(); b_xor.xor(b0);
+        OpenBitSet b_andn = (OpenBitSet)b.clone(); b_andn.andNot(b0);
+
+        doIterate(a_and,b_and);
+        doIterate(a_or,b_or);
+        doIterate(a_xor,b_xor);
+        doIterate(a_andn,b_andn);
+
+        assertEquals(a_and.cardinality(), b_and.cardinality());
+        assertEquals(a_or.cardinality(), b_or.cardinality());
+        assertEquals(a_xor.cardinality(), b_xor.cardinality());
+        assertEquals(a_andn.cardinality(), b_andn.cardinality());
+
+        // test non-mutating popcounts
+        assertEquals(b_and.cardinality(), OpenBitSet.intersectionCount(b,b0));
+        assertEquals(b_or.cardinality(), OpenBitSet.unionCount(b,b0));
+        assertEquals(b_xor.cardinality(), OpenBitSet.xorCount(b,b0));
+        assertEquals(b_andn.cardinality(), OpenBitSet.andNotCount(b,b0));
+      }
+
+      a0=a;
+      b0=b;
+    }
+  }
+
+  // large enough to flush obvious bugs, small enough to run in <.5 sec as part of a
+  // larger testsuite.
+  public void testSmall() {
+    doRandomSets(1200,1000);
+  }
+
+  public void testBig() {
+    // uncomment to run a bigger test (~2 minutes).
+    // doRandomSets(2000,200000);
+  }
+
+  public void testEquals() {
+    OpenBitSet b1 = new OpenBitSet(1111);
+    OpenBitSet b2 = new OpenBitSet(2222);
+    assertTrue(b1.equals(b2));
+    assertTrue(b2.equals(b1));
+    b1.set(10);
+    assertFalse(b1.equals(b2));
+    assertFalse(b2.equals(b1));
+    b2.set(10);
+    assertTrue(b1.equals(b2));
+    assertTrue(b2.equals(b1));
+    b2.set(2221);
+    assertFalse(b1.equals(b2));
+    assertFalse(b2.equals(b1));
+    b1.set(2221);
+    assertTrue(b1.equals(b2));
+    assertTrue(b2.equals(b1));
+
+    // try different type of object
+    assertFalse(b1.equals(new Object()));
+  }
+
+}
+
+
+
--- a/src/test/org/apache/lucene/util/TestSortedVIntList.java
+++ b/src/test/org/apache/lucene/util/TestSortedVIntList.java
@ -0,0 +1,198 @@
+package org.apache.lucene.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+
+import org.apache.lucene.search.DocIdSetIterator;
+
+public class TestSortedVIntList extends TestCase {
+  /** Main for running test case by itself. */
+  public static void main(String args[]) {
+    TestRunner.run(new TestSuite(TestSortedVIntList.class));
+  }
+  
+  void tstIterator (
+          SortedVIntList vintList,
+          int[] ints) throws IOException {
+    for (int i = 0; i < ints.length; i++) {
+      if ((i > 0) && (ints[i-1] == ints[i])) {
+        return; // DocNrSkipper should not skip to same document.
+      }
+    }
+    DocIdSetIterator m = vintList.iterator();
+    for (int i = 0; i < ints.length; i++) {
+      assertTrue("No end of Matcher at: " + i, m.next());
+      assertEquals(ints[i], m.doc());
+    }
+    assertTrue("End of Matcher", (! m.next()));
+  }
+
+  void tstVIntList(
+          SortedVIntList vintList,
+          int[] ints,
+          int expectedByteSize) throws IOException {
+    assertEquals("Size", ints.length, vintList.size());
+    assertEquals("Byte size", expectedByteSize, vintList.getByteSize());
+    tstIterator(vintList, ints);
+  }
+
+  public void tstViaBitSet(int [] ints, int expectedByteSize) throws IOException {
+    final int MAX_INT_FOR_BITSET = 1024 * 1024;
+    BitSet bs = new BitSet();
+    for (int i = 0; i < ints.length; i++) {
+      if (ints[i] > MAX_INT_FOR_BITSET) {
+        return; // BitSet takes too much memory
+      }
+      if ((i > 0) && (ints[i-1] == ints[i])) {
+        return; // BitSet cannot store duplicate.
+      }
+      bs.set(ints[i]);
+    }
+    SortedVIntList svil = new SortedVIntList(bs);
+    tstVIntList(svil, ints, expectedByteSize);
+    tstVIntList(new SortedVIntList(svil.iterator()), ints, expectedByteSize);
+  }
+  
+  private static final int VB1 = 0x7F;
+  private static final int BIT_SHIFT = 7;
+  private static final int VB2 = (VB1 << BIT_SHIFT) | VB1;
+  private static final int VB3 = (VB2 << BIT_SHIFT) | VB1;
+  private static final int VB4 = (VB3 << BIT_SHIFT) | VB1;
+
+  private int vIntByteSize(int i) {
+    assert i >= 0;
+    if (i <= VB1) return 1;
+    if (i <= VB2) return 2;
+    if (i <= VB3) return 3;
+    if (i <= VB4) return 4;
+    return 5;
+  }
+
+  private int vIntListByteSize(int [] ints) {
+    int byteSize = 0;
+    int last = 0;
+    for (int i = 0; i < ints.length; i++) {
+      byteSize += vIntByteSize(ints[i] - last);
+      last = ints[i];
+    }
+    return byteSize;
+  }
+  
+  public void tstInts(int [] ints) {
+    int expectedByteSize = vIntListByteSize(ints);
+    try {
+      tstVIntList(new SortedVIntList(ints), ints, expectedByteSize);
+      tstViaBitSet(ints, expectedByteSize);
+    } catch (IOException ioe) {
+      throw new Error(ioe);
+    }
+  }
+
+  public void tstIllegalArgExc(int [] ints) {
+    try {
+      new SortedVIntList(ints);
+    }
+    catch (IllegalArgumentException e) {
+      return;
+    }
+    fail("Expected IllegalArgumentException");    
+  }
+
+  private int[] fibArray(int a, int b, int size) {
+    final int[] fib = new int[size];
+    fib[0] = a;
+    fib[1] = b;
+    for (int i = 2; i < size; i++) {
+      fib[i] = fib[i-1] + fib[i-2];
+    }
+    return fib;
+  }
+
+  private int[] reverseDiffs(int []ints) { // reverse the order of the successive differences
+    final int[] res = new int[ints.length];
+    for (int i = 0; i < ints.length; i++) {
+      res[i] = ints[ints.length - 1] + (ints[0] - ints[ints.length - 1 - i]);
+    }
+    return res;
+  }
+
+  public void test01() {
+    tstInts(new int[] {});
+  }
+  public void test02() {
+    tstInts(new int[] {0});
+  }
+  public void test03() {
+    tstInts(new int[] {0,Integer.MAX_VALUE});
+  }
+  public void test04a() {
+    tstInts(new int[] {0, VB2 - 1});
+  }
+  public void test04b() {
+    tstInts(new int[] {0, VB2});
+  }
+  public void test04c() {
+    tstInts(new int[] {0, VB2 + 1});
+  }
+  public void test05() {
+    tstInts(fibArray(0,1,7)); // includes duplicate value 1
+  }
+  public void test05b() {
+    tstInts(reverseDiffs(fibArray(0,1,7)));
+  }
+  public void test06() {
+    tstInts(fibArray(1,2,45)); // no duplicates, size 46 exceeds max int.
+  }
+  public void test06b() {
+    tstInts(reverseDiffs(fibArray(1,2,45)));
+  }
+  public void test07a() {
+    tstInts(new int[] {0, VB3});
+  }
+  public void test07b() {
+    tstInts(new int[] {1, VB3 + 2});
+  }
+  public void test07c() {
+    tstInts(new int[] {2, VB3 + 4});
+  }
+  public void test08a() {
+    tstInts(new int[] {0, VB4 + 1});
+  }
+  public void test08b() {
+    tstInts(new int[] {1, VB4 + 1});
+  }
+  public void test08c() {
+    tstInts(new int[] {2, VB4 + 1});
+  }
+
+  public void test10() {
+    tstIllegalArgExc(new int[] {-1});
+  }
+  public void test11() {
+    tstIllegalArgExc(new int[] {1,0});
+  }
+  public void test12() {
+   tstIllegalArgExc(new int[] {0,1,1,2,3,5,8,0});
+  }
+}