SOLR-8037: speed up term range queries, use filter cache for embedded ranges

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1702661 13f79535-47bb-0310-9956-ffa450edef68
2015-09-12 18:51:36 +00:00 · 2015-09-12 18:51:36 +00:00 · 996a3fb117
parent f213b3bfdc
commit 996a3fb117
9 changed files with 845 additions and 25 deletions
--- a/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/LSBRadixSorter.java
@ -21,8 +21,9 @@ import java.util.Arrays;
 /**
 * A LSB Radix sorter for unsigned int values.
 * @lucene.internal
 */
-final class LSBRadixSorter {
+public final class LSBRadixSorter {
  private static final int INSERTION_SORT_THRESHOLD = 30;
  private static final int HISTOGRAM_SIZE = 256;
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -72,6 +72,11 @@ Optimizations
 * SOLR-7876: Speed up queries and operations that use many terms when timeAllowed has not been
  specified.  Speedups of up to 8% were observed.  (yonik)
 * SOLR-8037: Speed up creation of filters from term range queries (i.e. non-numeric range queries)
  and use the filter cache for term range queries that are part of larger queries.  Some observed
  speedups were up to 2.5x for production of filters, and up to 10x for query evaluation with
  embedded term range queres that resulted in filter cache hits.  (yonik)
 Other Changes
 ----------------------
--- a/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
+++ b/solr/core/src/java/org/apache/solr/query/SolrRangeQuery.java
@ -0,0 +1,502 @@
 package org.apache.solr.query;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermContext;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BulkScorer;
 import org.apache.lucene.search.ConstantScoreQuery;
 import org.apache.lucene.search.ConstantScoreScorer;
 import org.apache.lucene.search.ConstantScoreWeight;
 import org.apache.lucene.search.DocIdSet;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.Filter;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.search.BitDocSet;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.DocSetBuilder;
 import org.apache.solr.search.DocSetProducer;
 import org.apache.solr.search.ExtendedQueryBase;
 import org.apache.solr.search.SolrConstantScoreQuery;
 import org.apache.solr.search.SolrIndexSearcher;
 /** @lucene.experimental */
 public final class SolrRangeQuery extends ExtendedQueryBase implements DocSetProducer {
  private final String field;
  private final BytesRef lower;
  private final BytesRef upper;
  private byte flags;
  private static byte FLAG_INC_LOWER = 0x01;
  private static byte FLAG_INC_UPPER = 0x02;
  public SolrRangeQuery(String field, BytesRef lower, BytesRef upper, boolean includeLower, boolean includeUpper) {
    this.field = field;
    this.lower = lower;
    this.upper = upper;
    this.flags = (byte)((this.lower != null && includeLower ? FLAG_INC_LOWER : 0) | (this.upper != null && includeUpper ? FLAG_INC_UPPER : 0));
  }
  public String getField() {
    return field;
  }
  public boolean includeLower() {
    return (flags & FLAG_INC_LOWER) != 0;
  }
  public boolean includeUpper() {
    return (flags & FLAG_INC_UPPER) != 0;
  }
  @Override
  public int hashCode() {
    int hash = 0x8f2c9ba7 * (flags+1);  // avoid multiplying by 0
    hash = hash * 29 + ((lower == null) ? 0 : lower.hashCode());  // TODO: simpler hash code here?
    hash = hash * 29 + ((upper == null) ? 0 : upper.hashCode());
    return hash;
  }
  @Override
  public boolean equals(Object obj) {
    if (this == obj) {
      return true;
    }
    if (!(obj instanceof SolrRangeQuery)) {
      return false;
    }
    SolrRangeQuery other = (SolrRangeQuery)obj;
    return (this.flags == other.flags)
        && (this.field.equals(other.field))
        && (this.lower == other.lower || (this.lower != null && other.lower != null && this.lower.equals(other.lower)))
        && (this.upper == other.upper || (this.upper != null && other.upper != null && this.upper.equals(other.upper)))
        ;
  }
  @Override
  public String toString(String field) {
    StringBuilder buffer = new StringBuilder();
    if (!getField().equals(field)) {
      buffer.append(getField());
      buffer.append(":");
    }
    // TODO: use our schema?
    buffer.append(includeLower() ? '[' : '{');
    buffer.append(endpoint(lower));
    buffer.append(" TO ");
    buffer.append(endpoint(upper));
    buffer.append(includeUpper() ? ']' : '}');
    return buffer.toString();
  }
  private String endpoint(BytesRef ref) {
    if (ref == null) return "*";
    String toStr = Term.toString(ref);
    if ("*".equals(toStr)) {
      toStr = "\\*";
    }
    // TODO: other escaping
    return toStr;
  }
  @Override
  public Query rewrite(IndexReader reader) throws IOException {
    return this;
  }
  @Override
  public Weight createWeight(IndexSearcher searcher, boolean needScores) throws IOException {
    return new ConstWeight(searcher, needScores);
    /*
    DocSet docs = createDocSet(searcher.getIndexReader().leaves(), searcher.getIndexReader().maxDoc());
    SolrConstantScoreQuery csq = new SolrConstantScoreQuery( docs.getTopFilter() );
    return csq.createWeight(searcher, needScores);
    */
  }
  @Override
  public DocSet createDocSet(SolrIndexSearcher searcher) throws IOException {
    return createDocSet( searcher, Math.min(64,(searcher.maxDoc()>>>10)+4) );
  }
  private DocSet createDocSet(SolrIndexSearcher searcher, long cost) throws IOException {
    int maxDoc = searcher.maxDoc();
    BitDocSet liveDocs = searcher.getLiveDocs();
    FixedBitSet liveBits = liveDocs.size() == maxDoc ? null : liveDocs.getBits();
    DocSetBuilder builder = new DocSetBuilder(maxDoc, cost);
    List<LeafReaderContext> leaves = searcher.getTopReaderContext().leaves();
    int maxTermsPerSegment = 0;
    for (LeafReaderContext ctx : leaves) {
      TermsEnum te = getTermsEnum(ctx);
      int termsVisited = builder.add(te, ctx.docBase);
      maxTermsPerSegment = Math.max(maxTermsPerSegment, termsVisited);
    }
    return maxTermsPerSegment <= 1 ? builder.buildUniqueInOrder(liveBits) : builder.build(liveBits);
  }
  private class RangeTermsEnum extends TermsEnum {
    TermsEnum te;
    BytesRef curr;
    boolean positioned;
    public RangeTermsEnum(Terms terms) throws IOException {
      if (terms == null) {
        positioned = true;
      } else {
        te = terms.iterator();
        if (lower != null) {
          TermsEnum.SeekStatus status = te.seekCeil(lower);
          if (status == TermsEnum.SeekStatus.END) {
            positioned = true;
            curr = null;
          } else if (status == SeekStatus.FOUND) {
            positioned = includeLower();
            curr = te.term();
          } else {
            // lower bound not found, so includeLower is irrelevant
            positioned = true;
            curr = te.term();
          }
        }
      }
    }
    @Override
    public SeekStatus seekCeil(BytesRef text) throws IOException {
      return te.seekCeil(text);
    }
    @Override
    public void seekExact(long ord) throws IOException {
      te.seekExact(ord);
    }
    @Override
    public BytesRef term() throws IOException {
      return te.term(); // should be equal to curr, except if we went past the end
    }
    @Override
    public long ord() throws IOException {
      return te.ord();
    }
    @Override
    public int docFreq() throws IOException {
      return te.docFreq();
    }
    @Override
    public long totalTermFreq() throws IOException {
      return te.totalTermFreq();
    }
    @Override
    public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {
      return te.postings(reuse, flags);
    }
    @Override
    public BytesRef next() throws IOException {
      if (positioned) {
        positioned = false;
      } else {
        curr = te.next();
      }
      if (curr == null) return null;
      if (upper != null) {
        int cmp = curr.compareTo(upper);
        if (cmp < 0 || cmp == 0 && includeUpper()) {
          return curr;
        } else {
          curr = null;
        }
      }
      return curr;
    }
    @Override
    public AttributeSource attributes() {
      return te.attributes();
    }
    @Override
    public boolean seekExact(BytesRef text) throws IOException {
      return te.seekExact(text);
    }
    @Override
    public void seekExact(BytesRef term, TermState state) throws IOException {
      te.seekExact(term, state);
    }
    @Override
    public TermState termState() throws IOException {
      return te.termState();
    }
  }
  public TermsEnum getTermsEnum(LeafReaderContext ctx) throws IOException {
    return new RangeTermsEnum( ctx.reader().terms(getField()) );
  }
  private static class TermAndState {
    final BytesRef term;
    final TermState state;
    final int docFreq;
    final long totalTermFreq;
    TermAndState(BytesRef term, TermState state, int docFreq, long totalTermFreq) {
      this.term = term;
      this.state = state;
      this.docFreq = docFreq;
      this.totalTermFreq = totalTermFreq;
    }
  }
  private static class SegState {
    final Weight weight;
    final DocIdSet set;
    SegState(Weight weight) {
      this.weight = weight;
      this.set = null;
    }
    SegState(DocIdSet set) {
      this.set = set;
      this.weight = null;
    }
  }
  // adapted from MultiTermQueryConstantScoreWrapper
  class ConstWeight extends ConstantScoreWeight {
    private static final int BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD = 16;
    final IndexSearcher searcher;
    final boolean needScores;
    boolean checkedFilterCache;
    Filter filter;
    final SegState[] segStates;
    protected ConstWeight(IndexSearcher searcher, boolean needScores) {
      super( SolrRangeQuery.this );
      this.searcher = searcher;
      this.segStates = new SegState[ searcher.getIndexReader().leaves().size() ];
      this.needScores = needScores;
    }
    /** Try to collect terms from the given terms enum and return count=sum(df) for terms visited so far
     *  or (-count - 1) if this should be rewritten into a boolean query.
     *  The termEnum will already be positioned on the next term if not exhausted.
     */
    private long collectTerms(LeafReaderContext context, TermsEnum termsEnum, List<TermAndState> terms) throws IOException {
      long count = 0;
      final int threshold = Math.min(BOOLEAN_REWRITE_TERM_COUNT_THRESHOLD, BooleanQuery.getMaxClauseCount());
      for (int i = 0; i < threshold; ++i) {
        final BytesRef term = termsEnum.next();
        if (term == null) {
          return -count - 1;
        }
        TermState state = termsEnum.termState();
        if (state.isRealTerm() == false) {
          // TermQuery does not accept fake terms for now
          return count;
        }
        int df = termsEnum.docFreq();
        count += df;
        terms.add(new TermAndState(BytesRef.deepCopyOf(term), state, df, termsEnum.totalTermFreq()));
      }
      return termsEnum.next() == null ? (-count - 1) : count;
    }
    private SegState getSegState(LeafReaderContext context) throws IOException {
      SegState segState = segStates[context.ord];
      if (segState != null) return segState;
      // first time, check our filter cache
      boolean doCheck = !checkedFilterCache && context.ord == 0;
      checkedFilterCache = true;
      SolrIndexSearcher solrSearcher = null;
      if (doCheck && searcher instanceof SolrIndexSearcher) {
        solrSearcher = (SolrIndexSearcher)searcher;
        if (solrSearcher.getFilterCache() == null) {
          doCheck = false;
        } else {
          solrSearcher = (SolrIndexSearcher)searcher;
          DocSet answer = solrSearcher.getFilterCache().get(SolrRangeQuery.this);
          if (answer != null) {
            filter = answer.getTopFilter();
          }
        }
      }
      if (filter != null) {
        return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null));
      }
      final Terms terms = context.reader().terms(SolrRangeQuery.this.getField());
      if (terms == null) {
        return segStates[context.ord] = new SegState((DocIdSet) null);
      }
      final TermsEnum termsEnum = SolrRangeQuery.this.getTermsEnum(context);
      PostingsEnum docs = null;
      final List<TermAndState> collectedTerms = new ArrayList<>();
      long count = collectTerms(context, termsEnum, collectedTerms);
      if (count < 0) {
        BooleanQuery.Builder bq = new BooleanQuery.Builder();
        for (TermAndState t : collectedTerms) {
          final TermContext termContext = new TermContext(searcher.getTopReaderContext());
          termContext.register(t.state, context.ord, t.docFreq, t.totalTermFreq);
          bq.add(new TermQuery(new Term( SolrRangeQuery.this.getField(), t.term), termContext), BooleanClause.Occur.SHOULD);
        }
        Query q = new ConstantScoreQuery(bq.build());
        final Weight weight = searcher.rewrite(q).createWeight(searcher, needScores);
        weight.normalize(1f, score());
        return segStates[context.ord] = new SegState(weight);
      }
      // Too many terms for boolean query...
      if (doCheck) {
        DocSet answer = createDocSet(solrSearcher, count);
        solrSearcher.getFilterCache().put(SolrRangeQuery.this, answer);
        filter = answer.getTopFilter();
        return segStates[context.ord] = new SegState(filter.getDocIdSet(context, null));
      }
      /* FUTURE: reuse term states in the future to help build DocSet, use collected count so far...
      Bits liveDocs = context.reader().getLiveDocs();
      int base = context.docBase;
      int termsVisited = collectedTerms.size();
      DocSetBuilder builder = new DocSetBuilder(searcher.getIndexReader().maxDoc());
      if (!collectedTerms.isEmpty()) {
        TermsEnum termsEnum2 = terms.iterator();
        for (TermAndState t : collectedTerms) {
          termsEnum2.seekExact(t.term, t.state);
          docs = termsEnum2.postings(docs, PostingsEnum.NONE);
          builder.add(docs, context.docBase, liveDocs);
        }
      }
      termsVisited += builder.add(termsEnum, base, liveDocs);
     */
      DocIdSetBuilder builder = new DocIdSetBuilder(context.reader().maxDoc());
      builder.grow((int)Math.min(Integer.MAX_VALUE,count));
      if (collectedTerms.isEmpty() == false) {
        TermsEnum termsEnum2 = terms.iterator();
        for (TermAndState t : collectedTerms) {
          termsEnum2.seekExact(t.term, t.state);
          docs = termsEnum2.postings(docs, PostingsEnum.NONE);
          builder.add(docs);
        }
      }
      do {
        // already positioned on the next term, so don't call next() here...
        docs = termsEnum.postings(docs, PostingsEnum.NONE);
        builder.add(docs);
      } while (termsEnum.next() != null);
      DocIdSet segSet = builder.build();
      return segStates[context.ord] = new SegState(segSet);
    }
    private Scorer scorer(DocIdSet set) throws IOException {
      if (set == null) {
        return null;
      }
      final DocIdSetIterator disi = set.iterator();
      if (disi == null) {
        return null;
      }
      return new ConstantScoreScorer(this, score(), disi);
    }
    @Override
    public BulkScorer bulkScorer(LeafReaderContext context) throws IOException {
      final SegState weightOrBitSet = getSegState(context);
      if (weightOrBitSet.weight != null) {
        return weightOrBitSet.weight.bulkScorer(context);
      } else {
        final Scorer scorer = scorer(weightOrBitSet.set);
        if (scorer == null) {
          return null;
        }
        return new DefaultBulkScorer(scorer);
      }
    }
    @Override
    public Scorer scorer(LeafReaderContext context) throws IOException {
      final SegState weightOrBitSet = getSegState(context);
      if (weightOrBitSet.weight != null) {
        return weightOrBitSet.weight.scorer(context);
      } else {
        return scorer(weightOrBitSet.set);
      }
    }
  }
 }
--- a/solr/core/src/java/org/apache/solr/schema/FieldType.java
+++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java
@ -63,6 +63,7 @@ import org.apache.solr.common.SolrException.ErrorCode;
 import org.apache.solr.common.util.Base64;
 import org.apache.solr.common.util.SimpleOrderedMap;
 import org.apache.solr.common.util.StrUtils;
 import org.apache.solr.query.SolrRangeQuery;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.Sorting;
@ -720,12 +721,11 @@ public abstract class FieldType extends FieldProperties {
          part2 == null ? null : new BytesRef(toInternal(part2)),
          minInclusive, maxInclusive);
    } else {
-      MultiTermQuery rangeQuery = TermRangeQuery.newStringRange(
+      SolrRangeQuery rangeQuery = new SolrRangeQuery(
            field.getName(),
-            part1 == null ? null : toInternal(part1),
+            part1 == null ? null : new BytesRef(toInternal(part1)),
-            part2 == null ? null : toInternal(part2),
+            part2 == null ? null : new BytesRef(toInternal(part2)),
            minInclusive, maxInclusive);
      rangeQuery.setRewriteMethod(getRewriteMethod(parser, field));
      return rangeQuery;
    }
  }
--- a/solr/core/src/java/org/apache/solr/schema/TextField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TextField.java
@ -28,6 +28,7 @@ import org.apache.lucene.uninverting.UninvertingReader.Type;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.QueryBuilder;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.query.SolrRangeQuery;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.Sorting;
@ -136,7 +137,7 @@ public class TextField extends FieldType {
    Analyzer multiAnalyzer = getMultiTermAnalyzer();
    BytesRef lower = analyzeMultiTerm(field.getName(), part1, multiAnalyzer);
    BytesRef upper = analyzeMultiTerm(field.getName(), part2, multiAnalyzer);
-    return new TermRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
+    return new SolrRangeQuery(field.getName(), lower, upper, minInclusive, maxInclusive);
  }
  public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
--- a/solr/core/src/java/org/apache/solr/search/BitDocSet.java
+++ b/solr/core/src/java/org/apache/solr/search/BitDocSet.java
@ -285,8 +285,7 @@ public class BitDocSet extends DocSetBase {
        }
        final int base = context.docBase;
-        final int maxDoc = reader.maxDoc();
+        final int max = base + reader.maxDoc();   // one past the max doc in this segment.
        final int max = base + maxDoc;   // one past the max doc in this segment.
        return BitsFilteredDocIdSet.wrap(new DocIdSet() {
          @Override
@ -302,10 +301,11 @@ public class BitDocSet extends DocSetBase {
              @Override
              public int nextDoc() {
-                if (pos >= bs.length() - 1) {
+                int next = pos+1;
                if (next >= max) {
                  return adjustedDoc = NO_MORE_DOCS;
                } else {
-                  pos = bs.nextSetBit(pos + 1);
+                  pos = bs.nextSetBit(next);
                  return adjustedDoc = pos < max ? pos - base : NO_MORE_DOCS;
                }
              }
@ -314,7 +314,7 @@ public class BitDocSet extends DocSetBase {
              public int advance(int target) {
                if (target == NO_MORE_DOCS) return adjustedDoc = NO_MORE_DOCS;
                int adjusted = target + base;
-                if (adjusted >= bs.length()) {
+                if (adjusted >= max) {
                  return adjustedDoc = NO_MORE_DOCS;
                } else {
                  pos = bs.nextSetBit(adjusted);
@ -326,6 +326,7 @@ public class BitDocSet extends DocSetBase {
              public long cost() {
                // we don't want to actually compute cardinality, but
                // if it's already been computed, we use it (pro-rated for the segment)
                int maxDoc = max-base;
                if (size != -1) {
                  return (long)(size * ((FixedBitSet.bits2words(maxDoc)<<6) / (float)bs.length()));
                } else {
@ -350,7 +351,7 @@ public class BitDocSet extends DocSetBase {
              @Override
              public int length() {
-                return maxDoc;
+                return max-base;
              }
            };
          }
--- a/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java
+++ b/solr/core/src/java/org/apache/solr/search/DocSetBuilder.java
@ -0,0 +1,222 @@
 package org.apache.solr.search;
 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 import java.io.IOException;
 import org.apache.lucene.index.PostingsEnum;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.lucene.util.LSBRadixSorter;
 import org.apache.lucene.util.RamUsageEstimator;
 /**
 * Adapted from DocIdSetBuilder to build DocSets
 *
 * @lucene.internal
 */
 public final class DocSetBuilder {
  private final int maxDoc;
  private final int threshold;
  private int[] buffer;
  private int pos;
  private FixedBitSet bitSet;
  public DocSetBuilder(int maxDoc, long costEst) {
    this.maxDoc = maxDoc;
    // For ridiculously small sets, we'll just use a sorted int[]
    // maxDoc >>> 7 is a good value if you want to save memory, lower values
    // such as maxDoc >>> 11 should provide faster building but at the expense
    // of using a full bitset even for quite sparse data
    this.threshold = (maxDoc >>> 7) + 4; // the +4 is for better testing on small indexes
    if (costEst > threshold) {
      bitSet = new FixedBitSet(maxDoc);
    } else {
      this.buffer = new int[Math.max((int)costEst,1)];
    }
  }
  private void upgradeToBitSet() {
    assert bitSet == null;
    bitSet = new FixedBitSet(maxDoc);
    for (int i = 0; i < pos; ++i) {
      bitSet.set(buffer[i]);
    }
    this.buffer = null;
    this.pos = 0;
  }
  private void growBuffer(int minSize) {
    if (minSize < buffer.length) return;
    int newSize = buffer.length;
    while (newSize < minSize) {
      newSize = newSize << 1;
    }
    newSize = Math.min(newSize, threshold);
    int[] newBuffer = new int[newSize];
    System.arraycopy(buffer, 0, newBuffer, 0, pos);
    buffer = newBuffer;
  }
  public void add(DocIdSetIterator iter, int base) throws IOException {
    grow((int) Math.min(Integer.MAX_VALUE, iter.cost()));
    if (bitSet != null) {
      add(bitSet, iter, base);
    } else {
      while (true) {
        for (int i = pos; i < buffer.length; ++i) {
          final int doc = iter.nextDoc();
          if (doc == DocIdSetIterator.NO_MORE_DOCS) {
            pos = i; // update pos
            return;
          }
          buffer[i] = doc + base;  // using the loop counter may help with removal of bounds checking
        }
        pos = buffer.length; // update pos
        if (pos + 1 >= threshold) {
          break;
        }
        growBuffer(pos + 1);
      }
      upgradeToBitSet();
      add(bitSet, iter, base);
    }
  }
  public static void add(FixedBitSet bitSet, DocIdSetIterator iter, int base) throws IOException {
    for (int doc = iter.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = iter.nextDoc()) {
      bitSet.set(doc + base);
    }
  }
  /** Returns the number of terms visited */
  public int add(TermsEnum te, int base) throws IOException {
    PostingsEnum postings = null;
    int termCount = 0;
    for(;;) {
      BytesRef term = te.next();
      if (term == null) break;
      termCount++;
      postings = te.postings(postings, PostingsEnum.NONE);
      add(postings, base);
    }
    return termCount;
  }
  public void grow(int numDocs) {
    if (bitSet == null) {
      final long newLength = pos + numDocs;
      if (newLength < threshold) {
        growBuffer((int) newLength);
      } else {
        upgradeToBitSet();
      }
    }
  }
  public void add(int doc) {
    if (bitSet != null) {
      bitSet.set(doc);
    } else {
      if (pos >= buffer.length) {
        if (pos + 1 >= threshold) {
          upgradeToBitSet();
          bitSet.set(doc);
          return;
        }
        growBuffer(pos + 1);
      }
      buffer[pos++] = doc;
    }
  }
  private static int dedup(int[] arr, int length, FixedBitSet acceptDocs) {
    if (length == 0) {
      return 0;
    }
    int l = 1;
    int previous = arr[0];
    for (int i = 1; i < length; ++i) {
      final int value = arr[i];
      assert value >= previous;
      if (value != previous) {
        if (acceptDocs == null || acceptDocs.get(value)) {
          arr[l++] = value;
          previous = value;
        }
      }
    }
    return l;
  }
  public DocSet build(FixedBitSet filter) {
    if (bitSet != null) {
      if (filter != null) {
        bitSet.and(filter);
      }
      return new BitDocSet(bitSet);
      // TODO - if this set will be cached, should we make it smaller if it's below DocSetUtil.smallSetSize?
    } else {
      LSBRadixSorter sorter = new LSBRadixSorter();
      sorter.sort(buffer, 0, pos);
      final int l = dedup(buffer, pos, filter);
      assert l <= pos;
      return new SortedIntDocSet(buffer, l);  // TODO: have option to not shrink in the future if it will be a temporary set
    }
  }
  /** Only use this if you know there were no duplicates and that docs were collected in-order! */
  public DocSet buildUniqueInOrder(FixedBitSet filter) {
    if (bitSet != null) {
      if (filter != null) {
        bitSet.and(filter);
      }
      return new BitDocSet(bitSet);
    } else {
      // don't need to sort, but still need to remove non accepted docs
      int l = pos;
      if (filter != null) {
        l = dedup(buffer, pos, filter);
      }
      return new SortedIntDocSet(buffer, l);  // TODO: have option to not shrink in the future if it will be a temporary set
    }
  }
 }
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@ -462,6 +462,10 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
    return fieldNames;
  }
  public SolrCache<Query,DocSet> getFilterCache() {
    return filterCache;
  }
  /**
   * Returns a collection of the names of all stored fields which can be
   * highlighted the index reader knows about.
@ -919,12 +923,12 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
      DocSet absAnswer = filterCache.get(absQ);
      if (absAnswer!=null) {
        if (positive) return absAnswer;
-        else return getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
+        else return getLiveDocs().andNot(absAnswer);
      }
    }
    DocSet absAnswer = getDocSetNC(absQ, null);
-    DocSet answer = positive ? absAnswer : getPositiveDocSet(matchAllDocsQuery).andNot(absAnswer);
+    DocSet answer = positive ? absAnswer : getLiveDocs().andNot(absAnswer);
    if (filterCache != null) {
      // cache negative queries as positive
@ -948,7 +952,15 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
  }
  private static Query matchAllDocsQuery = new MatchAllDocsQuery();
  private BitDocSet liveDocs;
  public BitDocSet getLiveDocs() throws IOException {
    // going through the filter cache will provide thread safety here
    if (liveDocs == null) {
       liveDocs = getDocSetBits(matchAllDocsQuery);
    }
    return liveDocs;
  }
  public static class ProcessedFilter {
    public DocSet answer;  // the answer, if non-null
@ -1129,7 +1141,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
    // Are all of our normal cached filters negative?
    if (end > 0 && answer==null) {
-      answer = getPositiveDocSet(matchAllDocsQuery);
+      answer = getLiveDocs();
    }
    // do negative queries first to shrink set size
@ -1152,7 +1164,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
    } else {
      if (postFilters == null) {
        if (answer == null) {
-          answer = getPositiveDocSet(matchAllDocsQuery);
+          answer = getLiveDocs();
        }
        // "answer" is the only part of the filter, so set it.
        pf.answer = answer;
@ -2150,7 +2162,7 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable,SolrIn
    // if both negative, we need to create a temp DocSet since we
    // don't have a counting method that takes three.
-    DocSet all = getPositiveDocSet(matchAllDocsQuery);
+    DocSet all = getLiveDocs();
    // -a -b == *:*.andNot(a).andNotSize(b) == *.*.andNotSize(a.union(b))
    // we use the last form since the intermediate DocSet should normally be smaller.
--- a/solr/core/src/test/org/apache/solr/search/TestFiltering.java
+++ b/solr/core/src/test/org/apache/solr/search/TestFiltering.java
@ -21,6 +21,7 @@ package org.apache.solr.search;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.FixedBitSet;
 import org.apache.solr.SolrTestCaseJ4;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.request.SolrQueryRequest;
@ -34,7 +35,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
  @BeforeClass
  public static void beforeTests() throws Exception {
    System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
-    initCore("solrconfig.xml","schema12.xml");
+    initCore("solrconfig.xml","schema_latest.xml");
  }
@ -132,8 +133,79 @@ public class TestFiltering extends SolrTestCaseJ4 {
  }
  static String f = "val_i";
  static String f_s = "val_s";
  static String f_s(int i) {
    return String.format(Locale.ROOT, "%05d", i);
  }
-  String frangeStr(boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
+
  String rangeStr(String field, boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
    String topLev="";
    if (!cache || exclude) {
      topLev = "{!" + (cache || random().nextBoolean() ? " cache=" + cache : "")
          + (cost != 0 ? " cost=" + cost : "")
          + ((exclude) ? " tag=t" : "") + "}";
    }
    String q = field + ":";
    String q2 = q;
    String lower1 = "[" + f_s(l);
    String lower2 = l<=0 ? lower1 : ("{" + f_s(l-1));
    String upper1 = f_s(u) + "]";
    String upper2 = f_s(u+1) + "}";
    if (random().nextBoolean()) {
      q += lower1;
      q2 += lower2;
    } else {
      q += lower2;
      q2 += lower1;
    }
    q += " TO ";
    q2 += " TO ";
    if (random().nextBoolean()) {
      q += upper1;
      q2 += upper2;
    } else {
      q += upper2;
      q2 += upper1;
    }
    // String q = field + ":[" + f_s(l) + " TO " + f_s(u) + "]";
    if (negative) {
      q = "-_query_:\"" + q + "\"";
      // q = "-" + q; // TODO: need to be encapsulated for some reason?
    } else {
      if (random().nextBoolean()) {
        // try some different query structures - important for testing different code paths
        switch (random().nextInt(5)) {
          case 0:
            q = q + " OR id:RAND"+random().nextInt();
            break;
          case 1:
            q = "id:RAND"+random().nextInt() + " OR " + q;
            break;
          case 2:
            q = "*:* AND " + q;
            break;
          case 3:
            q = q + " AND " + q2;
            break;
          case 4:
            q = q + " OR " + q2;
            break;
        }
      }
    }
    return topLev + q;
  }
  String frangeStr(String field, boolean negative, int l, int u, boolean cache, int cost, boolean exclude) {
    String topLev="";
    if (!cache || exclude) {
@ -142,7 +214,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
        + ((exclude) ? " tag=t" : "");
    }
-    String ret = "{!frange v="+f+" l="+l+" u="+u;
+    String ret = "{!frange v="+field+" l="+l+" u="+u;
    if (negative) {
      ret = "-_query_:\"" + ret + "}\"";
      if (topLev.length()>0) {
@ -165,7 +237,7 @@ public class TestFiltering extends SolrTestCaseJ4 {
    FixedBitSet[] sets = facetQuery ? new FixedBitSet[]{model.facetQuery} :
        (exclude ? new FixedBitSet[]{model.answer, model.facetQuery} : new FixedBitSet[]{model.answer, model.multiSelect, model.facetQuery});
-    if (random().nextInt(100) < 50) {
+    if (random().nextInt(100) < 60) {
      // frange
      int l=0;
      int u=0;
@ -201,7 +273,10 @@ public class TestFiltering extends SolrTestCaseJ4 {
        }
      }
-      return frangeStr(!positive, l, u, cache, cost, exclude);
+      String whichField = random().nextBoolean() ? f : f_s;
      return random().nextBoolean() ?
           frangeStr(f, !positive, l, u, cache, cost, exclude)   // todo: frange doesn't work on the string field?
         :  rangeStr(whichField, !positive, l, u, cache, cost, exclude);
    } else {
      // term or boolean query
      int numWords = FixedBitSet.bits2words(model.indexSize);
@ -256,16 +331,17 @@ public class TestFiltering extends SolrTestCaseJ4 {
    Model model = new Model();
    for (int iiter = 0; iiter<indexIter; iiter++) {
-      model.indexSize = random().nextInt(20 * RANDOM_MULTIPLIER) + 1;
+      model.indexSize = random().nextInt(40 * RANDOM_MULTIPLIER) + 1;
      clearIndex();
      for (int i=0; i<model.indexSize; i++) {
        String val = Integer.toString(i);
-        assertU(adoc("id",val,f,val));
+        SolrInputDocument doc = sdoc("id", val, f,val, f_s, f_s(i) );
        updateJ(jsonAdd(doc), null);
        if (random().nextInt(100) < 20) {
          // duplicate doc 20% of the time (makes deletions)
-          assertU(adoc("id",val,f,val));
+          updateJ(jsonAdd(doc), null);
        }
        if (random().nextInt(100) < 10) {
          // commit 10% of the time (forces a new segment)