LUCENE-2690: MultiTermQuery boolean rewrites per segment

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1022934 13f79535-47bb-0310-9956-ffa450edef68
2010-10-15 14:25:48 +00:00 · 2010-10-15 14:25:48 +00:00 · dd1c7a8585
parent 2a8c9dfa3f
commit dd1c7a8585
21 changed files with 676 additions and 191 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -147,8 +147,10 @@ API Changes
  you also override this method on upgrade.  (Robert Muir, Mike
  McCandless)

-* LUCENE-2691: IndexWriter.getReader() has been made package local and is now exposed via open and reopen methods on
-  IndexReader.  The semantics of the call is the same as it was prior to the API change.  (Grant Ingersoll, Mike McCandless)
+* LUCENE-2691: IndexWriter.getReader() has been made package local and is now
+  exposed via open and reopen methods on IndexReader.  The semantics of the
+  call is the same as it was prior to the API change.
+  (Grant Ingersoll, Mike McCandless)

 New features

@ -265,6 +267,9 @@ New features
 * LUCENE-2692: Added several new SpanQuery classes for positional checking
  (match is in a range, payload is a specific value) (Grant Ingersoll)  
  
+* LUCENE-2690: MultiTermQuery boolean rewrites per segment.
+  (Uwe Schindler, Robert Muir, Mike McCandless)
+
 Optimizations

 * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@ -316,3 +316,11 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
 * LUCENE-2691: The near-real-time API has moved from IndexWriter to
  IndexReader.  Instead of IndexWriter.getReader(), call
  IndexReader.open(IndexWriter) or IndexReader.reopen(IndexWriter).
+
+* LUCENE-2690: MultiTermQuery boolean rewrites per segment.
+  Also MultiTermQuery.getTermsEnum() now takes an AttributeSource. FuzzyTermsEnum
+  is both consumer and producer of attributes: MTQ.BoostAttribute is
+  added to the FuzzyTermsEnum and MTQ's rewrite mode consumes it.
+  The other way round MTQ.TopTermsBooleanQueryRewrite supplys a
+  global AttributeSource to each segments TermsEnum. The TermsEnum is consumer
+  and gets the current minimum competitive boosts (MTQ.MaxNonCompetitiveBoostAttribute).
--- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@ -886,7 +886,7 @@ public class MemoryIndex implements Serializable {

      @Override
      public int docFreq() {
-        return info.sortedTerms[termUpto].getValue().size();
+        return 1;
      }

      @Override
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;

@ -199,7 +200,10 @@ public class FuzzyLikeThisQuery extends Query
                  ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                  float minScore=0;
                  Term startTerm=internSavingTemplateTerm.createTerm(term);
-                  FuzzyTermsEnum fe = new FuzzyTermsEnum(reader, startTerm, f.minSimilarity, f.prefixLength);
+                  AttributeSource atts = new AttributeSource();
+                  MultiTermQuery.MaxNonCompetitiveBoostAttribute maxBoostAtt =
+                    atts.addAttribute(MultiTermQuery.MaxNonCompetitiveBoostAttribute.class);
+                  FuzzyTermsEnum fe = new FuzzyTermsEnum(reader, atts, startTerm, f.minSimilarity, f.prefixLength);
                  //store the df so all variants use same idf
                  int df = reader.docFreq(startTerm);
                  int numVariants=0;
@ -217,7 +221,7 @@ public class FuzzyLikeThisQuery extends Query
                          variantsQ.insertWithOverflow(st);
                          minScore = variantsQ.top().score; // maintain minScore
                        }
-                        boostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
+                        maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
                      }
                    }

--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java
@ -21,6 +21,7 @@ import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.FilteredTermsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ToStringUtils;

 import java.io.IOException;
@ -60,7 +61,7 @@ public class RegexQuery extends MultiTermQuery implements RegexQueryCapable {
  }

  @Override
-  protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected FilteredTermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
    return new RegexTermsEnum(reader, term, regexImpl);
  }

--- a/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
+++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
@ -28,6 +28,7 @@ import org.apache.lucene.index.TermsEnum;

 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.LuceneTestCase;

 public class TestRegexQuery extends LuceneTestCase {
@ -78,7 +79,7 @@ public class TestRegexQuery extends LuceneTestCase {
  }

  public void testMatchAll() throws Exception {
-    TermsEnum terms = new RegexQuery(new Term(FN, "jum.")).getTermsEnum(searcher.getIndexReader());
+    TermsEnum terms = new RegexQuery(new Term(FN, "jum.")).getTermsEnum(searcher.getIndexReader(), new AttributeSource() /*dummy*/);
    // no term should match
    assertNull(terms.next());
  }
--- a/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
+++ b/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
@ -30,6 +30,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.FuzzyTermsEnum;
 import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;

@ -387,7 +388,10 @@ public class DirectSpellChecker {
  private Collection<ScoreTerm> suggestSimilar(Term term, int numSug, 
      IndexReader ir, int docfreq, int editDistance, float accuracy) throws IOException {
    
-    FuzzyTermsEnum e = new FuzzyTermsEnum(ir, term, editDistance, Math.max(minPrefix, editDistance-1));
+    AttributeSource atts = new AttributeSource();
+    MultiTermQuery.MaxNonCompetitiveBoostAttribute maxBoostAtt =
+      atts.addAttribute(MultiTermQuery.MaxNonCompetitiveBoostAttribute.class);
+    FuzzyTermsEnum e = new FuzzyTermsEnum(ir, atts, term, editDistance, Math.max(minPrefix, editDistance-1));
    final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
    
    BytesRef queryTerm = new BytesRef(term.text());
@ -435,7 +439,7 @@ public class DirectSpellChecker {
      stQueue.offer(st);
      // possibly drop entries from queue
      st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
-      boostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
+      maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
    }
      
    return stQueue;
--- a/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
@ -24,6 +24,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.ByteRunAutomaton;
@ -85,7 +86,7 @@ public class AutomatonQuery extends MultiTermQuery {
  }

  @Override
-  protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
    // matches nothing
    if (BasicOperations.isEmpty(automaton)) {
      return TermsEnum.EMPTY;
--- a/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
@ -20,6 +20,7 @@ package org.apache.lucene.search;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;

@ -135,11 +136,11 @@ public class FuzzyQuery extends MultiTermQuery {
  }

  @Override
-  protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
    if (!termLongEnough) {  // can only match if it's exact
      return new SingleTermsEnum(reader, term);
    }
-    return new FuzzyTermsEnum(reader, getTerm(), minimumSimilarity, prefixLength);
+    return new FuzzyTermsEnum(reader, atts, getTerm(), minimumSimilarity, prefixLength);
  }
  
  /**
--- a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@ -22,6 +22,7 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
@ -51,7 +52,12 @@ public final class FuzzyTermsEnum extends TermsEnum {
  private final MultiTermQuery.BoostAttribute boostAtt =
    attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
  
-  private float bottom = boostAtt.getMaxNonCompetitiveBoost();
+  private final MultiTermQuery.MaxNonCompetitiveBoostAttribute maxBoostAtt;
+  
+  private float bottom;
+  private BytesRef bottomTerm;
+  // nocommit: chicken-and-egg
+  private final Comparator<BytesRef> termComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
  
  private final float minSimilarity;
  private final float scale_factor;
@ -82,7 +88,7 @@ public final class FuzzyTermsEnum extends TermsEnum {
   * @param prefixLength Length of required common prefix. Default value is 0.
   * @throws IOException
   */
-  public FuzzyTermsEnum(IndexReader reader, Term term, 
+  public FuzzyTermsEnum(IndexReader reader, AttributeSource atts, Term term, 
      final float minSimilarity, final int prefixLength) throws IOException {
    if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
      throw new IllegalArgumentException("fractional edit distances are not allowed");
@ -116,9 +122,10 @@ public final class FuzzyTermsEnum extends TermsEnum {
    }
    this.scale_factor = 1.0f / (1.0f - this.minSimilarity);

-    TermsEnum subEnum = getAutomatonEnum(maxEdits, null);
-    setEnum(subEnum != null ? subEnum : 
-      new LinearFuzzyTermsEnum());
+    this.maxBoostAtt = atts.addAttribute(MultiTermQuery.MaxNonCompetitiveBoostAttribute.class);
+    bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
+    bottomTerm = maxBoostAtt.getCompetitiveTerm();
+    bottomChanged(null, true);
  }
  
  /**
@ -169,19 +176,24 @@ public final class FuzzyTermsEnum extends TermsEnum {
   * fired when the max non-competitive boost has changed. this is the hook to
   * swap in a smarter actualEnum
   */
-  private void bottomChanged(float boostValue, BytesRef lastTerm)
+  private void bottomChanged(BytesRef lastTerm, boolean init)
      throws IOException {
    int oldMaxEdits = maxEdits;
    
+    // true if the last term encountered is lexicographically equal or after the bottom term in the PQ
+    boolean termAfter = bottomTerm == null || (lastTerm != null && termComparator.compare(lastTerm, bottomTerm) >= 0);
+
    // as long as the max non-competitive boost is >= the max boost
    // for some edit distance, keep dropping the max edit distance.
-    while (maxEdits > 0 && boostValue >= calculateMaxBoost(maxEdits))
+    while (maxEdits > 0 && (termAfter ? bottom >= calculateMaxBoost(maxEdits) : bottom > calculateMaxBoost(maxEdits)))
      maxEdits--;
    
-    if (oldMaxEdits != maxEdits) { // the maximum n has changed
+    if (oldMaxEdits != maxEdits || init) { // the maximum n has changed
      TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
      if (newEnum != null) {
        setEnum(newEnum);
+      } else if (init) {
+        setEnum(new LinearFuzzyTermsEnum());      
      }
    }
  }
@ -202,16 +214,18 @@ public final class FuzzyTermsEnum extends TermsEnum {
  @Override
  public BytesRef next() throws IOException {
    if (queuedBottom != null) {
-      bottomChanged(bottom, queuedBottom);
+      bottomChanged(queuedBottom, false);
      queuedBottom = null;
    }
    
    BytesRef term = actualEnum.next();
    boostAtt.setBoost(actualBoostAtt.getBoost());
    
-    final float bottom = boostAtt.getMaxNonCompetitiveBoost();
-    if (bottom != this.bottom && term != null) {
+    final float bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
+    final BytesRef bottomTerm = maxBoostAtt.getCompetitiveTerm();
+    if (term != null && (bottom != this.bottom || bottomTerm != this.bottomTerm)) {
      this.bottom = bottom;
+      this.bottomTerm = bottomTerm;
      // clone the term before potentially doing something with it
      // this is a rare but wonderful occurrence anyway
      queuedBottom = new BytesRef(term);
--- a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
@ -19,19 +19,30 @@ package org.apache.lucene.search;

 import java.io.IOException;
 import java.io.Serializable;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.PriorityQueue;
+import java.util.Comparator;

+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.queryParser.QueryParser; // for javadoc
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;

 /**
 * An abstract {@link Query} that matches documents
@ -39,7 +50,7 @@ import org.apache.lucene.util.PagedBytes;
 * FilteredTermsEnum} enumeration.
 *
 * <p>This query cannot be used directly; you must subclass
- * it and define {@link #getTermsEnum} to provide a {@link
+ * it and define {@link #getTermsEnum(IndexReader,AttributeSource)} to provide a {@link
 * FilteredTermsEnum} that iterates through the terms to be
 * matched.
 *
@ -71,34 +82,25 @@ public abstract class MultiTermQuery extends Query {
  protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
  transient int numberOfTerms = 0;
  
-  /** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum}
+  /** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum(IndexReader,AttributeSource)}
   * and update the boost on each returned term. This enables to control the boost factor
   * for each matching term in {@link #SCORING_BOOLEAN_QUERY_REWRITE} or
   * {@link TopTermsBooleanQueryRewrite} mode.
   * {@link FuzzyQuery} is using this to take the edit distance into account.
+   * <p><b>Please note:</b> This attribute is intended to be added only by the TermsEnum
+   * to itsself in its constructor and consumed by the {@link RewriteMethod}.
+   * @lucene.internal
   */
  public static interface BoostAttribute extends Attribute {
    /** Sets the boost in this attribute */
    public void setBoost(float boost);
    /** Retrieves the boost, default is {@code 1.0f}. */
    public float getBoost();
-    /** Sets the maximum boost for terms that would never get
-     * into the priority queue of {@link MultiTermQuery.TopTermsBooleanQueryRewrite}.
-     * This value is not changed by {@link AttributeImpl#clear}
-     * and not used in {@code equals()} and {@code hashCode()}.
-     * Do not change the value in the {@link TermsEnum}!
-     */
-    public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost);
-    /** Retrieves the maximum boost that is not competitive,
-     * default is megative infinity. You can use this boost value
-     * as a hint when writing the {@link TermsEnum}.
-     */
-    public float getMaxNonCompetitiveBoost();
  }

  /** Implementation class for {@link BoostAttribute}. */
  public static final class BoostAttributeImpl extends AttributeImpl implements BoostAttribute {
-    private float boost = 1.0f, maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+    private float boost = 1.0f;
  
    public void setBoost(float boost) {
      this.boost = boost;
@ -107,14 +109,6 @@ public abstract class MultiTermQuery extends Query {
    public float getBoost() {
      return boost;
    }
-  
-    public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
-      this.maxNonCompetitiveBoost = maxNonCompetitiveBoost;
-    }
-    
-    public float getMaxNonCompetitiveBoost() {
-      return maxNonCompetitiveBoost;
-    }

    @Override
    public void clear() {
@ -141,6 +135,83 @@ public abstract class MultiTermQuery extends Query {
    }
  }

+  /** Add this {@link Attribute} to a fresh {@link AttributeSource} before calling
+   * {@link #getTermsEnum(IndexReader,AttributeSource)}.
+   * {@link FuzzyQuery} is using this to control its internal behaviour
+   * to only return competitive terms.
+   * <p><b>Please note:</b> This attribute is intended to be added by the {@link RewriteMethod}
+   * to an empty {@link AttributeSource} that is shared for all segments
+   * during query rewrite. This attribute source is passed to all segment enums
+   * on {@link #getTermsEnum(IndexReader,AttributeSource)}.
+   * {@link TopTermsBooleanQueryRewrite} uses this attribute to
+   * inform all enums about the current boost, that is not competitive.
+   * @lucene.internal
+   */
+  public static interface MaxNonCompetitiveBoostAttribute extends Attribute {
+    /** This is the maximum boost that would not be competitive. */
+    public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost);
+    /** This is the maximum boost that would not be competitive. Default is negative infinity, which means every term is competitive. */
+    public float getMaxNonCompetitiveBoost();
+    /** This is the term or <code>null<code> of the term that triggered the boost change. */
+    public void setCompetitiveTerm(BytesRef competitiveTerm);
+    /** This is the term or <code>null<code> of the term that triggered the boost change. Default is <code>null</code>, which means every term is competitoive. */
+    public BytesRef getCompetitiveTerm();
+  }
+
+  /** Implementation class for {@link MaxNonCompetitiveBoostAttribute}. */
+  public static final class MaxNonCompetitiveBoostAttributeImpl extends AttributeImpl implements MaxNonCompetitiveBoostAttribute {
+    private float maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+    private BytesRef competitiveTerm = null;
+  
+    public void setMaxNonCompetitiveBoost(final float maxNonCompetitiveBoost) {
+      this.maxNonCompetitiveBoost = maxNonCompetitiveBoost;
+    }
+    
+    public float getMaxNonCompetitiveBoost() {
+      return maxNonCompetitiveBoost;
+    }
+
+    public void setCompetitiveTerm(final BytesRef competitiveTerm) {
+      this.competitiveTerm = competitiveTerm;
+    }
+    
+    public BytesRef getCompetitiveTerm() {
+      return competitiveTerm;
+    }
+
+    @Override
+    public void clear() {
+      maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+      competitiveTerm = null;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (this == other)
+        return true;
+      if (other instanceof MaxNonCompetitiveBoostAttributeImpl) {
+        final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other;
+        return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost)
+          && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm));
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      int hash = Float.floatToIntBits(maxNonCompetitiveBoost);
+      if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode();
+      return hash;
+    }
+    
+    @Override
+    public void copyTo(AttributeImpl target) {
+      final MaxNonCompetitiveBoostAttributeImpl t = (MaxNonCompetitiveBoostAttributeImpl) target;
+      t.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
+      t.setCompetitiveTerm(competitiveTerm);
+    }
+  }
+
  /** Abstract class that defines how the query is rewritten. */
  public static abstract class RewriteMethod implements Serializable {
    public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException;
@ -177,69 +248,85 @@ public abstract class MultiTermQuery extends Query {
  private abstract static class BooleanQueryRewrite extends RewriteMethod {
  
    protected final int collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
-      final Fields fields = MultiFields.getFields(reader);
-      if (fields == null) {
-        // reader has no fields
-        return 0;
-      }
-
-      final Terms terms = fields.terms(query.field);
-      if (terms == null) {
-        // field does not exist
-        return 0;
-      }
-
-      final TermsEnum termsEnum = query.getTermsEnum(reader);
-      assert termsEnum != null;
-
-      if (termsEnum == TermsEnum.EMPTY)
-        return 0;
-      final BoostAttribute boostAtt =
-        termsEnum.attributes().addAttribute(BoostAttribute.class);
-      collector.boostAtt = boostAtt;
+      final List<IndexReader> subReaders = new ArrayList<IndexReader>();
+      ReaderUtil.gatherSubReaders(subReaders, reader);
      int count = 0;
-      BytesRef bytes;
-      while ((bytes = termsEnum.next()) != null) {
-        if (collector.collect(termsEnum, bytes, boostAtt.getBoost())) {
-          termsEnum.cacheCurrentTerm();
-          count++;
-        } else {
-          break;
+      Comparator<BytesRef> lastTermComp = null;
+      
+      for (IndexReader r : subReaders) {
+        final Fields fields = r.fields();
+        if (fields == null) {
+          // reader has no fields
+          continue;
+        }
+
+        final Terms terms = fields.terms(query.field);
+        if (terms == null) {
+          // field does not exist
+          continue;
+        }
+
+        final TermsEnum termsEnum = query.getTermsEnum(r, collector.attributes);
+        assert termsEnum != null;
+
+        if (termsEnum == TermsEnum.EMPTY)
+          continue;
+        
+        // Check comparator compatibility:
+        final Comparator<BytesRef> newTermComp = termsEnum.getComparator();
+        if (lastTermComp != null && newTermComp != lastTermComp)
+          throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp);
+        lastTermComp = newTermComp;
+        
+        collector.setNextEnum(termsEnum);
+        BytesRef bytes;
+        while ((bytes = termsEnum.next()) != null) {
+          if (collector.collect(bytes)) {
+            termsEnum.cacheCurrentTerm();
+            count++;
+          } else {
+            return count; // interrupt whole term collection, so also don't iterate other subReaders
+          }
        }
      }
-      collector.boostAtt = null;
      return count;
    }
    
    protected static abstract class TermCollector {
-      private BoostAttribute boostAtt = null;
+      /** attributes used for communication with the enum */
+      public final AttributeSource attributes = new AttributeSource();
    
      /** return false to stop collecting */
-      public abstract boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException;
+      public abstract boolean collect(BytesRef bytes) throws IOException;
      
-      /** set the minimum boost as a hint for the term producer */
-      protected final void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
-        assert boostAtt != null;
-        boostAtt.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
-      }
+      /** the next segment's {@link TermsEnum} that is used to collect terms */
+      public abstract void setNextEnum(TermsEnum termsEnum) throws IOException;
    }
  }
  
  private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite {
    @Override
    public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
-      final BooleanQuery result = new BooleanQuery(true);
+      final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
+      collectTerms(reader, query, col);
+      
      final Term placeholderTerm = new Term(query.field);
-      query.incTotalNumberOfTerms(collectTerms(reader, query, new TermCollector() {
-        @Override
-        public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) {
-          // add new TQ, we must clone the term, else it may get overwritten!
-          TermQuery tq = new TermQuery(placeholderTerm.createTerm(new BytesRef(bytes)), termsEnum.docFreq());
-          tq.setBoost(query.getBoost() * boost); // set the boost
-          result.add(tq, BooleanClause.Occur.SHOULD); // add to query
-          return true;
+      final BooleanQuery result = new BooleanQuery(true);
+      final int size = col.terms.size();
+      if (size > 0) {
+        final int sort[] = col.terms.sort(col.termsEnum.getComparator());
+        final int[] docFreq = col.array.docFreq;
+        final float[] boost = col.array.boost;
+        for (int i = 0; i < size; i++) {
+          final int pos = sort[i];
+          final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef()));
+          assert reader.docFreq(term) == docFreq[pos];
+          final TermQuery tq = new TermQuery(term, docFreq[pos]);
+          tq.setBoost(query.getBoost() * boost[pos]);
+          result.add(tq, BooleanClause.Occur.SHOULD);
        }
-      }));
+      }
+      query.incTotalNumberOfTerms(size);
      return result;
    }

@ -247,6 +334,75 @@ public abstract class MultiTermQuery extends Query {
    protected Object readResolve() {
      return SCORING_BOOLEAN_QUERY_REWRITE;
    }
+    
+    static final class ParallelArraysTermCollector extends TermCollector {
+      final TermFreqBoostByteStart array = new TermFreqBoostByteStart(16);
+      final BytesRefHash terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
+      TermsEnum termsEnum;
+
+      private BoostAttribute boostAtt;
+    
+      @Override
+      public void setNextEnum(TermsEnum termsEnum) throws IOException {
+        this.termsEnum = termsEnum;
+        this.boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
+      }
+    
+      @Override
+      public boolean collect(BytesRef bytes) {
+        final int e = terms.add(bytes);
+        if (e < 0 ) {
+          // duplicate term: update docFreq
+          final int pos = (-e)-1;
+          array.docFreq[pos] += termsEnum.docFreq();
+          assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums";
+        } else {
+          // new entry: we populate the entry initially
+          array.docFreq[e] = termsEnum.docFreq();
+          array.boost[e] = boostAtt.getBoost();
+        }
+        // if the new entry reaches the max clause count, we exit early
+        if (e >= BooleanQuery.getMaxClauseCount())
+          throw new BooleanQuery.TooManyClauses();
+        return true;
+      }
+    }
+    
+    /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */
+    static final class TermFreqBoostByteStart extends DirectBytesStartArray  {
+      int[] docFreq;
+      float[] boost;
+      
+      public TermFreqBoostByteStart(int initSize) {
+        super(initSize);
+      }
+
+      @Override
+      public int[] init() {
+        final int[] ord = super.init();
+        boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)];
+        docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+        assert boost.length >= ord.length && docFreq.length >= ord.length;
+        return ord;
+      }
+
+      @Override
+      public int[] grow() {
+        final int[] ord = super.grow();
+        docFreq = ArrayUtil.grow(docFreq, ord.length);
+        boost = ArrayUtil.grow(boost, ord.length);
+        assert boost.length >= ord.length && docFreq.length >= ord.length;
+        return ord;
+      }
+
+      @Override
+      public int[] clear() {
+       boost = null;
+       docFreq = null;
+       return super.clear();
+      }
+      
+    }
  }

  /** A rewrite method that first translates each term into
@ -291,44 +447,92 @@ public abstract class MultiTermQuery extends Query {
      final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount());
      final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
      collectTerms(reader, query, new TermCollector() {
+        private final MaxNonCompetitiveBoostAttribute maxBoostAtt =
+          attributes.addAttribute(MaxNonCompetitiveBoostAttribute.class);
+        
+        private final Map<BytesRef,ScoreTerm> visitedTerms = new HashMap<BytesRef,ScoreTerm>();
+        
+        private TermsEnum termsEnum;
+        private Comparator<BytesRef> termComp;
+        private BoostAttribute boostAtt;        
+        private ScoreTerm st;
+        
        @Override
-        public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) {
+        public void setNextEnum(TermsEnum termsEnum) throws IOException {
+          this.termsEnum = termsEnum;
+          this.termComp = termsEnum.getComparator();
+          // lazy init the initial ScoreTerm because comparator is not known on ctor:
+          if (st == null)
+            st = new ScoreTerm(this.termComp);
+          boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
+        }
+      
+        @Override
+        public boolean collect(BytesRef bytes) {
+          final float boost = boostAtt.getBoost();
          // ignore uncompetetive hits
-          if (stQueue.size() >= maxSize && boost <= stQueue.peek().boost)
-            return true;
-          // add new entry in PQ, we must clone the term, else it may get overwritten!
-          st.bytes.copy(bytes);
-          st.boost = boost;
-          st.docFreq = termsEnum.docFreq();
-          stQueue.offer(st);
-          // possibly drop entries from queue
-          st = (stQueue.size() > maxSize) ? stQueue.poll() : new ScoreTerm();
-          setMaxNonCompetitiveBoost((stQueue.size() >= maxSize) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
+          if (stQueue.size() == maxSize) {
+            final ScoreTerm t = stQueue.peek();
+            if (boost < t.boost)
+              return true;
+            if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
+              return true;
+          }
+          ScoreTerm t = visitedTerms.get(bytes);
+          if (t != null) {
+            // if the term is already in the PQ, only update docFreq of term in PQ
+            t.docFreq += termsEnum.docFreq();
+            assert t.boost == boost : "boost should be equal in all segment TermsEnums";
+          } else {
+            // add new entry in PQ, we must clone the term, else it may get overwritten!
+            st.bytes.copy(bytes);
+            st.boost = boost;
+            st.docFreq = termsEnum.docFreq();
+            visitedTerms.put(st.bytes, st);
+            stQueue.offer(st);
+            // possibly drop entries from queue
+            if (stQueue.size() > maxSize) {
+              st = stQueue.poll();
+              visitedTerms.remove(st.bytes);
+            } else {
+              st = new ScoreTerm(termComp);
+            }
+            assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
+            // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
+            if (stQueue.size() == maxSize) {
+              t = stQueue.peek();
+              maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
+              maxBoostAtt.setCompetitiveTerm(t.bytes);
+            }
+          }
          return true;
        }
-        
-        // reusable instance
-        private ScoreTerm st = new ScoreTerm();
      });
      
      final Term placeholderTerm = new Term(query.field);
      final BooleanQuery bq = new BooleanQuery(true);
-      for (final ScoreTerm st : stQueue) {
-        // add new query, we must clone the term, else it may get overwritten!
-        Query tq = getQuery(placeholderTerm.createTerm(st.bytes), st.docFreq);
+      final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
+      Arrays.sort(scoreTerms, new Comparator<ScoreTerm>() {
+        public int compare(ScoreTerm st1, ScoreTerm st2) {
+          assert st1.termComp == st2.termComp :
+            "term comparator should not change between segments";
+          return st1.termComp.compare(st1.bytes, st2.bytes);
+        }
+      });
+      for (final ScoreTerm st : scoreTerms) {
+        final Term term = placeholderTerm.createTerm(st.bytes);
+        assert reader.docFreq(term) == st.docFreq;
+        Query tq = getQuery(term, st.docFreq);
        tq.setBoost(query.getBoost() * st.boost); // set the boost
        bq.add(tq, BooleanClause.Occur.SHOULD);   // add to query
      }
-      query.incTotalNumberOfTerms(bq.clauses().size());
+      query.incTotalNumberOfTerms(scoreTerms.length);
      return bq;
    }
  
    @Override
    public int hashCode() {
-      final int prime = 17;
-      int result = 1;
-      result = prime * result + size;
-      return result;
+      return 31 * size;
    }

    @Override
@ -341,15 +545,20 @@ public abstract class MultiTermQuery extends Query {
      return true;
    }
  
-    private static class ScoreTerm implements Comparable<ScoreTerm> {
+    static final class ScoreTerm implements Comparable<ScoreTerm> {
+      public final Comparator<BytesRef> termComp;
+
      public final BytesRef bytes = new BytesRef();
      public float boost;
      public int docFreq;
      
+      public ScoreTerm(Comparator<BytesRef> termComp) {
+        this.termComp = termComp;
+      }
+      
      public int compareTo(ScoreTerm other) {
        if (this.boost == other.boost)
-          // TODO: is it OK to use default compare here?
-          return other.bytes.compareTo(this.bytes);
+          return termComp.compare(other.bytes, this.bytes);
        else
          return Float.compare(this.boost, other.boost);
      }
@ -362,8 +571,8 @@ public abstract class MultiTermQuery extends Query {
   * scores as computed by the query.
   * 
   * <p>
-   * This rewrite mode only uses the top scoring terms so it will not overflow
-   * the boolean max clause count. It is the default rewrite mode for
+   * This rewrite method only uses the top scoring terms so it will not overflow
+   * the boolean max clause count. It is the default rewrite method for
   * {@link FuzzyQuery}.
   * 
   * @see #setRewriteMethod
@ -510,63 +719,61 @@ public abstract class MultiTermQuery extends Query {
      final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
      final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);

-      final CutOffTermCollector col = new CutOffTermCollector(reader, query.field, docCountCutoff, termCountLimit);
+      final CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit);
      collectTerms(reader, query, col);
-      
+      final int size = col.pendingTerms.size();
      if (col.hasCutOff) {
        return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
-      } else if (col.termCount == 0) {
+      } else if (size == 0) {
        return new BooleanQuery(true);
      } else {
-        final PagedBytes.Reader bytesReader = col.pendingTerms.freeze(false);
-        try {
-          final BooleanQuery bq = new BooleanQuery(true);
-          final Term placeholderTerm = new Term(query.field);
-          long start = col.startOffset;
-          for(int i = 0; i < col.termCount; i++) {
-            final BytesRef bytes = new BytesRef();
-            start = bytesReader.fillUsingLengthPrefix3(bytes, start);
-            bq.add(new TermQuery(placeholderTerm.createTerm(bytes)), BooleanClause.Occur.SHOULD);
-          }
-          // Strip scores
-          final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
-          result.setBoost(query.getBoost());
-          query.incTotalNumberOfTerms(col.termCount);
-          return result;
-        } finally {
-          bytesReader.close();
+        final BooleanQuery bq = new BooleanQuery(true);
+        final Term placeholderTerm = new Term(query.field);
+        final BytesRefHash pendingTerms = col.pendingTerms;
+        final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
+        for(int i = 0; i < size; i++) {
+          // docFreq is not used for constant score here, we pass 1
+          // to explicitely set a fake value, so it's not calculated
+          bq.add(new TermQuery(
+            placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1
+          ), BooleanClause.Occur.SHOULD);
        }
+        // Strip scores
+        final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+        result.setBoost(query.getBoost());
+        query.incTotalNumberOfTerms(size);
+        return result;
      }
    }
    
-    private static final class CutOffTermCollector extends TermCollector {
-      CutOffTermCollector(IndexReader reader, String field, int docCountCutoff, int termCountLimit) {
-        this.reader = reader;
-        this.field = field;
+    static final class CutOffTermCollector extends TermCollector {
+      CutOffTermCollector(int docCountCutoff, int termCountLimit) {
        this.docCountCutoff = docCountCutoff;
        this.termCountLimit = termCountLimit;
      }
    
-      public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException {
-        termCount++;
-        if (termCount >= termCountLimit || docVisitCount >= docCountCutoff) {
+      @Override
+      public void setNextEnum(TermsEnum termsEnum) throws IOException {
+        this.termsEnum = termsEnum;
+      }
+        
+      @Override
+      public boolean collect(BytesRef bytes) throws IOException {
+        if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
          hasCutOff = true;
          return false;
        }
-        pendingTerms.copyUsingLengthPrefix(bytes);
+        pendingTerms.add(bytes);
        docVisitCount += termsEnum.docFreq();
        return true;
      }
      
      int docVisitCount = 0;
      boolean hasCutOff = false;
-      int termCount = 0;
-      
-      final IndexReader reader;
-      final String field;
+      TermsEnum termsEnum;
+
      final int docCountCutoff, termCountLimit;
-      final PagedBytes pendingTerms = new PagedBytes(15); // max term size is 32 KiB
-      final long startOffset = pendingTerms.getPointer();
+      final BytesRefHash pendingTerms = new BytesRefHash();
    }

    @Override
@ -644,8 +851,20 @@ public abstract class MultiTermQuery extends Query {
   *  field does exist).  This method should not return null
   *  (should instead return {@link TermsEnum#EMPTY} if no
   *  terms match).  The TermsEnum must already be
-   *  positioned to the first matching term. */
-  protected abstract TermsEnum getTermsEnum(IndexReader reader) throws IOException;
+   *  positioned to the first matching term.
+   * The given {@link AttributeSource} is passed by the {@link RewriteMethod} to
+   * provide attributes, the rewrite method uses to inform about e.g. maximum competitive boosts.
+   * This is currently only used by {@link TopTermsBooleanQueryRewrite}
+   */
+  protected abstract TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException;
+
+  /** Convenience method, if no attributes are needed:
+   * This simply passes empty attributes and is equal to:
+   * <code>getTermsEnum(reader, new AttributeSource())</code>
+   */
+  protected final TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+    return getTermsEnum(reader, new AttributeSource());
+  }

  /**
   * Expert: Return the number of unique terms visited during execution of the query.
--- a/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
@ -26,6 +26,7 @@ import org.apache.lucene.document.NumericField; // for javadocs
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.index.TermsEnum;

@ -301,7 +302,7 @@ public final class NumericRangeQuery<T extends Number> extends MultiTermQuery {
  }

  @Override @SuppressWarnings("unchecked")
-  protected TermsEnum getTermsEnum(final IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(final IndexReader reader, AttributeSource atts) throws IOException {
    // very strange: java.lang.Number itsself is not Comparable, but all subclasses used here are
    return (min != null && max != null && ((Comparable<T>) min).compareTo(max) > 0) ?
      TermsEnum.EMPTY :
--- a/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
@ -24,6 +24,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ToStringUtils;

 /** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
@ -45,7 +46,7 @@ public class PrefixQuery extends MultiTermQuery {
  public Term getPrefix() { return prefix; }
  
  @Override  
-  protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
    if (prefix.bytes().length == 0) {
      // no prefix -- match all terms for this field:
      // NOTE: for now, MultiTermQuery enums terms at the
--- a/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java
+++ b/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java
@ -33,7 +33,7 @@ import org.apache.lucene.index.IndexReader;
 * once per day.
 */
 public class QueryWrapperFilter extends Filter {
-  private Query query;
+  private final Query query;

  /** Constructs a filter which only matches documents matching
   * <code>query</code>.
@ -41,6 +41,11 @@ public class QueryWrapperFilter extends Filter {
  public QueryWrapperFilter(Query query) {
    this.query = query;
  }
+  
+  /** returns the inner Query */
+  public final Query getQuery() {
+    return query;
+  }

  @Override
  public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
--- a/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
@ -25,6 +25,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.AttributeSource;

 /**
 * A Query that matches documents within an range of terms.
@ -130,7 +131,7 @@ public class TermRangeQuery extends MultiTermQuery {
  public Collator getCollator() { return collator; }
  
  @Override
-  protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
    if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
      return TermsEnum.EMPTY;
    }
--- a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
@ -27,14 +27,7 @@ import java.util.Collection;

 public final class ArrayUtil {

-  /**
-   * @deprecated This constructor was not intended to be public and should not be used.
-   *  This class contains solely a static utility methods.
-   *  It will be made private in Lucene 4.0
-   */
-  // make private in 4.0!
-  @Deprecated
-  public ArrayUtil() {} // no instance
+  private ArrayUtil() {} // no instance

  /*
     Begin Apache Harmony code
@ -247,6 +240,19 @@ public final class ArrayUtil {
  public static short[] grow(short[] array) {
    return grow(array, 1 + array.length);
  }
+  
+  public static float[] grow(float[] array, int minSize) {
+    if (array.length < minSize) {
+      float[] newArray = new float[oversize(minSize, RamUsageEstimator.NUM_BYTES_FLOAT)];
+      System.arraycopy(array, 0, newArray, 0, array.length);
+      return newArray;
+    } else
+      return array;
+  }
+
+  public static float[] grow(float[] array) {
+    return grow(array, 1 + array.length);
+  }

  public static short[] shrink(short[] array, int targetSize) {
    final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT);
--- a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java
+++ b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java
@ -16,8 +16,12 @@ package org.apache.lucene.util;
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
+import java.util.Arrays;
+import java.util.List;
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;

-/* Class that Posting and PostingVector use to write byte
+/** 
+ * Class that Posting and PostingVector use to write byte
 * streams into shared fixed-size byte[] arrays.  The idea
 * is to allocate slices of increasing lengths For
 * example, the first slice is 5 bytes, the next slice is
@ -31,14 +35,10 @@ package org.apache.lucene.util;
 * the end with a non-zero byte.  This way the methods
 * that are writing into the slice don't need to record
 * its length and instead allocate a new slice once they
- * hit a non-zero byte. */
-
-import java.util.Arrays;
-
-
-import java.util.List;
-import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
-
+ * hit a non-zero byte. 
+ * 
+ * @lucene.internal
+ **/
 public final class ByteBlockPool {
  public final static int BYTE_BLOCK_SHIFT = 15;
  public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
@ -62,6 +62,22 @@ public final class ByteBlockPool {
      return new byte[blockSize];
    }
  }
+  
+  public static final class DirectAllocator extends Allocator {
+    
+    public DirectAllocator() {
+      this(BYTE_BLOCK_SIZE);
+    }
+
+    public DirectAllocator(int blockSize) {
+      super(blockSize);
+    }
+
+    @Override
+    public void recycleByteBlocks(byte[][] blocks, int start, int end) {
+    }
+    
+  }

  public byte[][] buffers = new byte[10][];

--- a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
+++ b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
@ -17,13 +17,15 @@ package org.apache.lucene.util;
 * limitations under the License.
 */

+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SHIFT;
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
+
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.concurrent.atomic.AtomicLong;

-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SHIFT;
+import org.apache.lucene.util.ByteBlockPool.DirectAllocator;

 /**
 * {@link BytesRefHash} is a special purpose hash-map like data-structure
@ -54,6 +56,14 @@ public final class BytesRefHash {
  public static final int DEFAULT_CAPACITY = 16;
  private final BytesStartArray bytesStartArray;
  private AtomicLong bytesUsed;
+  
+  /**
+   * Creates a new {@link BytesRefHash} with a {@link ByteBlockPool} using a
+   * {@link DirectAllocator}.
+   */
+  public BytesRefHash() { 
+    this(new ByteBlockPool(new DirectAllocator()));
+  }

  /**
   * Creates a new {@link BytesRefHash}
@ -75,7 +85,7 @@ public final class BytesRefHash {
    Arrays.fill(ords, -1);
    this.bytesStartArray = bytesStartArray;
    bytesStart = bytesStartArray.init();
-    bytesUsed = bytesStartArray.bytesUsed();
+    bytesUsed = bytesStartArray.bytesUsed() == null? new AtomicLong(0) : bytesStartArray.bytesUsed();;
    bytesUsed.addAndGet(hashSize * RamUsageEstimator.NUM_BYTES_INT);
  }

@ -143,7 +153,6 @@ public final class BytesRefHash {
   *          the {@link Comparator} used for sorting
   */
  public int[] sort(Comparator<BytesRef> comp) {
-    assert bytesStart != null : "Bytesstart is null - not initialized";
    final int[] compact = compact();
    quickSort(comp, compact, 0, count - 1);
    return compact;
@ -536,13 +545,13 @@ public final class BytesRefHash {
    public abstract AtomicLong bytesUsed();
  }

-  static class DirectBytesStartArray extends BytesStartArray {
+  public static class DirectBytesStartArray extends BytesStartArray {

-    private final int initSize;
+    protected final int initSize;
    private int[] bytesStart;
    private final AtomicLong bytesUsed = new AtomicLong(0);

-    DirectBytesStartArray(int initSize) {
+    public DirectBytesStartArray(int initSize) {
      this.initSize = initSize;
    }

--- a/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
+++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
@ -0,0 +1,186 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+import java.io.IOException;
+
+public class TestMultiTermQueryRewrites extends LuceneTestCase {
+
+  static Directory dir, sdir1, sdir2;
+  static IndexReader reader, multiReader;
+  static IndexSearcher searcher, multiSearcher;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    dir = newDirectory();
+    sdir1 = newDirectory();
+    sdir2 = newDirectory();
+    final RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer());
+    final RandomIndexWriter swriter1 = new RandomIndexWriter(random, sdir1, new MockAnalyzer());
+    final RandomIndexWriter swriter2 = new RandomIndexWriter(random, sdir2, new MockAnalyzer());
+
+    for (int i = 0; i < 10; i++) {
+      Document doc = new Document();
+      doc.add(newField("data", Integer.toString(i), Field.Store.NO, Field.Index.NOT_ANALYZED));
+      writer.addDocument(doc);
+      ((i % 2 == 0) ? swriter1 : swriter2).addDocument(doc);
+    }
+    writer.optimize(); swriter1.optimize(); swriter2.optimize();
+    writer.close(); swriter1.close(); swriter2.close();
+    
+    reader = IndexReader.open(dir, true);
+    searcher = new IndexSearcher(reader);
+    multiReader = new MultiReader(new IndexReader[] {
+      IndexReader.open(sdir1, true), IndexReader.open(sdir2, true) 
+    }, true);
+    multiSearcher = new IndexSearcher(multiReader);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    reader.close();
+    multiReader.close();
+    dir.close(); sdir1.close(); sdir2.close();
+    reader = multiReader = null;
+    searcher = multiSearcher = null;
+    dir = sdir1 = sdir2 = null;
+  }
+  
+  private Query extractInnerQuery(Query q) {
+    if (q instanceof ConstantScoreQuery) {
+      // wrapped as ConstantScoreQuery using QueryWrapperFilter
+      q = ((QueryWrapperFilter) ((ConstantScoreQuery) q).getFilter()).getQuery();
+    }
+    return q;
+  }
+  
+  private Term extractTerm(Query q) {
+    q = extractInnerQuery(q);
+    return ((TermQuery) q).getTerm();
+  }
+  
+  private void checkBooleanQueryOrder(Query q) {
+    q = extractInnerQuery(q);
+    final BooleanQuery bq = (BooleanQuery) q;
+    Term last = null, act;
+    for (BooleanClause clause : bq.clauses()) {
+      act = extractTerm(clause.getQuery());
+      if (last != null) {
+        assertTrue("sort order of terms in BQ violated", last.compareTo(act) < 0);
+      }
+      last = act;
+    }
+  }
+  
+  private void checkDuplicateTerms(MultiTermQuery.RewriteMethod method) throws Exception {
+    final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true);
+    mtq.setRewriteMethod(method);
+    final Query q1 = searcher.rewrite(mtq);
+    final Query q2 = multiSearcher.rewrite(mtq);
+    if (VERBOSE) {
+      System.out.println();
+      System.out.println("single segment: " + q1);
+      System.out.println(" multi segment: " + q2);
+    }
+    assertEquals("The multi-segment case must produce same rewritten query", q1, q2);
+    checkBooleanQueryOrder(q1);
+    checkBooleanQueryOrder(q2);
+  }
+  
+  public void testRewritesWithDuplicateTerms() throws Exception {
+    checkDuplicateTerms(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    
+    checkDuplicateTerms(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    
+    // use a large PQ here to only test duplicate terms and dont mix up when all scores are equal
+    checkDuplicateTerms(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(1024));
+    checkDuplicateTerms(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(1024));
+    
+    // Test auto rewrite (but only boolean mode), so we set the limits to large values to always get a BQ
+    final MultiTermQuery.ConstantScoreAutoRewrite rewrite = new MultiTermQuery.ConstantScoreAutoRewrite();
+    rewrite.setTermCountCutoff(Integer.MAX_VALUE);
+    rewrite.setDocCountPercent(100.);
+    checkDuplicateTerms(rewrite);
+  }
+  
+  private void checkBooleanQueryBoosts(BooleanQuery bq) {
+    for (BooleanClause clause : bq.clauses()) {
+      final TermQuery mtq = (TermQuery) clause.getQuery();
+      assertEquals("Parallel sorting of boosts in rewrite mode broken",
+        Float.parseFloat(mtq.getTerm().text()), mtq.getBoost());
+    }
+  }
+  
+  private void checkBoosts(MultiTermQuery.RewriteMethod method) throws Exception {
+    final MultiTermQuery mtq = new MultiTermQuery("data") {
+      @Override
+      protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
+        return new TermRangeTermsEnum(reader, field, "2", "7", true, true, null) {
+          final MultiTermQuery.BoostAttribute boostAtt =
+            attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
+        
+          @Override
+          protected AcceptStatus accept(BytesRef term) {
+            boostAtt.setBoost(Float.parseFloat(term.utf8ToString()));
+            return super.accept(term);
+          }
+        };
+      }
+      
+      @Override
+      public String toString(String field) {
+        return "dummy";
+      }
+    };
+    mtq.setRewriteMethod(method);
+    final Query q1 = searcher.rewrite(mtq);
+    final Query q2 = multiSearcher.rewrite(mtq);
+    if (VERBOSE) {
+      System.out.println();
+      System.out.println("single segment: " + q1);
+      System.out.println(" multi segment: " + q2);
+    }
+    assertEquals("The multi-segment case must produce same rewritten query", q1, q2);
+    checkBooleanQueryBoosts((BooleanQuery) q1);
+    checkBooleanQueryBoosts((BooleanQuery) q2);
+  }
+  
+  public void testBoosts() throws Exception {
+    checkBoosts(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+
+    // use a large PQ here to only test boosts and dont mix up when all scores are equal
+    checkBoosts(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(1024));
+  }
+  
+}
--- a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java
+++ b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java
@ -29,6 +29,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
@ -85,7 +86,7 @@ public class TestPrefixRandom extends LuceneTestCase {
    }
    
    @Override
-    protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+    protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
      return new SimplePrefixTermsEnum(reader, field, prefix);
    }

--- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
+++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
@ -36,6 +36,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@ -103,7 +104,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
    }
    
    @Override
-    protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+    protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
      return new SimpleAutomatonTermsEnum(reader, field);
    }