From dd1c7a858538b2716d3844bcb8f6edbe2e702cb3 Mon Sep 17 00:00:00 2001
From: Uwe Schindler <uschindler@apache.org>
Date: Fri, 15 Oct 2010 14:25:48 +0000
Subject: [PATCH] LUCENE-2690: MultiTermQuery boolean rewrites per segment

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1022934 13f79535-47bb-0310-9956-ffa450edef68
---
 lucene/CHANGES.txt                            |   9 +-
 lucene/MIGRATE.txt                            |   8 +
 .../lucene/index/memory/MemoryIndex.java      |   2 +-
 .../lucene/search/FuzzyLikeThisQuery.java     |   8 +-
 .../lucene/search/regex/RegexQuery.java       |   3 +-
 .../lucene/search/regex/TestRegexQuery.java   |   3 +-
 .../search/spell/DirectSpellChecker.java      |   8 +-
 .../apache/lucene/search/AutomatonQuery.java  |   3 +-
 .../org/apache/lucene/search/FuzzyQuery.java  |   5 +-
 .../apache/lucene/search/FuzzyTermsEnum.java  |  36 +-
 .../apache/lucene/search/MultiTermQuery.java  | 493 +++++++++++++-----
 .../lucene/search/NumericRangeQuery.java      |   3 +-
 .../org/apache/lucene/search/PrefixQuery.java |   3 +-
 .../lucene/search/QueryWrapperFilter.java     |   7 +-
 .../apache/lucene/search/TermRangeQuery.java  |   3 +-
 .../org/apache/lucene/util/ArrayUtil.java     |  22 +-
 .../org/apache/lucene/util/ByteBlockPool.java |  34 +-
 .../org/apache/lucene/util/BytesRefHash.java  |  25 +-
 .../search/TestMultiTermQueryRewrites.java    | 186 +++++++
 .../lucene/search/TestPrefixRandom.java       |   3 +-
 .../lucene/search/TestRegexpRandom2.java      |   3 +-
 21 files changed, 676 insertions(+), 191 deletions(-)
 create mode 100644 lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index b22cc2190e5..35700af4477 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -147,8 +147,10 @@ API Changes
   you also override this method on upgrade.  (Robert Muir, Mike
   McCandless)
 
-* LUCENE-2691: IndexWriter.getReader() has been made package local and is now exposed via open and reopen methods on
-  IndexReader.  The semantics of the call is the same as it was prior to the API change.  (Grant Ingersoll, Mike McCandless)
+* LUCENE-2691: IndexWriter.getReader() has been made package local and is now
+  exposed via open and reopen methods on IndexReader.  The semantics of the
+  call is the same as it was prior to the API change.
+  (Grant Ingersoll, Mike McCandless)
 
 New features
 
@@ -265,6 +267,9 @@ New features
 * LUCENE-2692: Added several new SpanQuery classes for positional checking
   (match is in a range, payload is a specific value) (Grant Ingersoll)  
   
+* LUCENE-2690: MultiTermQuery boolean rewrites per segment.
+  (Uwe Schindler, Robert Muir, Mike McCandless)
+
 Optimizations
 
 * LUCENE-2410: ~20% speedup on exact (slop=0) PhraseQuery matching.
diff --git a/lucene/MIGRATE.txt b/lucene/MIGRATE.txt
index 3e4d166a657..592ed610139 100644
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@@ -316,3 +316,11 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
 * LUCENE-2691: The near-real-time API has moved from IndexWriter to
   IndexReader.  Instead of IndexWriter.getReader(), call
   IndexReader.open(IndexWriter) or IndexReader.reopen(IndexWriter).
+
+* LUCENE-2690: MultiTermQuery boolean rewrites per segment.
+  Also MultiTermQuery.getTermsEnum() now takes an AttributeSource. FuzzyTermsEnum
+  is both consumer and producer of attributes: MTQ.BoostAttribute is
+  added to the FuzzyTermsEnum and MTQ's rewrite mode consumes it.
+  The other way round MTQ.TopTermsBooleanQueryRewrite supplys a
+  global AttributeSource to each segments TermsEnum. The TermsEnum is consumer
+  and gets the current minimum competitive boosts (MTQ.MaxNonCompetitiveBoostAttribute).
diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 37e95004652..11dd692ff83 100644
--- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -886,7 +886,7 @@ public class MemoryIndex implements Serializable {
 
       @Override
       public int docFreq() {
-        return info.sortedTerms[termUpto].getValue().size();
+        return 1;
       }
 
       @Override
diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
index 9b0f72b3439..5b4996a51a3 100644
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
@@ -29,6 +29,7 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.PriorityQueue;
 
@@ -199,7 +200,10 @@ public class FuzzyLikeThisQuery extends Query
                   ScoreTermQueue variantsQ=new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term
                   float minScore=0;
                   Term startTerm=internSavingTemplateTerm.createTerm(term);
-                  FuzzyTermsEnum fe = new FuzzyTermsEnum(reader, startTerm, f.minSimilarity, f.prefixLength);
+                  AttributeSource atts = new AttributeSource();
+                  MultiTermQuery.MaxNonCompetitiveBoostAttribute maxBoostAtt =
+                    atts.addAttribute(MultiTermQuery.MaxNonCompetitiveBoostAttribute.class);
+                  FuzzyTermsEnum fe = new FuzzyTermsEnum(reader, atts, startTerm, f.minSimilarity, f.prefixLength);
                   //store the df so all variants use same idf
                   int df = reader.docFreq(startTerm);
                   int numVariants=0;
@@ -217,7 +221,7 @@ public class FuzzyLikeThisQuery extends Query
                           variantsQ.insertWithOverflow(st);
                           minScore = variantsQ.top().score; // maintain minScore
                         }
-                        boostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
+                        maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
                       }
                     }
 
diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java
index 61e26c73ada..950beded5f7 100644
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/regex/RegexQuery.java
@@ -21,6 +21,7 @@ import org.apache.lucene.search.MultiTermQuery;
 import org.apache.lucene.search.FilteredTermsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ToStringUtils;
 
 import java.io.IOException;
@@ -60,7 +61,7 @@ public class RegexQuery extends MultiTermQuery implements RegexQueryCapable {
   }
 
   @Override
-  protected FilteredTermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected FilteredTermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
     return new RegexTermsEnum(reader, term, regexImpl);
   }
 
diff --git a/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java b/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
index 0b03d38d369..512db84025a 100644
--- a/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
+++ b/lucene/contrib/queries/src/test/org/apache/lucene/search/regex/TestRegexQuery.java
@@ -28,6 +28,7 @@ import org.apache.lucene.index.TermsEnum;
 
 import org.apache.lucene.search.spans.SpanNearQuery;
 import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.LuceneTestCase;
 
 public class TestRegexQuery extends LuceneTestCase {
@@ -78,7 +79,7 @@ public class TestRegexQuery extends LuceneTestCase {
   }
 
   public void testMatchAll() throws Exception {
-    TermsEnum terms = new RegexQuery(new Term(FN, "jum.")).getTermsEnum(searcher.getIndexReader());
+    TermsEnum terms = new RegexQuery(new Term(FN, "jum.")).getTermsEnum(searcher.getIndexReader(), new AttributeSource() /*dummy*/);
     // no term should match
     assertNull(terms.next());
   }
diff --git a/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java b/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
index 3e4f8ee0306..147d269edb8 100644
--- a/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
+++ b/lucene/contrib/spellchecker/src/java/org/apache/lucene/search/spell/DirectSpellChecker.java
@@ -30,6 +30,7 @@ import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.FuzzyTermsEnum;
 import org.apache.lucene.search.MultiTermQuery;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;
 
@@ -387,7 +388,10 @@ public class DirectSpellChecker {
   private Collection<ScoreTerm> suggestSimilar(Term term, int numSug, 
       IndexReader ir, int docfreq, int editDistance, float accuracy) throws IOException {
     
-    FuzzyTermsEnum e = new FuzzyTermsEnum(ir, term, editDistance, Math.max(minPrefix, editDistance-1));
+    AttributeSource atts = new AttributeSource();
+    MultiTermQuery.MaxNonCompetitiveBoostAttribute maxBoostAtt =
+      atts.addAttribute(MultiTermQuery.MaxNonCompetitiveBoostAttribute.class);
+    FuzzyTermsEnum e = new FuzzyTermsEnum(ir, atts, term, editDistance, Math.max(minPrefix, editDistance-1));
     final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
     
     BytesRef queryTerm = new BytesRef(term.text());
@@ -435,7 +439,7 @@ public class DirectSpellChecker {
       stQueue.offer(st);
       // possibly drop entries from queue
       st = (stQueue.size() > numSug) ? stQueue.poll() : new ScoreTerm();
-      boostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
+      maxBoostAtt.setMaxNonCompetitiveBoost((stQueue.size() >= numSug) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
     }
       
     return stQueue;
diff --git a/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java b/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
index 0d2d5ee2b20..e67230b6a5c 100644
--- a/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/AutomatonQuery.java
@@ -24,6 +24,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.ByteRunAutomaton;
@@ -85,7 +86,7 @@ public class AutomatonQuery extends MultiTermQuery {
   }
 
   @Override
-  protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
     // matches nothing
     if (BasicOperations.isEmpty(automaton)) {
       return TermsEnum.EMPTY;
diff --git a/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java b/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
index f559728b759..b153dd9e719 100644
--- a/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/FuzzyQuery.java
@@ -20,6 +20,7 @@ package org.apache.lucene.search;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.util.automaton.LevenshteinAutomata;
 
@@ -135,11 +136,11 @@ public class FuzzyQuery extends MultiTermQuery {
   }
 
   @Override
-  protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
     if (!termLongEnough) {  // can only match if it's exact
       return new SingleTermsEnum(reader, term);
     }
-    return new FuzzyTermsEnum(reader, getTerm(), minimumSimilarity, prefixLength);
+    return new FuzzyTermsEnum(reader, atts, getTerm(), minimumSimilarity, prefixLength);
   }
   
   /**
diff --git a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
index ee1ad083a25..49861a0f2f4 100644
--- a/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
+++ b/lucene/src/java/org/apache/lucene/search/FuzzyTermsEnum.java
@@ -22,6 +22,7 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
@@ -51,7 +52,12 @@ public final class FuzzyTermsEnum extends TermsEnum {
   private final MultiTermQuery.BoostAttribute boostAtt =
     attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
   
-  private float bottom = boostAtt.getMaxNonCompetitiveBoost();
+  private final MultiTermQuery.MaxNonCompetitiveBoostAttribute maxBoostAtt;
+  
+  private float bottom;
+  private BytesRef bottomTerm;
+  // nocommit: chicken-and-egg
+  private final Comparator<BytesRef> termComparator = BytesRef.getUTF8SortedAsUnicodeComparator();
   
   private final float minSimilarity;
   private final float scale_factor;
@@ -82,7 +88,7 @@ public final class FuzzyTermsEnum extends TermsEnum {
    * @param prefixLength Length of required common prefix. Default value is 0.
    * @throws IOException
    */
-  public FuzzyTermsEnum(IndexReader reader, Term term, 
+  public FuzzyTermsEnum(IndexReader reader, AttributeSource atts, Term term, 
       final float minSimilarity, final int prefixLength) throws IOException {
     if (minSimilarity >= 1.0f && minSimilarity != (int)minSimilarity)
       throw new IllegalArgumentException("fractional edit distances are not allowed");
@@ -116,9 +122,10 @@ public final class FuzzyTermsEnum extends TermsEnum {
     }
     this.scale_factor = 1.0f / (1.0f - this.minSimilarity);
 
-    TermsEnum subEnum = getAutomatonEnum(maxEdits, null);
-    setEnum(subEnum != null ? subEnum : 
-      new LinearFuzzyTermsEnum());
+    this.maxBoostAtt = atts.addAttribute(MultiTermQuery.MaxNonCompetitiveBoostAttribute.class);
+    bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
+    bottomTerm = maxBoostAtt.getCompetitiveTerm();
+    bottomChanged(null, true);
   }
   
   /**
@@ -169,19 +176,24 @@ public final class FuzzyTermsEnum extends TermsEnum {
    * fired when the max non-competitive boost has changed. this is the hook to
    * swap in a smarter actualEnum
    */
-  private void bottomChanged(float boostValue, BytesRef lastTerm)
+  private void bottomChanged(BytesRef lastTerm, boolean init)
       throws IOException {
     int oldMaxEdits = maxEdits;
     
+    // true if the last term encountered is lexicographically equal or after the bottom term in the PQ
+    boolean termAfter = bottomTerm == null || (lastTerm != null && termComparator.compare(lastTerm, bottomTerm) >= 0);
+
     // as long as the max non-competitive boost is >= the max boost
     // for some edit distance, keep dropping the max edit distance.
-    while (maxEdits > 0 && boostValue >= calculateMaxBoost(maxEdits))
+    while (maxEdits > 0 && (termAfter ? bottom >= calculateMaxBoost(maxEdits) : bottom > calculateMaxBoost(maxEdits)))
       maxEdits--;
     
-    if (oldMaxEdits != maxEdits) { // the maximum n has changed
+    if (oldMaxEdits != maxEdits || init) { // the maximum n has changed
       TermsEnum newEnum = getAutomatonEnum(maxEdits, lastTerm);
       if (newEnum != null) {
         setEnum(newEnum);
+      } else if (init) {
+        setEnum(new LinearFuzzyTermsEnum());      
       }
     }
   }
@@ -202,16 +214,18 @@ public final class FuzzyTermsEnum extends TermsEnum {
   @Override
   public BytesRef next() throws IOException {
     if (queuedBottom != null) {
-      bottomChanged(bottom, queuedBottom);
+      bottomChanged(queuedBottom, false);
       queuedBottom = null;
     }
     
     BytesRef term = actualEnum.next();
     boostAtt.setBoost(actualBoostAtt.getBoost());
     
-    final float bottom = boostAtt.getMaxNonCompetitiveBoost();
-    if (bottom != this.bottom && term != null) {
+    final float bottom = maxBoostAtt.getMaxNonCompetitiveBoost();
+    final BytesRef bottomTerm = maxBoostAtt.getCompetitiveTerm();
+    if (term != null && (bottom != this.bottom || bottomTerm != this.bottomTerm)) {
       this.bottom = bottom;
+      this.bottomTerm = bottomTerm;
       // clone the term before potentially doing something with it
       // this is a rare but wonderful occurrence anyway
       queuedBottom = new BytesRef(term);
diff --git a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
index ac8b713a739..2af261b39af 100644
--- a/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/MultiTermQuery.java
@@ -19,19 +19,30 @@ package org.apache.lucene.search;
 
 import java.io.IOException;
 import java.io.Serializable;
+import java.util.Arrays;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 import java.util.PriorityQueue;
+import java.util.Comparator;
 
+import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.Term;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.MultiFields;
-import org.apache.lucene.index.Fields;
 import org.apache.lucene.index.Terms;
-import org.apache.lucene.queryParser.QueryParser; // for javadoc
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeImpl;
-import org.apache.lucene.util.PagedBytes;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.ByteBlockPool;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
 
 /**
  * An abstract {@link Query} that matches documents
@@ -39,7 +50,7 @@ import org.apache.lucene.util.PagedBytes;
  * FilteredTermsEnum} enumeration.
  *
  * <p>This query cannot be used directly; you must subclass
- * it and define {@link #getTermsEnum} to provide a {@link
+ * it and define {@link #getTermsEnum(IndexReader,AttributeSource)} to provide a {@link
  * FilteredTermsEnum} that iterates through the terms to be
  * matched.
  *
@@ -71,34 +82,25 @@ public abstract class MultiTermQuery extends Query {
   protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
   transient int numberOfTerms = 0;
   
-  /** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum}
+  /** Add this {@link Attribute} to a {@link TermsEnum} returned by {@link #getTermsEnum(IndexReader,AttributeSource)}
    * and update the boost on each returned term. This enables to control the boost factor
    * for each matching term in {@link #SCORING_BOOLEAN_QUERY_REWRITE} or
    * {@link TopTermsBooleanQueryRewrite} mode.
    * {@link FuzzyQuery} is using this to take the edit distance into account.
+   * <p><b>Please note:</b> This attribute is intended to be added only by the TermsEnum
+   * to itsself in its constructor and consumed by the {@link RewriteMethod}.
+   * @lucene.internal
    */
   public static interface BoostAttribute extends Attribute {
     /** Sets the boost in this attribute */
     public void setBoost(float boost);
     /** Retrieves the boost, default is {@code 1.0f}. */
     public float getBoost();
-    /** Sets the maximum boost for terms that would never get
-     * into the priority queue of {@link MultiTermQuery.TopTermsBooleanQueryRewrite}.
-     * This value is not changed by {@link AttributeImpl#clear}
-     * and not used in {@code equals()} and {@code hashCode()}.
-     * Do not change the value in the {@link TermsEnum}!
-     */
-    public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost);
-    /** Retrieves the maximum boost that is not competitive,
-     * default is megative infinity. You can use this boost value
-     * as a hint when writing the {@link TermsEnum}.
-     */
-    public float getMaxNonCompetitiveBoost();
   }
 
   /** Implementation class for {@link BoostAttribute}. */
   public static final class BoostAttributeImpl extends AttributeImpl implements BoostAttribute {
-    private float boost = 1.0f, maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+    private float boost = 1.0f;
   
     public void setBoost(float boost) {
       this.boost = boost;
@@ -107,14 +109,6 @@ public abstract class MultiTermQuery extends Query {
     public float getBoost() {
       return boost;
     }
-  
-    public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
-      this.maxNonCompetitiveBoost = maxNonCompetitiveBoost;
-    }
-    
-    public float getMaxNonCompetitiveBoost() {
-      return maxNonCompetitiveBoost;
-    }
 
     @Override
     public void clear() {
@@ -141,6 +135,83 @@ public abstract class MultiTermQuery extends Query {
     }
   }
 
+  /** Add this {@link Attribute} to a fresh {@link AttributeSource} before calling
+   * {@link #getTermsEnum(IndexReader,AttributeSource)}.
+   * {@link FuzzyQuery} is using this to control its internal behaviour
+   * to only return competitive terms.
+   * <p><b>Please note:</b> This attribute is intended to be added by the {@link RewriteMethod}
+   * to an empty {@link AttributeSource} that is shared for all segments
+   * during query rewrite. This attribute source is passed to all segment enums
+   * on {@link #getTermsEnum(IndexReader,AttributeSource)}.
+   * {@link TopTermsBooleanQueryRewrite} uses this attribute to
+   * inform all enums about the current boost, that is not competitive.
+   * @lucene.internal
+   */
+  public static interface MaxNonCompetitiveBoostAttribute extends Attribute {
+    /** This is the maximum boost that would not be competitive. */
+    public void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost);
+    /** This is the maximum boost that would not be competitive. Default is negative infinity, which means every term is competitive. */
+    public float getMaxNonCompetitiveBoost();
+    /** This is the term or <code>null<code> of the term that triggered the boost change. */
+    public void setCompetitiveTerm(BytesRef competitiveTerm);
+    /** This is the term or <code>null<code> of the term that triggered the boost change. Default is <code>null</code>, which means every term is competitoive. */
+    public BytesRef getCompetitiveTerm();
+  }
+
+  /** Implementation class for {@link MaxNonCompetitiveBoostAttribute}. */
+  public static final class MaxNonCompetitiveBoostAttributeImpl extends AttributeImpl implements MaxNonCompetitiveBoostAttribute {
+    private float maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+    private BytesRef competitiveTerm = null;
+  
+    public void setMaxNonCompetitiveBoost(final float maxNonCompetitiveBoost) {
+      this.maxNonCompetitiveBoost = maxNonCompetitiveBoost;
+    }
+    
+    public float getMaxNonCompetitiveBoost() {
+      return maxNonCompetitiveBoost;
+    }
+
+    public void setCompetitiveTerm(final BytesRef competitiveTerm) {
+      this.competitiveTerm = competitiveTerm;
+    }
+    
+    public BytesRef getCompetitiveTerm() {
+      return competitiveTerm;
+    }
+
+    @Override
+    public void clear() {
+      maxNonCompetitiveBoost = Float.NEGATIVE_INFINITY;
+      competitiveTerm = null;
+    }
+
+    @Override
+    public boolean equals(Object other) {
+      if (this == other)
+        return true;
+      if (other instanceof MaxNonCompetitiveBoostAttributeImpl) {
+        final MaxNonCompetitiveBoostAttributeImpl o = (MaxNonCompetitiveBoostAttributeImpl) other;
+        return (o.maxNonCompetitiveBoost == maxNonCompetitiveBoost)
+          && (o.competitiveTerm == null ? competitiveTerm == null : o.competitiveTerm.equals(competitiveTerm));
+      }
+      return false;
+    }
+
+    @Override
+    public int hashCode() {
+      int hash = Float.floatToIntBits(maxNonCompetitiveBoost);
+      if (competitiveTerm != null) hash = 31 * hash + competitiveTerm.hashCode();
+      return hash;
+    }
+    
+    @Override
+    public void copyTo(AttributeImpl target) {
+      final MaxNonCompetitiveBoostAttributeImpl t = (MaxNonCompetitiveBoostAttributeImpl) target;
+      t.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
+      t.setCompetitiveTerm(competitiveTerm);
+    }
+  }
+
   /** Abstract class that defines how the query is rewritten. */
   public static abstract class RewriteMethod implements Serializable {
     public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException;
@@ -177,69 +248,85 @@ public abstract class MultiTermQuery extends Query {
   private abstract static class BooleanQueryRewrite extends RewriteMethod {
   
     protected final int collectTerms(IndexReader reader, MultiTermQuery query, TermCollector collector) throws IOException {
-      final Fields fields = MultiFields.getFields(reader);
-      if (fields == null) {
-        // reader has no fields
-        return 0;
-      }
-
-      final Terms terms = fields.terms(query.field);
-      if (terms == null) {
-        // field does not exist
-        return 0;
-      }
-
-      final TermsEnum termsEnum = query.getTermsEnum(reader);
-      assert termsEnum != null;
-
-      if (termsEnum == TermsEnum.EMPTY)
-        return 0;
-      final BoostAttribute boostAtt =
-        termsEnum.attributes().addAttribute(BoostAttribute.class);
-      collector.boostAtt = boostAtt;
+      final List<IndexReader> subReaders = new ArrayList<IndexReader>();
+      ReaderUtil.gatherSubReaders(subReaders, reader);
       int count = 0;
-      BytesRef bytes;
-      while ((bytes = termsEnum.next()) != null) {
-        if (collector.collect(termsEnum, bytes, boostAtt.getBoost())) {
-          termsEnum.cacheCurrentTerm();
-          count++;
-        } else {
-          break;
+      Comparator<BytesRef> lastTermComp = null;
+      
+      for (IndexReader r : subReaders) {
+        final Fields fields = r.fields();
+        if (fields == null) {
+          // reader has no fields
+          continue;
+        }
+
+        final Terms terms = fields.terms(query.field);
+        if (terms == null) {
+          // field does not exist
+          continue;
+        }
+
+        final TermsEnum termsEnum = query.getTermsEnum(r, collector.attributes);
+        assert termsEnum != null;
+
+        if (termsEnum == TermsEnum.EMPTY)
+          continue;
+        
+        // Check comparator compatibility:
+        final Comparator<BytesRef> newTermComp = termsEnum.getComparator();
+        if (lastTermComp != null && newTermComp != lastTermComp)
+          throw new RuntimeException("term comparator should not change between segments: "+lastTermComp+" != "+newTermComp);
+        lastTermComp = newTermComp;
+        
+        collector.setNextEnum(termsEnum);
+        BytesRef bytes;
+        while ((bytes = termsEnum.next()) != null) {
+          if (collector.collect(bytes)) {
+            termsEnum.cacheCurrentTerm();
+            count++;
+          } else {
+            return count; // interrupt whole term collection, so also don't iterate other subReaders
+          }
         }
       }
-      collector.boostAtt = null;
       return count;
     }
     
     protected static abstract class TermCollector {
-      private BoostAttribute boostAtt = null;
+      /** attributes used for communication with the enum */
+      public final AttributeSource attributes = new AttributeSource();
     
       /** return false to stop collecting */
-      public abstract boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException;
+      public abstract boolean collect(BytesRef bytes) throws IOException;
       
-      /** set the minimum boost as a hint for the term producer */
-      protected final void setMaxNonCompetitiveBoost(float maxNonCompetitiveBoost) {
-        assert boostAtt != null;
-        boostAtt.setMaxNonCompetitiveBoost(maxNonCompetitiveBoost);
-      }
+      /** the next segment's {@link TermsEnum} that is used to collect terms */
+      public abstract void setNextEnum(TermsEnum termsEnum) throws IOException;
     }
   }
   
   private static class ScoringBooleanQueryRewrite extends BooleanQueryRewrite {
     @Override
     public Query rewrite(final IndexReader reader, final MultiTermQuery query) throws IOException {
-      final BooleanQuery result = new BooleanQuery(true);
+      final ParallelArraysTermCollector col = new ParallelArraysTermCollector();
+      collectTerms(reader, query, col);
+      
       final Term placeholderTerm = new Term(query.field);
-      query.incTotalNumberOfTerms(collectTerms(reader, query, new TermCollector() {
-        @Override
-        public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) {
-          // add new TQ, we must clone the term, else it may get overwritten!
-          TermQuery tq = new TermQuery(placeholderTerm.createTerm(new BytesRef(bytes)), termsEnum.docFreq());
-          tq.setBoost(query.getBoost() * boost); // set the boost
-          result.add(tq, BooleanClause.Occur.SHOULD); // add to query
-          return true;
+      final BooleanQuery result = new BooleanQuery(true);
+      final int size = col.terms.size();
+      if (size > 0) {
+        final int sort[] = col.terms.sort(col.termsEnum.getComparator());
+        final int[] docFreq = col.array.docFreq;
+        final float[] boost = col.array.boost;
+        for (int i = 0; i < size; i++) {
+          final int pos = sort[i];
+          final Term term = placeholderTerm.createTerm(col.terms.get(pos, new BytesRef()));
+          assert reader.docFreq(term) == docFreq[pos];
+          final TermQuery tq = new TermQuery(term, docFreq[pos]);
+          tq.setBoost(query.getBoost() * boost[pos]);
+          result.add(tq, BooleanClause.Occur.SHOULD);
         }
-      }));
+      }
+      query.incTotalNumberOfTerms(size);
       return result;
     }
 
@@ -247,6 +334,75 @@ public abstract class MultiTermQuery extends Query {
     protected Object readResolve() {
       return SCORING_BOOLEAN_QUERY_REWRITE;
     }
+    
+    static final class ParallelArraysTermCollector extends TermCollector {
+      final TermFreqBoostByteStart array = new TermFreqBoostByteStart(16);
+      final BytesRefHash terms = new BytesRefHash(new ByteBlockPool(new ByteBlockPool.DirectAllocator()), 16, array);
+      TermsEnum termsEnum;
+
+      private BoostAttribute boostAtt;
+    
+      @Override
+      public void setNextEnum(TermsEnum termsEnum) throws IOException {
+        this.termsEnum = termsEnum;
+        this.boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
+      }
+    
+      @Override
+      public boolean collect(BytesRef bytes) {
+        final int e = terms.add(bytes);
+        if (e < 0 ) {
+          // duplicate term: update docFreq
+          final int pos = (-e)-1;
+          array.docFreq[pos] += termsEnum.docFreq();
+          assert array.boost[pos] == boostAtt.getBoost() : "boost should be equal in all segment TermsEnums";
+        } else {
+          // new entry: we populate the entry initially
+          array.docFreq[e] = termsEnum.docFreq();
+          array.boost[e] = boostAtt.getBoost();
+        }
+        // if the new entry reaches the max clause count, we exit early
+        if (e >= BooleanQuery.getMaxClauseCount())
+          throw new BooleanQuery.TooManyClauses();
+        return true;
+      }
+    }
+    
+    /** Special implementation of BytesStartArray that keeps parallel arrays for boost and docFreq */
+    static final class TermFreqBoostByteStart extends DirectBytesStartArray  {
+      int[] docFreq;
+      float[] boost;
+      
+      public TermFreqBoostByteStart(int initSize) {
+        super(initSize);
+      }
+
+      @Override
+      public int[] init() {
+        final int[] ord = super.init();
+        boost = new float[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_FLOAT)];
+        docFreq = new int[ArrayUtil.oversize(ord.length, RamUsageEstimator.NUM_BYTES_INT)];
+        assert boost.length >= ord.length && docFreq.length >= ord.length;
+        return ord;
+      }
+
+      @Override
+      public int[] grow() {
+        final int[] ord = super.grow();
+        docFreq = ArrayUtil.grow(docFreq, ord.length);
+        boost = ArrayUtil.grow(boost, ord.length);
+        assert boost.length >= ord.length && docFreq.length >= ord.length;
+        return ord;
+      }
+
+      @Override
+      public int[] clear() {
+       boost = null;
+       docFreq = null;
+       return super.clear();
+      }
+      
+    }
   }
 
   /** A rewrite method that first translates each term into
@@ -291,44 +447,92 @@ public abstract class MultiTermQuery extends Query {
       final int maxSize = Math.min(size, BooleanQuery.getMaxClauseCount());
       final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>();
       collectTerms(reader, query, new TermCollector() {
+        private final MaxNonCompetitiveBoostAttribute maxBoostAtt =
+          attributes.addAttribute(MaxNonCompetitiveBoostAttribute.class);
+        
+        private final Map<BytesRef,ScoreTerm> visitedTerms = new HashMap<BytesRef,ScoreTerm>();
+        
+        private TermsEnum termsEnum;
+        private Comparator<BytesRef> termComp;
+        private BoostAttribute boostAtt;        
+        private ScoreTerm st;
+        
         @Override
-        public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) {
+        public void setNextEnum(TermsEnum termsEnum) throws IOException {
+          this.termsEnum = termsEnum;
+          this.termComp = termsEnum.getComparator();
+          // lazy init the initial ScoreTerm because comparator is not known on ctor:
+          if (st == null)
+            st = new ScoreTerm(this.termComp);
+          boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class);
+        }
+      
+        @Override
+        public boolean collect(BytesRef bytes) {
+          final float boost = boostAtt.getBoost();
           // ignore uncompetetive hits
-          if (stQueue.size() >= maxSize && boost <= stQueue.peek().boost)
-            return true;
-          // add new entry in PQ, we must clone the term, else it may get overwritten!
-          st.bytes.copy(bytes);
-          st.boost = boost;
-          st.docFreq = termsEnum.docFreq();
-          stQueue.offer(st);
-          // possibly drop entries from queue
-          st = (stQueue.size() > maxSize) ? stQueue.poll() : new ScoreTerm();
-          setMaxNonCompetitiveBoost((stQueue.size() >= maxSize) ? stQueue.peek().boost : Float.NEGATIVE_INFINITY);
+          if (stQueue.size() == maxSize) {
+            final ScoreTerm t = stQueue.peek();
+            if (boost < t.boost)
+              return true;
+            if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0)
+              return true;
+          }
+          ScoreTerm t = visitedTerms.get(bytes);
+          if (t != null) {
+            // if the term is already in the PQ, only update docFreq of term in PQ
+            t.docFreq += termsEnum.docFreq();
+            assert t.boost == boost : "boost should be equal in all segment TermsEnums";
+          } else {
+            // add new entry in PQ, we must clone the term, else it may get overwritten!
+            st.bytes.copy(bytes);
+            st.boost = boost;
+            st.docFreq = termsEnum.docFreq();
+            visitedTerms.put(st.bytes, st);
+            stQueue.offer(st);
+            // possibly drop entries from queue
+            if (stQueue.size() > maxSize) {
+              st = stQueue.poll();
+              visitedTerms.remove(st.bytes);
+            } else {
+              st = new ScoreTerm(termComp);
+            }
+            assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize";
+            // set maxBoostAtt with values to help FuzzyTermsEnum to optimize
+            if (stQueue.size() == maxSize) {
+              t = stQueue.peek();
+              maxBoostAtt.setMaxNonCompetitiveBoost(t.boost);
+              maxBoostAtt.setCompetitiveTerm(t.bytes);
+            }
+          }
           return true;
         }
-        
-        // reusable instance
-        private ScoreTerm st = new ScoreTerm();
       });
       
       final Term placeholderTerm = new Term(query.field);
       final BooleanQuery bq = new BooleanQuery(true);
-      for (final ScoreTerm st : stQueue) {
-        // add new query, we must clone the term, else it may get overwritten!
-        Query tq = getQuery(placeholderTerm.createTerm(st.bytes), st.docFreq);
+      final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
+      Arrays.sort(scoreTerms, new Comparator<ScoreTerm>() {
+        public int compare(ScoreTerm st1, ScoreTerm st2) {
+          assert st1.termComp == st2.termComp :
+            "term comparator should not change between segments";
+          return st1.termComp.compare(st1.bytes, st2.bytes);
+        }
+      });
+      for (final ScoreTerm st : scoreTerms) {
+        final Term term = placeholderTerm.createTerm(st.bytes);
+        assert reader.docFreq(term) == st.docFreq;
+        Query tq = getQuery(term, st.docFreq);
         tq.setBoost(query.getBoost() * st.boost); // set the boost
         bq.add(tq, BooleanClause.Occur.SHOULD);   // add to query
       }
-      query.incTotalNumberOfTerms(bq.clauses().size());
+      query.incTotalNumberOfTerms(scoreTerms.length);
       return bq;
     }
   
     @Override
     public int hashCode() {
-      final int prime = 17;
-      int result = 1;
-      result = prime * result + size;
-      return result;
+      return 31 * size;
     }
 
     @Override
@@ -341,15 +545,20 @@ public abstract class MultiTermQuery extends Query {
       return true;
     }
   
-    private static class ScoreTerm implements Comparable<ScoreTerm> {
+    static final class ScoreTerm implements Comparable<ScoreTerm> {
+      public final Comparator<BytesRef> termComp;
+
       public final BytesRef bytes = new BytesRef();
       public float boost;
       public int docFreq;
       
+      public ScoreTerm(Comparator<BytesRef> termComp) {
+        this.termComp = termComp;
+      }
+      
       public int compareTo(ScoreTerm other) {
         if (this.boost == other.boost)
-          // TODO: is it OK to use default compare here?
-          return other.bytes.compareTo(this.bytes);
+          return termComp.compare(other.bytes, this.bytes);
         else
           return Float.compare(this.boost, other.boost);
       }
@@ -362,8 +571,8 @@ public abstract class MultiTermQuery extends Query {
    * scores as computed by the query.
    * 
    * <p>
-   * This rewrite mode only uses the top scoring terms so it will not overflow
-   * the boolean max clause count. It is the default rewrite mode for
+   * This rewrite method only uses the top scoring terms so it will not overflow
+   * the boolean max clause count. It is the default rewrite method for
    * {@link FuzzyQuery}.
    * 
    * @see #setRewriteMethod
@@ -510,63 +719,61 @@ public abstract class MultiTermQuery extends Query {
       final int docCountCutoff = (int) ((docCountPercent / 100.) * reader.maxDoc());
       final int termCountLimit = Math.min(BooleanQuery.getMaxClauseCount(), termCountCutoff);
 
-      final CutOffTermCollector col = new CutOffTermCollector(reader, query.field, docCountCutoff, termCountLimit);
+      final CutOffTermCollector col = new CutOffTermCollector(docCountCutoff, termCountLimit);
       collectTerms(reader, query, col);
-      
+      final int size = col.pendingTerms.size();
       if (col.hasCutOff) {
         return CONSTANT_SCORE_FILTER_REWRITE.rewrite(reader, query);
-      } else if (col.termCount == 0) {
+      } else if (size == 0) {
         return new BooleanQuery(true);
       } else {
-        final PagedBytes.Reader bytesReader = col.pendingTerms.freeze(false);
-        try {
-          final BooleanQuery bq = new BooleanQuery(true);
-          final Term placeholderTerm = new Term(query.field);
-          long start = col.startOffset;
-          for(int i = 0; i < col.termCount; i++) {
-            final BytesRef bytes = new BytesRef();
-            start = bytesReader.fillUsingLengthPrefix3(bytes, start);
-            bq.add(new TermQuery(placeholderTerm.createTerm(bytes)), BooleanClause.Occur.SHOULD);
-          }
-          // Strip scores
-          final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
-          result.setBoost(query.getBoost());
-          query.incTotalNumberOfTerms(col.termCount);
-          return result;
-        } finally {
-          bytesReader.close();
+        final BooleanQuery bq = new BooleanQuery(true);
+        final Term placeholderTerm = new Term(query.field);
+        final BytesRefHash pendingTerms = col.pendingTerms;
+        final int sort[] = pendingTerms.sort(col.termsEnum.getComparator());
+        for(int i = 0; i < size; i++) {
+          // docFreq is not used for constant score here, we pass 1
+          // to explicitely set a fake value, so it's not calculated
+          bq.add(new TermQuery(
+            placeholderTerm.createTerm(pendingTerms.get(sort[i], new BytesRef())), 1
+          ), BooleanClause.Occur.SHOULD);
         }
+        // Strip scores
+        final Query result = new ConstantScoreQuery(new QueryWrapperFilter(bq));
+        result.setBoost(query.getBoost());
+        query.incTotalNumberOfTerms(size);
+        return result;
       }
     }
     
-    private static final class CutOffTermCollector extends TermCollector {
-      CutOffTermCollector(IndexReader reader, String field, int docCountCutoff, int termCountLimit) {
-        this.reader = reader;
-        this.field = field;
+    static final class CutOffTermCollector extends TermCollector {
+      CutOffTermCollector(int docCountCutoff, int termCountLimit) {
         this.docCountCutoff = docCountCutoff;
         this.termCountLimit = termCountLimit;
       }
     
-      public boolean collect(TermsEnum termsEnum, BytesRef bytes, float boost) throws IOException {
-        termCount++;
-        if (termCount >= termCountLimit || docVisitCount >= docCountCutoff) {
+      @Override
+      public void setNextEnum(TermsEnum termsEnum) throws IOException {
+        this.termsEnum = termsEnum;
+      }
+        
+      @Override
+      public boolean collect(BytesRef bytes) throws IOException {
+        if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) {
           hasCutOff = true;
           return false;
         }
-        pendingTerms.copyUsingLengthPrefix(bytes);
+        pendingTerms.add(bytes);
         docVisitCount += termsEnum.docFreq();
         return true;
       }
       
       int docVisitCount = 0;
       boolean hasCutOff = false;
-      int termCount = 0;
-      
-      final IndexReader reader;
-      final String field;
+      TermsEnum termsEnum;
+
       final int docCountCutoff, termCountLimit;
-      final PagedBytes pendingTerms = new PagedBytes(15); // max term size is 32 KiB
-      final long startOffset = pendingTerms.getPointer();
+      final BytesRefHash pendingTerms = new BytesRefHash();
     }
 
     @Override
@@ -644,8 +851,20 @@ public abstract class MultiTermQuery extends Query {
    *  field does exist).  This method should not return null
    *  (should instead return {@link TermsEnum#EMPTY} if no
    *  terms match).  The TermsEnum must already be
-   *  positioned to the first matching term. */
-  protected abstract TermsEnum getTermsEnum(IndexReader reader) throws IOException;
+   *  positioned to the first matching term.
+   * The given {@link AttributeSource} is passed by the {@link RewriteMethod} to
+   * provide attributes, the rewrite method uses to inform about e.g. maximum competitive boosts.
+   * This is currently only used by {@link TopTermsBooleanQueryRewrite}
+   */
+  protected abstract TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException;
+
+  /** Convenience method, if no attributes are needed:
+   * This simply passes empty attributes and is equal to:
+   * <code>getTermsEnum(reader, new AttributeSource())</code>
+   */
+  protected final TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+    return getTermsEnum(reader, new AttributeSource());
+  }
 
   /**
    * Expert: Return the number of unique terms visited during execution of the query.
diff --git a/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java b/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
index 89eff58fb6b..ad5d631a607 100644
--- a/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/NumericRangeQuery.java
@@ -26,6 +26,7 @@ import org.apache.lucene.document.NumericField; // for javadocs
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.ToStringUtils;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.index.TermsEnum;
 
@@ -301,7 +302,7 @@ public final class NumericRangeQuery<T extends Number> extends MultiTermQuery {
   }
 
   @Override @SuppressWarnings("unchecked")
-  protected TermsEnum getTermsEnum(final IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(final IndexReader reader, AttributeSource atts) throws IOException {
     // very strange: java.lang.Number itsself is not Comparable, but all subclasses used here are
     return (min != null && max != null && ((Comparable<T>) min).compareTo(max) > 0) ?
       TermsEnum.EMPTY :
diff --git a/lucene/src/java/org/apache/lucene/search/PrefixQuery.java b/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
index e3b08bd2050..03caee8ab10 100644
--- a/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/PrefixQuery.java
@@ -24,6 +24,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.ToStringUtils;
 
 /** A Query that matches documents containing terms with a specified prefix. A PrefixQuery
@@ -45,7 +46,7 @@ public class PrefixQuery extends MultiTermQuery {
   public Term getPrefix() { return prefix; }
   
   @Override  
-  protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
     if (prefix.bytes().length == 0) {
       // no prefix -- match all terms for this field:
       // NOTE: for now, MultiTermQuery enums terms at the
diff --git a/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java b/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java
index 7001dd4a732..3aa6d4d2245 100644
--- a/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java
+++ b/lucene/src/java/org/apache/lucene/search/QueryWrapperFilter.java
@@ -33,7 +33,7 @@ import org.apache.lucene.index.IndexReader;
  * once per day.
  */
 public class QueryWrapperFilter extends Filter {
-  private Query query;
+  private final Query query;
 
   /** Constructs a filter which only matches documents matching
    * <code>query</code>.
@@ -41,6 +41,11 @@ public class QueryWrapperFilter extends Filter {
   public QueryWrapperFilter(Query query) {
     this.query = query;
   }
+  
+  /** returns the inner Query */
+  public final Query getQuery() {
+    return query;
+  }
 
   @Override
   public DocIdSet getDocIdSet(final IndexReader reader) throws IOException {
diff --git a/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java b/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
index 394145a1a48..9e4a01b367f 100644
--- a/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/TermRangeQuery.java
@@ -25,6 +25,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.MultiFields;
 import org.apache.lucene.util.ToStringUtils;
+import org.apache.lucene.util.AttributeSource;
 
 /**
  * A Query that matches documents within an range of terms.
@@ -130,7 +131,7 @@ public class TermRangeQuery extends MultiTermQuery {
   public Collator getCollator() { return collator; }
   
   @Override
-  protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+  protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
     if (collator == null && lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
       return TermsEnum.EMPTY;
     }
diff --git a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
index 151ae1a95db..8797ca1df00 100644
--- a/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
+++ b/lucene/src/java/org/apache/lucene/util/ArrayUtil.java
@@ -27,14 +27,7 @@ import java.util.Collection;
 
 public final class ArrayUtil {
 
-  /**
-   * @deprecated This constructor was not intended to be public and should not be used.
-   *  This class contains solely a static utility methods.
-   *  It will be made private in Lucene 4.0
-   */
-  // make private in 4.0!
-  @Deprecated
-  public ArrayUtil() {} // no instance
+  private ArrayUtil() {} // no instance
 
   /*
      Begin Apache Harmony code
@@ -247,6 +240,19 @@ public final class ArrayUtil {
   public static short[] grow(short[] array) {
     return grow(array, 1 + array.length);
   }
+  
+  public static float[] grow(float[] array, int minSize) {
+    if (array.length < minSize) {
+      float[] newArray = new float[oversize(minSize, RamUsageEstimator.NUM_BYTES_FLOAT)];
+      System.arraycopy(array, 0, newArray, 0, array.length);
+      return newArray;
+    } else
+      return array;
+  }
+
+  public static float[] grow(float[] array) {
+    return grow(array, 1 + array.length);
+  }
 
   public static short[] shrink(short[] array, int targetSize) {
     final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT);
diff --git a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java
index a34e9fe7a3c..552340eca6f 100644
--- a/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java
+++ b/lucene/src/java/org/apache/lucene/util/ByteBlockPool.java
@@ -16,8 +16,12 @@ package org.apache.lucene.util;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+import java.util.Arrays;
+import java.util.List;
+import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
 
-/* Class that Posting and PostingVector use to write byte
+/** 
+ * Class that Posting and PostingVector use to write byte
  * streams into shared fixed-size byte[] arrays.  The idea
  * is to allocate slices of increasing lengths For
  * example, the first slice is 5 bytes, the next slice is
@@ -31,14 +35,10 @@ package org.apache.lucene.util;
  * the end with a non-zero byte.  This way the methods
  * that are writing into the slice don't need to record
  * its length and instead allocate a new slice once they
- * hit a non-zero byte. */
-
-import java.util.Arrays;
-
-
-import java.util.List;
-import static org.apache.lucene.util.RamUsageEstimator.NUM_BYTES_OBJECT_REF;
-
+ * hit a non-zero byte. 
+ * 
+ * @lucene.internal
+ **/
 public final class ByteBlockPool {
   public final static int BYTE_BLOCK_SHIFT = 15;
   public final static int BYTE_BLOCK_SIZE = 1 << BYTE_BLOCK_SHIFT;
@@ -62,6 +62,22 @@ public final class ByteBlockPool {
       return new byte[blockSize];
     }
   }
+  
+  public static final class DirectAllocator extends Allocator {
+    
+    public DirectAllocator() {
+      this(BYTE_BLOCK_SIZE);
+    }
+
+    public DirectAllocator(int blockSize) {
+      super(blockSize);
+    }
+
+    @Override
+    public void recycleByteBlocks(byte[][] blocks, int start, int end) {
+    }
+    
+  }
 
   public byte[][] buffers = new byte[10][];
 
diff --git a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
index 0976c8c61a0..a5940edc764 100644
--- a/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
+++ b/lucene/src/java/org/apache/lucene/util/BytesRefHash.java
@@ -17,13 +17,15 @@ package org.apache.lucene.util;
  * limitations under the License.
  */
 
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SHIFT;
+import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
+
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.concurrent.atomic.AtomicLong;
 
-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_MASK;
-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SIZE;
-import static org.apache.lucene.util.ByteBlockPool.BYTE_BLOCK_SHIFT;
+import org.apache.lucene.util.ByteBlockPool.DirectAllocator;
 
 /**
  * {@link BytesRefHash} is a special purpose hash-map like data-structure
@@ -54,6 +56,14 @@ public final class BytesRefHash {
   public static final int DEFAULT_CAPACITY = 16;
   private final BytesStartArray bytesStartArray;
   private AtomicLong bytesUsed;
+  
+  /**
+   * Creates a new {@link BytesRefHash} with a {@link ByteBlockPool} using a
+   * {@link DirectAllocator}.
+   */
+  public BytesRefHash() { 
+    this(new ByteBlockPool(new DirectAllocator()));
+  }
 
   /**
    * Creates a new {@link BytesRefHash}
@@ -75,7 +85,7 @@ public final class BytesRefHash {
     Arrays.fill(ords, -1);
     this.bytesStartArray = bytesStartArray;
     bytesStart = bytesStartArray.init();
-    bytesUsed = bytesStartArray.bytesUsed();
+    bytesUsed = bytesStartArray.bytesUsed() == null? new AtomicLong(0) : bytesStartArray.bytesUsed();;
     bytesUsed.addAndGet(hashSize * RamUsageEstimator.NUM_BYTES_INT);
   }
 
@@ -143,7 +153,6 @@ public final class BytesRefHash {
    *          the {@link Comparator} used for sorting
    */
   public int[] sort(Comparator<BytesRef> comp) {
-    assert bytesStart != null : "Bytesstart is null - not initialized";
     final int[] compact = compact();
     quickSort(comp, compact, 0, count - 1);
     return compact;
@@ -536,13 +545,13 @@ public final class BytesRefHash {
     public abstract AtomicLong bytesUsed();
   }
 
-  static class DirectBytesStartArray extends BytesStartArray {
+  public static class DirectBytesStartArray extends BytesStartArray {
 
-    private final int initSize;
+    protected final int initSize;
     private int[] bytesStart;
     private final AtomicLong bytesUsed = new AtomicLong(0);
 
-    DirectBytesStartArray(int initSize) {
+    public DirectBytesStartArray(int initSize) {
       this.initSize = initSize;
     }
 
diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
new file mode 100644
index 00000000000..8b9f5c94d06
--- /dev/null
+++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java
@@ -0,0 +1,186 @@
+package org.apache.lucene.search;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.MultiReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+
+import java.io.IOException;
+
+public class TestMultiTermQueryRewrites extends LuceneTestCase {
+
+  static Directory dir, sdir1, sdir2;
+  static IndexReader reader, multiReader;
+  static IndexSearcher searcher, multiSearcher;
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    dir = newDirectory();
+    sdir1 = newDirectory();
+    sdir2 = newDirectory();
+    final RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer());
+    final RandomIndexWriter swriter1 = new RandomIndexWriter(random, sdir1, new MockAnalyzer());
+    final RandomIndexWriter swriter2 = new RandomIndexWriter(random, sdir2, new MockAnalyzer());
+
+    for (int i = 0; i < 10; i++) {
+      Document doc = new Document();
+      doc.add(newField("data", Integer.toString(i), Field.Store.NO, Field.Index.NOT_ANALYZED));
+      writer.addDocument(doc);
+      ((i % 2 == 0) ? swriter1 : swriter2).addDocument(doc);
+    }
+    writer.optimize(); swriter1.optimize(); swriter2.optimize();
+    writer.close(); swriter1.close(); swriter2.close();
+    
+    reader = IndexReader.open(dir, true);
+    searcher = new IndexSearcher(reader);
+    multiReader = new MultiReader(new IndexReader[] {
+      IndexReader.open(sdir1, true), IndexReader.open(sdir2, true) 
+    }, true);
+    multiSearcher = new IndexSearcher(multiReader);
+  }
+
+  @AfterClass
+  public static void afterClass() throws Exception {
+    reader.close();
+    multiReader.close();
+    dir.close(); sdir1.close(); sdir2.close();
+    reader = multiReader = null;
+    searcher = multiSearcher = null;
+    dir = sdir1 = sdir2 = null;
+  }
+  
+  private Query extractInnerQuery(Query q) {
+    if (q instanceof ConstantScoreQuery) {
+      // wrapped as ConstantScoreQuery using QueryWrapperFilter
+      q = ((QueryWrapperFilter) ((ConstantScoreQuery) q).getFilter()).getQuery();
+    }
+    return q;
+  }
+  
+  private Term extractTerm(Query q) {
+    q = extractInnerQuery(q);
+    return ((TermQuery) q).getTerm();
+  }
+  
+  private void checkBooleanQueryOrder(Query q) {
+    q = extractInnerQuery(q);
+    final BooleanQuery bq = (BooleanQuery) q;
+    Term last = null, act;
+    for (BooleanClause clause : bq.clauses()) {
+      act = extractTerm(clause.getQuery());
+      if (last != null) {
+        assertTrue("sort order of terms in BQ violated", last.compareTo(act) < 0);
+      }
+      last = act;
+    }
+  }
+  
+  private void checkDuplicateTerms(MultiTermQuery.RewriteMethod method) throws Exception {
+    final MultiTermQuery mtq = new TermRangeQuery("data", "2", "7", true, true);
+    mtq.setRewriteMethod(method);
+    final Query q1 = searcher.rewrite(mtq);
+    final Query q2 = multiSearcher.rewrite(mtq);
+    if (VERBOSE) {
+      System.out.println();
+      System.out.println("single segment: " + q1);
+      System.out.println(" multi segment: " + q2);
+    }
+    assertEquals("The multi-segment case must produce same rewritten query", q1, q2);
+    checkBooleanQueryOrder(q1);
+    checkBooleanQueryOrder(q2);
+  }
+  
+  public void testRewritesWithDuplicateTerms() throws Exception {
+    checkDuplicateTerms(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+    
+    checkDuplicateTerms(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE);
+    
+    // use a large PQ here to only test duplicate terms and dont mix up when all scores are equal
+    checkDuplicateTerms(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(1024));
+    checkDuplicateTerms(new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(1024));
+    
+    // Test auto rewrite (but only boolean mode), so we set the limits to large values to always get a BQ
+    final MultiTermQuery.ConstantScoreAutoRewrite rewrite = new MultiTermQuery.ConstantScoreAutoRewrite();
+    rewrite.setTermCountCutoff(Integer.MAX_VALUE);
+    rewrite.setDocCountPercent(100.);
+    checkDuplicateTerms(rewrite);
+  }
+  
+  private void checkBooleanQueryBoosts(BooleanQuery bq) {
+    for (BooleanClause clause : bq.clauses()) {
+      final TermQuery mtq = (TermQuery) clause.getQuery();
+      assertEquals("Parallel sorting of boosts in rewrite mode broken",
+        Float.parseFloat(mtq.getTerm().text()), mtq.getBoost());
+    }
+  }
+  
+  private void checkBoosts(MultiTermQuery.RewriteMethod method) throws Exception {
+    final MultiTermQuery mtq = new MultiTermQuery("data") {
+      @Override
+      protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
+        return new TermRangeTermsEnum(reader, field, "2", "7", true, true, null) {
+          final MultiTermQuery.BoostAttribute boostAtt =
+            attributes().addAttribute(MultiTermQuery.BoostAttribute.class);
+        
+          @Override
+          protected AcceptStatus accept(BytesRef term) {
+            boostAtt.setBoost(Float.parseFloat(term.utf8ToString()));
+            return super.accept(term);
+          }
+        };
+      }
+      
+      @Override
+      public String toString(String field) {
+        return "dummy";
+      }
+    };
+    mtq.setRewriteMethod(method);
+    final Query q1 = searcher.rewrite(mtq);
+    final Query q2 = multiSearcher.rewrite(mtq);
+    if (VERBOSE) {
+      System.out.println();
+      System.out.println("single segment: " + q1);
+      System.out.println(" multi segment: " + q2);
+    }
+    assertEquals("The multi-segment case must produce same rewritten query", q1, q2);
+    checkBooleanQueryBoosts((BooleanQuery) q1);
+    checkBooleanQueryBoosts((BooleanQuery) q2);
+  }
+  
+  public void testBoosts() throws Exception {
+    checkBoosts(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
+
+    // use a large PQ here to only test boosts and dont mix up when all scores are equal
+    checkBoosts(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(1024));
+  }
+  
+}
diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java
index edf912e1c24..1c402ead66c 100644
--- a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java
+++ b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java
@@ -29,6 +29,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.RandomIndexWriter;
 import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util._TestUtil;
@@ -85,7 +86,7 @@ public class TestPrefixRandom extends LuceneTestCase {
     }
     
     @Override
-    protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+    protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
       return new SimplePrefixTermsEnum(reader, field, prefix);
     }
 
diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
index 836ab46b84d..bce9eedb061 100644
--- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
+++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java
@@ -36,6 +36,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util._TestUtil;
+import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.AutomatonTestUtil;
 import org.apache.lucene.util.automaton.CharacterRunAutomaton;
@@ -103,7 +104,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
     }
     
     @Override
-    protected TermsEnum getTermsEnum(IndexReader reader) throws IOException {
+    protected TermsEnum getTermsEnum(IndexReader reader, AttributeSource atts) throws IOException {
       return new SimpleAutomatonTermsEnum(reader, field);
     }