merged with trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/docvalues@1102677 13f79535-47bb-0310-9956-ffa450edef68
2011-05-13 11:18:19 +00:00 · 2011-05-13 11:18:19 +00:00 · 54a2d7aab4
parent 998a94fa78 3a9eae5a28
commit 54a2d7aab4
220 changed files with 3953 additions and 1674 deletions
--- a/README.txt
+++ b/README.txt
@ -7,6 +7,7 @@ modules/ is shared code
 To compile the sources run 'ant compile'
 To run all the tests run 'ant test'
 To setup your ide run 'ant idea' or 'ant eclipse'
+For Maven info, see dev-tools/maven/README.maven.

 For more information on how to contribute see:
 http://wiki.apache.org/lucene-java/HowToContribute
--- a/dev-tools/eclipse/dot.classpath
+++ b/dev-tools/eclipse/dot.classpath
@ -95,7 +95,7 @@
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-digester-1.7.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/commons-logging-1.0.4.jar"/>
 	<classpathentry kind="lib" path="modules/benchmark/lib/xercesImpl-2.9.1-patched-XERCESJ-1257.jar"/>
-	<classpathentry kind="lib" path="solr/lib/apache-solr-noggit-r944541.jar"/>
+	<classpathentry kind="lib" path="solr/lib/apache-solr-noggit-r1099557.jar"/>
 	<classpathentry kind="lib" path="solr/lib/commons-beanutils-1.7.0.jar"/>
 	<classpathentry kind="lib" path="solr/lib/commons-codec-1.4.jar"/>
 	<classpathentry kind="lib" path="solr/lib/commons-collections-3.2.1.jar"/>
--- a/dev-tools/maven/README.maven
+++ b/dev-tools/maven/README.maven
@ -0,0 +1,131 @@
+====================================
+Lucene/Solr Maven build instructions
+====================================
+
+Contents:
+
+A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts
+B. How to generate Lucene Maven artifacts
+C. How to generate Solr Maven artifacts
+D. How to use Maven to build Lucene/Solr
+
+-----
+
+A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts
+
+   The most recently produced nightly Jenkins-built Lucene and Solr Maven
+   artifacts are available in Maven repository layout here:
+   
+      <https://builds.apache.org/hudson/job/Lucene-Solr-Maven-trunk/lastSuccessfulBuild/artifact/maven_artifacts/>
+
+
+B. How to generate Lucene Maven artifacts
+
+   1. Prerequisites: JDK 1.5+, Ant 1.7.X, and maven-ant-tasks-2.1.1.jar
+
+      In order to generate Maven artifacts for Lucene/Solr, you must first
+      download the Maven ant tasks JAR (maven-ant-tasks-2.1.1.jar), e.g.
+      from <http://maven.apache.org/ant-tasks/download.html>, and add it
+      to any one of the following:
+
+         a. Your $HOME/.ant/lib/ directory (C:\Users\username\.ant\lib\ under
+            Windows Vista/7); or
+         b. Your $ANT_HOME/lib/ directory (%ANT_HOME%\lib\ under Windows); or
+         c. Your $CLASSPATH (%CLASSPATH% under Windows); or
+         d. Your ant commond line: "-lib /path/to/maven-ant-tasks-2.1.1.jar".
+
+   2. Run the following command from the lucene/ directory:
+
+         ant generate-maven-artifacts
+	  
+      The above command will create an internal Maven repository under
+      lucene/dist/maven/, including POMs, binary .jars, source .jars,
+      and javadoc .jars, for Lucene Core, for the Lucene test framework,
+      for each contrib, and for each module under the top-level modules/
+      directory.
+
+
+C. How to generate Solr Maven artifacts
+
+   1. Prerequisites: JDK 1.6+; Ant 1.7.X; and maven-ant-tasks-2.1.1.jar 
+      (see item A.1. above for where to put the Maven ant tasks jar).
+
+   2. Run the following from the solr/ directory:
+
+         ant generate-maven-artifacts
+		 
+      The above command will create an internal Maven repository under
+      solr/package/maven/, including POMs, binary .jars, source .jars,
+      and javadoc .jars, for Solr Core, for the Solr test framework,
+      for each contrib, and for the Solr .war (for which there are no
+      source or javadoc .jars).
+
+
+D. How to use Maven to build Lucene/Solr
+
+   In summary, to enable Maven builds, perform the following:
+
+         svn update
+         ant get-maven-poms
+         mvn -N -Pbootstrap install
+
+   The details, followed by some example Maven commands:
+
+   1. Prerequisites: JDK 1.5+ (for Lucene); JDK 1.6+ (for Solr);
+                     Maven 2.2.1 or 3.0.X
+
+   2. Make sure your sources are up to date.  If you checked your sources out
+      from the Apache Subversion repository, run "svn update" from the top
+      level.
+
+   3. Copy the Maven POM templates from under dev-tools/maven/ to where they
+      they need to go in order to drive the Maven build, using the following
+      command from the top-level directory:
+
+         ant get-maven-poms
+
+      Note that you will need to do this whenever changes to the POM
+      templates are committed.  It's a good idea to follow every "svn update"
+      with "ant get-maven-poms" for this reason.
+
+      The above command copies all of the POM templates from dev-tools/maven/,
+      filling in the project version with the default "X.X-SNAPSHOT".  If you
+      want the POMs and the Maven-built artifacts to have a version other than
+      the default, you can supply an alternate version on the command line
+      with the above command, e.g.:
+
+         ant -Dversion=4.0-my-special-version get-maven-poms
+
+   4. Populate your local repository with .jars & POMs for dependencies that
+      are not available from public Maven repositories (a.k.a. "non-mavenized
+      dependencies"):
+
+         mvn -N -Pbootstrap install
+
+      Note that you will need to do this whenever changes to the non-Mavenized
+      dependencies are committed.  It's a good idea to follow every
+      "svn update" with "ant get-maven-poms" and "mvn -N -Pbootstrap install"
+      for this reason.
+
+
+   Some example Maven commands you can use after you perform the above
+   preparatory steps:
+
+   - Compile, package, and install all artifacts to your local repository:
+
+         mvn install
+
+     After compiling and packaging, but before installing each module's 
+     artifact, the above command will also run all the module's tests.
+
+     To compile, package and install all artifacts without running any tests:
+
+         mvn -DskipTests install
+
+   - Run tests:
+
+         mvn test
+
+     To run all test methods defined in a test class:
+
+         mvn -Dtest=TestClassName test
--- a/dev-tools/maven/pom.xml.template
+++ b/dev-tools/maven/pom.xml.template
@ -699,7 +699,7 @@
                  <artifactId>solr-noggit</artifactId>
                  <version>${project.version}</version>
                  <packaging>jar</packaging>
-                  <file>solr/lib/apache-solr-noggit-r944541.jar</file>
+                  <file>solr/lib/apache-solr-noggit-r1099557.jar</file>
                </configuration>  
              </execution>
              <execution>
--- a/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/dataimporthandler/src/extras/pom.xml.template
@ -103,8 +103,8 @@
  </dependencies>
  <build>
    <directory>${build-directory}</directory>
-    <outputDirectory>${build-directory}/extras/classes</outputDirectory>
-    <testOutputDirectory>${build-directory}/extras/test-classes</testOutputDirectory>
+    <outputDirectory>${build-directory}/classes</outputDirectory>
+    <testOutputDirectory>${build-directory}/test-classes</testOutputDirectory>
    <sourceDirectory>main/java</sourceDirectory>
    <testSourceDirectory>test/java</testSourceDirectory>
    <testResources>
--- a/dev-tools/maven/solr/src/pom.xml.template
+++ b/dev-tools/maven/solr/src/pom.xml.template
@ -159,7 +159,6 @@
    <dependency>
      <groupId>com.google.guava</groupId>
      <artifactId>guava</artifactId>
-      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>junit</groupId>
--- a/dev-tools/maven/solr/src/solrj/pom.xml.template
+++ b/dev-tools/maven/solr/src/solrj/pom.xml.template
@ -85,7 +85,7 @@
  </dependencies>
  <build>
    <directory>${build-directory}</directory>
-    <outputDirectory>${build-directory}</outputDirectory>
+    <outputDirectory>${build-directory}/classes</outputDirectory>
    <sourceDirectory>.</sourceDirectory>
    <testResources/>
    <plugins>
--- a/dev-tools/scripts/diffSources.py
+++ b/dev-tools/scripts/diffSources.py
@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import subprocess
+import sys
+
+# recursive, unified output format, treat missing files as present but empty
+DIFF_FLAGS = '-ruN'
+
+if '-skipWhitespace' in sys.argv:
+  sys.argv.remove('-skipWhitespace')
+  # ignores only whitespace changes
+  DIFF_FLAGS += 'bBw'
+
+if len(sys.argv) != 3:
+  print
+  print 'Usage: python -u diffSources.py <dir1> <dir2> [-skipWhitespace]'
+  print
+  print '''This tool creates an applying patch between two directories.
+
+While you could use this to make a committable patch from a branch, that approach loses
+the svn history from the branch (better to use "svn merge --reintegrate", for example).  This
+diff output should not be considered "authoritative" from a merging standpoint as it does
+not reflect what svn will do on merge.
+'''
+  print
+  sys.exit(0)
+
+p = subprocess.Popen(['diff', DIFF_FLAGS, '-x', '.svn', '-x', 'build', sys.argv[1], sys.argv[2]], shell=False, stdout=subprocess.PIPE)
+
+keep = False
+while True:
+  l = p.stdout.readline()
+  if l == '':
+    break
+  if l.endswith('\r\n'):
+    l = l[:-2]
+  elif l.endswith('\n'):
+    l = l[:-1]
+  if l.startswith('diff ') or l.startswith('Binary files '):
+    keep = l.lower().find('/build/') == -1 and (l.lower().startswith('Only in') or ((l.lower().endswith('.java') or l.lower().endswith('.txt') or l.lower().endswith('.xml') or l.lower().endswith('.iml')) and l.find('/.svn/') == -1))
+    if keep:
+      print
+      print
+      print l.strip()
+  elif keep:
+    print l
+  elif l.startswith('Only in'):
+    print l.strip()
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -472,13 +472,63 @@ Changes in backwards compatibility policy
  a method getHeapArray() was added to retrieve the internal heap array as a
  non-generic Object[].  (Uwe Schindler, Yonik Seeley)

+* LUCENE-1076: IndexWriter.setInfoStream now throws IOException
+  (Mike McCandless, Shai Erera)
+
+* LUCENE-3084: MergePolicy.OneMerge.segments was changed from
+  SegmentInfos to a List<SegmentInfo>; this is actually a minor change
+  because SegmentInfos itself extends Vector<SegmentInfo>.  (Uwe
+  Schindler, Mike McCandless)
+
+Changes in runtime behavior
+
+* LUCENE-3065: When a NumericField is retrieved from a Document loaded
+  from IndexReader (or IndexSearcher), it will now come back as
+  NumericField not as a Field with a string-ified version of the
+  numeric value you had indexed.  Note that this only applies for
+  newly-indexed Documents; older indices will still return Field
+  with the string-ified numeric value. If you call Document.get(),
+  the value comes still back as String, but Document.getFieldable()
+  returns NumericField instances. (Uwe Schindler, Ryan McKinley,
+  Mike McCandless)
+
+New features
+
+* LUCENE-3082: Added index upgrade tool oal.index.IndexUpgrader
+  that allows to upgrade all segments to last recent supported index
+  format without fully optimizing.  (Uwe Schindler, Mike McCandless)
+
+* LUCENE-1076: Added TieredMergePolicy which is able to merge non-contiguous 
+  segments, which means docIDs no longer necessarily stay "in order".
+  (Mike McCandless, Shai Erera)
+
+* LUCENE-3071: Adding ReversePathHierarchyTokenizer, added skip parameter to 
+  PathHierarchyTokenizer (Olivier Favre via ryan)
+
+API Changes
+
+* LUCENE-3061: IndexWriter's getNextMerge() and merge(OneMerge) are now public
+  (though @lucene.experimental), allowing for custom MergeScheduler 
+  implementations. (Shai Erera)
+
+* LUCENE-3065: Document.getField() was deprecated, as it throws
+  ClassCastException when loading lazy fields or NumericFields.
+  (Uwe Schindler, Ryan McKinley, Mike McCandless)
+
 Optimizations

 * LUCENE-2990: ArrayUtil/CollectionUtil.*Sort() methods now exit early
  on empty or one-element lists/arrays.  (Uwe Schindler)

+* LUCENE-2897: Apply deleted terms while flushing a segment.  We still
+  buffer deleted terms to later apply to past segments.  (Mike McCandless)
+
 Bug fixes

+* LUCENE-2996: addIndexes(IndexReader) did not flush before adding the new 
+  indexes, causing existing deletions to be applied on the incoming indexes as 
+  well. (Shai Erera, Mike McCandless)
+
 * LUCENE-3024: Index with more than 2.1B terms was hitting AIOOBE when
  seeking TermEnum (eg used by Solr's faceting) (Tom Burton-West, Mike
  McCandless)
@ -491,6 +541,17 @@ Bug fixes
  very special use cases of the TokenStream-API, most users would not
  have recognized it.  (Uwe Schindler, Robert Muir)

+* LUCENE-3054: PhraseQuery can in some cases stack overflow in
+  SorterTemplate.quickSort(). This fix also adds an optimization to
+  PhraseQuery as term with lower doc freq will also have less positions.
+  (Uwe Schindler, Robert Muir, Otis Gospodnetic)
+
+Test Cases
+
+* LUCENE-3002: added 'tests.iter.min' to control 'tests.iter' by allowing to 
+  stop iterating if at least 'tests.iter.min' ran and a failure occured. 
+  (Shai Erera, Chris Hostetter)
+
 ======================= Lucene 3.1.0 =======================

 Changes in backwards compatibility policy
@ -1472,6 +1533,10 @@ Bug fixes
  that warming is free to do whatever it needs to.  (Earwin Burrfoot
  via Mike McCandless)

+* LUCENE-3029: Fix corner case when MultiPhraseQuery is used with zero
+  position-increment tokens that would sometimes assign different
+  scores to identical docs.  (Mike McCandless)
+
 * LUCENE-2486: Fixed intermittent FileNotFoundException on doc store
  files when a mergedSegmentWarmer is set on IndexWriter.  (Mike
  McCandless)
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@ -312,6 +312,8 @@ LUCENE-1458, LUCENE-2111: Flexible Indexing
    - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
    - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
    - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
+    - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
+    - o.a.l.util.CharacterUtils -> o.a.l.analysis.util.CharacterUtils

 * LUCENE-2514: The option to use a Collator's order (instead of binary order) for
  sorting and range queries has been moved to contrib/queries.
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@ -73,6 +73,7 @@
  </condition>
  <property name="tests.multiplier" value="1" />
  <property name="tests.codec" value="randomPerField" />
+  <property name="tests.codecprovider" value="random" />
  <property name="tests.locale" value="random" />
  <property name="tests.timezone" value="random" />
  <property name="tests.directory" value="random" />
@ -499,6 +500,8 @@
 	      <sysproperty key="tests.verbose" value="${tests.verbose}"/>
              <!-- set the codec tests should run with -->
 	      <sysproperty key="tests.codec" value="${tests.codec}"/>
+              <!-- set the codec provider tests should run with -->
+	      <sysproperty key="tests.codecprovider" value="${tests.codecprovider}"/>
              <!-- set the locale tests should run with -->
 	      <sysproperty key="tests.locale" value="${tests.locale}"/>
              <!-- set the timezone tests should run with -->
--- a/lucene/contrib/CHANGES.txt
+++ b/lucene/contrib/CHANGES.txt
@ -50,6 +50,11 @@ Bug Fixes

 ======================= Lucene 3.x (not yet released) =======================

+Changes in runtime behavior
+
+ * LUCENE-3086: ItalianAnalyzer now uses ElisionFilter with a set of Italian
+   contractions by default.  (Robert Muir)
+
 Bug Fixes

 * LUCENE-3045: fixed QueryNodeImpl.containsTag(String key) that was
@ -183,6 +188,10 @@ Bug fixes
 * LUCENE-2943: Fix thread-safety issues with ICUCollationKeyFilter.
   (Robert Muir)

+ * LUCENE-3087: Highlighter: fix case that was preventing highlighting
+   of exact phrase when tokens overlap. (Pierre Gossé via Mike
+   McCandless)
+
 API Changes

 * LUCENE-2867: Some contrib queryparser methods that receives CharSequence as
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
@ -355,6 +355,7 @@ public class Highlighter
 			{
 				try
 				{
+				  tokenStream.end();
 					tokenStream.close();
 				}
 				catch (Exception e)
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
@ -30,6 +30,7 @@ import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.TermFreqVector;
@ -158,10 +159,13 @@ public class TokenSources {

      OffsetAttribute offsetAtt;

+      PositionIncrementAttribute posincAtt;
+
      StoredTokenStream(Token tokens[]) {
        this.tokens = tokens;
        termAtt = addAttribute(CharTermAttribute.class);
        offsetAtt = addAttribute(OffsetAttribute.class);
+        posincAtt = (PositionIncrementAttribute) addAttribute(PositionIncrementAttribute.class);
      }

      @Override
@ -173,6 +177,10 @@ public class TokenSources {
        clearAttributes();
        termAtt.setEmpty().append(token);
        offsetAtt.setOffset(token.startOffset(), token.endOffset());
+        posincAtt
+            .setPositionIncrement(currentToken <= 1
+                || tokens[currentToken - 1].startOffset() > tokens[currentToken - 2]
+                    .startOffset() ? 1 : 0);
        return true;
      }
    }
@ -180,7 +188,6 @@ public class TokenSources {
    BytesRef[] terms = tpv.getTerms();
    int[] freq = tpv.getTermFrequencies();
    int totalTokens = 0;
-
    for (int t = 0; t < freq.length; t++) {
      totalTokens += freq[t];
    }
@ -189,7 +196,8 @@ public class TokenSources {
    for (int t = 0; t < freq.length; t++) {
      TermVectorOffsetInfo[] offsets = tpv.getOffsets(t);
      if (offsets == null) {
-        throw new IllegalArgumentException("Required TermVector Offset information was not found");
+        throw new IllegalArgumentException(
+            "Required TermVector Offset information was not found");
      }

      int[] pos = null;
@ -205,8 +213,8 @@ public class TokenSources {
          unsortedTokens = new ArrayList<Token>();
        }
        for (int tp = 0; tp < offsets.length; tp++) {
-          Token token = new Token(terms[t].utf8ToString(), offsets[tp].getStartOffset(), offsets[tp]
-              .getEndOffset());
+          Token token = new Token(terms[t].utf8ToString(),
+              offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
          unsortedTokens.add(token);
        }
      } else {
@ -221,8 +229,8 @@ public class TokenSources {
        // tokens stored with positions - can use this to index straight into
        // sorted array
        for (int tp = 0; tp < pos.length; tp++) {
-          Token token = new Token(terms[t].utf8ToString(), offsets[tp].getStartOffset(),
-              offsets[tp].getEndOffset());
+          Token token = new Token(terms[t].utf8ToString(),
+              offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
          tokensInOriginalOrder[pos[tp]] = token;
        }
      }
@ -231,12 +239,11 @@ public class TokenSources {
    if (unsortedTokens != null) {
      tokensInOriginalOrder = unsortedTokens.toArray(new Token[unsortedTokens
          .size()]);
-      ArrayUtil.quickSort(tokensInOriginalOrder, new Comparator<Token>() {
+      ArrayUtil.mergeSort(tokensInOriginalOrder, new Comparator<Token>() {
        public int compare(Token t1, Token t2) {
-          if (t1.startOffset() == t2.startOffset())
-            return t1.endOffset() - t2.endOffset();
-          else
-            return t1.startOffset() - t2.startOffset();
+          if (t1.startOffset() == t2.startOffset()) return t1.endOffset()
+              - t2.endOffset();
+          else return t1.startOffset() - t2.startOffset();
        }
      });
    }
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@ -1093,6 +1093,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
  }

  public void testMaxSizeHighlight() throws Exception {
+    final MockAnalyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
+    // we disable MockTokenizer checks because we will forcefully limit the 
+    // tokenstream and call end() before incrementToken() returns false.
+    analyzer.setEnableChecks(false);
    TestHighlightRunner helper = new TestHighlightRunner() {

      @Override
@ -1122,7 +1126,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
      public void run() throws Exception {
        String goodWord = "goodtoken";
        CharacterRunAutomaton stopWords = new CharacterRunAutomaton(BasicAutomata.makeString("stoppedtoken"));
-
+        // we disable MockTokenizer checks because we will forcefully limit the 
+        // tokenstream and call end() before incrementToken() returns false.
+        final MockAnalyzer analyzer = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true);
+        analyzer.setEnableChecks(false);
        TermQuery query = new TermQuery(new Term("data", goodWord));

        String match;
@ -1134,13 +1141,13 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
          sb.append("stoppedtoken");
        }
        SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
-        Highlighter hg = getHighlighter(query, "data", new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true).tokenStream(
+        Highlighter hg = getHighlighter(query, "data", analyzer.tokenStream(
            "data", new StringReader(sb.toString())), fm);// new Highlighter(fm,
        // new
        // QueryTermScorer(query));
        hg.setTextFragmenter(new NullFragmenter());
        hg.setMaxDocCharsToAnalyze(100);
-        match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString());
+        match = hg.getBestFragment(analyzer, "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());

@ -1151,7 +1158,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
        // + whitespace)
        sb.append(" ");
        sb.append(goodWord);
-        match = hg.getBestFragment(new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true), "data", sb.toString());
+        match = hg.getBestFragment(analyzer, "data", sb.toString());
        assertTrue("Matched text should be no more than 100 chars in length ", match.length() < hg
            .getMaxDocCharsToAnalyze());
      }
@ -1726,6 +1733,11 @@ final class SynonymAnalyzer extends Analyzer {
    stream.addAttribute(CharTermAttribute.class);
    stream.addAttribute(PositionIncrementAttribute.class);
    stream.addAttribute(OffsetAttribute.class);
+    try {
+      stream.reset();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
    return new SynonymTokenizer(stream, synonyms);
  }
 }
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/OffsetLimitTokenFilterTest.java
@ -28,32 +28,38 @@ import org.apache.lucene.analysis.TokenStream;
 public class OffsetLimitTokenFilterTest extends BaseTokenStreamTestCase {
  
  public void testFilter() throws Exception {
-    TokenStream stream = new MockTokenizer(new StringReader(
+    // we disable MockTokenizer checks because we will forcefully limit the 
+    // tokenstream and call end() before incrementToken() returns false.
+    MockTokenizer stream = new MockTokenizer(new StringReader(
        "short toolong evenmuchlongertext a ab toolong foo"),
        MockTokenizer.WHITESPACE, false);
+    stream.setEnableChecks(false);
    OffsetLimitTokenFilter filter = new OffsetLimitTokenFilter(stream, 10);
    assertTokenStreamContents(filter, new String[] {"short", "toolong"});
    
    stream = new MockTokenizer(new StringReader(
    "short toolong evenmuchlongertext a ab toolong foo"),
    MockTokenizer.WHITESPACE, false);
+    stream.setEnableChecks(false);
    filter = new OffsetLimitTokenFilter(stream, 12);
    assertTokenStreamContents(filter, new String[] {"short", "toolong"});
    
    stream = new MockTokenizer(new StringReader(
        "short toolong evenmuchlongertext a ab toolong foo"),
        MockTokenizer.WHITESPACE, false);
+    stream.setEnableChecks(false);
    filter = new OffsetLimitTokenFilter(stream, 30);
    assertTokenStreamContents(filter, new String[] {"short", "toolong",
        "evenmuchlongertext"});
    
-    
+    // TODO: This is not actually testing reuse! (reusableTokenStream is not implemented)
    checkOneTermReuse(new Analyzer() {
      
      @Override
      public TokenStream tokenStream(String fieldName, Reader reader) {
-        return new OffsetLimitTokenFilter(new MockTokenizer(reader,
-            MockTokenizer.WHITESPACE, false), 10);
+        MockTokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.WHITESPACE, false);
+        tokenizer.setEnableChecks(false);
+        return new OffsetLimitTokenFilter(tokenizer, 10);
      }
    }, "llenges", "llenges");
  }
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
@ -36,7 +36,10 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.index.TermPositionVector;
 import org.apache.lucene.search.DisjunctionMaxQuery;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.spans.SpanNearQuery;
+import org.apache.lucene.search.spans.SpanQuery;
 import org.apache.lucene.search.spans.SpanTermQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.LockObtainFailedException;
@ -86,12 +89,12 @@ public class TokenSourcesTest extends LuceneTestCase {
    public void reset() {
      this.i = -1;
      this.tokens = new Token[] {
-          new Token(new char[] { 't', 'h', 'e' }, 0, 3, 0, 3),
-          new Token(new char[] { '{', 'f', 'o', 'x', '}' }, 0, 5, 0, 7),
-          new Token(new char[] { 'f', 'o', 'x' }, 0, 3, 4, 7),
-          new Token(new char[] { 'd', 'i', 'd' }, 0, 3, 8, 11),
-          new Token(new char[] { 'n', 'o', 't' }, 0, 3, 12, 15),
-          new Token(new char[] { 'j', 'u', 'm', 'p' }, 0, 4, 16, 20) };
+          new Token(new char[] {'t', 'h', 'e'}, 0, 3, 0, 3),
+          new Token(new char[] {'{', 'f', 'o', 'x', '}'}, 0, 5, 0, 7),
+          new Token(new char[] {'f', 'o', 'x'}, 0, 3, 4, 7),
+          new Token(new char[] {'d', 'i', 'd'}, 0, 3, 8, 11),
+          new Token(new char[] {'n', 'o', 't'}, 0, 3, 12, 15),
+          new Token(new char[] {'j', 'u', 'm', 'p'}, 0, 4, 16, 20)};
      this.tokens[1].setPositionIncrement(0);
    }
  }
@ -188,4 +191,97 @@ public class TokenSourcesTest extends LuceneTestCase {
    }
  }

+  public void testOverlapWithOffsetExactPhrase() throws CorruptIndexException,
+      LockObtainFailedException, IOException, InvalidTokenOffsetsException {
+    final String TEXT = "the fox did not jump";
+    final Directory directory = newDirectory();
+    final IndexWriter indexWriter = new IndexWriter(directory,
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
+    try {
+      final Document document = new Document();
+      document.add(new Field(FIELD, new TokenStreamOverlap(),
+          TermVector.WITH_OFFSETS));
+      indexWriter.addDocument(document);
+    } finally {
+      indexWriter.close();
+    }
+    final IndexReader indexReader = IndexReader.open(directory, true);
+    try {
+      assertEquals(1, indexReader.numDocs());
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
+      try {
+        // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
+        // query.add(new SpanTermQuery(new Term(FIELD, "{fox}")));
+        // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
+        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
+            new SpanTermQuery(new Term(FIELD, "the")),
+            new SpanTermQuery(new Term(FIELD, "fox"))}, 0, true);
+
+        TopDocs hits = indexSearcher.search(phraseQuery, 1);
+        assertEquals(1, hits.totalHits);
+        final Highlighter highlighter = new Highlighter(
+            new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
+            new QueryScorer(phraseQuery));
+        final TokenStream tokenStream = TokenSources
+            .getTokenStream(
+                (TermPositionVector) indexReader.getTermFreqVector(0, FIELD),
+                false);
+        assertEquals("<B>the fox</B> did not jump",
+            highlighter.getBestFragment(tokenStream, TEXT));
+      } finally {
+        indexSearcher.close();
+      }
+    } finally {
+      indexReader.close();
+      directory.close();
+    }
+  }
+
+  public void testOverlapWithPositionsAndOffsetExactPhrase()
+      throws CorruptIndexException, LockObtainFailedException, IOException,
+      InvalidTokenOffsetsException {
+    final String TEXT = "the fox did not jump";
+    final Directory directory = newDirectory();
+    final IndexWriter indexWriter = new IndexWriter(directory,
+        newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
+    try {
+      final Document document = new Document();
+      document.add(new Field(FIELD, new TokenStreamOverlap(),
+          TermVector.WITH_POSITIONS_OFFSETS));
+      indexWriter.addDocument(document);
+    } finally {
+      indexWriter.close();
+    }
+    final IndexReader indexReader = IndexReader.open(directory, true);
+    try {
+      assertEquals(1, indexReader.numDocs());
+      final IndexSearcher indexSearcher = newSearcher(indexReader);
+      try {
+        // final DisjunctionMaxQuery query = new DisjunctionMaxQuery(1);
+        // query.add(new SpanTermQuery(new Term(FIELD, "the")));
+        // query.add(new SpanTermQuery(new Term(FIELD, "fox")));
+        final Query phraseQuery = new SpanNearQuery(new SpanQuery[] {
+            new SpanTermQuery(new Term(FIELD, "the")),
+            new SpanTermQuery(new Term(FIELD, "fox"))}, 0, true);
+
+        TopDocs hits = indexSearcher.search(phraseQuery, 1);
+        assertEquals(1, hits.totalHits);
+        final Highlighter highlighter = new Highlighter(
+            new SimpleHTMLFormatter(), new SimpleHTMLEncoder(),
+            new QueryScorer(phraseQuery));
+        final TokenStream tokenStream = TokenSources
+            .getTokenStream(
+                (TermPositionVector) indexReader.getTermFreqVector(0, FIELD),
+                false);
+        assertEquals("<B>the fox</B> did not jump",
+            highlighter.getBestFragment(tokenStream, TEXT));
+      } finally {
+        indexSearcher.close();
+      }
+    } finally {
+      indexReader.close();
+      directory.close();
+    }
+  }
+
 }
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
@ -192,6 +192,7 @@ public class FuzzyLikeThisQuery extends Query
        int corpusNumDocs=reader.numDocs();
        Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
        HashSet<String> processedTerms=new HashSet<String>();
+        ts.reset();
        while (ts.incrementToken()) 
        {
                String term = termAtt.toString();
@ -213,17 +214,15 @@ public class FuzzyLikeThisQuery extends Query
                  BoostAttribute boostAtt =
                    fe.attributes().addAttribute(BoostAttribute.class);
                  while ((possibleMatch = fe.next()) != null) {
-                      if (possibleMatch!=null) {
-                        numVariants++;
-                        totalVariantDocFreqs+=fe.docFreq();
-                        float score=boostAtt.getBoost();
-                        if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
-                          ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), new BytesRef(possibleMatch)),score,startTerm);                    
-                          variantsQ.insertWithOverflow(st);
-                          minScore = variantsQ.top().score; // maintain minScore
-                        }
-                        maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
+                      numVariants++;
+                      totalVariantDocFreqs+=fe.docFreq();
+                      float score=boostAtt.getBoost();
+                      if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore){
+                        ScoreTerm st=new ScoreTerm(new Term(startTerm.field(), new BytesRef(possibleMatch)),score,startTerm);                    
+                        variantsQ.insertWithOverflow(st);
+                        minScore = variantsQ.top().score; // maintain minScore
                      }
+                      maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY);
                    }

                  if(numVariants>0)
@ -247,6 +246,8 @@ public class FuzzyLikeThisQuery extends Query
                }
        	}
        }
+        ts.end();
+        ts.close();
    }
            
    @Override
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
@ -885,7 +885,7 @@ public final class MoreLikeThis {
 			int tokenCount=0;
 			// for every token
 			CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-			
+			ts.reset();
 			while (ts.incrementToken()) {
 				String word = termAtt.toString();
 				tokenCount++;
@ -906,6 +906,8 @@ public final class MoreLikeThis {
 					cnt.x++;
 				}
 			}
+			ts.end();
+			ts.close();
 	}
 	
 	
--- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
+++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
@ -110,6 +110,11 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
    
    int countTokens = 0;
+    try {
+      source.reset();
+    } catch (IOException e1) {
+      throw new RuntimeException(e1);
+    }
    while (true) {
      try {
        if (!source.incrementToken()) break;
@ -126,6 +131,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
      }
    }
    try {
+      source.end();
      source.close();
    } catch (IOException e) {
      // ignore
@ -191,7 +197,11 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
    TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
    List<String> tlist = new ArrayList<String>();
    CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
-    
+    try {
+      source.reset();
+    } catch (IOException e1) {
+      throw new RuntimeException(e1);
+    }
    while (true) {
      try {
        if (!source.incrementToken()) break;
@ -202,6 +212,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
    }

    try {
+      source.end();
      source.close();
    } catch (IOException e) {
      // ignore
@ -242,6 +253,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
    boolean multipleTokens = false;
    
    try {
+      source.reset();
      if (source.incrementToken()) {
        nextToken = termAtt.toString();
      }
@ -251,6 +263,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
    }

    try {
+      source.end();
      source.close();
    } catch (IOException e) {
      // ignore
@ -281,6 +294,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
      try {
        source = getAnalyzer().tokenStream(field, new StringReader(part1));
        termAtt = source.addAttribute(CharTermAttribute.class);
+        source.reset();
        multipleTokens = false;


@ -292,6 +306,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
        // ignore
      }
      try {
+        source.end();
        source.close();
      } catch (IOException e) {
        // ignore
@ -308,6 +323,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
      termAtt = source.addAttribute(CharTermAttribute.class);

      try {
+        source.reset();
        if (source.incrementToken()) {
          part2 = termAtt.toString();
        }
@ -316,6 +332,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
        // ignore
      }
      try {
+        source.end();
        source.close();
      } catch (IOException e) {
        // ignore
--- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java
+++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java
@ -123,6 +123,11 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {

      TokenStream source = this.analyzer.tokenStream(field, new StringReader(
          text));
+      try {
+        source.reset();
+      } catch (IOException e1) {
+        throw new RuntimeException(e1);
+      }
      CachingTokenFilter buffer = new CachingTokenFilter(source);

      PositionIncrementAttribute posIncrAtt = null;
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
@ -118,12 +118,14 @@ public final class SynExpand {
 		// [1] Parse query into separate words so that when we expand we can avoid dups
 		TokenStream ts = a.tokenStream( field, new StringReader( query));
 		CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
-		
+		ts.reset();
 		while (ts.incrementToken()) {
 		  String word = termAtt.toString();
 			if ( already.add( word))
 				top.add( word);
 		}
+		ts.end();
+		ts.close();
 		final BooleanQuery tmp = new BooleanQuery();
 		
 		// [2] form query
--- a/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java
+++ b/lucene/contrib/wordnet/src/test/org/apache/lucene/wordnet/TestSynonymTokenFilter.java
@ -111,7 +111,6 @@ public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
        setPreviousTokenStream(streams);
      } else {
        streams.source.reset(reader);
-        streams.result.reset(); // reset the SynonymTokenFilter
      }
      return streams.result;
    }
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
@ -80,9 +80,12 @@ public class LikeThisQueryBuilder implements QueryBuilder {
                CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
                try
                {
+                  ts.reset();
 	                while(ts.incrementToken()) {
 	                    stopWordsSet.add(termAtt.toString());
 	                }
+	                ts.end();
+	                ts.close();
                }
                catch(IOException ioe)
                {
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
@ -59,11 +59,14 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
 			TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
 			TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
      BytesRef bytes = termAtt.getBytesRef();
+      ts.reset();
 	    while (ts.incrementToken()) {
 	        termAtt.fillBytesRef();
 			    SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, new BytesRef(bytes)));
 			    clausesList.add(stq);
 			}
+	    ts.end();
+	    ts.close();
 			SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
 			soq.setBoost(DOMUtils.getAttribute(e,"boost",1.0f));
 			return soq;
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
@ -64,6 +64,7 @@ public class TermsFilterBuilder implements FilterBuilder
 		{
 			Term term = null;
      BytesRef bytes = termAtt.getBytesRef();
+      ts.reset();
 	      while (ts.incrementToken()) {
 	        termAtt.fillBytesRef();
 				if (term == null)
@ -76,6 +77,8 @@ public class TermsFilterBuilder implements FilterBuilder
 				}
 				tf.addTerm(term);
 			}
+	    ts.end();
+	    ts.close();
 		} 
 		catch (IOException ioe)
 		{
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java
@ -61,6 +61,7 @@ public class TermsQueryBuilder implements QueryBuilder {
 		  TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
 			Term term = null;
      BytesRef bytes = termAtt.getBytesRef();
+      ts.reset();
 			while (ts.incrementToken()) {
        termAtt.fillBytesRef();
 				if (term == null)
@ -73,6 +74,8 @@ public class TermsQueryBuilder implements QueryBuilder {
 				}
 				bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
 			}
+			ts.end();
+			ts.close();
 		} 
 		catch (IOException ioe)
 		{
--- a/lucene/docs/contributions.html
+++ b/lucene/docs/contributions.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Contributions
@ -275,7 +275,7 @@ document.write("Last Published: " + document.lastModified);
 <a href="#PDFTextStream -- PDF text and metadata extraction">PDFTextStream -- PDF text and metadata extraction</a>
 </li>
 <li>
-<a href="#PJ Classic & PJ Professional - PDF Document Conversion">PJ Classic &amp; PJ Professional - PDF Document Conversion</a>
+<a href="#PJ Classic &amp; PJ Professional - PDF Document Conversion">PJ Classic &amp; PJ Professional - PDF Document Conversion</a>
 </li>
 </ul>
 </li>
@ -403,7 +403,7 @@ document.write("Last Published: " + document.lastModified);
                            URL
                        </th>
                        <td>
-                            <a href="http://marc.theaimsgroup.com/?l=lucene-dev&m=100723333506246&w=2">
+                            <a href="http://marc.theaimsgroup.com/?l=lucene-dev&amp;m=100723333506246&amp;w=2">
                                http://marc.theaimsgroup.com/?l=lucene-dev&amp;m=100723333506246&amp;w=2
                            </a>
                        </td>
@ -538,7 +538,7 @@ document.write("Last Published: " + document.lastModified);
 </tr>
                
 </table>
-<a name="N10124"></a><a name="PJ Classic & PJ Professional - PDF Document Conversion"></a>
+<a name="N10124"></a><a name="PJ Classic &amp; PJ Professional - PDF Document Conversion"></a>
 <h3 class="boxed">PJ Classic &amp; PJ Professional - PDF Document Conversion</h3>
 <table class="ForrestTable" cellspacing="1" cellpadding="4">
                    
--- a/lucene/docs/contributions.pdf
+++ b/lucene/docs/contributions.pdf
--- a/lucene/docs/demo.html
+++ b/lucene/docs/demo.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Building and Installing the Basic Demo
--- a/lucene/docs/demo.pdf
+++ b/lucene/docs/demo.pdf
--- a/lucene/docs/demo2.html
+++ b/lucene/docs/demo2.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Basic Demo Sources Walk-through
--- a/lucene/docs/demo2.pdf
+++ b/lucene/docs/demo2.pdf
--- a/lucene/docs/fileformats.html
+++ b/lucene/docs/fileformats.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
            Apache Lucene - Index File Formats
@ -425,11 +425,19 @@ document.write("Last Published: " + document.lastModified);
 <p>
            In version 3.1, segments records the code version
            that created them. See LUCENE-2720 for details.
+            
+            Additionally segments track explicitly whether or
+            not they have term vectors. See LUCENE-2811 for details.
+           </p>
+<p>
+            In version 3.2, numeric fields are written as natively
+            to stored fields file, previously they were stored in
+            text format only.
           </p>
 </div>

        
-<a name="N10037"></a><a name="Definitions"></a>
+<a name="N1003A"></a><a name="Definitions"></a>
 <h2 class="boxed">Definitions</h2>
 <div class="section">
 <p>
@ -470,7 +478,7 @@ document.write("Last Published: " + document.lastModified);
                strings, the first naming the field, and the second naming text
                within the field.
            </p>
-<a name="N10057"></a><a name="Inverted Indexing"></a>
+<a name="N1005A"></a><a name="Inverted Indexing"></a>
 <h3 class="boxed">Inverted Indexing</h3>
 <p>
                    The index stores statistics about terms in order
@ -480,7 +488,7 @@ document.write("Last Published: " + document.lastModified);
                    it.  This is the inverse of the natural relationship, in which
                    documents list terms.
                </p>
-<a name="N10063"></a><a name="Types of Fields"></a>
+<a name="N10066"></a><a name="Types of Fields"></a>
 <h3 class="boxed">Types of Fields</h3>
 <p>
                    In Lucene, fields may be <i>stored</i>, in which
@ -494,7 +502,7 @@ document.write("Last Published: " + document.lastModified);
                    to be indexed literally.
                </p>
 <p>See the <a href="api/core/org/apache/lucene/document/Field.html">Field</a> java docs for more information on Fields.</p>
-<a name="N10080"></a><a name="Segments"></a>
+<a name="N10083"></a><a name="Segments"></a>
 <h3 class="boxed">Segments</h3>
 <p>
                    Lucene indexes may be composed of multiple sub-indexes, or
@ -520,7 +528,7 @@ document.write("Last Published: " + document.lastModified);
                    Searches may involve multiple segments and/or multiple indexes, each
                    index potentially composed of a set of segments.
                </p>
-<a name="N1009E"></a><a name="Document Numbers"></a>
+<a name="N100A1"></a><a name="Document Numbers"></a>
 <h3 class="boxed">Document Numbers</h3>
 <p>
                    Internally, Lucene refers to documents by an integer <i>document
@ -575,7 +583,7 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N100C5"></a><a name="Overview"></a>
+<a name="N100C8"></a><a name="Overview"></a>
 <h2 class="boxed">Overview</h2>
 <div class="section">
 <p>
@ -674,7 +682,7 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N10108"></a><a name="File Naming"></a>
+<a name="N1010B"></a><a name="File Naming"></a>
 <h2 class="boxed">File Naming</h2>
 <div class="section">
 <p>
@ -701,7 +709,7 @@ document.write("Last Published: " + document.lastModified);
            </p>
 </div>
      
-<a name="N10117"></a><a name="file-names"></a>
+<a name="N1011A"></a><a name="file-names"></a>
 <h2 class="boxed">Summary of File Extensions</h2>
 <div class="section">
 <p>The following table summarizes the names and extensions of the files in Lucene:
@ -843,10 +851,10 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N10201"></a><a name="Primitive Types"></a>
+<a name="N10204"></a><a name="Primitive Types"></a>
 <h2 class="boxed">Primitive Types</h2>
 <div class="section">
-<a name="N10206"></a><a name="Byte"></a>
+<a name="N10209"></a><a name="Byte"></a>
 <h3 class="boxed">Byte</h3>
 <p>
                    The most primitive type
@ -854,7 +862,7 @@ document.write("Last Published: " + document.lastModified);
                    other data types are defined as sequences
                    of bytes, so file formats are byte-order independent.
                </p>
-<a name="N1020F"></a><a name="UInt32"></a>
+<a name="N10212"></a><a name="UInt32"></a>
 <h3 class="boxed">UInt32</h3>
 <p>
                    32-bit unsigned integers are written as four
@ -864,7 +872,7 @@ document.write("Last Published: " + document.lastModified);
                    UInt32    --&gt; &lt;Byte&gt;<sup>4</sup>
                
 </p>
-<a name="N1021E"></a><a name="Uint64"></a>
+<a name="N10221"></a><a name="Uint64"></a>
 <h3 class="boxed">Uint64</h3>
 <p>
                    64-bit unsigned integers are written as eight
@ -873,7 +881,7 @@ document.write("Last Published: " + document.lastModified);
 <p>UInt64    --&gt; &lt;Byte&gt;<sup>8</sup>
                
 </p>
-<a name="N1022D"></a><a name="VInt"></a>
+<a name="N10230"></a><a name="VInt"></a>
 <h3 class="boxed">VInt</h3>
 <p>
                    A variable-length format for positive integers is
@ -1423,13 +1431,13 @@ document.write("Last Published: " + document.lastModified);
                    This provides compression while still being
                    efficient to decode.
                </p>
-<a name="N10512"></a><a name="Chars"></a>
+<a name="N10515"></a><a name="Chars"></a>
 <h3 class="boxed">Chars</h3>
 <p>
                    Lucene writes unicode
                    character sequences as UTF-8 encoded bytes.
                </p>
-<a name="N1051B"></a><a name="String"></a>
+<a name="N1051E"></a><a name="String"></a>
 <h3 class="boxed">String</h3>
 <p>
 		    Lucene writes strings as UTF-8 encoded bytes.
@ -1442,10 +1450,10 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N10528"></a><a name="Compound Types"></a>
+<a name="N1052B"></a><a name="Compound Types"></a>
 <h2 class="boxed">Compound Types</h2>
 <div class="section">
-<a name="N1052D"></a><a name="MapStringString"></a>
+<a name="N10530"></a><a name="MapStringString"></a>
 <h3 class="boxed">Map&lt;String,String&gt;</h3>
 <p>
 		    In a couple places Lucene stores a Map
@ -1458,13 +1466,13 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N1053D"></a><a name="Per-Index Files"></a>
+<a name="N10540"></a><a name="Per-Index Files"></a>
 <h2 class="boxed">Per-Index Files</h2>
 <div class="section">
 <p>
                The files in this section exist one-per-index.
            </p>
-<a name="N10545"></a><a name="Segments File"></a>
+<a name="N10548"></a><a name="Segments File"></a>
 <h3 class="boxed">Segments File</h3>
 <p>
                    The active segments in the index are stored in the
@ -1508,7 +1516,7 @@ document.write("Last Published: " + document.lastModified);
 <b>3.1</b>
                    Segments --&gt; Format, Version, NameCounter, SegCount, &lt;SegVersion, SegName, SegSize, DelGen, DocStoreOffset, [DocStoreSegment, DocStoreIsCompoundFile], HasSingleNormFile, NumField,
                    NormGen<sup>NumField</sup>,
-                    IsCompoundFile, DeletionCount, HasProx, Diagnostics&gt;<sup>SegCount</sup>, CommitUserData, Checksum
+                    IsCompoundFile, DeletionCount, HasProx, Diagnostics, HasVectors&gt;<sup>SegCount</sup>, CommitUserData, Checksum
                </p>
 <p>
                    Format, NameCounter, SegCount, SegSize, NumField,
@ -1525,7 +1533,7 @@ document.write("Last Published: " + document.lastModified);
 		</p>
 <p>
                    IsCompoundFile, HasSingleNormFile,
-                    DocStoreIsCompoundFile, HasProx --&gt; Int8
+                    DocStoreIsCompoundFile, HasProx, HasVectors --&gt; Int8
                </p>
 <p>
 		    CommitUserData --&gt; Map&lt;String,String&gt;
@ -1634,7 +1642,10 @@ document.write("Last Published: " + document.lastModified);
 		    Lucene version, OS, Java version, why the segment
 		    was created (merge, flush, addIndexes), etc.
                </p>
-<a name="N105CD"></a><a name="Lock File"></a>
+<p> HasVectors is 1 if this segment stores term vectors,
+            else it's 0.
+                </p>
+<a name="N105D3"></a><a name="Lock File"></a>
 <h3 class="boxed">Lock File</h3>
 <p>
                    The write lock, which is stored in the index
@ -1648,14 +1659,14 @@ document.write("Last Published: " + document.lastModified);
                    documents).  This lock file ensures that only one
                    writer is modifying the index at a time.
                </p>
-<a name="N105D6"></a><a name="Deletable File"></a>
+<a name="N105DC"></a><a name="Deletable File"></a>
 <h3 class="boxed">Deletable File</h3>
 <p>
                    A writer dynamically computes
                    the files that are deletable, instead, so no file
                    is written.
                </p>
-<a name="N105DF"></a><a name="Compound Files"></a>
+<a name="N105E5"></a><a name="Compound Files"></a>
 <h3 class="boxed">Compound Files</h3>
 <p>Starting with Lucene 1.4 the compound file format became default. This
                    is simply a container for all files described in the next section
@ -1682,14 +1693,14 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N10607"></a><a name="Per-Segment Files"></a>
+<a name="N1060D"></a><a name="Per-Segment Files"></a>
 <h2 class="boxed">Per-Segment Files</h2>
 <div class="section">
 <p>
                The remaining files are all per-segment, and are
                thus defined by suffix.
            </p>
-<a name="N1060F"></a><a name="Fields"></a>
+<a name="N10615"></a><a name="Fields"></a>
 <h3 class="boxed">Fields</h3>
 <p>
                    
@ -1863,12 +1874,28 @@ document.write("Last Published: " + document.lastModified);
                                    (if compression is enabled, the algorithm used is ZLIB),
                                    only available for indexes until Lucene version 2.9.x</li>
                                
+<li>4th to 6th bits (mask: 0x7&lt;&lt;3) define the type of a
+                                numeric field: <ul>
+                                  
+<li>all bits in mask are cleared if no numeric field at all</li>
+                                  
+<li>1&lt;&lt;3: Value is Int</li>
+                                  
+<li>2&lt;&lt;3: Value is Long</li>
+                                  
+<li>3&lt;&lt;3: Value is Int as Float (as of Integer.intBitsToFloat)</li>
+                                  
+<li>4&lt;&lt;3: Value is Long as Double (as of Double.longBitsToDouble)</li>
+                                
+</ul>
+</li>
+                            
 </ul>
                        
 </p>
                        
 <p>Value --&gt;
-                            String | BinaryValue (depending on Bits)
+                            String | BinaryValue | Int | Long (depending on Bits)
                        </p>
                        
 <p>BinaryValue --&gt;
@ -1883,7 +1910,7 @@ document.write("Last Published: " + document.lastModified);
 </li>
                
 </ol>
-<a name="N106B6"></a><a name="Term Dictionary"></a>
+<a name="N106D0"></a><a name="Term Dictionary"></a>
 <h3 class="boxed">Term Dictionary</h3>
 <p>
                    The term dictionary is represented as two files:
@ -2075,7 +2102,7 @@ document.write("Last Published: " + document.lastModified);
 </li>
                
 </ol>
-<a name="N1073A"></a><a name="Frequencies"></a>
+<a name="N10754"></a><a name="Frequencies"></a>
 <h3 class="boxed">Frequencies</h3>
 <p>
                    The .frq file contains the lists of documents
@ -2203,7 +2230,7 @@ document.write("Last Published: " + document.lastModified);
                   entry in level-1. In the example has entry 15 on level 1 a pointer to entry 15 on level 0 and entry 31 on level 1 a pointer
                   to entry 31 on level 0.                   
                </p>
-<a name="N107C2"></a><a name="Positions"></a>
+<a name="N107DC"></a><a name="Positions"></a>
 <h3 class="boxed">Positions</h3>
 <p>
                    The .prx file contains the lists of positions that
@ -2273,7 +2300,7 @@ document.write("Last Published: " + document.lastModified);
                    Payload. If PayloadLength is not stored, then this Payload has the same
                    length as the Payload at the previous position.
                </p>
-<a name="N107FE"></a><a name="Normalization Factors"></a>
+<a name="N10818"></a><a name="Normalization Factors"></a>
 <h3 class="boxed">Normalization Factors</h3>
 <p>There's a single .nrm file containing all norms:
                </p>
@ -2353,7 +2380,7 @@ document.write("Last Published: " + document.lastModified);
                </p>
 <p>Separate norm files are created (when adequate) for both compound and non compound segments.
                </p>
-<a name="N1084F"></a><a name="Term Vectors"></a>
+<a name="N10869"></a><a name="Term Vectors"></a>
 <h3 class="boxed">Term Vectors</h3>
 <p>
 		  Term Vector support is an optional on a field by
@ -2489,7 +2516,7 @@ document.write("Last Published: " + document.lastModified);
 </li>
                
 </ol>
-<a name="N108EB"></a><a name="Deleted Documents"></a>
+<a name="N10905"></a><a name="Deleted Documents"></a>
 <h3 class="boxed">Deleted Documents</h3>
 <p>The .del file is
                    optional, and only exists when a segment contains deletions.
@ -2553,7 +2580,7 @@ document.write("Last Published: " + document.lastModified);
 </div>

        
-<a name="N10925"></a><a name="Limitations"></a>
+<a name="N1093F"></a><a name="Limitations"></a>
 <h2 class="boxed">Limitations</h2>
 <div class="section">
 <p>
--- a/lucene/docs/fileformats.pdf
+++ b/lucene/docs/fileformats.pdf
--- a/lucene/docs/gettingstarted.html
+++ b/lucene/docs/gettingstarted.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Getting Started Guide
@ -269,14 +269,12 @@ may wish to skip sections.
 <li>
 <a href="demo.html">About the command-line Lucene demo and its usage</a>.  This section
 	is intended for anyone who wants to use the command-line Lucene demo.</li>
-<p></p>

 	
 <li>
 <a href="demo2.html">About the sources and implementation for the command-line Lucene
 	demo</a>.  This section walks through the implementation details (sources) of the
 	command-line Lucene demo.  This section is intended for developers.</li>
-<p></p>

 </ul>
 </div>
--- a/lucene/docs/gettingstarted.pdf
+++ b/lucene/docs/gettingstarted.pdf
--- a/lucene/docs/index.html
+++ b/lucene/docs/index.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>Lucene Java Documentation</title>
 <link type="text/css" href="skin/basic.css" rel="stylesheet">
--- a/lucene/docs/index.pdf
+++ b/lucene/docs/index.pdf
--- a/lucene/docs/linkmap.html
+++ b/lucene/docs/linkmap.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>Site Linkmap Table of Contents</title>
 <link type="text/css" href="skin/basic.css" rel="stylesheet">
--- a/lucene/docs/linkmap.pdf
+++ b/lucene/docs/linkmap.pdf
--- a/lucene/docs/lucene-contrib/index.html
+++ b/lucene/docs/lucene-contrib/index.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	        Apache Lucene - Lucene Contrib
--- a/lucene/docs/lucene-contrib/index.pdf
+++ b/lucene/docs/lucene-contrib/index.pdf
--- a/lucene/docs/queryparsersyntax.html
+++ b/lucene/docs/queryparsersyntax.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Query Parser Syntax
--- a/lucene/docs/queryparsersyntax.pdf
+++ b/lucene/docs/queryparsersyntax.pdf
--- a/lucene/docs/scoring.html
+++ b/lucene/docs/scoring.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>
 	Apache Lucene - Scoring
--- a/lucene/docs/scoring.pdf
+++ b/lucene/docs/scoring.pdf
--- a/lucene/docs/skin/images/apache-thanks.png
+++ b/lucene/docs/skin/images/apache-thanks.png
--- a/lucene/docs/skin/images/built-with-cocoon.gif
+++ b/lucene/docs/skin/images/built-with-cocoon.gif
--- a/lucene/docs/systemrequirements.html
+++ b/lucene/docs/systemrequirements.html
@ -3,7 +3,7 @@
 <head>
 <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
 <meta content="Apache Forrest" name="Generator">
-<meta name="Forrest-version" content="0.8">
+<meta name="Forrest-version" content="0.9">
 <meta name="Forrest-skin-name" content="lucene">
 <title>Apache Lucene - System Requirements</title>
 <link type="text/css" href="skin/basic.css" rel="stylesheet">
--- a/lucene/docs/systemrequirements.pdf
+++ b/lucene/docs/systemrequirements.pdf
--- a/lucene/src/java/org/apache/lucene/document/Document.java
+++ b/lucene/src/java/org/apache/lucene/document/Document.java
@ -131,8 +131,13 @@ public final class Document {
  /** Returns a field with the given name if any exist in this document, or
   * null.  If multiple fields exists with this name, this method returns the
   * first value added.
-   * Do not use this method with lazy loaded fields.
+   * Do not use this method with lazy loaded fields or {@link NumericField}.
+   * @deprecated use {@link #getFieldable} instead and cast depending on
+   * data type.
+   * @throws ClassCastException if you try to retrieve a numerical or
+   * lazy loaded field.
   */
+  @Deprecated
  public final Field getField(String name) {
    return (Field) getFieldable(name);
  }
@ -154,6 +159,8 @@ public final class Document {
   * this document, or null.  If multiple fields exist with this name, this
   * method returns the first value added. If only binary fields with this name
   * exist, returns null.
+   * For {@link NumericField} it returns the string value of the number. If you want
+   * the actual {@code NumericField} instance back, use {@link #getFieldable}.
   */
  public final String get(String name) {
   for (Fieldable field : fields) {
@ -177,13 +184,18 @@ public final class Document {
  
  /**
   * Returns an array of {@link Field}s with the given name.
-   * Do not use with lazy loaded fields.
   * This method returns an empty array when there are no
   * matching fields.  It never returns null.
+   * Do not use this method with lazy loaded fields or {@link NumericField}.
   *
   * @param name the name of the field
   * @return a <code>Field[]</code> array
+   * @deprecated use {@link #getFieldable} instead and cast depending on
+   * data type.
+   * @throws ClassCastException if you try to retrieve a numerical or
+   * lazy loaded field.
   */
+   @Deprecated
   public final Field[] getFields(String name) {
     List<Field> result = new ArrayList<Field>();
     for (Fieldable field : fields) {
@ -230,6 +242,8 @@ public final class Document {
   * Returns an array of values of the field specified as the method parameter.
   * This method returns an empty array when there are no
   * matching fields.  It never returns null.
+   * For {@link NumericField}s it returns the string value of the number. If you want
+   * the actual {@code NumericField} instances back, use {@link #getFieldables}.
   * @param name the name of the field
   * @return a <code>String[]</code> of field values
   */
--- a/lucene/src/java/org/apache/lucene/document/NumericField.java
+++ b/lucene/src/java/org/apache/lucene/document/NumericField.java
@ -127,18 +127,18 @@ import org.apache.lucene.search.FieldCache; // javadocs
 * class is a wrapper around this token stream type for
 * easier, more intuitive usage.</p>
 *
- * <p><b>NOTE:</b> This class is only used during
- * indexing. When retrieving the stored field value from a
- * {@link Document} instance after search, you will get a
- * conventional {@link Fieldable} instance where the numeric
- * values are returned as {@link String}s (according to
- * <code>toString(value)</code> of the used data type).
- *
 * @since 2.9
 */
 public final class NumericField extends AbstractField {

-  private final NumericTokenStream numericTS;
+  /** Data type of the value in {@link NumericField}.
+   * @since 3.2
+   */
+  public static enum DataType { INT, LONG, FLOAT, DOUBLE }
+
+  private transient NumericTokenStream numericTS;
+  private DataType type;
+  private final int precisionStep;

  /**
   * Creates a field for numeric values using the default <code>precisionStep</code>
@ -158,8 +158,8 @@ public final class NumericField extends AbstractField {
   * a numeric value, before indexing a document containing this field,
   * set a value using the various set<em>???</em>Value() methods.
   * @param name the field name
-   * @param store if the field should be stored in plain text form
-   *  (according to <code>toString(value)</code> of the used data type)
+   * @param store if the field should be stored, {@link Document#getFieldable}
+   * then returns {@code NumericField} instances on search results.
   * @param index if the field should be indexed using {@link NumericTokenStream}
   */
  public NumericField(String name, Field.Store store, boolean index) {
@ -186,19 +186,43 @@ public final class NumericField extends AbstractField {
   * set a value using the various set<em>???</em>Value() methods.
   * @param name the field name
   * @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
-   * @param store if the field should be stored in plain text form
-   *  (according to <code>toString(value)</code> of the used data type)
+   * @param store if the field should be stored, {@link Document#getFieldable}
+   * then returns {@code NumericField} instances on search results.
   * @param index if the field should be indexed using {@link NumericTokenStream}
   */
  public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
    super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
+    this.precisionStep = precisionStep;
    setOmitTermFreqAndPositions(true);
-    numericTS = new NumericTokenStream(precisionStep);
  }

  /** Returns a {@link NumericTokenStream} for indexing the numeric value. */
  public TokenStream tokenStreamValue()   {
-    return isIndexed() ? numericTS : null;
+    if (!isIndexed())
+      return null;
+    if (numericTS == null) {
+      // lazy init the TokenStream as it is heavy to instantiate (attributes,...),
+      // if not needed (stored field loading)
+      numericTS = new NumericTokenStream(precisionStep);
+      // initialize value in TokenStream
+      if (fieldsData != null) {
+        assert type != null;
+        final Number val = (Number) fieldsData;
+        switch (type) {
+          case INT:
+            numericTS.setIntValue(val.intValue()); break;
+          case LONG:
+            numericTS.setLongValue(val.longValue()); break;
+          case FLOAT:
+            numericTS.setFloatValue(val.floatValue()); break;
+          case DOUBLE:
+            numericTS.setDoubleValue(val.doubleValue()); break;
+          default:
+            assert false : "Should never get here";
+        }
+      }
+    }
+    return numericTS;
  }
  
  /** Returns always <code>null</code> for numeric fields */
@ -212,7 +236,10 @@ public final class NumericField extends AbstractField {
    return null;
  }
    
-  /** Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). */
+  /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
+   * on search results. It is recommended to use {@link Document#getFieldable} instead
+   * that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
+   * to return the stored value. */
  public String stringValue()   {
    return (fieldsData == null) ? null : fieldsData.toString();
  }
@ -224,7 +251,14 @@ public final class NumericField extends AbstractField {
  
  /** Returns the precision step. */
  public int getPrecisionStep() {
-    return numericTS.getPrecisionStep();
+    return precisionStep;
+  }
+  
+  /** Returns the data type of the current value, {@code null} if not yet set.
+   * @since 3.2
+   */
+  public DataType getDataType() {
+    return type;
  }
  
  /**
@ -234,8 +268,9 @@ public final class NumericField extends AbstractField {
   * <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
   */
  public NumericField setLongValue(final long value) {
-    numericTS.setLongValue(value);
+    if (numericTS != null) numericTS.setLongValue(value);
    fieldsData = Long.valueOf(value);
+    type = DataType.LONG;
    return this;
  }
  
@ -246,8 +281,9 @@ public final class NumericField extends AbstractField {
   * <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
   */
  public NumericField setIntValue(final int value) {
-    numericTS.setIntValue(value);
+    if (numericTS != null) numericTS.setIntValue(value);
    fieldsData = Integer.valueOf(value);
+    type = DataType.INT;
    return this;
  }
  
@ -258,8 +294,9 @@ public final class NumericField extends AbstractField {
   * <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
   */
  public NumericField setDoubleValue(final double value) {
-    numericTS.setDoubleValue(value);
+    if (numericTS != null) numericTS.setDoubleValue(value);
    fieldsData = Double.valueOf(value);
+    type = DataType.DOUBLE;
    return this;
  }
  
@ -270,8 +307,9 @@ public final class NumericField extends AbstractField {
   * <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
   */
  public NumericField setFloatValue(final float value) {
-    numericTS.setFloatValue(value);
+    if (numericTS != null) numericTS.setFloatValue(value);
    fieldsData = Float.valueOf(value);
+    type = DataType.FLOAT;
    return this;
  }

--- a/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
+++ b/lucene/src/java/org/apache/lucene/index/BufferedDeletesStream.java
@ -132,9 +132,9 @@ class BufferedDeletesStream {
    public final long gen;

    // If non-null, contains segments that are 100% deleted
-    public final SegmentInfos allDeleted;
+    public final List<SegmentInfo> allDeleted;

-    ApplyDeletesResult(boolean anyDeletes, long gen, SegmentInfos allDeleted) {
+    ApplyDeletesResult(boolean anyDeletes, long gen, List<SegmentInfo> allDeleted) {
      this.anyDeletes = anyDeletes;
      this.gen = gen;
      this.allDeleted = allDeleted;
@ -164,7 +164,7 @@ class BufferedDeletesStream {
  /** Resolves the buffered deleted Term/Query/docIDs, into
   *  actual deleted docIDs in the deletedDocs BitVector for
   *  each SegmentReader. */
-  public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, SegmentInfos infos) throws IOException {
+  public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, List<SegmentInfo> infos) throws IOException {
    final long t0 = System.currentTimeMillis();

    if (infos.size() == 0) {
@ -182,7 +182,7 @@ class BufferedDeletesStream {
      message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size());
    }

-    SegmentInfos infos2 = new SegmentInfos();
+    List<SegmentInfo> infos2 = new ArrayList<SegmentInfo>();
    infos2.addAll(infos);
    Collections.sort(infos2, sortSegInfoByDelGen);

@ -192,7 +192,7 @@ class BufferedDeletesStream {
    int infosIDX = infos2.size()-1;
    int delIDX = deletes.size()-1;

-    SegmentInfos allDeleted = null;
+    List<SegmentInfo> allDeleted = null;

    while (infosIDX >= 0) {
      //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX);
@ -245,7 +245,7 @@ class BufferedDeletesStream {

        if (segAllDeletes) {
          if (allDeleted == null) {
-            allDeleted = new SegmentInfos();
+            allDeleted = new ArrayList<SegmentInfo>();
          }
          allDeleted.add(info);
        }
@ -287,7 +287,7 @@ class BufferedDeletesStream {

          if (segAllDeletes) {
            if (allDeleted == null) {
-              allDeleted = new SegmentInfos();
+              allDeleted = new ArrayList<SegmentInfo>();
            }
            allDeleted.add(info);
          }
--- a/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/CompoundFileWriter.java
@ -46,8 +46,10 @@ import org.apache.lucene.util.IOUtils;
 * file. The {directory} that follows has that many entries. Each directory entry
 * contains a long pointer to the start of this file's data section, and a String
 * with that file's name.
+ * 
+ * @lucene.internal
 */
-final class CompoundFileWriter {
+public final class CompoundFileWriter {

    static final class FileEntry {
 	
@ -137,8 +139,7 @@ final class CompoundFileWriter {

    /** Merge files with the extensions added up to now.
     *  All files with these extensions are combined sequentially into the
-     *  compound stream. After successful merge, the source files
-     *  are deleted.
+     *  compound stream.
     *  @throws IllegalStateException if close() had been called before or
     *   if no file has been added to this object
     */
--- a/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
+++ b/lucene/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
@ -135,8 +135,8 @@ public class ConcurrentMergeScheduler extends MergeScheduler {
      final MergePolicy.OneMerge m1 = t1.getCurrentMerge();
      final MergePolicy.OneMerge m2 = t2.getCurrentMerge();
      
-      final int c1 = m1 == null ? Integer.MAX_VALUE : m1.segments.totalDocCount();
-      final int c2 = m2 == null ? Integer.MAX_VALUE : m2.segments.totalDocCount();
+      final int c1 = m1 == null ? Integer.MAX_VALUE : m1.totalDocCount;
+      final int c2 = m2 == null ? Integer.MAX_VALUE : m2.totalDocCount;

      return c2 - c1;
    }
--- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
+++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java
@ -263,9 +263,10 @@ final class DocFieldProcessor extends DocConsumer {
    // enabled; we could save [small amount of] CPU
    // here.
    ArrayUtil.quickSort(fields, 0, fieldCount, fieldsComp);
-
-    for(int i=0;i<fieldCount;i++)
-      fields[i].consumer.processFields(fields[i].fields, fields[i].fieldCount);
+    for(int i=0;i<fieldCount;i++) {
+      final DocFieldProcessorPerField perField = fields[i];
+      perField.consumer.processFields(perField.fields, perField.fieldCount);
+    }

    if (docState.maxTermPrefix != null && docState.infoStream != null) {
      docState.infoStream.println("WARNING: document contains at least one immense term (whose UTF8 encoding is longer than the max length " + DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8 + "), all of which were skipped.  Please correct the analyzer to not produce such terms.  The prefix of the first immense term is: '" + docState.maxTermPrefix + "...'");
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java
@ -188,7 +188,7 @@ final class DocumentsWriter {
    this.infoStream = infoStream;
    final Iterator<ThreadState> it = perThreadPool.getAllPerThreadsIterator();
    while (it.hasNext()) {
-      it.next().perThread.docState.infoStream = infoStream;
+      it.next().perThread.setInfoStream(infoStream);
    }
  }

--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java
@ -63,9 +63,10 @@ import org.apache.lucene.search.Query;
 */
 final class DocumentsWriterDeleteQueue {

-  private volatile Node tail;
+  private volatile Node<?> tail;
  
-  private static final AtomicReferenceFieldUpdater<DocumentsWriterDeleteQueue, Node> tailUpdater = AtomicReferenceFieldUpdater
+  @SuppressWarnings("rawtypes")
+  private static final AtomicReferenceFieldUpdater<DocumentsWriterDeleteQueue,Node> tailUpdater = AtomicReferenceFieldUpdater
      .newUpdater(DocumentsWriterDeleteQueue.class, Node.class, "tail");

  private final DeleteSlice globalSlice;
@ -90,7 +91,7 @@ final class DocumentsWriterDeleteQueue {
     * we use a sentinel instance as our initial tail. No slice will ever try to
     * apply this tail since the head is always omitted.
     */
-    tail = new Node(null); // sentinel
+    tail = new Node<Object>(null); // sentinel
    globalSlice = new DeleteSlice(tail);
  }

@ -126,14 +127,14 @@ final class DocumentsWriterDeleteQueue {
    // we can do it just every n times or so?
  }

-  void add(Node item) {
+  void add(Node<?> item) {
    /*
     * this non-blocking / 'wait-free' linked list add was inspired by Apache
     * Harmony's ConcurrentLinkedQueue Implementation.
     */
    while (true) {
-      final Node currentTail = this.tail;
-      final Node tailNext = currentTail.next;
+      final Node<?> currentTail = this.tail;
+      final Node<?> tailNext = currentTail.next;
      if (tail == currentTail) {
        if (tailNext != null) {
          /*
@ -196,7 +197,7 @@ final class DocumentsWriterDeleteQueue {
     * deletes in the queue and reset the global slice to let the GC prune the
     * queue.
     */
-    final Node currentTail = tail; // take the current tail make this local any
+    final Node<?> currentTail = tail; // take the current tail make this local any
    // Changes after this call are applied later
    // and not relevant here
    if (callerSlice != null) {
@ -232,10 +233,10 @@ final class DocumentsWriterDeleteQueue {

  static class DeleteSlice {
    // No need to be volatile, slices are thread captive (only accessed by one thread)!
-    Node sliceHead; // we don't apply this one
-    Node sliceTail;
+    Node<?> sliceHead; // we don't apply this one
+    Node<?> sliceTail;

-    DeleteSlice(Node currentTail) {
+    DeleteSlice(Node<?> currentTail) {
      assert currentTail != null;
      /*
       * Initially this is a 0 length slice pointing to the 'current' tail of
@ -256,7 +257,7 @@ final class DocumentsWriterDeleteQueue {
       * tail in this slice are not equal then there will be at least one more
       * non-null node in the slice!
       */
-      Node current = sliceHead;
+      Node<?> current = sliceHead;
      do {
        current = current.next;
        assert current != null : "slice property violated between the head on the tail must not be a null node";
@ -290,7 +291,7 @@ final class DocumentsWriterDeleteQueue {
  void clear() {
    globalBufferLock.lock();
    try {
-      final Node currentTail = tail;
+      final Node<?> currentTail = tail;
      globalSlice.sliceHead = globalSlice.sliceTail = currentTail;
      globalBufferedDeletes.clear();
    } finally {
@ -298,27 +299,28 @@ final class DocumentsWriterDeleteQueue {
    }
  }

-  private static class Node {
-    volatile Node next;
-    final Object item;
+  private static class Node<T> {
+    volatile Node<?> next;
+    final T item;

-    private Node(Object item) {
+    Node(T item) {
      this.item = item;
    }

-    static final AtomicReferenceFieldUpdater<Node, Node> nextUpdater = AtomicReferenceFieldUpdater
+    @SuppressWarnings("rawtypes")
+    static final AtomicReferenceFieldUpdater<Node,Node> nextUpdater = AtomicReferenceFieldUpdater
        .newUpdater(Node.class, Node.class, "next");

    void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
      assert false : "sentinel item must never be applied";
    }

-    boolean casNext(Node cmp, Node val) {
+    boolean casNext(Node<?> cmp, Node<?> val) {
      return nextUpdater.compareAndSet(this, cmp, val);
    }
  }

-  private static final class TermNode extends Node {
+  private static final class TermNode extends Node<Term> {

    TermNode(Term term) {
      super(term);
@ -326,33 +328,31 @@ final class DocumentsWriterDeleteQueue {

    @Override
    void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
-      bufferedDeletes.addTerm((Term) item, docIDUpto);
+      bufferedDeletes.addTerm(item, docIDUpto);
    }
  }

-  private static final class QueryArrayNode extends Node {
+  private static final class QueryArrayNode extends Node<Query[]> {
    QueryArrayNode(Query[] query) {
      super(query);
    }

    @Override
    void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
-      final Query[] queries = (Query[]) item;
-      for (Query query : queries) {
+      for (Query query : item) {
        bufferedDeletes.addQuery(query, docIDUpto);  
      }
    }
  }
  
-  private static final class TermArrayNode extends Node {
+  private static final class TermArrayNode extends Node<Term[]> {
    TermArrayNode(Term[] term) {
      super(term);
    }

    @Override
    void apply(BufferedDeletes bufferedDeletes, int docIDUpto) {
-      final Term[] terms = (Term[]) item;
-      for (Term term : terms) {
+      for (Term term : item) {
        bufferedDeletes.addTerm(term, docIDUpto);  
      }
    }
@ -361,7 +361,7 @@ final class DocumentsWriterDeleteQueue {

  private boolean forceApplyGlobalSlice() {
    globalBufferLock.lock();
-    final Node currentTail = tail;
+    final Node<?> currentTail = tail;
    try {
      if (globalSlice.sliceTail != currentTail) {
        globalSlice.sliceTail = currentTail;
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterFlushControl.java
@ -122,13 +122,13 @@ public final class DocumentsWriterFlushControl {
        // is super important since we can not address more than 2048 MB per DWPT
        setFlushPending(perThread);
        if (fullFlush) {
-          DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread, false);
+          DocumentsWriterPerThread toBlock = internalTryCheckOutForFlush(perThread);
          assert toBlock != null;
          blockedFlushes.add(toBlock);
        }
      }
    }
-    final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread, false);
+    final DocumentsWriterPerThread flushingDWPT = tryCheckoutForFlush(perThread);
    healthiness.updateStalled(this);
    return flushingDWPT;
  }
@ -189,18 +189,15 @@ public final class DocumentsWriterFlushControl {
  }

  synchronized DocumentsWriterPerThread tryCheckoutForFlush(
-      ThreadState perThread, boolean setPending) {
+      ThreadState perThread) {
    if (fullFlush) {
      return null;
    }
-    return internalTryCheckOutForFlush(perThread, setPending);
+    return internalTryCheckOutForFlush(perThread);
  }

  private DocumentsWriterPerThread internalTryCheckOutForFlush(
-      ThreadState perThread, boolean setPending) {
-    if (setPending && !perThread.flushPending) {
-      setFlushPending(perThread);
-    }
+      ThreadState perThread) {
    if (perThread.flushPending) {
      // We are pending so all memory is already moved to flushBytes
      if (perThread.tryLock()) {
@ -245,7 +242,7 @@ public final class DocumentsWriterFlushControl {
      while (allActiveThreads.hasNext() && numPending > 0) {
        ThreadState next = allActiveThreads.next();
        if (next.flushPending) {
-          final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next, false);
+          final DocumentsWriterPerThread dwpt = tryCheckoutForFlush(next);
          if (dwpt != null) {
            return dwpt;
          }
@ -330,7 +327,12 @@ public final class DocumentsWriterFlushControl {
        }
        if (next.perThread.getNumDocsInRAM() > 0 ) {
          final DocumentsWriterPerThread dwpt = next.perThread; // just for assert
-          final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next, true);
+          synchronized (this) {
+            if (!next.flushPending) {
+              setFlushPending(next);
+            }
+          }
+          final DocumentsWriterPerThread flushingDWPT = internalTryCheckOutForFlush(next);
          assert flushingDWPT != null : "DWPT must never be null here since we hold the lock and it holds documents";
          assert dwpt == flushingDWPT : "flushControl returned different DWPT";
          toFlush.add(flushingDWPT);
--- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
+++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java
@ -163,7 +163,7 @@ public class DocumentsWriterPerThread {
  boolean hasAborted = false; // True if the last exception throws by #updateDocument was aborting

  private FieldInfos fieldInfos;
-  private final PrintStream infoStream;
+  private PrintStream infoStream;
  private int numDocsInRAM;
  private int flushedDocCount;
  DocumentsWriterDeleteQueue deleteQueue;
@ -235,6 +235,7 @@ public class DocumentsWriterPerThread {
          // mark document as deleted
          deleteDocID(docState.docID);
          numDocsInRAM++;
+          fieldInfos.revertUncommitted();
        } else {
          abort();
        }
@ -377,15 +378,12 @@ public class DocumentsWriterPerThread {
    boolean success = false;

    try {
-
-      SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, fieldInfos.hasProx(), flushState.segmentCodecs, false, fieldInfos);
      consumer.flush(flushState);
      pendingDeletes.terms.clear();
-      newSegment.setHasVectors(flushState.hasVectors);
-
+      final SegmentInfo newSegment = new SegmentInfo(segment, flushState.numDocs, directory, false, flushState.segmentCodecs, fieldInfos.asReadOnly());
      if (infoStream != null) {
        message("new segment has " + (flushState.deletedDocs == null ? 0 : flushState.deletedDocs.count()) + " deleted docs");
-        message("new segment has " + (flushState.hasVectors ? "vectors" : "no vectors"));
+        message("new segment has " + (newSegment.getHasVectors() ? "vectors" : "no vectors"));
        message("flushedFiles=" + newSegment.files());
        message("flushed codecs=" + newSegment.getSegmentCodecs());
      }
@ -435,10 +433,6 @@ public class DocumentsWriterPerThread {
    return bytesUsed.get() + pendingDeletes.bytesUsed.get();
  }

-  FieldInfos getFieldInfos() {
-    return fieldInfos;
-  }
-
  void message(String message) {
    writer.message("DWPT: " + message);
  }
@ -498,4 +492,9 @@ public class DocumentsWriterPerThread {
    assert segment != null;
    return new PerDocWriteState(infoStream, directory, segment, fieldInfos, bytesUsed, codecId);
  }
+  
+  void setInfoStream(PrintStream infoStream) {
+    this.infoStream = infoStream;
+    docState.infoStream = infoStream;
+  }
 }
--- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java
@ -22,7 +22,6 @@ import org.apache.lucene.index.values.Type;
 /** @lucene.experimental */
 public final class FieldInfo {
  public static final int UNASSIGNED_CODEC_ID = -1;
-
  public final String name;
  public final int number;

@ -113,7 +112,6 @@ public final class FieldInfo {
    }
    assert !this.omitTermFreqAndPositions || !this.storePayloads;
  }
-
  void setDocValues(Type v) {
    if (docValues == null) {
      docValues = v;
@ -127,4 +125,29 @@ public final class FieldInfo {
  public Type getDocValues() {
    return docValues;
  }
+  
+  private boolean vectorsCommitted;
+ 
+  /**
+   * Reverts all uncommitted changes on this {@link FieldInfo}
+   * @see #commitVectors()
+   */
+  void revertUncommitted() {
+    if (storeTermVector && !vectorsCommitted) {
+      storeOffsetWithTermVector = false;
+      storePositionWithTermVector = false;
+      storeTermVector = false;  
+    }
+  }
+
+  /**
+   * Commits term vector modifications. Changes to term-vectors must be
+   * explicitly committed once the necessary files are created. If those changes
+   * are not committed subsequent {@link #revertUncommitted()} will reset the
+   * all term-vector flags before the next document.
+   */
+  void commitVectors() {
+    assert storeTermVector;
+    vectorsCommitted = true;
+  }
 }
--- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java
@ -220,6 +220,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
  static final byte OMIT_TERM_FREQ_AND_POSITIONS = 0x40;

  private int format;
+  private boolean hasProx; // only set if readonly
+  private boolean hasVectors; // only set if readonly
+  private long version; // internal use to track changes
+  

  /**
   * Creates a new {@link FieldInfos} instance with a private
@ -267,7 +271,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
   */
  public FieldInfos(Directory d, String name) throws IOException {
    this((FieldNumberBiMap)null, null); // use null here to make this FIs Read-Only
-    IndexInput input = d.openInput(name);
+    final IndexInput input = d.openInput(name);
    try {
      read(input, name);
    } finally {
@ -303,6 +307,9 @@ public final class FieldInfos implements Iterable<FieldInfo> {
  @Override
  synchronized public Object clone() {
    FieldInfos fis = new FieldInfos(globalFieldNumbers, segmentCodecsBuilder);
+    fis.format = format;
+    fis.hasProx = hasProx;
+    fis.hasVectors = hasVectors;
    for (FieldInfo fi : this) {
      FieldInfo clone = (FieldInfo) (fi).clone();
      fis.putInternal(clone);
@ -312,6 +319,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {

  /** Returns true if any fields do not omitTermFreqAndPositions */
  public boolean hasProx() {
+    if (isReadOnly()) {
+      return hasProx;
+    }
+    // mutable FIs must check!
    for (FieldInfo fi : this) {
      if (fi.isIndexed && !fi.omitTermFreqAndPositions) {
        return true;
@ -445,6 +456,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
    if ((fi.isIndexed || fi.hasDocValues()) && fi.getCodecId() == FieldInfo.UNASSIGNED_CODEC_ID) {
      segmentCodecsBuilder.tryAddAndSet(fi);
    }
+    version++;
    return fi;
  }

@ -514,6 +526,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
  }

  public boolean hasVectors() {
+    if (isReadOnly()) {
+      return hasVectors;
+    }
+    // mutable FIs must check
    for (FieldInfo fi : this) {
      if (fi.storeTermVector) {
        return true;
@ -567,6 +583,10 @@ public final class FieldInfos implements Iterable<FieldInfo> {
    return globalFieldNumbers == null;
  }
  
+  synchronized final long getVersion() {
+    return version;
+  }
+
  public void write(IndexOutput output) throws IOException {
    output.writeVInt(FORMAT_CURRENT);
    output.writeVInt(size());
@ -658,7 +678,8 @@ public final class FieldInfos implements Iterable<FieldInfo> {
      if (omitTermFreqAndPositions) {
        storePayloads = false;
      }
-
+      hasVectors |= storeTermVector;
+      hasProx |= isIndexed && !omitTermFreqAndPositions;
      Type docValuesType = null;
      if (format <= FORMAT_INDEX_VALUES) {
        final byte b = input.readByte();
@ -706,4 +727,28 @@ public final class FieldInfos implements Iterable<FieldInfo> {
    }    
  }
  
+  /**
+   * Reverts all uncommitted changes 
+   * @see FieldInfo#revertUncommitted()
+   */
+  void revertUncommitted() {
+    for (FieldInfo fieldInfo : this) {
+      fieldInfo.revertUncommitted();
+    }
+  }
+  
+  final FieldInfos asReadOnly() {
+    if (isReadOnly()) {
+      return this;
+    }
+    final FieldInfos roFis = new FieldInfos((FieldNumberBiMap)null, null);
+    for (FieldInfo fieldInfo : this) {
+      FieldInfo clone = (FieldInfo) (fieldInfo).clone();
+      roFis.putInternal(clone);
+      roFis.hasVectors |= clone.storeTermVector;
+      roFis.hasProx |= clone.isIndexed && !clone.omitTermFreqAndPositions;
+    }
+    return roFis;
+  }
+
 }
--- a/lucene/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldsReader.java
@ -24,10 +24,11 @@ import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldSelector;
 import org.apache.lucene.document.FieldSelectorResult;
 import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.document.NumericField;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.CloseableThreadLocal;

 import java.io.IOException;
@ -212,40 +213,39 @@ public final class FieldsReader implements Cloneable {

    Document doc = new Document();
    int numFields = fieldsStream.readVInt();
-    for (int i = 0; i < numFields; i++) {
+    out: for (int i = 0; i < numFields; i++) {
      int fieldNumber = fieldsStream.readVInt();
      FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
      FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
      
-      byte bits = fieldsStream.readByte();
-      assert bits <= FieldsWriter.FIELD_IS_TOKENIZED + FieldsWriter.FIELD_IS_BINARY;
+      int bits = fieldsStream.readByte() & 0xFF;
+      assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);

      boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
      boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
-      //TODO: Find an alternative approach here if this list continues to grow beyond the
-      //list of 5 or 6 currently here.  See Lucene 762 for discussion
-      if (acceptField.equals(FieldSelectorResult.LOAD)) {
-        addField(doc, fi, binary, tokenize);
-      }
-      else if (acceptField.equals(FieldSelectorResult.LOAD_AND_BREAK)){
-        addField(doc, fi, binary, tokenize);
-        break;//Get out of this loop
-      }
-      else if (acceptField.equals(FieldSelectorResult.LAZY_LOAD)) {
-        addFieldLazy(doc, fi, binary, tokenize, true);
-      }
-      else if (acceptField.equals(FieldSelectorResult.LATENT)) {
-        addFieldLazy(doc, fi, binary, tokenize, false);
-      }
-      else if (acceptField.equals(FieldSelectorResult.SIZE)){
-        skipField(addFieldSize(doc, fi, binary));
-      }
-      else if (acceptField.equals(FieldSelectorResult.SIZE_AND_BREAK)){
-        addFieldSize(doc, fi, binary);
-        break;
-      }
-      else {
-        skipField();
+      final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
+
+      switch (acceptField) {
+        case LOAD:
+          addField(doc, fi, binary, tokenize, numeric);
+          break;
+        case LOAD_AND_BREAK:
+          addField(doc, fi, binary, tokenize, numeric);
+          break out; //Get out of this loop
+        case LAZY_LOAD:
+          addFieldLazy(doc, fi, binary, tokenize, true, numeric);
+          break;
+        case LATENT:
+          addFieldLazy(doc, fi, binary, tokenize, false, numeric);
+          break;
+        case SIZE:
+          skipFieldBytes(addFieldSize(doc, fi, binary, numeric));
+          break;
+        case SIZE_AND_BREAK:
+          addFieldSize(doc, fi, binary, numeric);
+          break out; //Get out of this loop
+        default:
+          skipField(numeric);
      }
    }

@ -282,72 +282,121 @@ public final class FieldsReader implements Cloneable {
   * Skip the field.  We still have to read some of the information about the field, but can skip past the actual content.
   * This will have the most payoff on large fields.
   */
-  private void skipField() throws IOException {
-    skipField(fieldsStream.readVInt());
+  private void skipField(int numeric) throws IOException {
+    final int numBytes;
+    switch(numeric) {
+      case 0:
+        numBytes = fieldsStream.readVInt();
+        break;
+      case FieldsWriter.FIELD_IS_NUMERIC_INT:
+      case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+        numBytes = 4;
+        break;
+      case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+      case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+        numBytes = 8;
+        break;
+      default:
+        throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+    }
+    
+    skipFieldBytes(numBytes);
  }
  
-  private void skipField(int toRead) throws IOException {
+  private void skipFieldBytes(int toRead) throws IOException {
    fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
  }

-  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult) throws IOException {
+  private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException {
+    assert numeric != 0;
+    switch(numeric) {
+      case FieldsWriter.FIELD_IS_NUMERIC_INT:
+        return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt());
+      case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+        return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong());
+      case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+        return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt()));
+      case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+        return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong()));
+      default:
+        throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+    }
+  }
+
+  private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult, int numeric) throws IOException {
+    final AbstractField f;
    if (binary) {
      int toRead = fieldsStream.readVInt();
      long pointer = fieldsStream.getFilePointer();
-      //was: doc.add(new Fieldable(fi.name, b, Fieldable.Store.YES));
-      doc.add(new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult));
+      f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult);
      //Need to move the pointer ahead by toRead positions
      fieldsStream.seek(pointer + toRead);
+    } else if (numeric != 0) {
+      f = loadNumericField(fi, numeric);
    } else {
      Field.Store store = Field.Store.YES;
      Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
      Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);

-      AbstractField f;
      int length = fieldsStream.readVInt();
      long pointer = fieldsStream.getFilePointer();
      //Skip ahead of where we are by the length of what is stored
      fieldsStream.seek(pointer+length);
      f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
-      f.setOmitNorms(fi.omitNorms);
-      f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
-
-      doc.add(f);
    }
    
+    f.setOmitNorms(fi.omitNorms);
+    f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+    doc.add(f);
  }

-  private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize) throws CorruptIndexException, IOException {
+  private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize, int numeric) throws CorruptIndexException, IOException {
+    final AbstractField f;

    if (binary) {
      int toRead = fieldsStream.readVInt();
      final byte[] b = new byte[toRead];
      fieldsStream.readBytes(b, 0, b.length);
-      doc.add(new Field(fi.name, b));
+      f = new Field(fi.name, b);
+    } else if (numeric != 0) {
+      f = loadNumericField(fi, numeric);
    } else {
-      Field.Store store = Field.Store.YES;
      Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
      Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
-
-      AbstractField f;
      f = new Field(fi.name,     // name
-       false,
-              fieldsStream.readString(), // read value
-              store,
-              index,
-              termVector);
-      f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
-      f.setOmitNorms(fi.omitNorms);
-
-      doc.add(f);
+        false,
+        fieldsStream.readString(), // read value
+        Field.Store.YES,
+        index,
+        termVector);
    }
+    
+    f.setOmitTermFreqAndPositions(fi.omitTermFreqAndPositions);
+    f.setOmitNorms(fi.omitNorms);
+    doc.add(f);
  }
  
  // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
  // Read just the size -- caller must skip the field content to continue reading fields
  // Return the size in bytes or chars, depending on field type
-  private int addFieldSize(Document doc, FieldInfo fi, boolean binary) throws IOException {
-    int size = fieldsStream.readVInt(), bytesize = binary ? size : 2*size;
+  private int addFieldSize(Document doc, FieldInfo fi, boolean binary, int numeric) throws IOException {
+    final int bytesize, size;
+    switch(numeric) {
+      case 0:
+        size = fieldsStream.readVInt();
+        bytesize = binary ? size : 2*size;
+        break;
+      case FieldsWriter.FIELD_IS_NUMERIC_INT:
+      case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
+        size = bytesize = 4;
+        break;
+      case FieldsWriter.FIELD_IS_NUMERIC_LONG:
+      case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
+        size = bytesize = 8;
+        break;
+      default:
+        throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
+    }
    byte[] sizebytes = new byte[4];
    sizebytes[0] = (byte) (bytesize>>>24);
    sizebytes[1] = (byte) (bytesize>>>16);
@ -358,7 +407,7 @@ public final class FieldsReader implements Cloneable {
  }

  /**
-   * A Lazy implementation of Fieldable that differs loading of fields until asked for, instead of when the Document is
+   * A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is
   * loaded.
   */
  private class LazyField extends AbstractField implements Fieldable {
--- a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java
@ -21,22 +21,40 @@ import java.util.List;

 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.NumericField;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.IOUtils;

 final class FieldsWriter {
-  static final byte FIELD_IS_TOKENIZED = 0x1;
-  static final byte FIELD_IS_BINARY = 0x2;
+  static final int FIELD_IS_TOKENIZED = 1 << 0;
+  static final int FIELD_IS_BINARY = 1 << 1;
+
+  // the old bit 1 << 2 was compressed, is now left out
+
+  private static final int _NUMERIC_BIT_SHIFT = 3;
+  static final int FIELD_IS_NUMERIC_MASK = 0x07 << _NUMERIC_BIT_SHIFT;
+
+  static final int FIELD_IS_NUMERIC_INT = 1 << _NUMERIC_BIT_SHIFT;
+  static final int FIELD_IS_NUMERIC_LONG = 2 << _NUMERIC_BIT_SHIFT;
+  static final int FIELD_IS_NUMERIC_FLOAT = 3 << _NUMERIC_BIT_SHIFT;
+  static final int FIELD_IS_NUMERIC_DOUBLE = 4 << _NUMERIC_BIT_SHIFT;
+  // currently unused: static final int FIELD_IS_NUMERIC_SHORT = 5 << _NUMERIC_BIT_SHIFT;
+  // currently unused: static final int FIELD_IS_NUMERIC_BYTE = 6 << _NUMERIC_BIT_SHIFT;
+
+  // the next possible bits are: 1 << 6; 1 << 7
  
  // Lucene 3.0: Removal of compressed fields
  static final int FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2;

+  // Lucene 3.2: NumericFields are stored in binary format
+  static final int FORMAT_LUCENE_3_2_NUMERIC_FIELDS = 3;
+
  // NOTE: if you introduce a new format, make it 1 higher
  // than the current one, and always change this if you
  // switch to a new format!
-  static final int FORMAT_CURRENT = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
+  static final int FORMAT_CURRENT = FORMAT_LUCENE_3_2_NUMERIC_FIELDS;

  // when removing support for old versions, leave the last supported version here
  static final int FORMAT_MINIMUM = FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS;
@ -121,13 +139,26 @@ final class FieldsWriter {

  final void writeField(int fieldNumber, Fieldable field) throws IOException {
    fieldsStream.writeVInt(fieldNumber);
-    byte bits = 0;
+    int bits = 0;
    if (field.isTokenized())
-      bits |= FieldsWriter.FIELD_IS_TOKENIZED;
+      bits |= FIELD_IS_TOKENIZED;
    if (field.isBinary())
-      bits |= FieldsWriter.FIELD_IS_BINARY;
-
-    fieldsStream.writeByte(bits);
+      bits |= FIELD_IS_BINARY;
+    if (field instanceof NumericField) {
+      switch (((NumericField) field).getDataType()) {
+        case INT:
+          bits |= FIELD_IS_NUMERIC_INT; break;
+        case LONG:
+          bits |= FIELD_IS_NUMERIC_LONG; break;
+        case FLOAT:
+          bits |= FIELD_IS_NUMERIC_FLOAT; break;
+        case DOUBLE:
+          bits |= FIELD_IS_NUMERIC_DOUBLE; break;
+        default:
+          assert false : "Should never get here";
+      }
+    }
+    fieldsStream.writeByte((byte) bits);

    if (field.isBinary()) {
      final byte[] data;
@ -139,8 +170,22 @@ final class FieldsWriter {

      fieldsStream.writeVInt(len);
      fieldsStream.writeBytes(data, offset, len);
-    }
-    else {
+    } else if (field instanceof NumericField) {
+      final NumericField nf = (NumericField) field;
+      final Number n = nf.getNumericValue();
+      switch (nf.getDataType()) {
+        case INT:
+          fieldsStream.writeInt(n.intValue()); break;
+        case LONG:
+          fieldsStream.writeLong(n.longValue()); break;
+        case FLOAT:
+          fieldsStream.writeInt(Float.floatToIntBits(n.floatValue())); break;
+        case DOUBLE:
+          fieldsStream.writeLong(Double.doubleToLongBits(n.doubleValue())); break;
+        default:
+          assert false : "Should never get here";
+      }
+    } else {
      fieldsStream.writeString(field.stringValue());
    }
  }
--- a/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexFileDeleter.java
@ -22,6 +22,7 @@ import java.io.FilenameFilter;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.Date;
@ -196,7 +197,31 @@ final class IndexFileDeleter {
            }
          }
          if (sis != null) {
-            CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
+            final SegmentInfos infos = sis;
+            for (SegmentInfo segmentInfo : infos) {
+              try {
+                /*
+                 * Force FI to load for each segment since we could see a
+                 * segments file and load successfully above if the files are
+                 * still referenced when they are deleted and the os doesn't let
+                 * you delete them. Yet its likely that fnm files are removed
+                 * while seg file is still around Since LUCENE-2984 we need FI
+                 * to find out if a seg has vectors and prox so we need those
+                 * files to be opened for a commit point.
+                 */
+                segmentInfo.getFieldInfos();
+              } catch (FileNotFoundException e) {
+                refresh(segmentInfo.name);
+                sis = null;
+                if (infoStream != null) {
+                  message("init: hit FileNotFoundException when loading commit \"" + fileName + "\"; skipping this commit point");
+                }
+              }
+            }
+           
+          }
+          if (sis != null) {
+            final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis);
            if (sis.getGeneration() == segmentInfos.getGeneration()) {
              currentCommitPoint = commitPoint;
            }
--- a/lucene/src/java/org/apache/lucene/index/IndexReader.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexReader.java
@ -1428,7 +1428,7 @@ public abstract class IndexReader implements Cloneable,Closeable {
      cfr = new CompoundFileReader(dir, filename);

      String [] files = cfr.listAll();
-      ArrayUtil.quickSort(files);   // sort the array of filename so that the output is more readable
+      ArrayUtil.mergeSort(files);   // sort the array of filename so that the output is more readable

      for (int i = 0; i < files.length; ++i) {
        long len = cfr.fileLength(files[i]);
--- a/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexUpgrader.java
@ -0,0 +1,129 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Constants;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collection;
+
+/**
+  * This is an easy-to-use tool that upgrades all segments of an index from previous Lucene versions
+  * to the current segment file format. It can be used from command line:
+  * <pre>
+  *  java -cp lucene-core.jar org.apache.lucene.index.IndexUpgrader [-delete-prior-commits] [-verbose] indexDir
+  * </pre>
+  * Alternatively this class can be instantiated and {@link #upgrade} invoked. It uses {@link UpgradeIndexMergePolicy}
+  * and triggers the upgrade via an optimize request to {@link IndexWriter}.
+  * <p>This tool keeps only the last commit in an index; for this
+  * reason, if the incoming index has more than one commit, the tool
+  * refuses to run by default. Specify {@code -delete-prior-commits}
+  * to override this, allowing the tool to delete all but the last commit.
+  * From Java code this can be enabled by passing {@code true} to
+  * {@link #IndexUpgrader(Directory,PrintStream,boolean)}.
+  */
+public final class IndexUpgrader {
+
+  private static void printUsage() {
+    System.err.println("Upgrades an index so all segments created with a previous Lucene version are rewritten.");
+    System.err.println("Usage:");
+    System.err.println("  java " + IndexUpgrader.class.getName() + " [-delete-prior-commits] [-verbose] indexDir");
+    System.err.println("This tool keeps only the last commit in an index; for this");
+    System.err.println("reason, if the incoming index has more than one commit, the tool");
+    System.err.println("refuses to run by default. Specify -delete-prior-commits to override");
+    System.err.println("this, allowing the tool to delete all but the last commit.");
+    System.exit(1);
+  }
+
+  public static void main(String[] args) throws IOException {
+    String dir = null;
+    boolean deletePriorCommits = false;
+    PrintStream out = null;
+    for (String arg : args) {
+      if ("-delete-prior-commits".equals(arg)) {
+        deletePriorCommits = true;
+      } else if ("-verbose".equals(arg)) {
+        out = System.out;
+      } else if (dir == null) {
+        dir = arg;
+      } else {
+        printUsage();
+      }
+    }
+    if (dir == null) {
+      printUsage();
+    }
+    
+    new IndexUpgrader(FSDirectory.open(new File(dir)), out, deletePriorCommits).upgrade();
+  }
+  
+  private final Directory dir;
+  private final PrintStream infoStream;
+  private final IndexWriterConfig iwc;
+  private final boolean deletePriorCommits;
+  
+  @SuppressWarnings("deprecation")
+  public IndexUpgrader(Directory dir) {
+    this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), null, false);
+  }
+  
+  @SuppressWarnings("deprecation")
+  public IndexUpgrader(Directory dir, PrintStream infoStream, boolean deletePriorCommits) {
+    this(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, null), infoStream, deletePriorCommits);
+  }
+  
+  public IndexUpgrader(Directory dir, IndexWriterConfig iwc, PrintStream infoStream, boolean deletePriorCommits) {
+    this.dir = dir;
+    this.iwc = iwc;
+    this.infoStream = infoStream;
+    this.deletePriorCommits = deletePriorCommits;
+  }
+  
+  public void upgrade() throws IOException {
+    if (!IndexReader.indexExists(dir)) {
+      throw new IndexNotFoundException(dir.toString());
+    }
+  
+    if (!deletePriorCommits) {
+      final Collection<IndexCommit> commits = IndexReader.listCommits(dir);
+      if (commits.size() > 1) {
+        throw new IllegalArgumentException("This tool was invoked to not delete prior commit points, but the following commits were found: " + commits);
+      }
+    }
+    
+    final IndexWriterConfig c = (IndexWriterConfig) iwc.clone();
+    c.setMergePolicy(new UpgradeIndexMergePolicy(c.getMergePolicy()));
+    c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
+    
+    final IndexWriter w = new IndexWriter(dir, c);
+    try {
+      w.setInfoStream(infoStream);
+      w.message("Upgrading all pre-" + Constants.LUCENE_MAIN_VERSION + " segments of index directory '" + dir + "' to version " + Constants.LUCENE_MAIN_VERSION + "...");
+      w.optimize();
+      w.message("All segments upgraded to version " + Constants.LUCENE_MAIN_VERSION);
+    } finally {
+      w.close();
+    }
+  }
+  
+}
--- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java
@ -421,7 +421,7 @@ public class IndexWriter implements Closeable {
    private final Map<SegmentInfo,SegmentReader> readerMap = new HashMap<SegmentInfo,SegmentReader>();

    /** Forcefully clear changes for the specified segments.  This is called on successful merge. */
-    synchronized void clear(SegmentInfos infos) throws IOException {
+    synchronized void clear(List<SegmentInfo> infos) throws IOException {
      if (infos == null) {
        for (Map.Entry<SegmentInfo,SegmentReader> ent: readerMap.entrySet()) {
          ent.getValue().hasChanges = false;
@ -511,7 +511,7 @@ public class IndexWriter implements Closeable {
      return false;
    }

-    public synchronized void drop(SegmentInfos infos) throws IOException {
+    public synchronized void drop(List<SegmentInfo> infos) throws IOException {
      for(SegmentInfo info : infos) {
        drop(info);
      }
@ -2355,7 +2355,7 @@ public class IndexWriter implements Closeable {

      String mergedName = newSegmentName();
      SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(),
-                                               mergedName, null, codecs, payloadProcessorProvider,
+                                               mergedName, null, payloadProcessorProvider,
                                               globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));

      for (IndexReader reader : readers)      // add new indexes
@ -2365,8 +2365,7 @@ public class IndexWriter implements Closeable {

      final FieldInfos fieldInfos = merger.fieldInfos();
      SegmentInfo info = new SegmentInfo(mergedName, docCount, directory,
-                                         false, fieldInfos.hasProx(), merger.getSegmentCodecs(),
-                                         fieldInfos.hasVectors(),
+                                         false, merger.getSegmentCodecs(),
                                         fieldInfos);
      setDiagnostics(info, "addIndexes(IndexReader...)");

@ -2729,7 +2728,7 @@ public class IndexWriter implements Closeable {

    assert testPoint("startCommitMergeDeletes");

-    final SegmentInfos sourceSegments = merge.segments;
+    final List<SegmentInfo> sourceSegments = merge.segments;

    if (infoStream != null)
      message("commitMergeDeletes " + merge.segString(directory));
@ -2741,7 +2740,7 @@ public class IndexWriter implements Closeable {
    long minGen = Long.MAX_VALUE;

    for(int i=0; i < sourceSegments.size(); i++) {
-      SegmentInfo info = sourceSegments.info(i);
+      SegmentInfo info = sourceSegments.get(i);
      minGen = Math.min(info.getBufferedDeletesGen(), minGen);
      int docCount = info.docCount;
      final SegmentReader previousReader = merge.readerClones.get(i);
@ -3041,7 +3040,16 @@ public class IndexWriter implements Closeable {
    // is running (while synchronized) to avoid race
    // condition where two conflicting merges from different
    // threads, start
-    message("registerMerge merging=" + mergingSegments);
+    if (infoStream != null) {
+      StringBuilder builder = new StringBuilder("registerMerge merging= [");
+      for (SegmentInfo info : mergingSegments) {
+        builder.append(info.name).append(", ");  
+      }
+      builder.append("]");
+      // don't call mergingSegments.toString() could lead to ConcurrentModException
+      // since merge updates the segments FieldInfos
+      message(builder.toString());  
+    }
    for(SegmentInfo info : merge.segments) {
      message("registerMerge info=" + info);
      mergingSegments.add(info);
@ -3094,7 +3102,7 @@ public class IndexWriter implements Closeable {
    // Bind a new segment name here so even with
    // ConcurrentMergePolicy we keep deterministic segment
    // names.
-    merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, false, null, false, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));
+    merge.info = new SegmentInfo(newSegmentName(), 0, directory, false, null, globalFieldNumberMap.newFieldInfos(SegmentCodecsBuilder.create(codecs)));

    // Lock order: IW -> BD
    final BufferedDeletesStream.ApplyDeletesResult result = bufferedDeletesStream.applyDeletes(readerPool, merge.segments);
@ -3133,6 +3141,16 @@ public class IndexWriter implements Closeable {
      message("merge seg=" + merge.info.name);
    }

+    assert merge.estimatedMergeBytes == 0;
+    for(SegmentInfo info : merge.segments) {
+      if (info.docCount > 0) {
+        final int delCount = numDeletedDocs(info);
+        assert delCount <= info.docCount;
+        final double delRatio = ((double) delCount)/info.docCount;
+        merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
+      }
+    }
+
    // TODO: I think this should no longer be needed (we
    // now build CFS before adding segment to the infos);
    // however, on removing it, tests fail for some reason!
@ -3174,7 +3192,7 @@ public class IndexWriter implements Closeable {
    // It's possible we are called twice, eg if there was an
    // exception inside mergeInit
    if (merge.registerDone) {
-      final SegmentInfos sourceSegments = merge.segments;
+      final List<SegmentInfo> sourceSegments = merge.segments;
      for(SegmentInfo info : sourceSegments) {
        mergingSegments.remove(info);
      }
@ -3245,21 +3263,17 @@ public class IndexWriter implements Closeable {

    int mergedDocCount = 0;

-    SegmentInfos sourceSegments = merge.segments;
+    List<SegmentInfo> sourceSegments = merge.segments;

    SegmentMerger merger = new SegmentMerger(directory, config.getTermIndexInterval(), mergedName, merge,
-                                             codecs, payloadProcessorProvider,
-                                             merge.info.getFieldInfos());
+                                             payloadProcessorProvider, merge.info.getFieldInfos());

    if (infoStream != null) {
-      message("merging " + merge.segString(directory) + " mergeVectors=" + merger.fieldInfos().hasVectors());
+      message("merging " + merge.segString(directory) + " mergeVectors=" + merge.info.getFieldInfos().hasVectors());
    }

    merge.readers = new ArrayList<SegmentReader>();
    merge.readerClones = new ArrayList<SegmentReader>();
-
-    merge.estimatedMergeBytes = 0;
-
    // This is try/finally to make sure merger's readers are
    // closed:
    boolean success = false;
@ -3268,7 +3282,7 @@ public class IndexWriter implements Closeable {
      int segUpto = 0;
      while(segUpto < sourceSegments.size()) {

-        final SegmentInfo info = sourceSegments.info(segUpto);
+        final SegmentInfo info = sourceSegments.get(segUpto);

        // Hold onto the "live" reader; we will use this to
        // commit merged deletes
@ -3277,13 +3291,6 @@ public class IndexWriter implements Closeable {
                                                    -config.getReaderTermsIndexDivisor());
        merge.readers.add(reader);

-        final int readerMaxDoc = reader.maxDoc();
-        if (readerMaxDoc > 0) {
-          final int delCount = reader.numDeletedDocs();
-          final double delRatio = ((double) delCount)/readerMaxDoc;
-          merge.estimatedMergeBytes += info.sizeInBytes(true) * (1.0 - delRatio);
-        }
-
        // We clone the segment readers because other
        // deletes may come in while we're merging so we
        // need readers that will not change
@ -3308,8 +3315,6 @@ public class IndexWriter implements Closeable {

      // Record which codec was used to write the segment
      merge.info.setSegmentCodecs(merger.getSegmentCodecs());
-      // Record if we have merged vectors
-      merge.info.setHasVectors(merger.fieldInfos().hasVectors());

      if (infoStream != null) {
        message("merge segmentCodecs=" + merger.getSegmentCodecs());
@ -3323,8 +3328,6 @@ public class IndexWriter implements Closeable {
      // because codec must know if prox was written for
      // this segment:
      //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name);
-      merge.info.setHasProx(merger.fieldInfos().hasProx());
-
      boolean useCompoundFile;
      synchronized (this) { // Guard segmentInfos
        useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info);
@ -3469,14 +3472,14 @@ public class IndexWriter implements Closeable {
  }

  /** @lucene.internal */
-  public synchronized String segString(SegmentInfos infos) throws IOException {
+  public synchronized String segString(List<SegmentInfo> infos) throws IOException {
    StringBuilder buffer = new StringBuilder();
    final int count = infos.size();
    for(int i = 0; i < count; i++) {
      if (i > 0) {
        buffer.append(' ');
      }
-      buffer.append(segString(infos.info(i)));
+      buffer.append(segString(infos.get(i)));
    }

    return buffer.toString();
@ -3531,6 +3534,7 @@ public class IndexWriter implements Closeable {

  // called only from assert
  private boolean filesExist(SegmentInfos toSync) throws IOException {
+    
    Collection<String> files = toSync.files(directory, false);
    for(final String fileName: files) {
      assert directory.fileExists(fileName): "file " + fileName + " does not exist";
--- a/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
+++ b/lucene/src/java/org/apache/lucene/index/LogMergePolicy.java
@ -20,7 +20,6 @@ package org.apache.lucene.index;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collection;
-import java.util.Comparator;
 import java.util.List;
 import java.util.Set;

@ -595,7 +594,7 @@ public abstract class LogMergePolicy extends MergePolicy {
        } else if (!anyTooLarge) {
          if (spec == null)
            spec = new MergeSpecification();
-          final SegmentInfos mergeInfos = new SegmentInfos();
+          final List<SegmentInfo> mergeInfos = new ArrayList<SegmentInfo>();
          for(int i=start;i<end;i++) {
            mergeInfos.add(levels.get(i).info);
            assert infos.contains(levels.get(i).info);
--- a/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java
+++ b/lucene/src/java/org/apache/lucene/index/MergeDocIDRemapper.java
@ -32,7 +32,7 @@ final class MergeDocIDRemapper {

  public MergeDocIDRemapper(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergedDocCount) {
    this.docMaps = docMaps;
-    SegmentInfo firstSegment = merge.segments.info(0);
+    SegmentInfo firstSegment = merge.segments.get(0);
    int i = 0;
    while(true) {
      SegmentInfo info = infos.info(i);
@ -45,7 +45,7 @@ final class MergeDocIDRemapper {
    int numDocs = 0;
    for(int j=0;j<docMaps.length;i++,j++) {
      numDocs += infos.info(i).docCount;
-      assert infos.info(i).equals(merge.segments.info(j));
+      assert infos.info(i).equals(merge.segments.get(j));
    }
    maxDocID = minDocID + numDocs;

@ -55,7 +55,7 @@ final class MergeDocIDRemapper {
    starts[0] = minDocID;
    newStarts[0] = minDocID;
    for(i=1;i<docMaps.length;i++) {
-      final int lastDocCount = merge.segments.info(i-1).docCount;
+      final int lastDocCount = merge.segments.get(i-1).docCount;
      starts[i] = starts[i-1] + lastDocCount;
      newStarts[i] = newStarts[i-1] + lastDocCount - delCounts[i-1];
    }
@ -69,7 +69,7 @@ final class MergeDocIDRemapper {
    // assert docShift > 0;

    // Make sure it all adds up:
-    assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.info(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
+    assert docShift == maxDocID - (newStarts[docMaps.length-1] + merge.segments.get(docMaps.length-1).docCount - delCounts[docMaps.length-1]);
  }

  public int remap(int oldDocID) {
--- a/lucene/src/java/org/apache/lucene/index/MergePolicy.java
+++ b/lucene/src/java/org/apache/lucene/index/MergePolicy.java
@ -75,15 +75,21 @@ public abstract class MergePolicy implements java.io.Closeable {
    long estimatedMergeBytes;       // used by IndexWriter
    List<SegmentReader> readers;        // used by IndexWriter
    List<SegmentReader> readerClones;   // used by IndexWriter
-    public final SegmentInfos segments;
+    public final List<SegmentInfo> segments;
+    public final int totalDocCount;
    boolean aborted;
    Throwable error;
    boolean paused;

-    public OneMerge(SegmentInfos segments) {
+    public OneMerge(List<SegmentInfo> segments) {
      if (0 == segments.size())
        throw new RuntimeException("segments must include at least one segment");
      this.segments = segments;
+      int count = 0;
+      for(SegmentInfo info : segments) {
+        count += info.docCount;
+      }
+      totalDocCount = count;
    }

    /** Record that an exception occurred while executing
@ -147,7 +153,7 @@ public abstract class MergePolicy implements java.io.Closeable {
      final int numSegments = segments.size();
      for(int i=0;i<numSegments;i++) {
        if (i > 0) b.append(' ');
-        b.append(segments.info(i).toString(dir, 0));
+        b.append(segments.get(i).toString(dir, 0));
      }
      if (info != null)
        b.append(" into ").append(info.name);
--- a/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentInfo.java
@ -43,7 +43,8 @@ import org.apache.lucene.util.Constants;
 * @lucene.experimental
 */
 public final class SegmentInfo {
-
+  // TODO: remove with hasVector and hasProx
+  private static final int CHECK_FIELDINFO = -2;
  static final int NO = -1;          // e.g. no norms; no deletes;
  static final int YES = 1;          // e.g. have norms; have deletes;
  static final int WITHOUT_GEN = 0;  // a file name that has no GEN in it.
@ -86,9 +87,11 @@ public final class SegmentInfo {

  private int delCount;                           // How many deleted docs in this segment
  
-  private boolean hasProx;                        // True if this segment has any fields with omitTermFreqAndPositions==false
+  //TODO: remove when we don't have to support old indexes anymore that had this field
+  private int hasVectors = CHECK_FIELDINFO;
+  //TODO: remove when we don't have to support old indexes anymore that had this field
+  private int hasProx = CHECK_FIELDINFO;     // True if this segment has any fields with omitTermFreqAndPositions==false

-  private boolean hasVectors;                     // True if this segment wrote term vectors
  
  private FieldInfos fieldInfos;

@ -107,8 +110,11 @@ public final class SegmentInfo {
  // this is never written to/read from the Directory
  private long bufferedDeletesGen;
  
+  // holds the fieldInfos Version to refresh files() cache if FI has changed
+  private long fieldInfosVersion;
+  
  public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile,
-                     boolean hasProx, SegmentCodecs segmentCodecs, boolean hasVectors, FieldInfos fieldInfos) {
+                     SegmentCodecs segmentCodecs, FieldInfos fieldInfos) {
    this.name = name;
    this.docCount = docCount;
    this.dir = dir;
@ -116,9 +122,7 @@ public final class SegmentInfo {
    this.isCompoundFile = isCompoundFile;
    this.docStoreOffset = -1;
    this.docStoreSegment = name;
-    this.hasProx = hasProx;
    this.segmentCodecs = segmentCodecs;
-    this.hasVectors = hasVectors;
    delCount = 0;
    version = Constants.LUCENE_MAIN_VERSION;
    this.fieldInfos = fieldInfos;
@ -213,7 +217,7 @@ public final class SegmentInfo {
    delCount = input.readInt();
    assert delCount <= docCount;

-    hasProx = input.readByte() == YES;
+    hasProx = input.readByte();

    // System.out.println(Thread.currentThread().getName() + ": si.read hasProx=" + hasProx + " seg=" + name);
    if (format <= DefaultSegmentInfosWriter.FORMAT_4_0) {
@ -226,7 +230,7 @@ public final class SegmentInfo {
    diagnostics = input.readStringStringMap();

    if (format <= DefaultSegmentInfosWriter.FORMAT_HAS_VECTORS) {
-      hasVectors = input.readByte() == 1;
+      hasVectors = input.readByte();
    } else {
      final String storesSegment;
      final String ext;
@ -247,7 +251,7 @@ public final class SegmentInfo {
        dirToTest = dir;
      }
      try {
-        hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
+        hasVectors = dirToTest.fileExists(IndexFileNames.segmentFileName(storesSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION)) ? YES : NO;
      } finally {
        if (isCompoundFile) {
          dirToTest.close();
@ -311,12 +315,7 @@ public final class SegmentInfo {
  }

  public boolean getHasVectors() throws IOException {
-    return hasVectors;
-  }
-
-  public void setHasVectors(boolean v) {
-    hasVectors = v;
-    clearFilesCache();
+    return hasVectors == CHECK_FIELDINFO ? getFieldInfos().hasVectors() : hasVectors == YES;
  }
  
  public FieldInfos getFieldInfos() throws IOException {
@ -349,7 +348,7 @@ public final class SegmentInfo {

  @Override
  public Object clone() {
-    final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, hasProx, segmentCodecs, hasVectors,
+    final SegmentInfo si = new SegmentInfo(name, docCount, dir, isCompoundFile, segmentCodecs,
        fieldInfos == null ? null : (FieldInfos) fieldInfos.clone());
    si.docStoreOffset = docStoreOffset;
    si.docStoreSegment = docStoreSegment;
@ -364,6 +363,8 @@ public final class SegmentInfo {
      }
    }
    si.version = version;
+    si.hasProx = hasProx;
+    si.hasVectors = hasVectors;
    return si;
  }

@ -569,19 +570,14 @@ public final class SegmentInfo {

    output.writeByte((byte) (isCompoundFile ? YES : NO));
    output.writeInt(delCount);
-    output.writeByte((byte) (hasProx ? 1:0));
+    output.writeByte((byte) (hasProx));
    segmentCodecs.write(output);
    output.writeStringStringMap(diagnostics);
-    output.writeByte((byte) (hasVectors ? 1 : 0));
+    output.writeByte((byte) (hasVectors));
  }

-  void setHasProx(boolean hasProx) {
-    this.hasProx = hasProx;
-    clearFilesCache();
-  }
-
-  public boolean getHasProx() {
-    return hasProx;
+  public boolean getHasProx() throws IOException {
+    return hasProx == CHECK_FIELDINFO ? getFieldInfos().hasProx() : hasProx == YES;
  }

  /** Can only be called once. */
@ -609,13 +605,14 @@ public final class SegmentInfo {
   */

  public List<String> files() throws IOException {
-
-    if (files != null) {
+    final long fisVersion = fieldInfosVersion;
+    if (fisVersion != (fieldInfosVersion = getFieldInfos().getVersion())) {
+      clearFilesCache(); // FIS has modifications - need to recompute
+    } else if (files != null) {
      // Already cached:
      return files;
    }
-
-    Set<String> fileSet = new HashSet<String>();
+    final Set<String> fileSet = new HashSet<String>();

    boolean useCompoundFile = getUseCompoundFile();

@ -637,7 +634,7 @@ public final class SegmentInfo {
      } else {
        fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
        fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.FIELDS_EXTENSION));
-        if (hasVectors) {
+        if (getHasVectors()) {
          fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
          fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
          fileSet.add(IndexFileNames.segmentFileName(docStoreSegment, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@ -646,7 +643,7 @@ public final class SegmentInfo {
    } else if (!useCompoundFile) {
      fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_INDEX_EXTENSION));
      fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.FIELDS_EXTENSION));
-      if (hasVectors) {
+      if (getHasVectors()) {
        fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_INDEX_EXTENSION));
        fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_DOCUMENTS_EXTENSION));
        fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.VECTORS_FIELDS_EXTENSION));
@ -709,8 +706,12 @@ public final class SegmentInfo {
    if (this.dir != dir) {
      s.append('x');
    }
-    if (hasVectors) {
-      s.append('v');
+    try {
+      if (getHasVectors()) {
+        s.append('v');
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
    }
    s.append(docCount);

--- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
@ -72,7 +72,7 @@ final class SegmentMerger {

  private PayloadProcessorProvider payloadProcessorProvider;

-  SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, CodecProvider codecs, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
+  SegmentMerger(Directory dir, int termIndexInterval, String name, MergePolicy.OneMerge merge, PayloadProcessorProvider payloadProcessorProvider, FieldInfos fieldInfos) {
    this.payloadProcessorProvider = payloadProcessorProvider;
    directory = dir;
    segment = name;
--- a/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentWriteState.java
@ -32,7 +32,6 @@ public class SegmentWriteState {
  public final String segmentName;
  public final FieldInfos fieldInfos;
  public final int numDocs;
-  public boolean hasVectors;

  // Deletes to apply while we are flushing the segment.  A
  // Term is enrolled in here if it was deleted at one
--- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
+++ b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriter.java
@ -63,7 +63,6 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
      }

      lastDocID = 0;
-      state.hasVectors = hasVectors;
      hasVectors = false;
    }

@ -121,8 +120,7 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
    fill(docState.docID);

    // Append term vectors to the real outputs:
-    long pointer = tvd.getFilePointer();
-    tvx.writeLong(pointer);
+    tvx.writeLong(tvd.getFilePointer());
    tvx.writeLong(tvf.getFilePointer());
    tvd.writeVInt(numVectorFields);
    if (numVectorFields > 0) {
@ -136,6 +134,8 @@ final class TermVectorsTermsWriter extends TermsHashConsumer {
        tvd.writeVLong(pos-lastPos);
        lastPos = pos;
        perFields[i].finishDocument();
+        // commit the termVectors once successful success - FI will otherwise reset them
+        perFields[i].fieldInfo.commitVectors();
      }
    }

--- a/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
+++ b/lucene/src/java/org/apache/lucene/index/TieredMergePolicy.java
@ -23,6 +23,8 @@ import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Comparator;
+import java.util.List;
+import java.util.ArrayList;

 /**
 *  Merges segments of approximately equal size, subject to
@ -249,7 +251,7 @@ public class TieredMergePolicy extends MergePolicy {
    final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
    final Collection<SegmentInfo> toBeMerged = new HashSet<SegmentInfo>();

-    final SegmentInfos infosSorted = new SegmentInfos();
+    final List<SegmentInfo> infosSorted = new ArrayList<SegmentInfo>();
    infosSorted.addAll(infos);

    Collections.sort(infosSorted, segmentByteSizeDescending);
@ -277,7 +279,7 @@ public class TieredMergePolicy extends MergePolicy {
    // If we have too-large segments, grace them out
    // of the maxSegmentCount:
    int tooBigCount = 0;
-    while (tooBigCount < infosSorted.size() && size(infosSorted.info(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
+    while (tooBigCount < infosSorted.size() && size(infosSorted.get(tooBigCount)) >= maxMergedSegmentBytes/2.0) {
      totIndexBytes -= size(infosSorted.get(tooBigCount));
      tooBigCount++;
    }
@ -310,7 +312,7 @@ public class TieredMergePolicy extends MergePolicy {
      // Gather eligible segments for merging, ie segments
      // not already being merged and not already picked (by
      // prior iteration of this loop) for merging:
-      final SegmentInfos eligible = new SegmentInfos();
+      final List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
      for(int idx = tooBigCount; idx<infosSorted.size(); idx++) {
        final SegmentInfo info = infosSorted.get(idx);
        if (merging.contains(info)) {
@ -332,7 +334,7 @@ public class TieredMergePolicy extends MergePolicy {

        // OK we are over budget -- find best merge!
        MergeScore bestScore = null;
-        SegmentInfos best = null;
+        List<SegmentInfo> best = null;
        boolean bestTooLarge = false;
        long bestMergeBytes = 0;

@ -341,10 +343,10 @@ public class TieredMergePolicy extends MergePolicy {

          long totAfterMergeBytes = 0;

-          final SegmentInfos candidate = new SegmentInfos();
+          final List<SegmentInfo> candidate = new ArrayList<SegmentInfo>();
          boolean hitTooLarge = false;
          for(int idx = startIdx;idx<eligible.size() && candidate.size() < maxMergeAtOnce;idx++) {
-            final SegmentInfo info = eligible.info(idx);
+            final SegmentInfo info = eligible.get(idx);
            final long segBytes = size(info);

            if (totAfterMergeBytes + segBytes > maxMergedSegmentBytes) {
@ -398,7 +400,7 @@ public class TieredMergePolicy extends MergePolicy {
  }

  /** Expert: scores one merge; subclasses can override. */
-  protected MergeScore score(SegmentInfos candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
+  protected MergeScore score(List<SegmentInfo> candidate, boolean hitTooLarge, long mergingBytes) throws IOException {
    long totBeforeMergeBytes = 0;
    long totAfterMergeBytes = 0;
    long totAfterMergeBytesFloored = 0;
@ -420,7 +422,7 @@ public class TieredMergePolicy extends MergePolicy {
      // over time:
      skew = 1.0/maxMergeAtOnce;
    } else {
-      skew = ((double) floorSize(size(candidate.info(0))))/totAfterMergeBytesFloored;
+      skew = ((double) floorSize(size(candidate.get(0))))/totAfterMergeBytesFloored;
    }

    // Strongly favor merges with less skew (smaller
@ -458,7 +460,8 @@ public class TieredMergePolicy extends MergePolicy {
    if (verbose()) {
      message("findMergesForOptimize maxSegmentCount=" + maxSegmentCount + " infos=" + writer.get().segString(infos) + " segmentsToOptimize=" + segmentsToOptimize);
    }
-    SegmentInfos eligible = new SegmentInfos();
+
+    List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
    boolean optimizeMergeRunning = false;
    final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
    for(SegmentInfo info : infos) {
@ -499,7 +502,7 @@ public class TieredMergePolicy extends MergePolicy {
      if (spec == null) {
        spec = new MergeSpecification();
      }
-      final OneMerge merge = new OneMerge(eligible.range(end-maxMergeAtOnceExplicit, end));
+      final OneMerge merge = new OneMerge(eligible.subList(end-maxMergeAtOnceExplicit, end));
      if (verbose()) {
        message("add merge=" + writer.get().segString(merge.segments));
      }
@ -510,7 +513,7 @@ public class TieredMergePolicy extends MergePolicy {
    if (spec == null && !optimizeMergeRunning) {
      // Do final merge
      final int numToMerge = end - maxSegmentCount + 1;
-      final OneMerge merge = new OneMerge(eligible.range(end-numToMerge, end));
+      final OneMerge merge = new OneMerge(eligible.subList(end-numToMerge, end));
      if (verbose()) {
        message("add final merge=" + merge.segString(writer.get().getDirectory()));
      }
@ -527,7 +530,7 @@ public class TieredMergePolicy extends MergePolicy {
    if (verbose()) {
      message("findMergesToExpungeDeletes infos=" + writer.get().segString(infos) + " expungeDeletesPctAllowed=" + expungeDeletesPctAllowed);
    }
-    final SegmentInfos eligible = new SegmentInfos();
+    final List<SegmentInfo> eligible = new ArrayList<SegmentInfo>();
    final Collection<SegmentInfo> merging = writer.get().getMergingSegments();
    for(SegmentInfo info : infos) {
      double pctDeletes = 100.*((double) writer.get().numDeletedDocs(info))/info.docCount;
@ -580,7 +583,7 @@ public class TieredMergePolicy extends MergePolicy {
        spec = new MergeSpecification();
      }

-      final OneMerge merge = new OneMerge(eligible.range(start, upto));
+      final OneMerge merge = new OneMerge(eligible.subList(start, upto));
      if (verbose()) {
        message("add merge=" + writer.get().segString(merge.segments));
      }
--- a/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
+++ b/lucene/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java
@ -0,0 +1,152 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Constants;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/** This {@link MergePolicy} is used for upgrading all existing segments of
+  * an index when calling {@link IndexWriter#optimize()}.
+  * All other methods delegate to the base {@code MergePolicy} given to the constructor.
+  * This allows for an as-cheap-as possible upgrade of an older index by only upgrading segments that
+  * are created by previous Lucene versions. Optimize does no longer really optimize
+  * it is just used to &quot;optimize&quot; older segment versions away.
+  * <p>In general one would use {@link IndexUpgrader}, but for a fully customizeable upgrade,
+  * you can use this like any other {@code MergePolicy} and call {@link IndexWriter#optimize()}:
+  * <pre class="prettyprint lang-java">
+  *  IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_XX, new KeywordAnalyzer());
+  *  iwc.setMergePolicy(new UpgradeIndexMergePolicy(iwc.getMergePolicy()));
+  *  IndexWriter w = new IndexWriter(dir, iwc);
+  *  w.optimize();
+  *  w.close();
+  * </pre>
+  * @lucene.experimental
+  * @see IndexUpgrader
+  */
+public class UpgradeIndexMergePolicy extends MergePolicy {
+
+  protected final MergePolicy base;
+
+  /** Wrap the given {@link MergePolicy} and intercept optimize requests to
+   * only upgrade segments written with previous Lucene versions. */
+  public UpgradeIndexMergePolicy(MergePolicy base) {
+    this.base = base;
+  }
+  
+  /** Returns if the given segment should be upgraded. The default implementation
+   * will return {@code !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion())},
+   * so all segments created with a different version number than this Lucene version will
+   * get upgraded.
+   */
+  protected boolean shouldUpgradeSegment(SegmentInfo si) {
+    return !Constants.LUCENE_MAIN_VERSION.equals(si.getVersion());
+  }
+
+  @Override
+  public void setIndexWriter(IndexWriter writer) {
+    super.setIndexWriter(writer);
+    base.setIndexWriter(writer);
+  }
+  
+  @Override
+  public MergeSpecification findMerges(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
+    return base.findMerges(segmentInfos);
+  }
+  
+  @Override
+  public MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, int maxSegmentCount, Set<SegmentInfo> segmentsToOptimize) throws CorruptIndexException, IOException {
+    // first find all old segments
+    final HashSet<SegmentInfo> oldSegments = new HashSet<SegmentInfo>();
+    for (final SegmentInfo si : segmentInfos) {
+      if (segmentsToOptimize.contains(si) && shouldUpgradeSegment(si)) {
+        oldSegments.add(si);
+      }
+    }
+    
+    if (verbose()) message("findMergesForOptimize: segmentsToUpgrade=" + oldSegments);
+      
+    if (oldSegments.isEmpty())
+      return null;
+
+    MergeSpecification spec = base.findMergesForOptimize(segmentInfos, maxSegmentCount, oldSegments);    
+    
+    if (spec != null) {
+      // remove all segments that are in merge specification from oldSegments,
+      // the resulting set contains all segments that are left over
+      // and will be merged to one additional segment:
+      for (final OneMerge om : spec.merges) {
+        oldSegments.removeAll(om.segments);
+      }
+    }
+
+    if (!oldSegments.isEmpty()) {
+      if (verbose())
+        message("findMergesForOptimize: " +  base.getClass().getSimpleName() +
+        " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments);
+      final List<SegmentInfo> newInfos = new ArrayList<SegmentInfo>();
+      for (final SegmentInfo si : segmentInfos) {
+        if (oldSegments.contains(si)) {
+          newInfos.add(si);
+        }
+      }
+      // add the final merge
+      if (spec == null) {
+        spec = new MergeSpecification();
+      }
+      spec.add(new OneMerge(newInfos));
+    }
+
+    return spec;
+  }
+  
+  @Override
+  public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos) throws CorruptIndexException, IOException {
+    return base.findMergesToExpungeDeletes(segmentInfos);
+  }
+  
+  @Override
+  public boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment) throws IOException {
+    return base.useCompoundFile(segments, newSegment);
+  }
+  
+  @Override
+  public void close() {
+    base.close();
+  }
+  
+  @Override
+  public String toString() {
+    return "[" + getClass().getSimpleName() + "->" + base + "]";
+  }
+  
+  private boolean verbose() {
+    IndexWriter w = writer.get();
+    return w != null && w.verbose();
+  }
+
+  private void message(String message) {
+    if (verbose())
+      writer.get().message("UPGMP: " + message);
+  }
+  
+}
--- a/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/CodecProvider.java
@ -73,6 +73,11 @@ public class CodecProvider {
    }
  }
  
+  /** @lucene.internal */
+  public synchronized Set<String> listAll() {
+    return codecs.keySet();
+  }
+
  public Collection<String> getAllExtensions() {
    return knownExtensions;
  }
--- a/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/pulsing/PulsingPostingsReaderImpl.java
@ -68,15 +68,8 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {

    @Override
    public Object clone() {
-      PulsingTermState clone;
-      clone = (PulsingTermState) super.clone();
-      if (postingsSize != -1) {
-        clone.postings = new byte[postingsSize];
-        System.arraycopy(postings, 0, clone.postings, 0, postingsSize);
-      } else {
-        assert wrappedTermState != null;
-        clone.wrappedTermState = (BlockTermState) wrappedTermState.clone();
-      }
+      PulsingTermState clone = new PulsingTermState();
+      clone.copyFrom(this);
      return clone;
    }

@ -90,8 +83,10 @@ public class PulsingPostingsReaderImpl extends PostingsReaderBase {
          postings = new byte[ArrayUtil.oversize(other.postingsSize, 1)];
        }
        System.arraycopy(other.postings, 0, postings, 0, other.postingsSize);
-      } else {
+      } else if (wrappedTermState != null) {
        wrappedTermState.copyFrom(other.wrappedTermState);
+      } else {
+        wrappedTermState = (BlockTermState) other.wrappedTermState.clone();
      }

      // NOTE: we do not copy the
--- a/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
+++ b/lucene/src/java/org/apache/lucene/index/codecs/sep/SepPostingsReaderImpl.java
@ -85,7 +85,7 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
    }
  }

-  public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) {
+  public static void files(SegmentInfo segmentInfo, String codecId, Collection<String> files) throws IOException {
    files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.DOC_EXTENSION));
    files.add(IndexFileNames.segmentFileName(segmentInfo.name, codecId, SepPostingsWriterImpl.SKIP_EXTENSION));

@ -151,14 +151,8 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {

    @Override
    public Object clone() {
-      SepTermState other = (SepTermState) super.clone();
-      other.docIndex = (IntIndexInput.Index) docIndex.clone();
-      if (freqIndex != null) {
-        other.freqIndex = (IntIndexInput.Index) freqIndex.clone();
-      }
-      if (posIndex != null) {
-        other.posIndex = (IntIndexInput.Index) posIndex.clone();
-      }
+      SepTermState other = new SepTermState();
+      other.copyFrom(this);
      return other;
    }

@ -166,12 +160,28 @@ public class SepPostingsReaderImpl extends PostingsReaderBase {
    public void copyFrom(TermState _other) {
      super.copyFrom(_other);
      SepTermState other = (SepTermState) _other;
-      docIndex.set(other.docIndex);
-      if (freqIndex != null && other.freqIndex != null) {
-        freqIndex.set(other.freqIndex);
+      if (docIndex == null) {
+        docIndex = (IntIndexInput.Index) other.docIndex.clone();
+      } else {
+        docIndex.set(other.docIndex);
      }
-      if (posIndex != null && other.posIndex != null) {
-        posIndex.set(other.posIndex);
+      if (other.freqIndex != null) {
+        if (freqIndex == null) {
+          freqIndex = (IntIndexInput.Index) other.freqIndex.clone();
+        } else {
+          freqIndex.set(other.freqIndex);
+        }
+      } else {
+        freqIndex = null;
+      }
+      if (other.posIndex != null) {
+        if (posIndex == null) {
+          posIndex = (IntIndexInput.Index) other.posIndex.clone();
+        } else {
+          posIndex.set(other.posIndex);
+        }
+      } else {
+        posIndex = null;
      }
      payloadFP = other.payloadFP;
      skipFP = other.skipFP;
--- a/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
+++ b/lucene/src/java/org/apache/lucene/queryParser/QueryParserBase.java
@ -806,6 +806,7 @@ public abstract class QueryParserBase {
    }
      
    try {
+      source.end();
      source.close();
    } catch (IOException ignored) {}
    
--- a/lucene/src/java/org/apache/lucene/search/HitQueue.java
+++ b/lucene/src/java/org/apache/lucene/search/HitQueue.java
@ -21,8 +21,6 @@ import org.apache.lucene.util.PriorityQueue;

 final class HitQueue extends PriorityQueue<ScoreDoc> {

-  private boolean prePopulate;
-
  /**
   * Creates a new instance with <code>size</code> elements. If
   * <code>prePopulate</code> is set to true, the queue will pre-populate itself
--- a/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/lucene/src/java/org/apache/lucene/search/IndexSearcher.java
@ -46,8 +46,18 @@ import org.apache.lucene.util.ThreadInterruptedException;
 *
 * <p>Applications usually need only call the inherited
 * {@link #search(Query,int)}
- * or {@link #search(Query,Filter,int)} methods. For performance reasons it is 
- * recommended to open only one IndexSearcher and use it for all of your searches.
+ * or {@link #search(Query,Filter,int)} methods. For
+ * performance reasons, if your index is unchanging, you
+ * should share a single IndexSearcher instance across
+ * multiple searches instead of creating a new one
+ * per-search.  If your index has changed and you wish to
+ * see the changes reflected in searching, you should
+ * use {@link IndexReader#reopen} to obtain a new reader and
+ * then create a new IndexSearcher from that.  Also, for
+ * low-latency turnaround it's best to use a near-real-time
+ * reader ({@link IndexReader#open(IndexWriter,boolean)}).
+ * Once you have a new {@link IndexReader}, it's relatively
+ * cheap to create a new IndexSearcher from it.
 * 
 * <a name="thread-safety"></a><p><b>NOTE</b>: <code>{@link
 * IndexSearcher}</code> instances are completely
--- a/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/MultiPhraseQuery.java
@ -214,12 +214,12 @@ public class MultiPhraseQuery extends Query {
          docFreq = reader.docFreq(term.field(), term.bytes());
        }

-        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue());
+        postingsFreqs[pos] = new PhraseQuery.PostingsAndFreq(postingsEnum, docFreq, positions.get(pos).intValue(), terms[0]);
      }

      // sort by increasing docFreq order
      if (slop == 0) {
-        ArrayUtil.quickSort(postingsFreqs);
+        ArrayUtil.mergeSort(postingsFreqs);
      }

      if (slop == 0) {
--- a/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/lucene/src/java/org/apache/lucene/search/PhrasePositions.java
@ -28,13 +28,15 @@ final class PhrasePositions {
  int position;					  // position in doc
  int count;					  // remaining pos in this doc
  int offset;					  // position in phrase
+  final int ord;                                  // unique across all PhrasePositions instances
  final DocsAndPositionsEnum postings;  	  // stream of docs & positions
  PhrasePositions next;	                          // used to make lists
  boolean repeats;       // there's other pp for same term (e.g. query="1st word 2nd word"~1) 

-  PhrasePositions(DocsAndPositionsEnum postings, int o) {
+  PhrasePositions(DocsAndPositionsEnum postings, int o, int ord) {
    this.postings = postings;
    offset = o;
+    this.ord = ord;
  }

  final boolean next() throws IOException {	  // increments to next doc
--- a/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/lucene/src/java/org/apache/lucene/search/PhraseQuery.java
@ -124,16 +124,48 @@ public class PhraseQuery extends Query {
    final DocsAndPositionsEnum postings;
    final int docFreq;
    final int position;
+    final Term term;

-    public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position) {
+    public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term term) {
      this.postings = postings;
      this.docFreq = docFreq;
      this.position = position;
+      this.term = term;
    }

    public int compareTo(PostingsAndFreq other) {
+      if (docFreq == other.docFreq) {
+        if (position == other.position) {
+          return term.compareTo(other.term);
+        }
+        return position - other.position;
+      }
      return docFreq - other.docFreq;
    }
+
+    @Override
+    public int hashCode() {
+      final int prime = 31;
+      int result = 1;
+      result = prime * result + docFreq;
+      result = prime * result + position;
+      result = prime * result + ((term == null) ? 0 : term.hashCode());
+      return result;
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+      if (this == obj) return true;
+      if (obj == null) return false;
+      if (getClass() != obj.getClass()) return false;
+      PostingsAndFreq other = (PostingsAndFreq) obj;
+      if (docFreq != other.docFreq) return false;
+      if (position != other.position) return false;
+      if (term == null) {
+        if (other.term != null) return false;
+      } else if (!term.equals(other.term)) return false;
+      return true;
+    }
  }

  private class PhraseWeight extends Weight {
@ -197,12 +229,12 @@ public class PhraseQuery extends Query {
            return null;
          }
        }
-        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue());
+        postingsFreqs[i] = new PostingsAndFreq(postingsEnum, reader.docFreq(t.field(), t.bytes()), positions.get(i).intValue(), t);
      }

      // sort by increasing docFreq order
      if (slop == 0) {
-        ArrayUtil.quickSort(postingsFreqs);
+        ArrayUtil.mergeSort(postingsFreqs);
      }

      if (slop == 0) {				  // optimize exact case
--- a/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
+++ b/lucene/src/java/org/apache/lucene/search/PhraseQueue.java
@ -30,10 +30,16 @@ final class PhraseQueue extends PriorityQueue<PhrasePositions> {
      if (pp1.position == pp2.position)
        // same doc and pp.position, so decide by actual term positions. 
        // rely on: pp.position == tp.position - offset. 
-        return pp1.offset < pp2.offset;
-      else
+        if (pp1.offset == pp2.offset) {
+          return pp1.ord < pp2.ord;
+        } else {
+          return pp1.offset < pp2.offset;
+        }
+      else {
        return pp1.position < pp2.position;
-    else
+      }
+    else {
      return pp1.doc < pp2.doc;
+    }
  }
 }
--- a/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
+++ b/lucene/src/java/org/apache/lucene/search/PhraseScorer.java
@ -55,7 +55,7 @@ abstract class PhraseScorer extends Scorer {
    // this allows to easily identify a matching (exact) phrase 
    // when all PhrasePositions have exactly the same position.
    for (int i = 0; i < postings.length; i++) {
-      PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position);
+      PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i);
      if (last != null) {			  // add next to end of list
        last.next = pp;
      } else {
--- a/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
+++ b/lucene/src/java/org/apache/lucene/search/TopTermsRewrite.java
@ -134,7 +134,7 @@ public abstract class TopTermsRewrite<Q extends Query> extends TermCollectingRew
    final Term placeholderTerm = new Term(query.field);
    final Q q = getTopLevelQuery();
    final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
-    ArrayUtil.quickSort(scoreTerms, scoreTermSortByTermComp);
+    ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp);
    for (final ScoreTerm st : scoreTerms) {
      final Term term = placeholderTerm.createTerm(st.bytes);
      assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq();
--- a/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
+++ b/lucene/src/java/org/apache/lucene/search/spans/NearSpansOrdered.java
@ -190,7 +190,7 @@ public class NearSpansOrdered extends Spans {

  /** Advance the subSpans to the same document */
  private boolean toSameDoc() throws IOException {
-    ArrayUtil.quickSort(subSpansByDoc, spanDocComparator);
+    ArrayUtil.mergeSort(subSpansByDoc, spanDocComparator);
    int firstIndex = 0;
    int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc();
    while (subSpansByDoc[firstIndex].doc() != maxDoc) {
--- a/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
+++ b/lucene/src/java/org/apache/lucene/util/SorterTemplate.java
@ -62,13 +62,26 @@ public abstract class SorterTemplate {

  /** Sorts via in-place, but unstable, QuickSort algorithm.
   * For small collections falls back to {@link #insertionSort(int,int)}. */
-  public final void quickSort(int lo, int hi) {
+  public final void quickSort(final int lo, final int hi) {
+    if (hi <= lo) return;
+    // from Integer's Javadocs: ceil(log2(x)) = 32 - numberOfLeadingZeros(x - 1)
+    quickSort(lo, hi, (Integer.SIZE - Integer.numberOfLeadingZeros(hi - lo)) << 1);
+  }
+  
+  private void quickSort(int lo, int hi, int maxDepth) {
+    // fall back to insertion when array has short length
    final int diff = hi - lo;
    if (diff <= QUICKSORT_THRESHOLD) {
      insertionSort(lo, hi);
      return;
    }
    
+    // fall back to merge sort when recursion depth gets too big
+    if (--maxDepth == 0) {
+      mergeSort(lo, hi);
+      return;
+    }
+    
    final int mid = lo + (diff >>> 1);
    
    if (compare(lo, mid) > 0) {
@ -101,8 +114,8 @@ public abstract class SorterTemplate {
      }
    }

-    quickSort(lo, left);
-    quickSort(left + 1, hi);
+    quickSort(lo, left, maxDepth);
+    quickSort(left + 1, hi, maxDepth);
  }
  
  /** Sorts via stable in-place MergeSort algorithm
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/Builder.java
@ -261,9 +261,12 @@ public class Builder<T> {
    add(scratchIntsRef, output);
  }

+  /** It's OK to add the same input twice in a row with
+   *  different outputs, as long as outputs impls the merge
+   *  method. */
  public void add(IntsRef input, T output) throws IOException {
    //System.out.println("\nFST ADD: input=" + input + " output=" + fst.outputs.outputToString(output));
-    assert lastInput.length == 0 || input.compareTo(lastInput) > 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
+    assert lastInput.length == 0 || input.compareTo(lastInput) >= 0: "inputs are added out of order lastInput=" + lastInput + " vs input=" + input;
    assert validOutput(output);

    //System.out.println("\nadd: " + input);
@ -347,8 +350,15 @@ public class Builder<T> {
      assert validOutput(output);
    }

-    // push remaining output:
-    frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+    if (lastInput.length == input.length && prefixLenPlus1 == 1+input.length) {
+      // same input more than 1 time in a row, mapping to
+      // multiple outputs
+      lastNode.output = fst.outputs.merge(lastNode.output, output);
+    } else {
+      // this new arc is private to this new input; set its
+      // arc output to the leftover output:
+      frontier[prefixLenPlus1-1].setLastOutput(input.ints[input.offset + prefixLenPlus1-1], output);
+    }

    // save last input
    lastInput.copy(input);
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FST.java
@ -231,10 +231,13 @@ public class FST<T> {
  }

  void setEmptyOutput(T v) throws IOException {
-    if (emptyOutput != null && !emptyOutput.equals(v)) {
-      throw new IllegalStateException("empty output is already set: " + outputs.outputToString(emptyOutput) + " vs " + outputs.outputToString(v));
+    if (emptyOutput != null) {
+      if (!emptyOutput.equals(v)) {
+        emptyOutput = outputs.merge(emptyOutput, v);
+      }
+    } else {
+      emptyOutput = v;
    }
-    emptyOutput = v;

    // TODO: this is messy -- replace with sillyBytesWriter; maybe make
    // bytes private
@ -446,25 +449,17 @@ public class FST<T> {
    // reverse bytes in-place; we do this so that the
    // "BIT_TARGET_NEXT" opto can work, ie, it reads the
    // node just before the current one
-    final int endAddress = writer.posWrite;
-    final int stopAt = (endAddress - startAddress)/2;
-    int upto = 0;
-    while (upto < stopAt) {
-      final byte b = bytes[startAddress+upto];
-      bytes[startAddress+upto] = bytes[endAddress-upto-1];
-      bytes[endAddress-upto-1] = b;
-      upto++;
+    final int endAddress = lastFrozenNode = writer.posWrite - 1;
+
+    int left = startAddress;
+    int right = endAddress;
+    while (left < right) {
+      final byte b = bytes[left];
+      bytes[left++] = bytes[right];
+      bytes[right--] = b;
    }

-    lastFrozenNode = endAddress - 1;
-    /*
-    System.out.println("  return node addr=" + (endAddress-1));
-    for(int i=endAddress-1;i>=startAddress;i--) {
-      System.out.println("    bytes[" + i + "]=" + bytes[i]);
-    }
-    */
-
-    return endAddress-1;
+    return endAddress;
  }

  /** Fills virtual 'start' arc, ie, an empty incoming arc to
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/FSTEnum.java
@ -140,7 +140,7 @@ abstract class FSTEnum<T> {
        // Arcs are fixed array -- use binary search to find
        // the target.

-        final FST.BytesReader in = fst.getBytesReader(0);
+        final FST<T>.BytesReader in = fst.getBytesReader(0);
        int low = arc.arcIdx;
        int high = arc.numArcs-1;
        int mid = 0;
@ -278,7 +278,7 @@ abstract class FSTEnum<T> {
        // Arcs are fixed array -- use binary search to find
        // the target.

-        final FST.BytesReader in = fst.getBytesReader(0);
+        final FST<T>.BytesReader in = fst.getBytesReader(0);
        int low = arc.arcIdx;
        int high = arc.numArcs-1;
        int mid = 0;
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/NodeHash.java
@ -40,7 +40,7 @@ final class NodeHash<T> {
      return false;
    }
    for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
-      final Builder.Arc arc = node.arcs[arcUpto];
+      final Builder.Arc<T> arc = node.arcs[arcUpto];
      if (arc.label != scratchArc.label ||
          !arc.output.equals(scratchArc.output) ||
          ((Builder.CompiledNode) arc.target).address != scratchArc.target ||
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/Outputs.java
@ -54,4 +54,8 @@ public abstract class Outputs<T> {
  public abstract T getNoOutput();

  public abstract String outputToString(T output);
+
+  public T merge(T first, T second) {
+    throw new UnsupportedOperationException();
+  }
 }
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/PairOutputs.java
@ -43,7 +43,7 @@ public class PairOutputs<A,B> extends Outputs<PairOutputs.Pair<A,B>> {
      this.output2 = output2;
    }

-    @Override @SuppressWarnings("unchecked")
+    @Override @SuppressWarnings("rawtypes")
    public boolean equals(Object other) {
      if (other == this) {
        return true;
--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/PositiveIntOutputs.java
@ -22,14 +22,11 @@ import java.io.IOException;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.DataOutput;

-// TODO: make a sharing and non-sharing variant; eg if you
-// output docFreq per term the FST will be smaller if you
-// don't share since they are not "well shared"
-
 /**
 * Output is a long, for each input term.  NOTE: the
 * resulting FST is not guaranteed to be minimal!  See
- * {@link Builder}.
+ * {@link Builder}.  You cannot store 0 output with this
+ * (that's reserved to mean "no output")!
 * @lucene.experimental
 */

--- a/lucene/src/java/org/apache/lucene/util/automaton/fst/UpToTwoPositiveIntOutputs.java
+++ b/lucene/src/java/org/apache/lucene/util/automaton/fst/UpToTwoPositiveIntOutputs.java
@ -0,0 +1,224 @@
+package org.apache.lucene.util.automaton.fst;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+
+/**
+ * Holds one or two longs for each input term.  If it's a
+ * single output, Long is returned; else, TwoLongs.  Order
+ * is preseved in the TwoLongs case, ie .first is the first
+ * input/output added to Builder, and .second is the
+ * second.  You cannot store 0 output with this (that's
+ * reserved to mean "no output")!
+ *
+ * NOTE: the resulting FST is not guaranteed to be minimal!
+ * See {@link Builder}.
+ *
+ * @lucene.experimental
+ */
+
+public final class UpToTwoPositiveIntOutputs extends Outputs<Object> {
+
+  public final static class TwoLongs {
+    final long first;
+    final long second;
+
+    public TwoLongs(long first, long second) {
+      this.first = first;
+      this.second = second;
+      assert first >= 0;
+      assert second >= 0;
+    }
+
+    @Override
+    public String toString() {
+      return "TwoLongs:" + first + "," + second;
+    }
+
+    @Override
+    public boolean equals(Object _other) {
+      if (_other instanceof TwoLongs) {
+        final TwoLongs other = (TwoLongs) _other;
+        return first == other.first && second == other.second;
+      } else {
+        return false;
+      }
+    }
+
+    @Override
+    public int hashCode() {
+      return (int) ((first^(first>>>32)) ^ (second^(second>>32)));
+    }
+  }
+  
+  private final static Long NO_OUTPUT = new Long(0);
+
+  private final boolean doShare;
+
+  private final static UpToTwoPositiveIntOutputs singletonShare = new UpToTwoPositiveIntOutputs(true);
+  private final static UpToTwoPositiveIntOutputs singletonNoShare = new UpToTwoPositiveIntOutputs(false);
+
+  private UpToTwoPositiveIntOutputs(boolean doShare) {
+    this.doShare = doShare;
+  }
+
+  public static UpToTwoPositiveIntOutputs getSingleton(boolean doShare) {
+    return doShare ? singletonShare : singletonNoShare;
+  }
+
+  public Long get(long v) {
+    if (v == 0) {
+      return NO_OUTPUT;
+    } else {
+      return Long.valueOf(v);
+    }
+  }
+
+  public TwoLongs get(long first, long second) {
+    return new TwoLongs(first, second);
+  }
+
+  @Override
+  public Long common(Object _output1, Object _output2) {
+    assert valid(_output1, false);
+    assert valid(_output2, false);
+    final Long output1 = (Long) _output1;
+    final Long output2 = (Long) _output2;
+    if (output1 == NO_OUTPUT || output2 == NO_OUTPUT) {
+      return NO_OUTPUT;
+    } else if (doShare) {
+      assert output1 > 0;
+      assert output2 > 0;
+      return Math.min(output1, output2);
+    } else if (output1.equals(output2)) {
+      return output1;
+    } else {
+      return NO_OUTPUT;
+    }
+  }
+
+  @Override
+  public Long subtract(Object _output, Object _inc) {
+    assert valid(_output, false);
+    assert valid(_inc, false);
+    final Long output = (Long) _output;
+    final Long inc = (Long) _inc;
+    assert output >= inc;
+
+    if (inc == NO_OUTPUT) {
+      return output;
+    } else if (output.equals(inc)) {
+      return NO_OUTPUT;
+    } else {
+      return output - inc;
+    }
+  }
+
+  @Override
+  public Object add(Object _prefix, Object _output) {
+    assert valid(_prefix, false);
+    assert valid(_output, true);
+    final Long prefix = (Long) _prefix;
+    if (_output instanceof Long) {
+      final Long output = (Long) _output;
+      if (prefix == NO_OUTPUT) {
+        return output;
+      } else if (output == NO_OUTPUT) {
+        return prefix;
+      } else {
+        return prefix + output;
+      }
+    } else {
+      final TwoLongs output = (TwoLongs) _output;
+      final long v = prefix;
+      return new TwoLongs(output.first + v, output.second + v);
+    }
+  }
+
+  @Override
+  public void write(Object _output, DataOutput out) throws IOException {
+    assert valid(_output, true);
+    if (_output instanceof Long) {
+      final Long output = (Long) _output;
+      out.writeVLong(output<<1);
+    } else {
+      final TwoLongs output = (TwoLongs) _output;
+      out.writeVLong((output.first<<1) | 1);
+      out.writeVLong(output.second);
+    }
+  }
+
+  @Override
+  public Object read(DataInput in) throws IOException {
+    final long code = in.readVLong();
+    if ((code & 1) == 0) {
+      // single long
+      final long v = code >>> 1;
+      if (v == 0) {
+        return NO_OUTPUT;
+      } else {
+        return Long.valueOf(v);
+      }
+    } else {
+      // two longs
+      final long first = code >>> 1;
+      final long second = in.readVLong();
+      return new TwoLongs(first, second);
+    }
+  }
+
+  private boolean valid(Long o) {
+    assert o != null;
+    assert o instanceof Long;
+    assert o == NO_OUTPUT || o > 0;
+    return true;
+  }
+
+  // Used only by assert
+  private boolean valid(Object _o, boolean allowDouble) {
+    if (!allowDouble) {
+      assert _o instanceof Long;
+      return valid((Long) _o);
+    } else if (_o instanceof TwoLongs) {
+      return true;
+    } else {
+      return valid((Long) _o);
+    }
+  }
+
+  @Override
+  public Object getNoOutput() {
+    return NO_OUTPUT;
+  }
+
+  @Override
+  public String outputToString(Object output) {
+    return output.toString();
+  }
+
+  @Override
+  public Object merge(Object first, Object second) {
+    assert valid(first, false);
+    assert valid(second, false);
+    return new TwoLongs((Long) first, (Long) second);
+  }
+}
--- a/Show More
+++ b/Show More