LUCENE-3892: merge trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/pforcodec_3892@1372366 13f79535-47bb-0310-9956-ffa450edef68
2012-08-13 11:16:57 +00:00 · 2012-08-13 11:16:57 +00:00 · 789981c9fd
parent 91a4425f79 1d27c0364b
commit 789981c9fd
369 changed files with 5511 additions and 3981 deletions
--- a/build-clover.xml
+++ b/build-clover.xml
@ -0,0 +1,49 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="clover" basedir=".">
+  <import file="lucene/common-build.xml"/>
+
+  <!--
+   Run after Junit tests.
+   
+   This target is in a separate file, as it needs to include common-build.xml,
+   but must run from top-level!
+   -->
+  <target name="generate-clover-reports" depends="clover">
+    <fail unless="run.clover">Clover not enabled!</fail>
+    <mkdir dir="${clover.report.dir}"/>
+    <fileset dir="." id="clover.test.result.files">
+      <include name="*/build/**/test/TEST-*.xml"/>
+      <exclude name="lucene/build/backwards/**"/>
+    </fileset>
+    <clover-report>
+      <current outfile="${clover.report.dir}" title="${final.name}" numThreads="0">
+        <format type="html" filter="assert"/>
+        <testresults refid="clover.test.result.files"/>
+      </current>
+      <current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
+        <format type="xml" filter="assert"/>
+        <testresults refid="clover.test.result.files"/>
+      </current>
+    </clover-report>
+    <echo>You can find the merged Lucene/Solr Clover report in '${clover.report.dir}'.</echo>
+  </target>
+
+</project>
--- a/build.xml
+++ b/build.xml
@ -51,11 +51,28 @@
    </sequential>
  </target>

-  <target name="validate" description="Validate dependencies, licenses, etc.">
-    <sequential><subant target="validate" inheritall="false" failonerror="true">
-        <fileset dir="lucene" includes="build.xml" />
-        <fileset dir="solr" includes="build.xml" />
-      </subant></sequential>
+  <target name="validate" description="Validate dependencies, licenses, etc." depends="-validate-source-patterns">
+    <subant target="validate" inheritall="false" failonerror="true">
+      <fileset dir="lucene" includes="build.xml" />
+      <fileset dir="solr" includes="build.xml" />
+    </subant>
+  </target>
+  
+  <target name="-validate-source-patterns" unless="disable.source-patterns">
+    <!-- check that there are no nocommits or @author javadoc tags: -->
+    <property name="validate.currDir" location="."/>
+    <pathconvert pathsep="${line.separator}" dirsep="/" property="validate.patternsFound" setonempty="false">
+      <fileset dir="${validate.currDir}">
+        <include name="**/*.java"/>
+        <exclude name="**/backwards/**"/>
+        <or>
+          <containsregexp expression="@author\b" casesensitive="yes"/>
+          <containsregexp expression="\bno(n|)commit\b" casesensitive="no"/>
+        </or>
+      </fileset>
+      <map from="${validate.currDir}${file.separator}" to="* "/>
+    </pathconvert>
+    <fail if="validate.patternsFound">The following files contain @author tags or nocommits:${line.separator}${validate.patternsFound}</fail>
  </target>

  <target name="rat-sources" description="Runs rat across all sources and tests">
@ -184,4 +201,111 @@
      </subant>
    </sequential>
  </target>
+
+  <!-- define here, as common-build is not included! -->
+  <property name="python32.exe" value="python3.2" />
+  <property name="fakeRelease" value="lucene/build/fakeRelease"/>
+  <property name="fakeReleaseTmp" value="lucene/build/fakeReleaseTmp"/>
+  <property name="fakeReleaseVersion" value="5.0"/> <!-- *not* -SNAPSHOT, the real version -->
+
+  <target name="nightly-smoke" description="Builds an unsigned release and smoke tests it." depends="clean">
+   <sequential>
+     <fail unless="JAVA6_HOME">JAVA6_HOME property is not defined.</fail>
+     <fail unless="JAVA7_HOME">JAVA7_HOME property is not defined.</fail>
+     <subant target="prepare-release-no-sign" inheritall="false" failonerror="true">
+       <fileset dir="lucene" includes="build.xml" />
+       <fileset dir="solr" includes="build.xml" />
+       <property name="version" value="${fakeReleaseVersion}" />
+     </subant>
+     <delete dir="${fakeRelease}"/>
+     <delete dir="${fakeReleaseTmp}"/>
+     <mkdir dir="${fakeRelease}"/>
+     <copy todir="${fakeRelease}/lucene">
+       <fileset dir="lucene/dist"/>
+     </copy>
+     <copy todir="${fakeRelease}/lucene/changes">
+       <fileset dir="lucene/build/docs/changes"/>
+     </copy>
+     <get src="http://people.apache.org/keys/group/lucene.asc" 
+          dest="${fakeRelease}/lucene/KEYS"/>
+     <copy todir="${fakeRelease}/solr">
+       <fileset dir="solr/package"/>
+     </copy>
+     <copy file="${fakeRelease}/lucene/KEYS" todir="${fakeRelease}/solr"/>
+     <makeurl file="${fakeRelease}" validate="false" property="fakeRelease.uri"/>
+     <exec executable="${python32.exe}" failonerror="true">
+       <arg value="-u"/>
+       <arg value="dev-tools/scripts/smokeTestRelease.py"/>
+       <arg value="${fakeRelease.uri}"/>
+       <arg value="${fakeReleaseVersion}"/>
+       <arg value="${fakeReleaseTmp}"/>
+       <arg value="false"/>
+       <env key="JAVA6_HOME" value="${JAVA6_HOME}"/>
+       <env key="JAVA7_HOME" value="${JAVA7_HOME}"/>
+     </exec>
+     <delete dir="${fakeRelease}"/>
+     <delete dir="${fakeReleaseTmp}"/>
+   </sequential>
+  </target>
+  
+  <!-- Calls only generate-clover-reports on Lucene, as Solr's is just a clone with other target; the database itsself is fixed -->
+  <target name="generate-clover-reports">
+    <subant target="generate-clover-reports" inheritall="false" failonerror="true">
+      <fileset dir="." includes="build-clover.xml" />
+    </subant>
+  </target>
+
+  <!-- Jenkins tasks -->
+  <target name="jenkins-hourly" depends="clean,test,validate,-jenkins-javadocs-lint,-svn-status"/>
+  
+  <target name="jenkins-clover">
+    <antcall target="-jenkins-clover">
+      <param name="run.clover" value="true"/>
+      <!-- must be 1, as clover does not like parallel test runs: -->
+      <param name="tests.jvms" value="1"/>
+      <!-- Also override some other props to be fast, ignoring what's set on command line: -->
+      <param name="tests.multiplier" value="1"/>
+      <param name="tests.slow" value="false"/>
+      <param name="tests.nightly" value="false"/>
+      <param name="tests.weekly" value="false"/>
+      <param name="tests.multiplier" value="1"/>
+    </antcall>
+  </target>
+  <target name="-jenkins-clover" depends="clean,test,generate-clover-reports"/>
+  
+  <!-- we need this extra condition, as we want to match only on "true", not solely if property is set: -->
+  <property name="disable.javadocs-lint" value="false" />
+  <condition property="-disable.javadocs-lint">
+    <equals arg1="${disable.javadocs-lint}" arg2="true"/>
+  </condition>
+  <target name="-jenkins-javadocs-lint" unless="-disable.javadocs-lint">
+    <antcall target="javadocs-lint"/> 
+  </target>
+
+  <!-- define here, as common-build is not included! -->
+  <property name="svn.exe" value="svn" />
+  
+  <target name="-svn-status">
+    <exec executable="${svn.exe}" dir="." failonerror="true">
+      <arg value="status"/>
+      <redirector outputproperty="svn.status.output">
+        <outputfilterchain>
+          <linecontainsregexp>
+            <regexp pattern="^\?" />
+          </linecontainsregexp>
+          <tokenfilter>
+            <replaceregex pattern="^........" replace="* " />
+            <replacestring from="${file.separator}" to="/" />
+          </tokenfilter>
+        </outputfilterchain>
+      </redirector>
+    </exec>
+    <fail message="Source checkout is dirty after running tests!!! Offending files:${line.separator}${svn.status.output}">
+      <condition>
+        <not>
+          <equals arg1="${svn.status.output}" arg2=""/>
+        </not>
+      </condition>
+    </fail>
+  </target>
 </project>
--- a/dev-tools/eclipse/dot.classpath
+++ b/dev-tools/eclipse/dot.classpath
@ -174,6 +174,6 @@
  <classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-beanutils-1.7.0.jar"/>
  <classpathentry kind="lib" path="solr/contrib/velocity/lib/commons-collections-3.2.1.jar"/>
 	<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
-	<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar"/>
+	<classpathentry kind="lib" path="lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar"/>
 	<classpathentry kind="output" path="bin/other"/>
 </classpath>
--- a/dev-tools/idea/.idea/libraries/JUnit.xml
+++ b/dev-tools/idea/.idea/libraries/JUnit.xml
@ -2,7 +2,7 @@
  <library name="JUnit">
    <CLASSES>
      <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/junit-4.10.jar!/" />
-      <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-1.6.0.jar!/" />
+      <root url="jar://$PROJECT_DIR$/lucene/test-framework/lib/randomizedtesting-runner-2.0.0.rc5.jar!/" />
    </CLASSES>
    <JAVADOC />
    <SOURCES />
--- a/dev-tools/maven/README.maven
+++ b/dev-tools/maven/README.maven
@ -36,27 +36,25 @@ A. How to use nightly Jenkins-built Lucene/Solr Maven artifacts

 B. How to generate Lucene/Solr Maven artifacts

-   Prerequisites: JDK 1.6+ and Ant 1.7.X
+   Prerequisites: JDK 1.6+ and Ant 1.8.2+

   Run 'ant generate-maven-artifacts' to create an internal Maven
   repository, including POMs, binary .jars, source .jars, and javadoc
   .jars.

-   You can run the above command in four possible places: the top-level
-   directory; under lucene/; under solr/; or under modules/.  From the
-   top-level directory, from lucene/, or from modules/, the internal
-   repository will be located at dist/maven/.  From solr/, the internal
-   repository will be located at package/maven/.
+   You can run the above command in three possible places: the top-level
+   directory; under lucene/; or under solr/.  From the top-level directory
+   or from lucene/, the internal repository will be located at dist/maven/.
+   From solr/, the internal repository will be located at package/maven/.


 C. How to deploy Maven artifacts to a repository

-   Prerequisites: JDK 1.6+ and Ant 1.7.X
+   Prerequisites: JDK 1.6+ and Ant 1.8.2+

-   You can deploy targets for all of Lucene/Solr, only Lucene, only Solr,
-   or only modules/, as in B. above.  To deploy to a Maven repository, the
-   command is the same as in B. above, with the addition of two system
-   properties:
+   You can deploy targets for all of Lucene/Solr, only Lucene, or only Solr,
+   as in B. above.  To deploy to a Maven repository, the command is the same
+   as in B. above, with the addition of two system properties:

      ant -Dm2.repository.id=my-repo-id \
          -Dm2.repository.url=http://example.org/my/repo \
@ -101,7 +99,7 @@ D. How to use Maven to build Lucene/Solr
      the default, you can supply an alternate version on the command line
      with the above command, e.g.:

-         ant -Dversion=5.0-my-special-version get-maven-poms
+         ant -Dversion=my-special-version get-maven-poms

      Note: if you change the version in the POMs, there is one test method
      that will fail under maven-surefire-plugin:
--- a/dev-tools/maven/lucene/analysis/common/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/common/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
@ -77,33 +71,5 @@
        </excludes>
      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>appassembler-maven-plugin</artifactId>
-        <configuration>
-          <extraJvmArguments>-Xmx128M</extraJvmArguments>
-          <repositoryLayout>flat</repositoryLayout>
-          <platforms>
-            <platform>windows</platform>
-            <platform>unix</platform>
-          </platforms>
-          <programs>
-            <program>
-              <mainClass>org.apache.lucene.analysis.charfilter.HtmlStripCharFilter</mainClass>
-              <name>HtmlStripCharFilter</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.analysis.en.PorterStemmer</mainClass>
-              <name>EnglishPorterStemmer</name>
-            </program>
-            <program>
-              <mainClass>org.tartarus.snowball.TestApp</mainClass>
-              <name>SnowballTestApp</name>
-            </program>
-          </programs>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/lucene/analysis/icu/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/icu/pom.xml.template
@ -40,15 +40,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/analysis/kuromoji/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/kuromoji/pom.xml.template
@ -39,15 +39,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/analysis/morfologik/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/morfologik/pom.xml.template
@ -39,15 +39,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
@ -75,6 +69,11 @@
  <build>
    <sourceDirectory>${module-path}/src/java</sourceDirectory>
    <testSourceDirectory>${module-path}/src/test</testSourceDirectory>
+    <resources>
+      <resource>
+        <directory>${module-path}/src/resources</directory>
+      </resource>
+    </resources>
    <testResources>
      <testResource>
        <directory>${project.build.testSourceDirectory}</directory>
--- a/dev-tools/maven/lucene/analysis/phonetic/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/phonetic/pom.xml.template
@ -39,15 +39,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
@ -75,6 +69,11 @@
  <build>
    <sourceDirectory>${module-path}/src/java</sourceDirectory>
    <testSourceDirectory>${module-path}/src/test</testSourceDirectory>
+    <resources>
+      <resource>
+        <directory>${module-path}/src/resources</directory>
+      </resource>
+    </resources>
    <testResources>
      <testResource>
        <directory>${project.build.testSourceDirectory}</directory>
--- a/dev-tools/maven/lucene/analysis/smartcn/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/smartcn/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/analysis/stempel/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/stempel/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/analysis/uima/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/uima/pom.xml.template
@ -41,15 +41,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/benchmark/pom.xml.template
+++ b/dev-tools/maven/lucene/benchmark/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
@ -120,41 +114,5 @@
        </includes>
      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>appassembler-maven-plugin</artifactId>
-        <configuration>
-          <extraJvmArguments>-Xmx128M</extraJvmArguments>
-          <repositoryLayout>flat</repositoryLayout>
-          <platforms>
-            <platform>windows</platform>
-            <platform>unix</platform>
-          </platforms>
-          <programs>
-            <program>
-              <mainClass>org.apache.lucene.benchmark.byTask.Benchmark</mainClass>
-              <name>Benchmark</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.benchmark.quality.trec.QueryDriver</mainClass>
-              <name>QueryDriver</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.benchmark.quality.utils.QualityQueriesFinder</mainClass>
-              <name>QualityQueriesFinder</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.benchmark.utils.ExtractReuters</mainClass>
-              <name>ExtractReuters</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.benchmark.utils.ExtractWikipedia</mainClass>
-              <name>ExtractWikipedia</name>
-            </program>
-          </programs>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/lucene/core/pom.xml.template
+++ b/dev-tools/maven/lucene/core/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -104,40 +98,6 @@
          </systemPropertyVariables>
        </configuration>
      </plugin>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>appassembler-maven-plugin</artifactId>
-        <configuration>
-          <extraJvmArguments>-Xmx128M</extraJvmArguments>
-          <repositoryLayout>flat</repositoryLayout>
-          <platforms>
-            <platform>windows</platform>
-            <platform>unix</platform>
-          </platforms>
-          <programs>
-            <program>
-              <mainClass>org.apache.lucene.index.CheckIndex</mainClass>
-              <name>CheckIndex</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.index.IndexReader</mainClass>
-              <name>IndexReader</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.store.LockStressTest</mainClass>
-              <name>LockStressTest</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.store.LockVerifyServer</mainClass>
-              <name>LockVerifyServer</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.util.English</mainClass>
-              <name>English</name>
-            </program>
-          </programs>
-        </configuration>
-      </plugin>
      <plugin>
        <groupId>org.codehaus.mojo</groupId>
        <artifactId>build-helper-maven-plugin</artifactId>
--- a/dev-tools/maven/lucene/demo/pom.xml.template
+++ b/dev-tools/maven/lucene/demo/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
@ -87,30 +81,5 @@
        </excludes>
      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>appassembler-maven-plugin</artifactId>
-        <configuration>
-          <extraJvmArguments>-Xmx128M</extraJvmArguments>
-          <repositoryLayout>flat</repositoryLayout>
-          <assembleDirectory>${build-directory}</assembleDirectory>
-          <platforms>
-            <platform>windows</platform>
-            <platform>unix</platform>
-          </platforms>
-          <programs>
-            <program>
-              <mainClass>org.apache.lucene.demo.IndexFiles</mainClass>
-              <name>IndexFiles</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.demo.SearchFiles</mainClass>
-              <name>SearchFiles</name>
-            </program>
-          </programs>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/lucene/facet/pom.xml.template
+++ b/dev-tools/maven/lucene/facet/pom.xml.template
@ -39,15 +39,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/grouping/pom.xml.template
+++ b/dev-tools/maven/lucene/grouping/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/highlighter/pom.xml.template
+++ b/dev-tools/maven/lucene/highlighter/pom.xml.template
@ -39,15 +39,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/join/pom.xml.template
+++ b/dev-tools/maven/lucene/join/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/memory/pom.xml.template
+++ b/dev-tools/maven/lucene/memory/pom.xml.template
@ -39,15 +39,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/misc/pom.xml.template
+++ b/dev-tools/maven/lucene/misc/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
@ -72,49 +66,5 @@
        </excludes>
      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>appassembler-maven-plugin</artifactId>
-        <configuration>
-          <extraJvmArguments>-Xmx128M</extraJvmArguments>
-          <repositoryLayout>flat</repositoryLayout>
-          <platforms>
-            <platform>windows</platform>
-            <platform>unix</platform>
-          </platforms>
-          <programs>
-            <program>
-              <mainClass>org.apache.lucene.index.FieldNormModifier</mainClass>
-              <name>FieldNormModifier</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.index.IndexSplitter</mainClass>
-              <name>IndexSplitter</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.index.MultiPassIndexSplitter</mainClass>
-              <name>MultiPassIndexSplitter</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.misc.GetTermInfo</mainClass>
-              <name>GetTermInfo</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.misc.HighFreqTerms</mainClass>
-              <name>HighFreqTerms</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.misc.IndexMergeTool</mainClass>
-              <name>IndexMergeTool</name>
-            </program>
-            <program>
-              <mainClass>org.apache.lucene.misc.LengthNormModifier</mainClass>
-              <name>LengthNormModifier</name>
-            </program>
-          </programs>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/lucene/pom.xml.template
+++ b/dev-tools/maven/lucene/pom.xml.template
@ -35,15 +35,9 @@
    <module-directory>lucene</module-directory>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <modules>
    <module>core</module>
--- a/dev-tools/maven/lucene/queries/pom.xml.template
+++ b/dev-tools/maven/lucene/queries/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/queryparser/pom.xml.template
+++ b/dev-tools/maven/lucene/queryparser/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/sandbox/pom.xml.template
+++ b/dev-tools/maven/lucene/sandbox/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/suggest/pom.xml.template
+++ b/dev-tools/maven/lucene/suggest/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency> 
--- a/dev-tools/maven/lucene/test-framework/pom.xml.template
+++ b/dev-tools/maven/lucene/test-framework/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
--- a/dev-tools/maven/pom.xml.template
+++ b/dev-tools/maven/pom.xml.template
@ -31,15 +31,18 @@
  <version>@version@</version>
  <packaging>pom</packaging>
  <name>Grandparent POM for Apache Lucene Core and Apache Solr</name>
-  <description>Parent POM for Apache Lucene Core and Apache Solr</description>
-  <url>http://lucene.apache.org/java</url>
+  <description>Grandparent POM for Apache Lucene Core and Apache Solr</description>
+  <url>http://lucene.apache.org</url>
  <modules>
    <module>lucene</module>
    <module>solr</module>
  </modules>
  <properties>
    <top-level>..</top-level>
-    <base.specification.version>4.0.0</base.specification.version>
+    <vc-anonymous-base-url>http://svn.apache.org/repos/asf/lucene/dev/trunk</vc-anonymous-base-url>
+    <vc-dev-base-url>https://svn.apache.org/repos/asf/lucene/dev/trunk</vc-dev-base-url>
+    <vc-browse-base-url>http://svn.apache.org/viewvc/lucene/dev/trunk</vc-browse-base-url>
+    <base.specification.version>5.0.0</base.specification.version>
    <maven.build.timestamp.format>yyyy-MM-dd HH:mm:ss</maven.build.timestamp.format>
    <java.compat.version>1.6</java.compat.version>
    <jetty.version>8.1.2.v20120308</jetty.version>
@ -69,11 +72,11 @@
  </properties>
  <issueManagement>
    <system>JIRA</system>
-    <url>http://issues.apache.org/jira/browse/LUCENE</url>
+    <url>https://issues.apache.org/jira/browse/LUCENE</url>
  </issueManagement>
  <ciManagement>
-    <system>Hudson</system>
-    <url>http://lucene.zones.apache.org:8080/hudson/job/Lucene-Nightly/</url>
+    <system>Jenkins</system>
+    <url>https://builds.apache.org/computer/lucene/</url>
  </ciManagement>
  <mailingLists>
    <mailingList>
@ -109,15 +112,9 @@
  </mailingLists>
  <inceptionYear>2000</inceptionYear>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}</developerConnection>
+    <url>${vc-browse-base-url}</url>
  </scm>
  <licenses>
    <license>
@ -388,7 +385,7 @@
      <dependency>
        <groupId>com.carrotsearch.randomizedtesting</groupId>
        <artifactId>randomizedtesting-runner</artifactId>
-        <version>1.6.0</version>
+        <version>2.0.0.rc5</version>
      </dependency>
    </dependencies>
  </dependencyManagement>
@ -549,11 +546,6 @@
            </archive>
          </configuration>
        </plugin>
-        <plugin>
-          <groupId>org.codehaus.mojo</groupId>
-          <artifactId>appassembler-maven-plugin</artifactId>
-          <version>1.2.1</version>
-        </plugin>
        <plugin>
          <groupId>org.codehaus.mojo</groupId>
          <artifactId>build-helper-maven-plugin</artifactId>
--- a/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/analysis-extras/pom.xml.template
@ -35,18 +35,11 @@
    <module-directory>solr/contrib/analysis-extras</module-directory>
    <top-level>../../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -101,17 +94,12 @@
      <testResource>
        <directory>${top-level}/solr/core/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/solr/contrib/clustering/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/clustering/pom.xml.template
@ -35,18 +35,11 @@
    <module-directory>solr/contrib/clustering</module-directory>
    <top-level>../../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -106,17 +99,12 @@
      <testResource>
        <directory>${top-level}/solr/core/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/dataimporthandler-extras/pom.xml.template
@ -35,18 +35,11 @@
    <module-directory>solr/contrib/dataimporthandler-extras</module-directory>
    <top-level>../../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -104,17 +97,12 @@
      <testResource>
        <directory>${top-level}/solr/core/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/dataimporthandler/pom.xml.template
@ -35,18 +35,11 @@
    <module-directory>solr/contrib/dataimporthandler</module-directory>
    <top-level>../../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -90,6 +83,12 @@
      <testResource>
        <directory>${top-level}/solr/core/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
    <plugins>
      <plugin>
@ -103,15 +102,6 @@
          </execution>
        </executions>
      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/solr/contrib/extraction/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/extraction/pom.xml.template
@ -38,18 +38,11 @@
    <module-directory>solr/contrib/extraction</module-directory>
    <top-level>../../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -102,17 +95,12 @@
      <testResource>
        <directory>${top-level}/solr/core/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/solr/contrib/langid/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/langid/pom.xml.template
@ -39,18 +39,11 @@
    <module-directory>solr/contrib/langid</module-directory>
    <top-level>../../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -107,17 +100,12 @@
      <testResource>
        <directory>${top-level}/solr/core/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/solr/contrib/uima/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/uima/pom.xml.template
@ -35,18 +35,11 @@
    <module-directory>solr/contrib/uima</module-directory>
    <top-level>../../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -121,17 +114,12 @@
      <testResource>
        <directory>${module-path}/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/solr/contrib/velocity/pom.xml.template
+++ b/dev-tools/maven/solr/contrib/velocity/pom.xml.template
@ -35,18 +35,11 @@
    <module-directory>solr/contrib/velocity</module-directory>
    <top-level>../../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -142,17 +135,12 @@
      <testResource>
        <directory>${top-level}/solr/core/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-    </plugins>
  </build>
 </project>
--- a/dev-tools/maven/solr/core/pom.xml.template
+++ b/dev-tools/maven/solr/core/pom.xml.template
@ -35,18 +35,11 @@
    <module-directory>solr/core</module-directory>
    <top-level>../../..</top-level>
    <module-path>${top-level}/${module-directory}</module-path>
-    <surefire-top-level>${top-level}/../..</surefire-top-level>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
@ -243,48 +236,14 @@
      <testResource>
        <directory>${top-level}/solr/solrj/src/test-files</directory>
      </testResource>
+      <testResource>
+        <directory>${top-level}/dev-tools/maven/solr</directory>
+        <includes>
+          <include>maven.testlogging.properties</include>
+        </includes>
+      </testResource>
    </testResources>
    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-surefire-plugin</artifactId>
-        <configuration>
-          <systemPropertyVariables>
-            <java.util.logging.config.file>${surefire-top-level}/solr/testlogging.properties</java.util.logging.config.file>
-          </systemPropertyVariables>
-        </configuration>
-      </plugin>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>appassembler-maven-plugin</artifactId>
-        <configuration>
-          <extraJvmArguments>-Xmx128M</extraJvmArguments>
-          <repositoryLayout>flat</repositoryLayout>
-          <platforms>
-            <platform>windows</platform>
-            <platform>unix</platform>
-          </platforms>
-          <programs>
-            <program>
-              <mainClass>org.apache.solr.client.solrj.embedded.JettySolrRunner</mainClass>
-              <name>JettySolrRunner</name>
-            </program>
-            <program>
-              <mainClass>org.apache.solr.util.BitSetPerf</mainClass>
-              <name>BitSetPerf</name>
-              <extraJvmArguments>-Xms128m -Xbatch</extraJvmArguments>
-            </program>
-            <program>
-              <mainClass>org.apache.solr.util.SimplePostTool</mainClass>
-              <name>SimplePostTool</name>
-            </program>
-            <program>
-              <mainClass>org.apache.solr.util.SuggestMissingFactories</mainClass>
-              <name>SuggestMissingFactories</name>
-            </program>
-          </programs>
-        </configuration>
-      </plugin>
      <plugin>
        <groupId>org.codehaus.mojo</groupId>
        <artifactId>build-helper-maven-plugin</artifactId>
--- a/dev-tools/maven/solr/maven.testlogging.properties
+++ b/dev-tools/maven/solr/maven.testlogging.properties
@ -0,0 +1,2 @@
+handlers=java.util.logging.ConsoleHandler
+.level=SEVERE
--- a/dev-tools/maven/solr/pom.xml.template
+++ b/dev-tools/maven/solr/pom.xml.template
@ -43,26 +43,14 @@
    <module-directory>solr</module-directory>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <issueManagement>
    <system>JIRA</system>
-    <url>http://issues.apache.org/jira/browse/SOLR</url>
+    <url>https://issues.apache.org/jira/browse/SOLR</url>
  </issueManagement>
-  <ciManagement>
-    <system>Hudson</system>
-    <url>
-      http://lucene.zones.apache.org:8080/hudson/job/Solr-Nightly/
-    </url>
-  </ciManagement>
  <mailingLists>
    <mailingList>
      <name>Solr User List</name>
@ -111,6 +99,15 @@
            <doctitle>${project.name} ${project.version} API (${now.version})</doctitle>
          </configuration>
        </plugin>
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-surefire-plugin</artifactId>
+          <configuration>
+            <systemPropertyVariables>
+              <java.util.logging.config.file>../test-classes/maven.testlogging.properties</java.util.logging.config.file>
+            </systemPropertyVariables>
+          </configuration>
+        </plugin>
      </plugins>
    </pluginManagement>
  </build>
--- a/dev-tools/maven/solr/solrj/pom.xml.template
+++ b/dev-tools/maven/solr/solrj/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
--- a/dev-tools/maven/solr/test-framework/pom.xml.template
+++ b/dev-tools/maven/solr/test-framework/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <!-- These dependencies are compile scope because this is a test framework. -->
@ -60,20 +54,29 @@
      <artifactId>solr-core</artifactId>
      <version>${project.version}</version>
    </dependency>
+    <dependency>
+      <groupId>javax.servlet</groupId>
+      <artifactId>servlet-api</artifactId>
+      <!-- SOLR-3263: Provided scope is required to avoid jar signing conflicts -->
+      <scope>provided</scope>
+    </dependency>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
    </dependency>
-    <!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
-         you can exclude the three Jetty dependencies below. -->
    <dependency>
      <groupId>org.eclipse.jetty</groupId>
-      <artifactId>jetty-server</artifactId>
-      <scope>runtime</scope>
+      <artifactId>jetty-servlet</artifactId>
    </dependency>
    <dependency>
      <groupId>org.eclipse.jetty</groupId>
      <artifactId>jetty-util</artifactId>
+    </dependency>
+    <!-- If your tests don't use BaseDistributedSearchTestCase or SolrJettyTestBase,
+         you can exclude the two Jetty dependencies below. -->
+    <dependency>
+      <groupId>org.eclipse.jetty</groupId>
+      <artifactId>jetty-server</artifactId>
      <scope>runtime</scope>
    </dependency>
    <dependency>
--- a/dev-tools/maven/solr/webapp/pom.xml.template
+++ b/dev-tools/maven/solr/webapp/pom.xml.template
@ -37,15 +37,9 @@
    <module-path>${top-level}/${module-directory}</module-path>
  </properties>
  <scm>
-    <connection>
-      scm:svn:http://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </connection>
-    <developerConnection>
-      scm:svn:https://svn.apache.org/repos/asf/lucene/dev/trunk/${module-directory}
-    </developerConnection>
-    <url>
-      http://svn.apache.org/viewvc/lucene/dev/trunk/${module-directory}
-    </url>
+    <connection>scm:svn:${vc-anonymous-base-url}/${module-directory}</connection>
+    <developerConnection>scm:svn:${vc-dev-base-url}/${module-directory}</developerConnection>
+    <url>${vc-browse-base-url}/${module-directory}</url>
  </scm>
  <dependencies>
    <dependency>
--- a/dev-tools/scripts/smokeTestRelease.py
+++ b/dev-tools/scripts/smokeTestRelease.py
@ -58,7 +58,7 @@ def javaExe(version):

 def verifyJavaVersion(version):
  s = os.popen('%s; java -version 2>&1' % javaExe(version)).read()
-  if s.find('java version "%s.' % version) == -1:
+  if s.find(' version "%s.' % version) == -1:
    raise RuntimeError('got wrong version for java %s:\n%s' % (version, s))

 # http://s.apache.org/lusolr32rc2
@ -363,6 +363,10 @@ def verifyDigests(artifact, urlString, tmpDir):
    raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' % (artifact, sha1Expected, sha1Actual))

 def getDirEntries(urlString):
+  if urlString.startswith('file:/') and not urlString.startswith('file://'):
+    # stupid bogus ant URI
+    urlString = "file:///" + urlString[6:]
+
  if urlString.startswith('file://'):
    path = urlString[7:]
    if path.endswith('/'):
@ -1026,7 +1030,7 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()):

 def main():

-  if len(sys.argv) != 4:
+  if len(sys.argv) < 4:
    print()
    print('Usage python -u %s BaseURL version tmpDir' % sys.argv[0])
    print()
@ -1035,8 +1039,11 @@ def main():
  baseURL = sys.argv[1]
  version = sys.argv[2]
  tmpDir = os.path.abspath(sys.argv[3])
+  isSigned = True 
+  if len(sys.argv) == 5:
+    isSigned = (sys.argv[4] == "True")

-  smokeTest(baseURL, version, tmpDir, True)
+  smokeTest(baseURL, version, tmpDir, isSigned)

 def smokeTest(baseURL, version, tmpDir, isSigned):

@ -1090,4 +1097,5 @@ if __name__ == '__main__':
  except:
    import traceback
    traceback.print_exc()
-  
+    sys.exit(1)
+  sys.exit(0)
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -6,6 +6,56 @@ http://s.apache.org/luceneversions

 ======================= Lucene 5.0.0 =======================

+======================= Lucene 4.0.0 =======================
+
+New Features
+
+* LUCENE-1888: Added the option to store payloads in the term
+  vectors (IndexableFieldType.storeTermVectorPayloads()). Note 
+  that you must store term vector positions to store payloads.
+  (Robert Muir)
+
+API Changes
+
+* LUCENE-4299: Added Terms.hasPositions() and Terms.hasOffsets().
+  Previously you had no real way to know that a term vector field
+  had positions or offsets, since this can be configured on a 
+  per-field-per-document basis. (Robert Muir)
+
+* Removed DocsAndPositionsEnum.hasPayload() and simplified the
+  contract of getPayload(). It returns null if there is no payload,
+  otherwise returns the current payload. You can now call it multiple
+  times per position if you want. (Robert Muir)
+
+* Removed FieldsEnum. Fields API instead implements Iterable<String>
+  and exposes Iterator, so you can iterate over field names with
+  for (String field : fields) instead.  (Robert Muir)
+
+Bug Fixes
+
+* LUCENE-4297: BooleanScorer2 would multiply the coord() factor
+  twice for conjunctions: for most users this is no problem, but
+  if you had a customized Similarity that returned something other
+  than 1 when overlap == maxOverlap (always the case for conjunctions),
+  then the score would be incorrect.  (Pascal Chollet, Robert Muir)
+
+* LUCENE-4298: MultiFields.getTermDocsEnum(IndexReader, Bits, String, BytesRef)
+  did not work at all, it would infinitely recurse.
+  (Alberto Paro via Robert Muir)
+
+* LUCENE-4300: BooleanQuery's rewrite was not always safe: if you
+  had a custom Similarity where coord(1,1) != 1F, then the rewritten
+  query would be scored differently.  (Robert Muir)
+
+* Don't allow negatives in the positions file. If you have an index
+  from 2.4.0 or earlier with such negative positions, and you already 
+  upgraded to 3.x, then to Lucene 4.0-ALPHA or -BETA, you should run 
+  CheckIndex. If it fails, then you need to upgrade again to 4.0  (Robert Muir)
+
+Build
+
+* LUCENE-3985: Upgrade to randomizedtesting 2.0.0. Added support for 
+  thread leak detection. Added support for suite timeouts. (Dawid Weiss)

 ======================= Lucene 4.0.0-BETA =======================

@ -47,6 +97,11 @@ New features
  int docID), to attempt deletion by docID as long as the provided
  reader is an NRT reader, and the segment has not yet been merged
  away (Mike McCandless).
+  
+* LUCENE-4286: Added option to CJKBigramFilter to always also output
+  unigrams. This can be used for a unigram+bigram approach, or at 
+  index-time only for better support of short queries.
+  (Tom Burton-West, Robert Muir)

 API Changes

@ -115,6 +170,10 @@ Optimizations
  making them substantially more lightweight. Behavior is unchanged. 
  (Robert Muir)

+* LUCENE-4291: Reduced internal buffer size for Jflex-based tokenizers 
+  such as StandardTokenizer from 32kb to 8kb.  
+  (Raintung Li, Steven Rowe, Robert Muir)
+
 Bug Fixes

 * LUCENE-4109: BooleanQueries are not parsed correctly with the 
@ -164,6 +223,9 @@ Bug Fixes
 * LUCENE-4282: Automaton FuzzyQuery didnt always deliver all results.
  (Johannes Christen, Uwe Schindler, Robert Muir)

+* LUCENE-4289: Fix minor idf inconsistencies/inefficiencies in highlighter.
+  (Robert Muir)
+
 Changes in Runtime Behavior

 * LUCENE-4109: Enable position increments in the flexible queryparser by default.
--- a/lucene/MIGRATE.txt
+++ b/lucene/MIGRATE.txt
@ -9,7 +9,7 @@ enumeration APIs.  Here are the major changes:
    by the BytesRef class (which provides an offset + length "slice"
    into an existing byte[]).

-  * Fields are separately enumerated (FieldsEnum) from the terms
+  * Fields are separately enumerated (Fields.iterator()) from the terms
    within each field (TermEnum).  So instead of this:

        TermEnum termsEnum = ...;
@ -20,10 +20,8 @@ enumeration APIs.  Here are the major changes:

    Do this:

-        FieldsEnum fieldsEnum = ...;
-        String field;
-        while((field = fieldsEnum.next()) != null) {
-            TermsEnum termsEnum = fieldsEnum.terms();
+        for(String field : fields) {
+            TermsEnum termsEnum = fields.terms(field);
            BytesRef text;
            while((text = termsEnum.next()) != null) {
              System.out.println("field=" + field + "; text=" + text.utf8ToString());
@ -316,11 +314,12 @@ an AtomicReader. Note: using "atomicity emulators" can cause serious
 slowdowns due to the need to merge terms, postings, DocValues, and 
 FieldCache, use them with care! 

-## LUCENE-2413: Analyzer package changes
+## LUCENE-2413,LUCENE-3396: Analyzer package changes

 Lucene's core and contrib analyzers, along with Solr's analyzers,
 were consolidated into lucene/analysis. During the refactoring some
-package names have changed:
+package names have changed, and ReusableAnalyzerBase was renamed to
+Analyzer:

  - o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
  - o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
@ -345,7 +344,7 @@ package names have changed:
  - o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
  - o.a.l.analysis.CharArraySet -> o.a.l.analysis.util.CharArraySet
  - o.a.l.analysis.CharArrayMap -> o.a.l.analysis.util.CharArrayMap
-  - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
+  - o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.Analyzer
  - o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
  - o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
  - o.a.l.analysis.CharTokenizer -> o.a.l.analysis.util.CharTokenizer
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */

 package org.apache.lucene.analysis.charfilter;

@ -40,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
 /**
 * This class is a scanner generated by 
 * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/26/12 6:22 PM from the specification file
- * <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
 */
 public final class HTMLStripCharFilter extends BaseCharFilter {

@ -31255,6 +31255,93 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
          { yybegin(STYLE);
          }
        case 55: break;
+        case 27: 
+          { // add (previously matched input length) + (this match length) - (substitution length)
+    cumulativeDiff += inputSegment.length() + yylength() - 1;
+    // position the correction at (already output length) + (substitution length)
+    addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(YYINITIAL);
+    return BLOCK_LEVEL_START_TAG_REPLACEMENT;
+          }
+        case 56: break;
+        case 30: 
+          { int length = yylength();
+    inputSegment.write(zzBuffer, zzStartRead, length);
+    entitySegment.clear();
+    char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
+    entitySegment.append(ch);
+    outputSegment = entitySegment;
+    yybegin(CHARACTER_REFERENCE_TAIL);
+          }
+        case 57: break;
+        case 48: 
+          { inputSegment.clear();
+    yybegin(YYINITIAL);
+    // add (previously matched input length) -- current match and substitution handled below
+    cumulativeDiff += yychar - inputStart;
+    // position the offset correction at (already output length) -- substitution handled below
+    int offsetCorrectionPos = outputCharCount;
+    int returnValue;
+    if (escapeSTYLE) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
+      outputSegment = inputSegment;
+      returnValue = outputSegment.nextChar();
+    } else {
+      // add (this match length) - (substitution length)
+      cumulativeDiff += yylength() - 1;
+      // add (substitution length)
+      ++offsetCorrectionPos;
+      returnValue = STYLE_REPLACEMENT;
+    }
+    addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
+    return returnValue;
+          }
+        case 58: break;
+        case 8: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+    if (null != escapedTags
+        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
+      yybegin(START_TAG_TAIL_INCLUDE);
+    } else {
+      yybegin(START_TAG_TAIL_SUBSTITUTE);
+    }
+          }
+        case 59: break;
+        case 2: 
+          { inputStart = yychar;
+  inputSegment.clear();
+  inputSegment.append('<');
+  yybegin(LEFT_ANGLE_BRACKET);
+          }
+        case 60: break;
+        case 44: 
+          { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+          }
+        case 61: break;
+        case 21: 
+          { previousRestoreState = restoreState;
+    restoreState = SERVER_SIDE_INCLUDE;
+    yybegin(SINGLE_QUOTED_STRING);
+          }
+        case 62: break;
+        case 11: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+    yybegin(LEFT_ANGLE_BRACKET_SPACE);
+          }
+        case 63: break;
+        case 35: 
+          { yybegin(SCRIPT);
+          }
+        case 64: break;
+        case 42: 
+          { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+          }
+        case 65: break;
+        case 10: 
+          { inputSegment.append('!'); yybegin(BANG);
+          }
+        case 66: break;
        case 51: 
          { // Handle paired UTF-16 surrogates.
    String surrogatePair = yytext();
@ -31288,13 +31375,331 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
    inputSegment.append('#');
    yybegin(NUMERIC_CHARACTER);
          }
-        case 56: break;
-        case 21: 
+        case 67: break;
+        case 4: 
+          { yypushback(1);
+    outputSegment = inputSegment;
+    outputSegment.restart();
+    yybegin(YYINITIAL);
+    return outputSegment.nextChar();
+          }
+        case 68: break;
+        case 43: 
+          { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
+          }
+        case 69: break;
+        case 52: 
+          { // Handle paired UTF-16 surrogates.
+    String surrogatePair = yytext();
+    char highSurrogate = '\u0000';
+    try { // High surrogates are in decimal range [55296, 56319]
+      highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
+    } catch(Exception e) { // should never happen
+      assert false: "Exception parsing high surrogate '"
+                  + surrogatePair.substring(1, 6) + "'";
+    }
+    if (Character.isHighSurrogate(highSurrogate)) {
+      outputSegment = entitySegment;
+      outputSegment.clear();
+      try {
+        outputSegment.unsafeWrite
+            ((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
+      } catch(Exception e) { // should never happen
+        assert false: "Exception parsing low surrogate '"
+                    + surrogatePair.substring(10, 14) + "'";
+      }
+      // add (previously matched input length) + (this match length) - (substitution length)
+      cumulativeDiff += inputSegment.length() + yylength() - 2;
+      // position the correction at (already output length) + (substitution length)
+      addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
+      inputSegment.clear();
+      yybegin(YYINITIAL);
+      return highSurrogate;
+    }
+    yypushback(surrogatePair.length() - 1); // Consume only '#'
+    inputSegment.append('#');
+    yybegin(NUMERIC_CHARACTER);
+          }
+        case 70: break;
+        case 28: 
+          { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
+          }
+        case 71: break;
+        case 50: 
+          { // Handle paired UTF-16 surrogates.
+    outputSegment = entitySegment;
+    outputSegment.clear();
+    String surrogatePair = yytext();
+    char highSurrogate = '\u0000';
+    try {
+      highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
+    } catch(Exception e) { // should never happen
+      assert false: "Exception parsing high surrogate '"
+                  + surrogatePair.substring(2, 6) + "'";
+    }
+    try {
+      outputSegment.unsafeWrite
+          ((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
+    } catch(Exception e) { // should never happen
+      assert false: "Exception parsing low surrogate '"
+                  + surrogatePair.substring(10, 14) + "'";
+    }
+    // add (previously matched input length) + (this match length) - (substitution length)
+    cumulativeDiff += inputSegment.length() + yylength() - 2;
+    // position the correction at (already output length) + (substitution length)
+    addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(YYINITIAL);
+    return highSurrogate;
+          }
+        case 72: break;
+        case 16: 
+          { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
+          }
+        case 73: break;
+        case 22: 
          { previousRestoreState = restoreState;
    restoreState = SERVER_SIDE_INCLUDE;
-    yybegin(SINGLE_QUOTED_STRING);
+    yybegin(DOUBLE_QUOTED_STRING);
          }
-        case 57: break;
+        case 74: break;
+        case 26: 
+          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+    cumulativeDiff += inputSegment.length() + yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0 ]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    inputSegment.clear();
+    outputSegment = inputSegment;
+    yybegin(YYINITIAL);
+          }
+        case 75: break;
+        case 20: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+          }
+        case 76: break;
+        case 47: 
+          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+    cumulativeDiff += inputSegment.length() + yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0 ]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(CDATA);
+          }
+        case 77: break;
+        case 33: 
+          { yybegin(YYINITIAL);
+    if (escapeBR) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
+      outputSegment = inputSegment;
+      return outputSegment.nextChar();
+    } else {
+      // add (previously matched input length) + (this match length) - (substitution length)
+      cumulativeDiff += inputSegment.length() + yylength() - 1;
+      // position the correction at (already output length) + (substitution length)
+      addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+      inputSegment.reset();
+      return BR_START_TAG_REPLACEMENT;
+    }
+          }
+        case 78: break;
+        case 23: 
+          { yybegin(restoreState); restoreState = previousRestoreState;
+          }
+        case 79: break;
+        case 32: 
+          { yybegin(COMMENT);
+          }
+        case 80: break;
+        case 24: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+     outputSegment = inputSegment;
+     yybegin(YYINITIAL);
+     return outputSegment.nextChar();
+          }
+        case 81: break;
+        case 3: 
+          { inputStart = yychar;
+  inputSegment.clear();
+  inputSegment.append('&');
+  yybegin(AMPERSAND);
+          }
+        case 82: break;
+        case 46: 
+          { yybegin(SCRIPT);
+    if (escapeSCRIPT) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
+      outputSegment = inputSegment;
+      inputStart += 1 + yylength();
+      return outputSegment.nextChar();
+    }
+          }
+        case 83: break;
+        case 14: 
+          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
+    cumulativeDiff += inputSegment.length() + yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0 ]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(YYINITIAL);
+          }
+        case 84: break;
+        case 6: 
+          { int matchLength = yylength();
+    inputSegment.write(zzBuffer, zzStartRead, matchLength);
+    if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
+      String decimalCharRef = yytext();
+      int codePoint = 0;
+      try {
+        codePoint = Integer.parseInt(decimalCharRef);
+      } catch(Exception e) {
+        assert false: "Exception parsing code point '" + decimalCharRef + "'";
+      }
+      if (codePoint <= 0x10FFFF) {
+        outputSegment = entitySegment;
+        outputSegment.clear();
+        if (codePoint >= Character.MIN_SURROGATE
+            && codePoint <= Character.MAX_SURROGATE) {
+          outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
+        } else {
+          outputSegment.setLength
+              (Character.toChars(codePoint, outputSegment.getArray(), 0));
+        }
+        yybegin(CHARACTER_REFERENCE_TAIL);
+      } else {
+        outputSegment = inputSegment;
+        yybegin(YYINITIAL);
+        return outputSegment.nextChar();
+      }
+    } else {
+      outputSegment = inputSegment;
+      yybegin(YYINITIAL);
+      return outputSegment.nextChar();
+    }
+          }
+        case 85: break;
+        case 34: 
+          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
+    cumulativeDiff += yychar - inputStart + yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(YYINITIAL);
+          }
+        case 86: break;
+        case 5: 
+          { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
+          }
+        case 87: break;
+        case 13: 
+          { inputSegment.append(zzBuffer[zzStartRead]);
+          }
+        case 88: break;
+        case 18: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+    if (null != escapedTags
+        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
+      yybegin(END_TAG_TAIL_INCLUDE);
+    } else {
+      yybegin(END_TAG_TAIL_SUBSTITUTE);
+    }
+          }
+        case 89: break;
+        case 40: 
+          { yybegin(SCRIPT_COMMENT);
+          }
+        case 90: break;
+        case 37: 
+          { // add (this match length) [ - (substitution length) = 0 ]
+    cumulativeDiff += yylength();
+    // position the correction at (already output length) [ + (substitution length) = 0 ]
+    addOffCorrectMap(outputCharCount, cumulativeDiff);
+    yybegin(YYINITIAL);
+          }
+        case 91: break;
+        case 12: 
+          { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
+          }
+        case 92: break;
+        case 9: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+    if (null != escapedTags
+        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
+      yybegin(START_TAG_TAIL_INCLUDE);
+    } else {
+      yybegin(START_TAG_TAIL_EXCLUDE);
+    }
+          }
+        case 93: break;
+        case 49: 
+          { inputSegment.clear();
+    yybegin(YYINITIAL);
+    // add (previously matched input length) -- current match and substitution handled below
+    cumulativeDiff += yychar - inputStart;
+    // position at (already output length) -- substitution handled below
+    int offsetCorrectionPos = outputCharCount;
+    int returnValue;
+    if (escapeSCRIPT) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
+      outputSegment = inputSegment;
+      returnValue = outputSegment.nextChar();
+    } else {
+      // add (this match length) - (substitution length)
+      cumulativeDiff += yylength() - 1;
+      // add (substitution length)
+      ++offsetCorrectionPos;
+      returnValue = SCRIPT_REPLACEMENT;
+    }
+    addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
+    return returnValue;
+          }
+        case 94: break;
+        case 29: 
+          { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+          }
+        case 95: break;
+        case 17: 
+          { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
+          }
+        case 96: break;
+        case 45: 
+          { yybegin(STYLE);
+    if (escapeSTYLE) {
+      inputSegment.write(zzBuffer, zzStartRead, yylength());
+      outputSegment = inputSegment;
+      inputStart += 1 + yylength();
+      return outputSegment.nextChar();
+    }
+          }
+        case 97: break;
+        case 7: 
+          { // add (previously matched input length) + (this match length) - (substitution length)
+    cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
+    // position the correction at (already output length) + (substitution length)
+    addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
+    yybegin(YYINITIAL);
+    return outputSegment.nextChar();
+          }
+        case 98: break;
+        case 19: 
+          { inputSegment.write(zzBuffer, zzStartRead, yylength());
+    if (null != escapedTags
+        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
+      yybegin(END_TAG_TAIL_INCLUDE);
+    } else {
+      yybegin(END_TAG_TAIL_EXCLUDE);
+    }
+          }
+        case 99: break;
+        case 25: 
+          { // add (previously matched input length) + (this match length) - (substitution length)
+    cumulativeDiff += inputSegment.length() + yylength() - 1;
+    // position the correction at (already output length) + (substitution length)
+    addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
+    inputSegment.clear();
+    yybegin(YYINITIAL);
+    return BLOCK_LEVEL_END_TAG_REPLACEMENT;
+          }
+        case 100: break;
        case 31: 
          { int matchLength = yylength();
    inputSegment.write(zzBuffer, zzStartRead, matchLength);
@ -31329,66 +31734,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
      return outputSegment.nextChar();
    }
          }
-        case 58: break;
-        case 19: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-    if (null != escapedTags
-        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
-      yybegin(END_TAG_TAIL_INCLUDE);
-    } else {
-      yybegin(END_TAG_TAIL_EXCLUDE);
-    }
-          }
-        case 59: break;
-        case 2: 
-          { inputStart = yychar;
-  inputSegment.clear();
-  inputSegment.append('<');
-  yybegin(LEFT_ANGLE_BRACKET);
-          }
-        case 60: break;
-        case 27: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
-    cumulativeDiff += inputSegment.length() + yylength() - 1;
-    // position the correction at (already output length) + (substitution length)
-    addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-    return BLOCK_LEVEL_START_TAG_REPLACEMENT;
-          }
-        case 61: break;
-        case 44: 
-          { restoreState = STYLE_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 62: break;
-        case 35: 
-          { yybegin(SCRIPT);
-          }
-        case 63: break;
-        case 42: 
-          { restoreState = COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 64: break;
-        case 10: 
-          { inputSegment.append('!'); yybegin(BANG);
-          }
-        case 65: break;
-        case 33: 
-          { yybegin(YYINITIAL);
-    if (escapeBR) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      return outputSegment.nextChar();
-    } else {
-      // add (previously matched input length) + (this match length) - (substitution length)
-      cumulativeDiff += inputSegment.length() + yylength() - 1;
-      // position the correction at (already output length) + (substitution length)
-      addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
-      inputSegment.reset();
-      return BR_START_TAG_REPLACEMENT;
-    }
-          }
-        case 66: break;
+        case 101: break;
        case 53: 
          { // Handle paired UTF-16 surrogates.
    String surrogatePair = yytext();
@ -31424,288 +31770,7 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
    inputSegment.append('#');
    yybegin(NUMERIC_CHARACTER);
          }
-        case 67: break;
-        case 43: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SERVER_SIDE_INCLUDE);
-          }
-        case 68: break;
-        case 30: 
-          { int length = yylength();
-    inputSegment.write(zzBuffer, zzStartRead, length);
-    entitySegment.clear();
-    char ch = entityValues.get(zzBuffer, zzStartRead, length).charValue();
-    entitySegment.append(ch);
-    outputSegment = entitySegment;
-    yybegin(CHARACTER_REFERENCE_TAIL);
-          }
-        case 69: break;
-        case 28: 
-          { restoreState = STYLE_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 70: break;
-        case 3: 
-          { inputStart = yychar;
-  inputSegment.clear();
-  inputSegment.append('&');
-  yybegin(AMPERSAND);
-          }
-        case 71: break;
-        case 16: 
-          { restoreState = SCRIPT_COMMENT; yybegin(SINGLE_QUOTED_STRING);
-          }
-        case 72: break;
-        case 52: 
-          { // Handle paired UTF-16 surrogates.
-    String surrogatePair = yytext();
-    char highSurrogate = '\u0000';
-    try { // High surrogates are in decimal range [55296, 56319]
-      highSurrogate = (char)Integer.parseInt(surrogatePair.substring(1, 6));
-    } catch(Exception e) { // should never happen
-      assert false: "Exception parsing high surrogate '"
-                  + surrogatePair.substring(1, 6) + "'";
-    }
-    if (Character.isHighSurrogate(highSurrogate)) {
-      outputSegment = entitySegment;
-      outputSegment.clear();
-      try {
-        outputSegment.unsafeWrite
-            ((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
-      } catch(Exception e) { // should never happen
-        assert false: "Exception parsing low surrogate '"
-                    + surrogatePair.substring(10, 14) + "'";
-      }
-      // add (previously matched input length) + (this match length) - (substitution length)
-      cumulativeDiff += inputSegment.length() + yylength() - 2;
-      // position the correction at (already output length) + (substitution length)
-      addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
-      inputSegment.clear();
-      yybegin(YYINITIAL);
-      return highSurrogate;
-    }
-    yypushback(surrogatePair.length() - 1); // Consume only '#'
-    inputSegment.append('#');
-    yybegin(NUMERIC_CHARACTER);
-          }
-        case 73: break;
-        case 6: 
-          { int matchLength = yylength();
-    inputSegment.write(zzBuffer, zzStartRead, matchLength);
-    if (matchLength <= 7) { // 0x10FFFF = 1114111: max 7 decimal chars
-      String decimalCharRef = yytext();
-      int codePoint = 0;
-      try {
-        codePoint = Integer.parseInt(decimalCharRef);
-      } catch(Exception e) {
-        assert false: "Exception parsing code point '" + decimalCharRef + "'";
-      }
-      if (codePoint <= 0x10FFFF) {
-        outputSegment = entitySegment;
-        outputSegment.clear();
-        if (codePoint >= Character.MIN_SURROGATE
-            && codePoint <= Character.MAX_SURROGATE) {
-          outputSegment.unsafeWrite(REPLACEMENT_CHARACTER);
-        } else {
-          outputSegment.setLength
-              (Character.toChars(codePoint, outputSegment.getArray(), 0));
-        }
-        yybegin(CHARACTER_REFERENCE_TAIL);
-      } else {
-        outputSegment = inputSegment;
-        yybegin(YYINITIAL);
-        return outputSegment.nextChar();
-      }
-    } else {
-      outputSegment = inputSegment;
-      yybegin(YYINITIAL);
-      return outputSegment.nextChar();
-    }
-          }
-        case 74: break;
-        case 37: 
-          { // add (this match length) [ - (substitution length) = 0 ]
-    cumulativeDiff += yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0 ]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    yybegin(YYINITIAL);
-          }
-        case 75: break;
-        case 8: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-    if (null != escapedTags
-        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
-      yybegin(START_TAG_TAIL_INCLUDE);
-    } else {
-      yybegin(START_TAG_TAIL_SUBSTITUTE);
-    }
-          }
-        case 76: break;
-        case 46: 
-          { yybegin(SCRIPT);
-    if (escapeSCRIPT) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      inputStart += 1 + yylength();
-      return outputSegment.nextChar();
-    }
-          }
-        case 77: break;
-        case 11: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-    yybegin(LEFT_ANGLE_BRACKET_SPACE);
-          }
-        case 78: break;
-        case 20: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-          }
-        case 79: break;
-        case 34: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0]
-    cumulativeDiff += yychar - inputStart + yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-          }
-        case 80: break;
-        case 23: 
-          { yybegin(restoreState); restoreState = previousRestoreState;
-          }
-        case 81: break;
-        case 32: 
-          { yybegin(COMMENT);
-          }
-        case 82: break;
-        case 14: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
-    cumulativeDiff += inputSegment.length() + yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0 ]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-          }
-        case 83: break;
-        case 18: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-    if (null != escapedTags
-        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
-      yybegin(END_TAG_TAIL_INCLUDE);
-    } else {
-      yybegin(END_TAG_TAIL_SUBSTITUTE);
-    }
-          }
-        case 84: break;
-        case 25: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
-    cumulativeDiff += inputSegment.length() + yylength() - 1;
-    // position the correction at (already output length) + (substitution length)
-    addOffCorrectMap(outputCharCount + 1, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-    return BLOCK_LEVEL_END_TAG_REPLACEMENT;
-          }
-        case 85: break;
-        case 7: 
-          { // add (previously matched input length) + (this match length) - (substitution length)
-    cumulativeDiff += inputSegment.length() + yylength() - outputSegment.length();
-    // position the correction at (already output length) + (substitution length)
-    addOffCorrectMap(outputCharCount + outputSegment.length(), cumulativeDiff);
-    yybegin(YYINITIAL);
-    return outputSegment.nextChar();
-          }
-        case 86: break;
-        case 48: 
-          { inputSegment.clear();
-    yybegin(YYINITIAL);
-    // add (previously matched input length) -- current match and substitution handled below
-    cumulativeDiff += yychar - inputStart;
-    // position the offset correction at (already output length) -- substitution handled below
-    int offsetCorrectionPos = outputCharCount;
-    int returnValue;
-    if (escapeSTYLE) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      returnValue = outputSegment.nextChar();
-    } else {
-      // add (this match length) - (substitution length)
-      cumulativeDiff += yylength() - 1;
-      // add (substitution length)
-      ++offsetCorrectionPos;
-      returnValue = STYLE_REPLACEMENT;
-    }
-    addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
-    return returnValue;
-          }
-        case 87: break;
-        case 5: 
-          { inputSegment.append('#'); yybegin(NUMERIC_CHARACTER);
-          }
-        case 88: break;
-        case 26: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
-    cumulativeDiff += inputSegment.length() + yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0 ]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    inputSegment.clear();
-    outputSegment = inputSegment;
-    yybegin(YYINITIAL);
-          }
-        case 89: break;
-        case 13: 
-          { inputSegment.append(zzBuffer[zzStartRead]);
-          }
-        case 90: break;
-        case 50: 
-          { // Handle paired UTF-16 surrogates.
-    outputSegment = entitySegment;
-    outputSegment.clear();
-    String surrogatePair = yytext();
-    char highSurrogate = '\u0000';
-    try {
-      highSurrogate = (char)Integer.parseInt(surrogatePair.substring(2, 6), 16);
-    } catch(Exception e) { // should never happen
-      assert false: "Exception parsing high surrogate '"
-                  + surrogatePair.substring(2, 6) + "'";
-    }
-    try {
-      outputSegment.unsafeWrite
-          ((char)Integer.parseInt(surrogatePair.substring(10, 14), 16));
-    } catch(Exception e) { // should never happen
-      assert false: "Exception parsing low surrogate '"
-                  + surrogatePair.substring(10, 14) + "'";
-    }
-    // add (previously matched input length) + (this match length) - (substitution length)
-    cumulativeDiff += inputSegment.length() + yylength() - 2;
-    // position the correction at (already output length) + (substitution length)
-    addOffCorrectMap(outputCharCount + 2, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(YYINITIAL);
-    return highSurrogate;
-          }
-        case 91: break;
-        case 40: 
-          { yybegin(SCRIPT_COMMENT);
-          }
-        case 92: break;
-        case 45: 
-          { yybegin(STYLE);
-    if (escapeSTYLE) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      inputStart += 1 + yylength();
-      return outputSegment.nextChar();
-    }
-          }
-        case 93: break;
-        case 22: 
-          { previousRestoreState = restoreState;
-    restoreState = SERVER_SIDE_INCLUDE;
-    yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 94: break;
-        case 12: 
-          { inputSegment.append('/'); yybegin(LEFT_ANGLE_BRACKET_SLASH);
-          }
-        case 95: break;
+        case 102: break;
        case 36: 
          { yybegin(YYINITIAL);
    if (escapeBR) {
@ -31721,83 +31786,18 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
      return BR_END_TAG_REPLACEMENT;
    }
          }
-        case 96: break;
-        case 24: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-     outputSegment = inputSegment;
-     yybegin(YYINITIAL);
-     return outputSegment.nextChar();
-          }
-        case 97: break;
-        case 47: 
-          { // add (previously matched input length) + (this match length) [ - (substitution length) = 0 ]
-    cumulativeDiff += inputSegment.length() + yylength();
-    // position the correction at (already output length) [ + (substitution length) = 0 ]
-    addOffCorrectMap(outputCharCount, cumulativeDiff);
-    inputSegment.clear();
-    yybegin(CDATA);
-          }
-        case 98: break;
-        case 29: 
-          { restoreState = STYLE_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 99: break;
-        case 17: 
-          { restoreState = SCRIPT_COMMENT; yybegin(DOUBLE_QUOTED_STRING);
-          }
-        case 100: break;
-        case 9: 
-          { inputSegment.write(zzBuffer, zzStartRead, yylength());
-    if (null != escapedTags
-        && escapedTags.contains(zzBuffer, zzStartRead, yylength())) {
-      yybegin(START_TAG_TAIL_INCLUDE);
-    } else {
-      yybegin(START_TAG_TAIL_EXCLUDE);
-    }
-          }
-        case 101: break;
-        case 49: 
-          { inputSegment.clear();
-    yybegin(YYINITIAL);
-    // add (previously matched input length) -- current match and substitution handled below
-    cumulativeDiff += yychar - inputStart;
-    // position at (already output length) -- substitution handled below
-    int offsetCorrectionPos = outputCharCount;
-    int returnValue;
-    if (escapeSCRIPT) {
-      inputSegment.write(zzBuffer, zzStartRead, yylength());
-      outputSegment = inputSegment;
-      returnValue = outputSegment.nextChar();
-    } else {
-      // add (this match length) - (substitution length)
-      cumulativeDiff += yylength() - 1;
-      // add (substitution length)
-      ++offsetCorrectionPos;
-      returnValue = SCRIPT_REPLACEMENT;
-    }
-    addOffCorrectMap(offsetCorrectionPos, cumulativeDiff);
-    return returnValue;
-          }
-        case 102: break;
+        case 103: break;
        case 38: 
          { yybegin(restoreState);
          }
-        case 103: break;
+        case 104: break;
        case 41: 
          { yybegin(STYLE_COMMENT);
          }
-        case 104: break;
+        case 105: break;
        case 1: 
          { return zzBuffer[zzStartRead];
          }
-        case 105: break;
-        case 4: 
-          { yypushback(1);
-    outputSegment = inputSegment;
-    outputSegment.restart();
-    yybegin(YYINITIAL);
-    return outputSegment.nextChar();
-          }
        case 106: break;
        default: 
          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex
@ -141,9 +141,9 @@ InlineElment = ( [aAbBiIqQsSuU]                   |
                 [vV][aA][rR]                     )


-%include src/java/org/apache/lucene/analysis/charfilter/HTMLCharacterEntities.jflex
+%include HTMLCharacterEntities.jflex

-%include src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro
+%include HTMLStripCharFilter.SUPPLEMENTARY.jflex-macro

 %{
  private static final int INITIAL_INPUT_SEGMENT_SIZE = 1024;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilter.java
@ -24,6 +24,8 @@ import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.standard.StandardTokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
 import org.apache.lucene.util.ArrayUtil;

@ -35,6 +37,12 @@ import org.apache.lucene.util.ArrayUtil;
 * {@link #CJKBigramFilter(TokenStream, int)} to explicitly control which
 * of the CJK scripts are turned into bigrams.
 * <p>
+ * By default, when a CJK character has no adjacent characters to form
+ * a bigram, it is output in unigram form. If you want to always output
+ * both unigrams and bigrams, set the <code>outputUnigrams</code>
+ * flag in {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)}.
+ * This can be used for a combined unigram+bigram approach.
+ * <p>
 * In all cases, all non-CJK input is passed thru unmodified.
 */
 public final class CJKBigramFilter extends TokenFilter {
@ -67,10 +75,16 @@ public final class CJKBigramFilter extends TokenFilter {
  private final Object doHiragana;
  private final Object doKatakana;
  private final Object doHangul;
+  
+  // true if we should output unigram tokens always
+  private final boolean outputUnigrams;
+  private boolean ngramState; // false = output unigram, true = output bigram
    
  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+  private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+  private final PositionLengthAttribute posLengthAtt = addAttribute(PositionLengthAttribute.class);
  
  // buffers containing codepoint and offsets in parallel
  int buffer[] = new int[8];
@ -88,23 +102,36 @@ public final class CJKBigramFilter extends TokenFilter {
  
  /** 
   * Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int)
-   *       CJKBigramFilter(HAN | HIRAGANA | KATAKANA | HANGUL)}
+   *       CJKBigramFilter(in, HAN | HIRAGANA | KATAKANA | HANGUL)}
   */
  public CJKBigramFilter(TokenStream in) {
    this(in, HAN | HIRAGANA | KATAKANA | HANGUL);
  }
  
  /** 
-   * Create a new CJKBigramFilter, specifying which writing systems should be bigrammed.
-   * @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA}, 
-   *        {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
+   * Calls {@link CJKBigramFilter#CJKBigramFilter(TokenStream, int, boolean)
+   *       CJKBigramFilter(in, flags, false)}
   */
  public CJKBigramFilter(TokenStream in, int flags) {
+    this(in, flags, false);
+  }
+  
+  /**
+   * Create a new CJKBigramFilter, specifying which writing systems should be bigrammed,
+   * and whether or not unigrams should also be output.
+   * @param flags OR'ed set from {@link CJKBigramFilter#HAN}, {@link CJKBigramFilter#HIRAGANA}, 
+   *        {@link CJKBigramFilter#KATAKANA}, {@link CJKBigramFilter#HANGUL}
+   * @param outputUnigrams true if unigrams for the selected writing systems should also be output.
+   *        when this is false, this is only done when there are no adjacent characters to form
+   *        a bigram.
+   */
+  public CJKBigramFilter(TokenStream in, int flags, boolean outputUnigrams) {
    super(in);
    doHan =      (flags & HAN) == 0      ? NO : HAN_TYPE;
    doHiragana = (flags & HIRAGANA) == 0 ? NO : HIRAGANA_TYPE;
    doKatakana = (flags & KATAKANA) == 0 ? NO : KATAKANA_TYPE;
    doHangul =   (flags & HANGUL) == 0   ? NO : HANGUL_TYPE;
+    this.outputUnigrams = outputUnigrams;
  }
  
  /*
@ -120,7 +147,24 @@ public final class CJKBigramFilter extends TokenFilter {
        // case 1: we have multiple remaining codepoints buffered,
        // so we can emit a bigram here.
        
-        flushBigram();
+        if (outputUnigrams) {
+
+          // when also outputting unigrams, we output the unigram first,
+          // then rewind back to revisit the bigram.
+          // so an input of ABC is A + (rewind)AB + B + (rewind)BC + C
+          // the logic in hasBufferedUnigram ensures we output the C, 
+          // even though it did actually have adjacent CJK characters.
+
+          if (ngramState) {
+            flushBigram();
+          } else {
+            flushUnigram();
+            index--;
+          }
+          ngramState = !ngramState;
+        } else {
+          flushBigram();
+        }
        return true;
      } else if (doNext()) {
        
@ -260,6 +304,11 @@ public final class CJKBigramFilter extends TokenFilter {
    termAtt.setLength(len2);
    offsetAtt.setOffset(startOffset[index], endOffset[index+1]);
    typeAtt.setType(DOUBLE_TYPE);
+    // when outputting unigrams, all bigrams are synonyms that span two unigrams
+    if (outputUnigrams) {
+      posIncAtt.setPositionIncrement(0);
+      posLengthAtt.setPositionLength(2);
+    }
    index++;
  }
  
@ -292,7 +341,13 @@ public final class CJKBigramFilter extends TokenFilter {
   * inputs.
   */
  private boolean hasBufferedUnigram() {
-    return bufferLen == 1 && index == 0;
+    if (outputUnigrams) {
+      // when outputting unigrams always
+      return bufferLen - index == 1;
+    } else {
+      // otherwise its only when we have a lone CJK character
+      return bufferLen == 1 && index == 0;
+    }
  }

  @Override
@ -303,5 +358,6 @@ public final class CJKBigramFilter extends TokenFilter {
    lastEndOffset = 0;
    loneState = null;
    exhausted = false;
+    ngramState = false;
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKBigramFilterFactory.java
@ -33,12 +33,13 @@ import org.apache.lucene.analysis.util.TokenFilterFactory;
 *     &lt;filter class="solr.LowerCaseFilterFactory"/&gt;
 *     &lt;filter class="solr.CJKBigramFilterFactory" 
 *       han="true" hiragana="true" 
- *       katakana="true" hangul="true" /&gt;
+ *       katakana="true" hangul="true" outputUnigrams="false" /&gt;
 *   &lt;/analyzer&gt;
 * &lt;/fieldType&gt;</pre>
 */
 public class CJKBigramFilterFactory extends TokenFilterFactory {
  int flags;
+  boolean outputUnigrams;

  @Override
  public void init(Map<String,String> args) {
@ -56,10 +57,11 @@ public class CJKBigramFilterFactory extends TokenFilterFactory {
    if (getBoolean("hangul", true)) {
      flags |= CJKBigramFilter.HANGUL;
    }
+    outputUnigrams = getBoolean("outputUnigrams", false);
  }
  
  @Override
  public TokenStream create(TokenStream input) {
-    return new CJKBigramFilter(input, flags);
+    return new CJKBigramFilter(input, flags, outputUnigrams);
  }
 }
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */

 package org.apache.lucene.analysis.standard;

@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
 * This class is a scanner generated by 
 * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/15/12 1:57 AM from the specification file
- * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex</tt>
 */
 class ClassicTokenizerImpl implements StandardTokenizerInterface {

@ -42,7 +42,7 @@ class ClassicTokenizerImpl implements StandardTokenizerInterface {
  public static final int YYEOF = -1;

  /** initial size of the lookahead buffer */
-  private static final int ZZ_BUFFERSIZE = 16384;
+  private static final int ZZ_BUFFERSIZE = 4096;

  /** lexical states */
  public static final int YYINITIAL = 0;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex
@ -36,6 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %function getNextToken
 %pack
 %char
+%buffer 4096

 %{

--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
@ -14,7 +14,7 @@
 * limitations under the License.
 */

-// Generated using ICU4J 49.1.0.0 on Thursday, July 26, 2012 10:22:01 PM UTC
+// Generated using ICU4J 49.1.0.0 on Monday, August 6, 2012 3:57:23 PM UTC
 // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros


--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */

 package org.apache.lucene.analysis.standard;

@ -43,7 +43,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
  public static final int YYEOF = -1;

  /** initial size of the lookahead buffer */
-  private static final int ZZ_BUFFERSIZE = 16384;
+  private static final int ZZ_BUFFERSIZE = 4096;

  /** lexical states */
  public static final int YYINITIAL = 0;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
@ -44,8 +44,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %implements StandardTokenizerInterface
 %function getNextToken
 %char
+%buffer 4096

-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+%include SUPPLEMENTARY.jflex-macro
 ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
 Format =  ([\p{WB:Format}] | {FormatSupp})
 Numeric = ([\p{WB:Numeric}] | {NumericSupp})
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/26/12 6:22 PM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */

 package org.apache.lucene.analysis.standard;

@ -46,7 +46,7 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
  public static final int YYEOF = -1;

  /** initial size of the lookahead buffer */
-  private static final int ZZ_BUFFERSIZE = 16384;
+  private static final int ZZ_BUFFERSIZE = 4096;

  /** lexical states */
  public static final int YYINITIAL = 0;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex
@ -47,8 +47,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %implements StandardTokenizerInterface
 %function getNextToken
 %char
+%buffer 4096

-%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
+%include SUPPLEMENTARY.jflex-macro
 ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
 Format =  ([\p{WB:Format}] | {FormatSupp})
 Numeric = ([\p{WB:Numeric}] | {NumericSupp})
@ -88,7 +89,7 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})*
 //     RFC-5321: Simple Mail Transfer Protocol
 //     RFC-5322: Internet Message Format

-%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
+%include ASCIITLD.jflex-macro

 DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
 DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 7/15/12 1:57 AM */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/6/12 11:57 AM */

 package org.apache.lucene.analysis.wikipedia;

@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 /**
 * This class is a scanner generated by 
 * <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
- * on 7/15/12 1:57 AM from the specification file
- * <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
+ * on 8/6/12 11:57 AM from the specification file
+ * <tt>/home/rmuir/workspace/lucene-trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex</tt>
 */
 class WikipediaTokenizerImpl {

@ -34,7 +34,7 @@ class WikipediaTokenizerImpl {
  public static final int YYEOF = -1;

  /** initial size of the lookahead buffer */
-  private static final int ZZ_BUFFERSIZE = 16384;
+  private static final int ZZ_BUFFERSIZE = 4096;

  /** lexical states */
  public static final int THREE_SINGLE_QUOTES_STATE = 10;
--- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 %function getNextToken
 %pack
 %char
+%buffer 4096

 %{

--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilter.java
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.cjk;
 */

 import java.io.Reader;
+import java.util.Random;

 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.BaseTokenStreamTestCase;
@ -33,6 +34,15 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
    }
  };
  
+  Analyzer unibiAnalyzer = new Analyzer() {
+    @Override
+    protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+      Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+      return new TokenStreamComponents(t, 
+          new CJKBigramFilter(t, 0xff, true));
+    }
+  };
+  
  public void testHuge() throws Exception {
    assertAnalyzesTo(analyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
     + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
@ -62,6 +72,96 @@ public class TestCJKBigramFilter extends BaseTokenStreamTestCase {
      }
    };
    assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
-        new String[] { "多", "く", "の",  "学生", "が",  "試験", "に",  "落", "ち", "た" });
+        new String[] { "多", "く", "の",  "学生", "が",  "試験", "に",  "落", "ち", "た" },
+        new int[] { 0, 1, 2, 3, 5, 6, 8, 9, 10, 11 },
+        new int[] { 1, 2, 3, 5, 6, 8, 9, 10, 11, 12 },
+        new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<DOUBLE>", "<HIRAGANA>", "<DOUBLE>", 
+                       "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
+        new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 },
+        new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 });
+  }
+  
+  public void testAllScripts() throws Exception {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+        return new TokenStreamComponents(t, 
+            new CJKBigramFilter(t, 0xff, false));
+      }
+    };
+    assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
+        new String[] { "多く", "くの", "の学", "学生", "生が", "が試", "試験", "験に", "に落", "落ち", "ちた" });
+  }
+  
+  public void testUnigramsAndBigramsAllScripts() throws Exception {
+    assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた。",
+        new String[] { 
+        "多", "多く", "く",  "くの", "の",  "の学", "学", "学生", "生", 
+        "生が", "が",  "が試", "試", "試験", "験", "験に", "に", 
+                "に落", "落", "落ち", "ち", "ちた", "た" 
+        },
+        new int[] { 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6,
+                    6, 7, 7, 8, 8, 9, 9, 10, 10, 11 },
+        new int[] { 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 
+                    8, 8, 9, 9, 10, 10, 11, 11, 12, 12 },
+        new String[] { "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
+                       "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>",
+                       "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", "<DOUBLE>", "<SINGLE>" },
+        new int[] { 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 
+                    0, 1, 0, 1, 0, 1, 0, 1, 0, 1 },
+        new int[] { 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 
+                    2, 1, 2, 1, 2, 1, 2, 1, 2, 1 }
+    );
+  }
+  
+  public void testUnigramsAndBigramsHanOnly() throws Exception {
+    Analyzer a = new Analyzer() {
+      @Override
+      protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+        Tokenizer t = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
+        return new TokenStreamComponents(t, new CJKBigramFilter(t, CJKBigramFilter.HAN, true));
+      }
+    };
+    assertAnalyzesTo(a, "多くの学生が試験に落ちた。",
+        new String[] { "多", "く", "の",  "学", "学生", "生", "が",  "試", "試験", "験", "に",  "落", "ち", "た" },
+        new int[] { 0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 10, 11 },
+        new int[] { 1, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9, 10, 11, 12 },
+        new String[] { "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", 
+                       "<SINGLE>", "<HIRAGANA>", "<SINGLE>", "<DOUBLE>", "<SINGLE>", 
+                       "<HIRAGANA>", "<SINGLE>", "<HIRAGANA>", "<HIRAGANA>", "<SINGLE>" },
+        new int[] { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
+        new int[] { 1, 1, 1, 1, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1 });
+  }
+  
+  public void testUnigramsAndBigramsHuge() throws Exception {
+    assertAnalyzesTo(unibiAnalyzer, "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
+     + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた"
+     + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた" + "多くの学生が試験に落ちた",
+       new String[] { 
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た", "た多",
+        "多", "多く", "く",  "くの", "の", "の学", "学", "学生", "生", "生が", "が",  "が試", "試", "試験", "験", "験に", "に",  "に落", "落", "落ち", "ち", "ちた", "た"
+       }    
+    );
+  }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomUnibiStrings() throws Exception {
+    checkRandomData(random(), unibiAnalyzer, 1000*RANDOM_MULTIPLIER);
+  }
+  
+  /** blast some random strings through the analyzer */
+  public void testRandomUnibiHugeStrings() throws Exception {
+    Random random = random();
+    checkRandomData(random, unibiAnalyzer, 100*RANDOM_MULTIPLIER, 8192);
  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/cjk/TestCJKBigramFilterFactory.java
@ -52,4 +52,16 @@ public class TestCJKBigramFilterFactory extends BaseTokenStreamTestCase {
    assertTokenStreamContents(stream,
        new String[] { "多", "く", "の",  "学生", "が",  "試験", "に",  "落", "ち", "た" });
  }
+  
+  public void testHanOnlyUnigrams() throws Exception {
+    Reader reader = new StringReader("多くの学生が試験に落ちた。");
+    CJKBigramFilterFactory factory = new CJKBigramFilterFactory();
+    Map<String,String> args = new HashMap<String,String>();
+    args.put("hiragana", "false");
+    args.put("outputUnigrams", "true");
+    factory.init(args);
+    TokenStream stream = factory.create(new StandardTokenizer(TEST_VERSION_CURRENT, reader));
+    assertTokenStreamContents(stream,
+        new String[] { "多", "く", "の",  "学", "学生", "生", "が",  "試", "試験", "験", "に",  "落", "ち", "た" });
+  }
 }
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestAllAnalyzersHaveFactories.java
@ -100,8 +100,7 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
  private static final ResourceLoader loader = new StringMockResourceLoader("");
  
  public void test() throws Exception {
-    List<Class<?>> analysisClasses = new ArrayList<Class<?>>();
-    TestRandomChains.getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
+    List<Class<?>> analysisClasses = TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
    
    for (final Class<?> c : analysisClasses) {
      final int modifiers = c.getModifiers();
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestRandomChains.java
@ -25,6 +25,7 @@ import java.io.StringReader;
 import java.lang.reflect.Constructor;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Modifier;
+import java.net.URI;
 import java.net.URL;
 import java.nio.CharBuffer;
 import java.util.ArrayList;
@ -165,8 +166,7 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
  
  @BeforeClass
  public static void beforeClass() throws Exception {
-    List<Class<?>> analysisClasses = new ArrayList<Class<?>>();
-    getClassesForPackage("org.apache.lucene.analysis", analysisClasses);
+    List<Class<?>> analysisClasses = getClassesForPackage("org.apache.lucene.analysis");
    tokenizers = new ArrayList<Constructor<? extends Tokenizer>>();
    tokenfilters = new ArrayList<Constructor<? extends TokenFilter>>();
    charfilters = new ArrayList<Constructor<? extends CharFilter>>();
@ -235,19 +235,30 @@ public class TestRandomChains extends BaseTokenStreamTestCase {
  private static <T> Constructor<T> castConstructor(Class<T> instanceClazz, Constructor<?> ctor) {
    return (Constructor<T>) ctor;
  }
-  static void getClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
+  
+  public static List<Class<?>> getClassesForPackage(String pckgname) throws Exception {
+    final List<Class<?>> classes = new ArrayList<Class<?>>();
+    collectClassesForPackage(pckgname, classes);
+    assertFalse("No classes found in package '"+pckgname+"'; maybe your test classes are packaged as JAR file?", classes.isEmpty());
+    return classes;
+  }
+  
+  private static void collectClassesForPackage(String pckgname, List<Class<?>> classes) throws Exception {
    final ClassLoader cld = TestRandomChains.class.getClassLoader();
    final String path = pckgname.replace('.', '/');
    final Enumeration<URL> resources = cld.getResources(path);
    while (resources.hasMoreElements()) {
-      final File directory = new File(resources.nextElement().toURI());
+      final URI uri = resources.nextElement().toURI();
+      if (!"file".equalsIgnoreCase(uri.getScheme()))
+        continue;
+      final File directory = new File(uri);
      if (directory.exists()) {
        String[] files = directory.list();
        for (String file : files) {
          if (new File(directory, file).isDirectory()) {
            // recurse
            String subPackage = pckgname + "." + file;
-            getClassesForPackage(subPackage, classes);
+            collectClassesForPackage(subPackage, classes);
          }
          if (file.endsWith(".class")) {
            String clazzName = file.substring(0, file.length() - 6);
--- a/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
+++ b/lucene/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@ -43,7 +43,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -156,7 +155,12 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {

    CountingSearchTestTask.numSearches = 0;
    execBenchmark(algLines);
-    assertTrue(CountingSearchTestTask.numSearches > 0);
+
+    // NOTE: cannot assert this, because on a super-slow
+    // system, it could be after waiting 0.5 seconds that
+    // the search threads hadn't yet succeeded in starting
+    // up and then they start up and do no searching:
+    //assertTrue(CountingSearchTestTask.numSearches > 0);
  }

  public void testHighlighting() throws Exception {
@ -201,6 +205,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
    // 1. alg definition (required in every "logic" test)
    String algLines[] = {
        "doc.stored=true",//doc storage is required in order to have text to highlight
+        "doc.term.vector=true",
        "doc.term.vector.offsets=true",
        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
        "docs.file=" + getReuters20LinesFile(),
@ -487,13 +492,13 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {

    int totalTokenCount2 = 0;

-    FieldsEnum fields = MultiFields.getFields(reader).iterator();
-    String fieldName = null;
-    while((fieldName = fields.next()) != null) {
+    Fields fields = MultiFields.getFields(reader);
+
+    for (String fieldName : fields) {
      if (fieldName.equals(DocMaker.ID_FIELD) || fieldName.equals(DocMaker.DATE_MSEC_FIELD) || fieldName.equals(DocMaker.TIME_SEC_FIELD)) {
        continue;
      }
-      Terms terms = fields.terms();
+      Terms terms = fields.terms(fieldName);
      if (terms == null) {
        continue;
      }
--- a/lucene/build.xml
+++ b/lucene/build.xml
@ -139,29 +139,6 @@

  <target name="compile-core" depends="compile-lucene-core"/>

-  <!--
-   Run after Junit tests.
-   -->
-  <target name="generate-clover-reports" depends="clover">
-    <fail unless="run.clover">Clover not enabled!</fail>
-    <mkdir dir="${clover.report.dir}"/>
-    <fileset dir="build" id="clover.test.result.files">
-      <include name="**/test/TEST-*.xml"/>
-      <!-- do not include BW tests -->
-      <exclude name="backwards/**"/>
-    </fileset>
-    <clover-report>
-      <current outfile="${clover.report.dir}" title="${final.name}" numThreads="0">
-        <format type="html" filter="assert"/>
-        <testresults refid="clover.test.result.files"/>
-      </current>
-      <current outfile="${clover.report.dir}/clover.xml" title="${final.name}">
-        <format type="xml" filter="assert"/>
-        <testresults refid="clover.test.result.files"/>
-      </current>
-    </clover-report>
-  </target>
-
  <!-- Validation (license/notice/api checks). -->
  <target name="validate" depends="check-licenses,rat-sources,check-forbidden-apis" description="Validate stuff." />

@ -176,6 +153,7 @@
      <apiFileSet dir="${custom-tasks.dir}/forbiddenApis">
        <include name="jdk.txt" />
        <include name="jdk-deprecated.txt" />
+        <include name="executors.txt" />
      </apiFileSet>
      <fileset dir="${basedir}/build" includes="**/*.class" />
    </forbidden-apis>
--- a/lucene/common-build.xml
+++ b/lucene/common-build.xml
@ -88,7 +88,7 @@
  <property name="tests.timezone" value="random" />
  <property name="tests.directory" value="random" />
  <property name="tests.linedocsfile" value="europarl.lines.txt.gz" />
-  <property name="tests.loggingfile" value="/dev/null"/>
+  <property name="tests.loggingfile" value="${common.dir}/tools/junit4/logging.properties"/>
  <property name="tests.nightly" value="false" />
  <property name="tests.weekly" value="false" />
  <property name="tests.slow" value="true" />
@ -700,15 +700,22 @@
  <condition property="tests.method" value="${testmethod}*">
    <isset property="testmethod" />
  </condition>
+
  <condition property="tests.showSuccess" value="true">
    <or>
      <isset property="tests.class" />
      <isset property="tests.method" />
    </or>
  </condition>
-  <!-- default -->
  <property name="tests.showSuccess" value="false"/>
-  
+
+  <condition property="tests.showOutput" value="always">
+    <or>
+      <isset property="tests.class" />
+      <isset property="tests.method" />
+    </or>
+  </condition>
+  <property name="tests.showOutput" value="onerror"/>

  <!-- Test macro using junit4. -->
  <macrodef name="test-macro" description="Executes junit tests.">
@ -854,6 +861,7 @@
            <syspropertyset>
                <propertyref prefix="tests.maxfailures" />
                <propertyref prefix="tests.failfast" />
+                <propertyref prefix="tests.badapples" />
            </syspropertyset>

            <!-- Pass randomized settings to the forked JVM. -->
@ -875,8 +883,7 @@
                <junit4:report-text
                    showThrowable="true" 
                    showStackTraces="true" 
-                    showOutputStream="true" 
-                    showErrorStream="true"
+                    showOutput="${tests.showOutput}" 

                    showStatusOk="${tests.showSuccess}"
                    showStatusError="${tests.showError}"
@ -896,8 +903,7 @@
                    file="@{junit.output.dir}/tests-report.txt"
                    showThrowable="true" 
                    showStackTraces="true" 
-                    showOutputStream="true" 
-                    showErrorStream="true"
+                    showOutput="always"

                    showStatusOk="true"
                    showStatusError="true"
@ -913,8 +919,7 @@
                    file="@{junit.output.dir}/tests-failures.txt"
                    showThrowable="true" 
                    showStackTraces="true" 
-                    showOutputStream="true" 
-                    showErrorStream="true"
+                    showOutput="onerror" 

                    showStatusOk="false"
                    showStatusError="true"
@ -929,8 +934,13 @@
                     the slowest tests or for reuse in balancing). -->
                <junit4:report-execution-times file="@{junit.output.dir}/tests-timehints.txt" historyLength="5" />

-                <junit4:report-ant-xml dir="@{junit.output.dir}" />
-                <junit4:report-json file="@{junit.output.dir}/tests-report-${ant.project.name}/index.html" />
+                <!-- ANT-compatible XMLs for jenkins records etc. -->
+                <junit4:report-ant-xml dir="@{junit.output.dir}" outputStreams="no" />
+
+                <!--
+                Enable if you wish to have a nice HTML5 report.
+                <junit4:report-json file="@{junit.output.dir}/tests-report-${ant.project.name}/index.html" outputStreams="no" />
+                -->
            </listeners>

            <!-- Input test classes. -->
--- a/lucene/core/src/java/org/apache/lucene/analysis/package.html
+++ b/lucene/core/src/java/org/apache/lucene/analysis/package.html
@ -480,7 +480,7 @@ public class MyAnalyzer extends Analyzer {
        System.out.println(termAtt.toString());
      }
    
-      stream.end()
+      stream.end();
    } finally {
      stream.close();
    }
@ -509,7 +509,7 @@ easily by adding a LengthFilter to the chain. Only the
  {@literal @Override}
  protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
    final Tokenizer source = new WhitespaceTokenizer(matchVersion, reader);
-    TokenStream result = new LengthFilter(source, 3, Integer.MAX_VALUE);
+    TokenStream result = new LengthFilter(true, source, 3, Integer.MAX_VALUE);
    return new TokenStreamComponents(source, result);
  }
 </pre>
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTermsReader.java
@ -27,7 +27,6 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
@ -40,6 +39,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.DoubleBarrelLRUCache;
+import org.apache.lucene.util.UnmodifiableIterator;

 /** Handles a terms dict, but decouples all details of
 *  doc/freqs/positions reading to an instance of {@link
@ -184,8 +184,8 @@ public class BlockTermsReader extends FieldsProducer {
  }

  @Override
-  public FieldsEnum iterator() {
-    return new TermFieldsEnum();
+  public Iterator<String> iterator() {
+    return new UnmodifiableIterator<String>(fields.keySet().iterator());
  }

  @Override
@ -199,32 +199,6 @@ public class BlockTermsReader extends FieldsProducer {
    return fields.size();
  }

-  // Iterates through all fields
-  private class TermFieldsEnum extends FieldsEnum {
-    final Iterator<FieldReader> it;
-    FieldReader current;
-
-    TermFieldsEnum() {
-      it = fields.values().iterator();
-    }
-
-    @Override
-    public String next() {
-      if (it.hasNext()) {
-        current = it.next();
-        return current.fieldInfo.name;
-      } else {
-        current = null;
-        return null;
-      }
-    }
-    
-    @Override
-    public Terms terms() throws IOException {
-      return current;
-    }
-  }
-
  private class FieldReader extends Terms {
    final long numTerms;
    final FieldInfo fieldInfo;
@ -253,6 +227,21 @@ public class BlockTermsReader extends FieldsProducer {
      return new SegmentTermsEnum();
    }

+    @Override
+    public boolean hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return fieldInfo.hasPayloads();
+    }
+
    @Override
    public long size() {
      return numTerms;
--- a/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/BlockTreeTermsReader.java
@ -31,7 +31,6 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.TermState;
 import org.apache.lucene.index.Terms;
@ -46,6 +45,7 @@ import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.UnmodifiableIterator;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.RunAutomaton;
 import org.apache.lucene.util.automaton.Transition;
@ -199,8 +199,8 @@ public class BlockTreeTermsReader extends FieldsProducer {
  }

  @Override
-  public FieldsEnum iterator() {
-    return new TermFieldsEnum();
+  public Iterator<String> iterator() {
+    return new UnmodifiableIterator<String>(fields.keySet().iterator());
  }

  @Override
@ -214,32 +214,6 @@ public class BlockTreeTermsReader extends FieldsProducer {
    return fields.size();
  }

-  // Iterates through all fields
-  private class TermFieldsEnum extends FieldsEnum {
-    final Iterator<FieldReader> it;
-    FieldReader current;
-
-    TermFieldsEnum() {
-      it = fields.values().iterator();
-    }
-
-    @Override
-    public String next() {
-      if (it.hasNext()) {
-        current = it.next();
-        return current.fieldInfo.name;
-      } else {
-        current = null;
-        return null;
-      }
-    }
-    
-    @Override
-    public Terms terms() throws IOException {
-      return current;
-    }
-  }
-
  // for debugging
  String brToString(BytesRef b) {
    if (b == null) {
@ -456,6 +430,21 @@ public class BlockTreeTermsReader extends FieldsProducer {
      return BytesRef.getUTF8SortedAsUnicodeComparator();
    }

+    @Override
+    public boolean hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return fieldInfo.hasPayloads();
+    }
+
    @Override
    public TermsEnum iterator(TermsEnum reuse) throws IOException {
      return new SegmentTermsEnum();
--- a/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/FieldsConsumer.java
@ -22,7 +22,6 @@ import java.io.IOException;

 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.MergeState;
 import org.apache.lucene.index.SegmentWriteState; // javadocs
 import org.apache.lucene.index.Terms;
@ -53,13 +52,10 @@ public abstract class FieldsConsumer implements Closeable {
  public abstract void close() throws IOException;

  public void merge(MergeState mergeState, Fields fields) throws IOException {
-    FieldsEnum fieldsEnum = fields.iterator();
-    assert fieldsEnum != null;
-    String field;
-    while((field = fieldsEnum.next()) != null) {
+    for (String field : fields) {
      mergeState.fieldInfo = mergeState.fieldInfos.fieldInfo(field);
      assert mergeState.fieldInfo != null : "FieldInfo for field is null: "+ field;
-      Terms terms = fieldsEnum.terms();
+      Terms terms = fields.terms(field);
      if (terms != null) {
        final TermsConsumer termsConsumer = addField(mergeState.fieldInfo);
        termsConsumer.merge(mergeState, terms.iterator(null));
--- a/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/MappingMultiDocsAndPositionsEnum.java
@ -124,15 +124,17 @@ public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum
  @Override
  public BytesRef getPayload() throws IOException {
    BytesRef payload = current.getPayload();
-    if (mergeState.currentPayloadProcessor[upto] != null) {
+    if (mergeState.currentPayloadProcessor[upto] != null && payload != null) {
+      // to not violate the D&P api, we must give the processor a private copy
+      // TODO: reuse a BytesRef if there is a PPP
+      payload = BytesRef.deepCopyOf(payload);
      mergeState.currentPayloadProcessor[upto].processPayload(payload);
+      if (payload.length == 0) {
+        // don't let PayloadProcessors corrumpt the index
+        return null;
+      }
    }
    return payload;
  }
-
-  @Override
-  public boolean hasPayload() {
-    return current.hasPayload();
-  }
 }

--- a/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/PostingsConsumer.java
@ -112,12 +112,7 @@ public abstract class PostingsConsumer {
        totTF += freq;
        for(int i=0;i<freq;i++) {
          final int position = postingsEnum.nextPosition();
-          final BytesRef payload;
-          if (postingsEnum.hasPayload()) {
-            payload = postingsEnum.getPayload();
-          } else {
-            payload = null;
-          }
+          final BytesRef payload = postingsEnum.getPayload();
          this.addPosition(position, payload, -1, -1);
        }
        this.finishDoc();
@ -137,12 +132,7 @@ public abstract class PostingsConsumer {
        totTF += freq;
        for(int i=0;i<freq;i++) {
          final int position = postingsEnum.nextPosition();
-          final BytesRef payload;
-          if (postingsEnum.hasPayload()) {
-            payload = postingsEnum.getPayload();
-          } else {
-            payload = null;
-          }
+          final BytesRef payload = postingsEnum.getPayload();
          this.addPosition(position, payload, postingsEnum.startOffset(), postingsEnum.endOffset());
        }
        this.finishDoc();
--- a/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/TermVectorsWriter.java
@ -26,8 +26,9 @@ import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor;
+import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.search.DocIdSetIterator;
@ -41,14 +42,14 @@ import org.apache.lucene.util.BytesRef;
 * <ol>
 *   <li>For every document, {@link #startDocument(int)} is called,
 *       informing the Codec how many fields will be written.
- *   <li>{@link #startField(FieldInfo, int, boolean, boolean)} is called for 
+ *   <li>{@link #startField(FieldInfo, int, boolean, boolean, boolean)} is called for 
 *       each field in the document, informing the codec how many terms
- *       will be written for that field, and whether or not positions
- *       or offsets are enabled.
+ *       will be written for that field, and whether or not positions,
+ *       offsets, or payloads are enabled.
 *   <li>Within each field, {@link #startTerm(BytesRef, int)} is called
 *       for each term.
 *   <li>If offsets and/or positions are enabled, then 
- *       {@link #addPosition(int, int, int)} will be called for each term
+ *       {@link #addPosition(int, int, int, BytesRef)} will be called for each term
 *       occurrence.
 *   <li>After all documents have been written, {@link #finish(FieldInfos, int)} 
 *       is called for verification/sanity-checks.
@ -60,7 +61,7 @@ import org.apache.lucene.util.BytesRef;
 public abstract class TermVectorsWriter implements Closeable {
  
  /** Called before writing the term vectors of the document.
-   *  {@link #startField(FieldInfo, int, boolean, boolean)} will 
+   *  {@link #startField(FieldInfo, int, boolean, boolean, boolean)} will 
   *  be called <code>numVectorFields</code> times. Note that if term 
   *  vectors are enabled, this is called even if the document 
   *  has no vector fields, in this case <code>numVectorFields</code> 
@ -69,17 +70,17 @@ public abstract class TermVectorsWriter implements Closeable {
  
  /** Called before writing the terms of the field.
   *  {@link #startTerm(BytesRef, int)} will be called <code>numTerms</code> times. */
-  public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException;
+  public abstract void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException;
  
  /** Adds a term and its term frequency <code>freq</code>.
   * If this field has positions and/or offsets enabled, then
-   * {@link #addPosition(int, int, int)} will be called 
+   * {@link #addPosition(int, int, int, BytesRef)} will be called 
   * <code>freq</code> times respectively.
   */
  public abstract void startTerm(BytesRef term, int freq) throws IOException;
  
  /** Adds a term position and offsets */
-  public abstract void addPosition(int position, int startOffset, int endOffset) throws IOException;
+  public abstract void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException;
  
  /** Aborts writing entirely, implementation should remove
   *  any partially-written files, etc. */
@ -99,7 +100,7 @@ public abstract class TermVectorsWriter implements Closeable {
   * This is an expert API that allows the codec to consume 
   * positions and offsets directly from the indexer.
   * <p>
-   * The default implementation calls {@link #addPosition(int, int, int)},
+   * The default implementation calls {@link #addPosition(int, int, int, BytesRef)},
   * but subclasses can override this if they want to efficiently write 
   * all the positions, then all the offsets, for example.
   * <p>
@ -111,15 +112,36 @@ public abstract class TermVectorsWriter implements Closeable {
  public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
    int position = 0;
    int lastOffset = 0;
+    BytesRef payload = null;

    for (int i = 0; i < numProx; i++) {
      final int startOffset;
      final int endOffset;
+      final BytesRef thisPayload;
      
      if (positions == null) {
        position = -1;
+        thisPayload = null;
      } else {
-        position += positions.readVInt();
+        int code = positions.readVInt();
+        position += code >>> 1;
+        if ((code & 1) != 0) {
+          // This position has a payload
+          final int payloadLength = positions.readVInt();
+
+          if (payload == null) {
+            payload = new BytesRef();
+            payload.bytes = new byte[payloadLength];
+          } else if (payload.bytes.length < payloadLength) {
+            payload.grow(payloadLength);
+          }
+
+          positions.readBytes(payload.bytes, 0, payloadLength);
+          payload.length = payloadLength;
+          thisPayload = payload;
+        } else {
+          thisPayload = null;
+        }
      }
      
      if (offsets == null) {
@ -129,24 +151,31 @@ public abstract class TermVectorsWriter implements Closeable {
        endOffset = startOffset + offsets.readVInt();
        lastOffset = endOffset;
      }
-      addPosition(position, startOffset, endOffset);
+      addPosition(position, startOffset, endOffset, thisPayload);
    }
  }
  
  /** Merges in the term vectors from the readers in 
   *  <code>mergeState</code>. The default implementation skips
   *  over deleted documents, and uses {@link #startDocument(int)},
-   *  {@link #startField(FieldInfo, int, boolean, boolean)}, 
-   *  {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int)},
+   *  {@link #startField(FieldInfo, int, boolean, boolean, boolean)}, 
+   *  {@link #startTerm(BytesRef, int)}, {@link #addPosition(int, int, int, BytesRef)},
   *  and {@link #finish(FieldInfos, int)},
   *  returning the number of documents that were written.
   *  Implementations can override this method for more sophisticated
   *  merging (bulk-byte copying, etc). */
  public int merge(MergeState mergeState) throws IOException {
    int docCount = 0;
-    for (AtomicReader reader : mergeState.readers) {
+    for (int i = 0; i < mergeState.readers.size(); i++) {
+      final AtomicReader reader = mergeState.readers.get(i);
      final int maxDoc = reader.maxDoc();
      final Bits liveDocs = reader.getLiveDocs();
+      // set PayloadProcessor
+      if (mergeState.payloadProcessorProvider != null) {
+        mergeState.currentReaderPayloadProcessor = mergeState.readerPayloadProcessor[i];
+      } else {
+        mergeState.currentReaderPayloadProcessor = null;
+      }
      for (int docID = 0; docID < maxDoc; docID++) {
        if (liveDocs != null && !liveDocs.get(docID)) {
          // skip deleted docs
@ -155,7 +184,7 @@ public abstract class TermVectorsWriter implements Closeable {
        // NOTE: it's very important to first assign to vectors then pass it to
        // termVectorsWriter.addAllDocVectors; see LUCENE-1282
        Fields vectors = reader.getTermVectors(docID);
-        addAllDocVectors(vectors, mergeState.fieldInfos);
+        addAllDocVectors(vectors, mergeState);
        docCount++;
        mergeState.checkAbort.work(300);
      }
@ -169,7 +198,7 @@ public abstract class TermVectorsWriter implements Closeable {
   *  implementation requires that the vectors implement
   *  both Fields.size and
   *  Terms.size. */
-  protected final void addAllDocVectors(Fields vectors, FieldInfos fieldInfos) throws IOException {
+  protected final void addAllDocVectors(Fields vectors, MergeState mergeState) throws IOException {
    if (vectors == null) {
      startDocument(0);
      return;
@ -181,54 +210,55 @@ public abstract class TermVectorsWriter implements Closeable {
    }
    startDocument(numFields);
    
-    final FieldsEnum fieldsEnum = vectors.iterator();
-    String fieldName;
    String lastFieldName = null;
+    
+    TermsEnum termsEnum = null;
+    DocsAndPositionsEnum docsAndPositionsEnum = null;
+    
+    final ReaderPayloadProcessor readerPayloadProcessor = mergeState.currentReaderPayloadProcessor;
+    PayloadProcessor payloadProcessor = null;

-    while((fieldName = fieldsEnum.next()) != null) {
-      final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldName);
+    for(String fieldName : vectors) {
+      final FieldInfo fieldInfo = mergeState.fieldInfos.fieldInfo(fieldName);

      assert lastFieldName == null || fieldName.compareTo(lastFieldName) > 0: "lastFieldName=" + lastFieldName + " fieldName=" + fieldName;
      lastFieldName = fieldName;

-      final Terms terms = fieldsEnum.terms();
+      final Terms terms = vectors.terms(fieldName);
      if (terms == null) {
        // FieldsEnum shouldn't lie...
        continue;
      }
+      
+      final boolean hasPositions = terms.hasPositions();
+      final boolean hasOffsets = terms.hasOffsets();
+      final boolean hasPayloads = terms.hasPayloads();
+      assert !hasPayloads || hasPositions;
+      
      final int numTerms = (int) terms.size();
      if (numTerms == -1) {
        throw new IllegalStateException("terms.size() must be implemented (it returned -1)");
      }
-      final TermsEnum termsEnum = terms.iterator(null);
-
-      DocsAndPositionsEnum docsAndPositionsEnum = null;
-
-      boolean startedField = false;
-
-      // NOTE: this is tricky, because TermVectors allow
-      // indexing offsets but NOT positions.  So we must
-      // lazily init the field by checking whether first
-      // position we see is -1 or not.
+      
+      startField(fieldInfo, numTerms, hasPositions, hasOffsets, hasPayloads);
+      termsEnum = terms.iterator(termsEnum);

      int termCount = 0;
      while(termsEnum.next() != null) {
        termCount++;

        final int freq = (int) termsEnum.totalTermFreq();
-
-        if (startedField) {
-          startTerm(termsEnum.term(), freq);
+        
+        startTerm(termsEnum.term(), freq);
+        
+        if (hasPayloads && readerPayloadProcessor != null) {
+          payloadProcessor = readerPayloadProcessor.getProcessor(fieldName, termsEnum.term());
        }

-        // TODO: we need a "query" API where we can ask (via
-        // flex API) what this term was indexed with...
-        // Both positions & offsets:
-        docsAndPositionsEnum = termsEnum.docsAndPositions(null, null);
-        boolean hasOffsets = false;
-        boolean hasPositions = false;
-
-        if (docsAndPositionsEnum != null) {
+        if (hasPositions || hasOffsets) {
+          docsAndPositionsEnum = termsEnum.docsAndPositions(null, docsAndPositionsEnum);
+          assert docsAndPositionsEnum != null;
+          
          final int docID = docsAndPositionsEnum.nextDoc();
          assert docID != DocIdSetIterator.NO_MORE_DOCS;
          assert docsAndPositionsEnum.freq() == freq;
@ -237,27 +267,21 @@ public abstract class TermVectorsWriter implements Closeable {
            final int pos = docsAndPositionsEnum.nextPosition();
            final int startOffset = docsAndPositionsEnum.startOffset();
            final int endOffset = docsAndPositionsEnum.endOffset();
-            if (!startedField) {
-              assert numTerms > 0;
-              hasPositions = pos != -1;
-              hasOffsets = startOffset != -1;
-              startField(fieldInfo, numTerms, hasPositions, hasOffsets);
-              startTerm(termsEnum.term(), freq);
-              startedField = true;
-            }
-            if (hasOffsets) {
-              assert startOffset != -1;
-              assert endOffset != -1;
+            
+            BytesRef payload = docsAndPositionsEnum.getPayload();
+                
+            if (payloadProcessor != null && payload != null) {
+              // to not violate the D&P api, we must give the processor a private copy
+              payload = BytesRef.deepCopyOf(payload);
+              payloadProcessor.processPayload(payload);
+              if (payload.length == 0) {
+                // don't let PayloadProcessors corrumpt the index
+                payload = null;
+              }
            }
+
            assert !hasPositions || pos >= 0;
-            addPosition(pos, startOffset, endOffset);
-          }
-        } else {
-          if (!startedField) {
-            assert numTerms > 0;
-            startField(fieldInfo, numTerms, hasPositions, hasOffsets);
-            startTerm(termsEnum.term(), freq);
-            startedField = true;
+            addPosition(pos, startOffset, endOffset, payload);
          }
        }
      }
--- a/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/block/BlockPostingsReader.java
@ -954,11 +954,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
      return -1;
    }
  
-    @Override
-    public boolean hasPayload() {
-      return false;
-    }
-
    @Override
    public BytesRef getPayload() {
      return null;
@ -1226,10 +1221,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
      if (DEBUG) {
        System.out.println("  FPR.nextDoc");
      }
-      if (indexHasPayloads) {
-        payloadByteUpto += payloadLength;
-        payloadLength = 0;
-      }
      while (true) {
        if (DEBUG) {
          System.out.println("    docUpto=" + docUpto + " (of df=" + docFreq + ") docBufferUpto=" + docBufferUpto);
@ -1255,7 +1246,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
            System.out.println("    return doc=" + doc + " freq=" + freq + " posPendingCount=" + posPendingCount);
          }
          position = 0;
-          payloadLength = 0;
          lastStartOffset = 0;
          return doc;
        }
@ -1355,12 +1345,7 @@ public final class BlockPostingsReader extends PostingsReaderBase {
        if (DEBUG) {
          System.out.println("  return doc=" + accum);
        }
-        if (indexHasPayloads) {
-          payloadByteUpto += payloadLength;
-          payloadLength = 0;
-        }
        position = 0;
-        payloadLength = 0;
        lastStartOffset = 0;
        return doc = accum;
      } else {
@ -1433,7 +1418,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
      }

      position = 0;
-      payloadLength = 0;
      lastStartOffset = 0;
    }

@ -1461,16 +1445,6 @@ public final class BlockPostingsReader extends PostingsReaderBase {
        posBufferUpto = BLOCK_SIZE;
      }

-      if (indexHasPayloads) {
-        if (DEBUG) {
-          if (payloadLength != 0) {
-            System.out.println("      skip unread payload length=" + payloadLength);
-          }
-        }
-        payloadByteUpto += payloadLength;
-        payloadLength = 0;
-      }
-
      if (posPendingCount > freq) {
        skipPositions();
        posPendingCount = freq;
@ -1484,6 +1458,10 @@ public final class BlockPostingsReader extends PostingsReaderBase {

      if (indexHasPayloads) {
        payloadLength = payloadLengthBuffer[posBufferUpto];
+        payload.bytes = payloadBytes;
+        payload.offset = payloadByteUpto;
+        payload.length = payloadLength;
+        payloadByteUpto += payloadLength;
      }

      if (indexHasOffsets) {
@ -1510,22 +1488,16 @@ public final class BlockPostingsReader extends PostingsReaderBase {
      return endOffset;
    }
  
-    @Override
-    public boolean hasPayload() {
-      return payloadLength != 0;
-    }
-
    @Override
    public BytesRef getPayload() {
      if (DEBUG) {
        System.out.println("    FPR.getPayload payloadLength=" + payloadLength + " payloadByteUpto=" + payloadByteUpto);
      }
-      payload.bytes = payloadBytes;
-      payload.offset = payloadByteUpto;
-      payload.length = payloadLength;
-      payloadByteUpto += payloadLength;
-      payloadLength = 0;
-      return payload;
+      if (payloadLength == 0) {
+        return null;
+      } else {
+        return payload;
+      }
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/bloom/BloomFilteringPostingsFormat.java
@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
@ -35,7 +36,6 @@ import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
@ -44,7 +44,6 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FuzzySet;
@ -187,9 +186,8 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
      
    }
    
-    public FieldsEnum iterator() throws IOException {
-      return new BloomFilteredFieldsEnum(delegateFieldsProducer.iterator(),
-          bloomsByFieldName);
+    public Iterator<String> iterator() {
+      return delegateFieldsProducer.iterator();
    }
    
    public void close() throws IOException {
@ -217,44 +215,6 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
      return delegateFieldsProducer.getUniqueTermCount();
    }
    
-    // Not all fields in a segment may be subject to a bloom filter. This class
-    // wraps Terms objects appropriately if a filtering request is present
-    class BloomFilteredFieldsEnum extends FieldsEnum {
-      private FieldsEnum delegateFieldsEnum;
-      private HashMap<String,FuzzySet> bloomsByFieldName;
-      private String currentFieldName;
-      
-      public BloomFilteredFieldsEnum(FieldsEnum iterator,
-          HashMap<String,FuzzySet> bloomsByFieldName) {
-        this.delegateFieldsEnum = iterator;
-        this.bloomsByFieldName = bloomsByFieldName;
-      }
-      
-      public AttributeSource attributes() {
-        return delegateFieldsEnum.attributes();
-      }
-      
-      public String next() throws IOException {
-        currentFieldName = delegateFieldsEnum.next();
-        return currentFieldName;
-      }
-      
-      public Terms terms() throws IOException {
-        FuzzySet filter = bloomsByFieldName.get(currentFieldName);
-        if (filter == null) {
-          return delegateFieldsEnum.terms();
-        } else {
-          Terms result = delegateFieldsEnum.terms();
-          if (result == null) {
-            return null;
-          }
-          // wrap the terms object with a bloom filter
-          return new BloomFilteredTerms(result, filter);
-        }
-      }
-      
-    }
-    
    class BloomFilteredTerms extends Terms {
      private Terms delegateTerms;
      private FuzzySet filter;
@ -314,6 +274,21 @@ public class BloomFilteringPostingsFormat extends PostingsFormat {
      public int getDocCount() throws IOException {
        return delegateTerms.getDocCount();
      }
+
+      @Override
+      public boolean hasOffsets() {
+        return delegateTerms.hasOffsets();
+      }
+
+      @Override
+      public boolean hasPositions() {
+        return delegateTerms.hasPositions();
+      }
+      
+      @Override
+      public boolean hasPayloads() {
+        return delegateTerms.hasPayloads();
+      }
    }
    
    class BloomFilteredTermsEnum extends TermsEnum {
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40PostingsReader.java
@ -873,12 +873,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
     *  payload was indexed. */
    @Override
    public BytesRef getPayload() throws IOException {
-      throw new IOException("No payloads exist for this field!");
-    }
-
-    @Override
-    public boolean hasPayload() {
-      return false;
+      return null;
    }
  }
  
@ -1152,28 +1147,26 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
    @Override
    public BytesRef getPayload() throws IOException {
      if (storePayloads) {
+        if (payloadLength <= 0) {
+          return null;
+        }
        assert lazyProxPointer == -1;
        assert posPendingCount < freq;
-        if (!payloadPending) {
-          throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
-        }
-        if (payloadLength > payload.bytes.length) {
-          payload.grow(payloadLength);
-        }
+        
+        if (payloadPending) {
+          if (payloadLength > payload.bytes.length) {
+            payload.grow(payloadLength);
+          }

-        proxIn.readBytes(payload.bytes, 0, payloadLength);
-        payload.length = payloadLength;
-        payloadPending = false;
+          proxIn.readBytes(payload.bytes, 0, payloadLength);
+          payload.length = payloadLength;
+          payloadPending = false;
+        }

        return payload;
      } else {
-        throw new IOException("No payloads exist for this field!");
+        return null;
      }
    }
-
-    @Override
-    public boolean hasPayload() {
-      return payloadPending && payloadLength > 0;
-    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsFormat.java
@ -67,33 +67,46 @@ import org.apache.lucene.store.IOContext;
 * <li><a name="tvf" id="tvf"></a>
 * <p>The Field or .tvf file.</p>
 * <p>This file contains, for each field that has a term vector stored, a list of
- * the terms, their frequencies and, optionally, position and offset
+ * the terms, their frequencies and, optionally, position, offset, and payload
 * information.</p>
- * <p>Field (.tvf) --&gt; Header,&lt;NumTerms, Position/Offset, TermFreqs&gt;
+ * <p>Field (.tvf) --&gt; Header,&lt;NumTerms, Flags, TermFreqs&gt;
 * <sup>NumFields</sup></p>
 * <ul>
 *   <li>Header --&gt; {@link CodecUtil#writeHeader CodecHeader}</li>
 *   <li>NumTerms --&gt; {@link DataOutput#writeVInt VInt}</li>
- *   <li>Position/Offset --&gt; {@link DataOutput#writeByte Byte}</li>
- *   <li>TermFreqs --&gt; &lt;TermText, TermFreq, Positions?, Offsets?&gt;
+ *   <li>Flags --&gt; {@link DataOutput#writeByte Byte}</li>
+ *   <li>TermFreqs --&gt; &lt;TermText, TermFreq, Positions?, PayloadData?, Offsets?&gt;
 *       <sup>NumTerms</sup></li>
 *   <li>TermText --&gt; &lt;PrefixLength, Suffix&gt;</li>
 *   <li>PrefixLength --&gt; {@link DataOutput#writeVInt VInt}</li>
 *   <li>Suffix --&gt; {@link DataOutput#writeString String}</li>
 *   <li>TermFreq --&gt; {@link DataOutput#writeVInt VInt}</li>
- *   <li>Positions --&gt; &lt;{@link DataOutput#writeVInt VInt}&gt;<sup>TermFreq</sup></li>
+ *   <li>Positions --&gt; &lt;PositionDelta PayloadLength?&gt;<sup>TermFreq</sup></li>
+ *   <li>PositionDelta --&gt; {@link DataOutput#writeVInt VInt}</li>
+ *   <li>PayloadLength --&gt; {@link DataOutput#writeVInt VInt}</li>
+ *   <li>PayloadData --&gt; {@link DataOutput#writeByte Byte}<sup>NumPayloadBytes</sup></li>
 *   <li>Offsets --&gt; &lt;{@link DataOutput#writeVInt VInt}, {@link DataOutput#writeVInt VInt}&gt;<sup>TermFreq</sup></li>
 * </ul>
 * <p>Notes:</p>
 * <ul>
- * <li>Position/Offset byte stores whether this term vector has position or offset
+ * <li>Flags byte stores whether this term vector has position, offset, payload.
 * information stored.</li>
 * <li>Term byte prefixes are shared. The PrefixLength is the number of initial
 * bytes from the previous term which must be pre-pended to a term's suffix
 * in order to form the term's bytes. Thus, if the previous term's text was "bone"
 * and the term is "boy", the PrefixLength is two and the suffix is "y".</li>
- * <li>Positions are stored as delta encoded VInts. This means we only store the
- * difference of the current position from the last position</li>
+ * <li>PositionDelta is, if payloads are disabled for the term's field, the
+ * difference between the position of the current occurrence in the document and
+ * the previous occurrence (or zero, if this is the first occurrence in this
+ * document). If payloads are enabled for the term's field, then PositionDelta/2
+ * is the difference between the current and the previous position. If payloads
+ * are enabled and PositionDelta is odd, then PayloadLength is stored, indicating
+ * the length of the payload at the current term position.</li>
+ * <li>PayloadData is metadata associated with a term position. If
+ * PayloadLength is stored at the current position, then it indicates the length
+ * of this payload. If PayloadLength is not stored, then this payload has the same
+ * length as the payload at the previous position. PayloadData encodes the 
+ * concatenated bytes for all of a terms occurrences.</li>
 * <li>Offsets are stored as delta encoded VInts. The first VInt is the
 * startOffset, the second is the endOffset.</li>
 * </ul>
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsReader.java
@ -21,7 +21,9 @@ import java.io.IOException;
 import java.util.Arrays;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
+import java.util.NoSuchElementException;

 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.TermVectorsReader;
@ -30,7 +32,6 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.Terms;
@ -55,6 +56,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {

  static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x2;
  
+  static final byte STORE_PAYLOAD_WITH_TERMVECTOR = 0x4;
+  
  /** Extension of vectors fields file */
  static final String VECTORS_FIELDS_EXTENSION = "tvf";

@ -68,8 +71,10 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
  static final String CODEC_NAME_DOCS = "Lucene40TermVectorsDocs";
  static final String CODEC_NAME_INDEX = "Lucene40TermVectorsIndex";

-  static final int VERSION_START = 0;
-  static final int VERSION_CURRENT = VERSION_START;
+  static final int VERSION_NO_PAYLOADS = 0;
+  static final int VERSION_PAYLOADS = 1;
+  static final int VERSION_START = VERSION_NO_PAYLOADS;
+  static final int VERSION_CURRENT = VERSION_PAYLOADS;
  
  static final long HEADER_LENGTH_FIELDS = CodecUtil.headerLength(CODEC_NAME_FIELDS);
  static final long HEADER_LENGTH_DOCS = CodecUtil.headerLength(CODEC_NAME_DOCS);
@ -245,9 +250,8 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
    }
    
    @Override
-    public FieldsEnum iterator() throws IOException {
-
-      return new FieldsEnum() {
+    public Iterator<String> iterator() {
+      return new Iterator<String>() {
        private int fieldUpto;

        @Override
@ -255,13 +259,18 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
          if (fieldNumbers != null && fieldUpto < fieldNumbers.length) {
            return fieldInfos.fieldInfo(fieldNumbers[fieldUpto++]).name;
          } else {
-            return null;
+            throw new NoSuchElementException();
          }
        }

        @Override
-        public Terms terms() throws IOException {
-          return TVFields.this.terms(fieldInfos.fieldInfo(fieldNumbers[fieldUpto-1]).name);
+        public boolean hasNext() {
+          return fieldNumbers != null && fieldUpto < fieldNumbers.length;
+        }
+
+        @Override
+        public void remove() {
+          throw new UnsupportedOperationException();
        }
      };
    }
@ -296,10 +305,17 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
  private class TVTerms extends Terms {
    private final int numTerms;
    private final long tvfFPStart;
+    private final boolean storePositions;
+    private final boolean storeOffsets;
+    private final boolean storePayloads;

    public TVTerms(long tvfFP) throws IOException {
      tvf.seek(tvfFP);
      numTerms = tvf.readVInt();
+      final byte bits = tvf.readByte();
+      storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
+      storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
+      storePayloads = (bits & STORE_PAYLOAD_WITH_TERMVECTOR) != 0;
      tvfFPStart = tvf.getFilePointer();
    }

@ -314,7 +330,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
      } else {
        termsEnum = new TVTermsEnum();
      }
-      termsEnum.reset(numTerms, tvfFPStart);
+      termsEnum.reset(numTerms, tvfFPStart, storePositions, storeOffsets, storePayloads);
      return termsEnum;
    }

@ -345,6 +361,21 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
      // this...?  I guess codec could buffer and re-sort...
      return BytesRef.getUTF8SortedAsUnicodeComparator();
    }
+
+    @Override
+    public boolean hasOffsets() {
+      return storeOffsets;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return storePositions;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return storePayloads;
+    }
  }

  private class TVTermsEnum extends TermsEnum {
@ -357,11 +388,17 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
    private BytesRef term = new BytesRef();
    private boolean storePositions;
    private boolean storeOffsets;
+    private boolean storePayloads;
    private long tvfFP;

    private int[] positions;
    private int[] startOffsets;
    private int[] endOffsets;
+    
+    // one shared byte[] for any term's payloads
+    private int[] payloadOffsets;
+    private int lastPayloadLength;
+    private byte[] payloadData;

    // NOTE: tvf is pre-positioned by caller
    public TVTermsEnum() {
@ -373,17 +410,20 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
      return tvf == origTVF;
    }

-    public void reset(int numTerms, long tvfFPStart) throws IOException {
+    public void reset(int numTerms, long tvfFPStart, boolean storePositions, boolean storeOffsets, boolean storePayloads) throws IOException {
      this.numTerms = numTerms;
+      this.storePositions = storePositions;
+      this.storeOffsets = storeOffsets;
+      this.storePayloads = storePayloads;
      nextTerm = 0;
      tvf.seek(tvfFPStart);
-      final byte bits = tvf.readByte();
-      storePositions = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0;
-      storeOffsets = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0;
      tvfFP = 1+tvfFPStart;
      positions = null;
      startOffsets = null;
      endOffsets = null;
+      payloadOffsets = null;
+      payloadData = null;
+      lastPayloadLength = -1;
    }

    // NOTE: slow!  (linear scan)
@ -430,7 +470,26 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
      tvf.readBytes(term.bytes, start, deltaLen);
      freq = tvf.readVInt();

-      if (storePositions) {
+      if (storePayloads) {
+        positions = new int[freq];
+        payloadOffsets = new int[freq];
+        int totalPayloadLength = 0;
+        int pos = 0;
+        for(int posUpto=0;posUpto<freq;posUpto++) {
+          int code = tvf.readVInt();
+          pos += code >>> 1;
+          positions[posUpto] = pos;
+          if ((code & 1) != 0) {
+            // length change
+            lastPayloadLength = tvf.readVInt();
+          }
+          payloadOffsets[posUpto] = totalPayloadLength;
+          totalPayloadLength += lastPayloadLength;
+          assert totalPayloadLength >= 0;
+        }
+        payloadData = new byte[totalPayloadLength];
+        tvf.readBytes(payloadData, 0, payloadData.length);
+      } else if (storePositions /* no payloads */) {
        // TODO: we could maybe reuse last array, if we can
        // somehow be careful about consumer never using two
        // D&PEnums at once...
@ -502,14 +561,12 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
      } else {
        docsAndPositionsEnum = new TVDocsAndPositionsEnum();
      }
-      docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets);
+      docsAndPositionsEnum.reset(liveDocs, positions, startOffsets, endOffsets, payloadOffsets, payloadData);
      return docsAndPositionsEnum;
    }

    @Override
    public Comparator<BytesRef> getComparator() {
-      // TODO: really indexer hardwires
-      // this...?  I guess codec could buffer and re-sort...
      return BytesRef.getUTF8SortedAsUnicodeComparator();
    }
  }
@ -567,6 +624,9 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
    private int[] positions;
    private int[] startOffsets;
    private int[] endOffsets;
+    private int[] payloadOffsets;
+    private BytesRef payload = new BytesRef();
+    private byte[] payloadBytes;

    @Override
    public int freq() throws IOException {
@ -602,11 +662,13 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
      }
    }

-    public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets) {
+    public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, int[] payloadLengths, byte[] payloadBytes) {
      this.liveDocs = liveDocs;
      this.positions = positions;
      this.startOffsets = startOffsets;
      this.endOffsets = endOffsets;
+      this.payloadOffsets = payloadLengths;
+      this.payloadBytes = payloadBytes;
      this.doc = -1;
      didNext = false;
      nextPos = 0;
@ -614,12 +676,19 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {

    @Override
    public BytesRef getPayload() {
-      return null;
-    }
-
-    @Override
-    public boolean hasPayload() {
-      return false;
+      if (payloadOffsets == null) {
+        return null;
+      } else {
+        int off = payloadOffsets[nextPos-1];
+        int end = nextPos == payloadOffsets.length ? payloadBytes.length : payloadOffsets[nextPos];
+        if (end - off == 0) {
+          return null;
+        }
+        payload.bytes = payloadBytes;
+        payload.offset = off;
+        payload.length = end - off;
+        return payload;
+      }
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/Lucene40TermVectorsWriter.java
@ -106,12 +106,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
  private String lastFieldName;

  @Override
-  public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException {
+  public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException {
    assert lastFieldName == null || info.name.compareTo(lastFieldName) > 0: "fieldName=" + info.name + " lastFieldName=" + lastFieldName;
    lastFieldName = info.name;
    this.positions = positions;
    this.offsets = offsets;
+    this.payloads = payloads;
    lastTerm.length = 0;
+    lastPayloadLength = -1; // force first payload to write its length
    fps[fieldCount++] = tvf.getFilePointer();
    tvd.writeVInt(info.number);
    tvf.writeVInt(numTerms);
@ -120,6 +122,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
      bits |= Lucene40TermVectorsReader.STORE_POSITIONS_WITH_TERMVECTOR;
    if (offsets)
      bits |= Lucene40TermVectorsReader.STORE_OFFSET_WITH_TERMVECTOR;
+    if (payloads)
+      bits |= Lucene40TermVectorsReader.STORE_PAYLOAD_WITH_TERMVECTOR;
    tvf.writeByte(bits);
    
    assert fieldCount <= numVectorFields;
@ -138,10 +142,12 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
  // we also don't buffer during bulk merges.
  private int offsetStartBuffer[] = new int[10];
  private int offsetEndBuffer[] = new int[10];
-  private int offsetIndex = 0;
-  private int offsetFreq = 0;
+  private BytesRef payloadData = new BytesRef(10);
+  private int bufferedIndex = 0;
+  private int bufferedFreq = 0;
  private boolean positions = false;
  private boolean offsets = false;
+  private boolean payloads = false;

  @Override
  public void startTerm(BytesRef term, int freq) throws IOException {
@ -158,20 +164,40 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
      // we might need to buffer if its a non-bulk merge
      offsetStartBuffer = ArrayUtil.grow(offsetStartBuffer, freq);
      offsetEndBuffer = ArrayUtil.grow(offsetEndBuffer, freq);
-      offsetIndex = 0;
-      offsetFreq = freq;
    }
+    bufferedIndex = 0;
+    bufferedFreq = freq;
+    payloadData.length = 0;
  }

  int lastPosition = 0;
  int lastOffset = 0;
+  int lastPayloadLength = -1; // force first payload to write its length
+
+  BytesRef scratch = new BytesRef(); // used only by this optimized flush below

  @Override
  public void addProx(int numProx, DataInput positions, DataInput offsets) throws IOException {
-    // TODO: technically we could just copy bytes and not re-encode if we knew the length...
-    if (positions != null) {
+    if (payloads) {
+      // TODO, maybe overkill and just call super.addProx() in this case?
+      // we do avoid buffering the offsets in RAM though.
      for (int i = 0; i < numProx; i++) {
-        tvf.writeVInt(positions.readVInt());
+        int code = positions.readVInt();
+        if ((code & 1) == 1) {
+          int length = positions.readVInt();
+          scratch.grow(length);
+          scratch.length = length;
+          positions.readBytes(scratch.bytes, scratch.offset, scratch.length);
+          writePosition(code >>> 1, scratch);
+        } else {
+          writePosition(code >>> 1, null);
+        }
+      }
+      tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
+    } else if (positions != null) {
+      // pure positions, no payloads
+      for (int i = 0; i < numProx; i++) {
+        tvf.writeVInt(positions.readVInt() >>> 1);
      }
    }
    
@ -184,28 +210,36 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
  }

  @Override
-  public void addPosition(int position, int startOffset, int endOffset) throws IOException {
-    if (positions && offsets) {
+  public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException {
+    if (positions && (offsets || payloads)) {
      // write position delta
-      tvf.writeVInt(position - lastPosition);
+      writePosition(position - lastPosition, payload);
      lastPosition = position;
      
      // buffer offsets
-      offsetStartBuffer[offsetIndex] = startOffset;
-      offsetEndBuffer[offsetIndex] = endOffset;
-      offsetIndex++;
+      if (offsets) {
+        offsetStartBuffer[bufferedIndex] = startOffset;
+        offsetEndBuffer[bufferedIndex] = endOffset;
+      }
+      
+      bufferedIndex++;
      
      // dump buffer if we are done
-      if (offsetIndex == offsetFreq) {
-        for (int i = 0; i < offsetIndex; i++) {
-          tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
-          tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
-          lastOffset = offsetEndBuffer[i];
+      if (bufferedIndex == bufferedFreq) {
+        if (payloads) {
+          tvf.writeBytes(payloadData.bytes, payloadData.offset, payloadData.length);
+        }
+        for (int i = 0; i < bufferedIndex; i++) {
+          if (offsets) {
+            tvf.writeVInt(offsetStartBuffer[i] - lastOffset);
+            tvf.writeVInt(offsetEndBuffer[i] - offsetStartBuffer[i]);
+            lastOffset = offsetEndBuffer[i];
+          }
        }
      }
    } else if (positions) {
      // write position delta
-      tvf.writeVInt(position - lastPosition);
+      writePosition(position - lastPosition, payload);
      lastPosition = position;
    } else if (offsets) {
      // write offset deltas
@ -214,6 +248,30 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
      lastOffset = endOffset;
    }
  }
+  
+  private void writePosition(int delta, BytesRef payload) throws IOException {
+    if (payloads) {
+      int payloadLength = payload == null ? 0 : payload.length;
+
+      if (payloadLength != lastPayloadLength) {
+        lastPayloadLength = payloadLength;
+        tvf.writeVInt((delta<<1)|1);
+        tvf.writeVInt(payloadLength);
+      } else {
+        tvf.writeVInt(delta << 1);
+      }
+      if (payloadLength > 0) {
+        if (payloadLength + payloadData.length < 0) {
+          // we overflowed the payload buffer, just throw UOE
+          // having > Integer.MAX_VALUE bytes of payload for a single term in a single doc is nuts.
+          throw new UnsupportedOperationException("A term cannot have more than Integer.MAX_VALUE bytes of payload data in a single document");
+        }
+        payloadData.append(payload);
+      }
+    } else {
+      tvf.writeVInt(delta);
+    }
+  }

  @Override
  public void abort() {
@ -255,7 +313,14 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {

    int idx = 0;
    int numDocs = 0;
-    for (final AtomicReader reader : mergeState.readers) {
+    for (int i = 0; i < mergeState.readers.size(); i++) {
+      final AtomicReader reader = mergeState.readers.get(i);
+      // set PayloadProcessor
+      if (mergeState.payloadProcessorProvider != null) {
+        mergeState.currentReaderPayloadProcessor = mergeState.readerPayloadProcessor[i];
+      } else {
+        mergeState.currentReaderPayloadProcessor = null;
+      }
      final SegmentReader matchingSegmentReader = mergeState.matchingSegmentReaders[idx++];
      Lucene40TermVectorsReader matchingVectorsReader = null;
      if (matchingSegmentReader != null) {
@ -288,8 +353,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
    final int maxDoc = reader.maxDoc();
    final Bits liveDocs = reader.getLiveDocs();
    int totalNumDocs = 0;
-    if (matchingVectorsReader != null) {
-      // We can bulk-copy because the fieldInfos are "congruent"
+    if (matchingVectorsReader != null && mergeState.currentReaderPayloadProcessor == null) {
+      // We can bulk-copy because the fieldInfos are "congruent" and there is no payload processor
      for (int docNum = 0; docNum < maxDoc;) {
        if (!liveDocs.get(docNum)) {
          // skip deleted docs
@ -324,7 +389,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
        // NOTE: it's very important to first assign to vectors then pass it to
        // termVectorsWriter.addAllDocVectors; see LUCENE-1282
        Fields vectors = reader.getTermVectors(docNum);
-        addAllDocVectors(vectors, mergeState.fieldInfos);
+        addAllDocVectors(vectors, mergeState);
        totalNumDocs++;
        mergeState.checkAbort.work(300);
      }
@ -339,8 +404,8 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
                                      int rawDocLengths2[])
          throws IOException {
    final int maxDoc = reader.maxDoc();
-    if (matchingVectorsReader != null) {
-      // We can bulk-copy because the fieldInfos are "congruent"
+    if (matchingVectorsReader != null && mergeState.currentReaderPayloadProcessor == null) {
+      // We can bulk-copy because the fieldInfos are "congruent" and there is no payload processor
      int docCount = 0;
      while (docCount < maxDoc) {
        int len = Math.min(MAX_RAW_MERGE_DOCS, maxDoc - docCount);
@ -354,7 +419,7 @@ public final class Lucene40TermVectorsWriter extends TermVectorsWriter {
        // NOTE: it's very important to first assign to vectors then pass it to
        // termVectorsWriter.addAllDocVectors; see LUCENE-1282
        Fields vectors = reader.getTermVectors(docNum);
-        addAllDocVectors(vectors, mergeState.fieldInfos);
+        addAllDocVectors(vectors, mergeState);
        mergeState.checkAbort.work(300);
      }
    }
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene40/package.html
@ -366,7 +366,7 @@ the {@link org.apache.lucene.codecs.Codec Codec} api. Fast per-document storage
 factors need no longer be a single byte, they can be any DocValues 
 {@link org.apache.lucene.index.DocValues.Type type}. Terms need not be unicode
 strings, they can be any byte sequence. Term offsets can optionally be indexed 
-into the postings lists.</li>
+into the postings lists. Payloads can be stored in the term vectors.</li>
 </ul>
 <a name="Limitations" id="Limitations"></a>
 <h2>Limitations</h2>
--- a/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/DirectPostingsFormat.java
@ -32,7 +32,6 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.OrdTermState;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
@ -44,6 +43,7 @@ import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnmodifiableIterator;
 import org.apache.lucene.util.automaton.CompiledAutomaton;
 import org.apache.lucene.util.automaton.RunAutomaton;
 import org.apache.lucene.util.automaton.Transition;
@ -124,36 +124,14 @@ public class DirectPostingsFormat extends PostingsFormat {
    private final Map<String,DirectField> fields = new TreeMap<String,DirectField>();

    public DirectFields(SegmentReadState state, Fields fields, int minSkipCount, int lowFreqCutoff) throws IOException {
-      FieldsEnum fieldsEnum = fields.iterator();
-      String field;
-      while ((field = fieldsEnum.next()) != null) {
-        this.fields.put(field, new DirectField(state, field, fieldsEnum.terms(), minSkipCount, lowFreqCutoff));
+      for (String field : fields) {
+        this.fields.put(field, new DirectField(state, field, fields.terms(field), minSkipCount, lowFreqCutoff));
      }
    }

    @Override
-    public FieldsEnum iterator() {
-
-      final Iterator<Map.Entry<String,DirectField>> iter = fields.entrySet().iterator();
-
-      return new FieldsEnum() {
-        Map.Entry<String,DirectField> current;
-        
-        @Override
-        public String next() {
-          if (iter.hasNext()) {
-            current = iter.next();
-            return current.getKey();
-          } else {
-            return null;
-          }
-        }
-
-        @Override
-        public Terms terms() {
-          return current.getValue();
-        }
-      };
+    public Iterator<String> iterator() {
+      return new UnmodifiableIterator<String>(fields.keySet().iterator());
    }

    @Override
@ -348,9 +326,8 @@ public class DirectPostingsFormat extends PostingsFormat {
                    scratch.add(docsAndPositionsEnum.endOffset());
                  }
                  if (hasPayloads) {
-                    final BytesRef payload;
-                    if (docsAndPositionsEnum.hasPayload()) {
-                      payload = docsAndPositionsEnum.getPayload();
+                    final BytesRef payload = docsAndPositionsEnum.getPayload();
+                    if (payload != null) {
                      scratch.add(payload.length);
                      ros.writeBytes(payload.bytes, payload.offset, payload.length);
                    } else {
@ -421,9 +398,8 @@ public class DirectPostingsFormat extends PostingsFormat {
                for(int pos=0;pos<freq;pos++) {
                  positions[upto][posUpto] = docsAndPositionsEnum.nextPosition();
                  if (hasPayloads) {
-                    if (docsAndPositionsEnum.hasPayload()) {
-                      BytesRef payload = docsAndPositionsEnum.getPayload();
-                      assert payload != null;
+                    BytesRef payload = docsAndPositionsEnum.getPayload();
+                    if (payload != null) {
                      byte[] payloadBytes = new byte[payload.length];
                      System.arraycopy(payload.bytes, payload.offset, payloadBytes, 0, payload.length);
                      payloads[upto][pos] = payloadBytes;
@ -635,6 +611,21 @@ public class DirectPostingsFormat extends PostingsFormat {
      return BytesRef.getUTF8SortedAsUnicodeComparator();
    }

+    @Override
+    public boolean hasOffsets() {
+      return hasOffsets;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return hasPos;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return hasPayloads;
+    }
+
    private final class DirectTermsEnum extends TermsEnum {

      private final BytesRef scratch = new BytesRef();
@ -1791,18 +1782,12 @@ public class DirectPostingsFormat extends PostingsFormat {
      return docID;
    }

-    @Override
-    public boolean hasPayload() {
-      return payloadLength > 0;
-    }
-
    @Override
    public BytesRef getPayload() {
      if (payloadLength > 0) {
        payload.bytes = payloadBytes;
        payload.offset = lastPayloadOffset;
        payload.length = payloadLength;
-        payloadLength = 0;
        return payload;
      } else {
        return null;
@ -1995,7 +1980,6 @@ public class DirectPostingsFormat extends PostingsFormat {
    private int upto;
    private int docID = -1;
    private int posUpto;
-    private boolean gotPayload;
    private int[] curPositions;

    public HighFreqDocsAndPositionsEnum(Bits liveDocs, boolean hasOffsets) {
@ -2065,7 +2049,6 @@ public class DirectPostingsFormat extends PostingsFormat {
    @Override
    public int nextPosition() {
      posUpto += posJump;
-      gotPayload = false;
      return curPositions[posUpto];
    }

@ -2199,21 +2182,22 @@ public class DirectPostingsFormat extends PostingsFormat {
      }
    }

-    @Override
-    public boolean hasPayload() {
-      return !gotPayload && payloads != null && payloads[upto][posUpto/(hasOffsets ? 3 : 1)] != null;
-    }
-
    private final BytesRef payload = new BytesRef();

    @Override
    public BytesRef getPayload() {
-      final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)];
-      payload.bytes = payloadBytes;
-      payload.length = payloadBytes.length;
-      payload.offset = 0;
-      gotPayload = true;
-      return payload;
+      if (payloads == null) {
+        return null;
+      } else {
+        final byte[] payloadBytes = payloads[upto][posUpto/(hasOffsets ? 3:1)];
+        if (payloadBytes == null) {
+          return null;
+        }
+        payload.bytes = payloadBytes;
+        payload.length = payloadBytes.length;
+        payload.offset = 0;
+        return payload;
+      }
    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/memory/MemoryPostingsFormat.java
@ -34,7 +34,6 @@ import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
@ -49,6 +48,7 @@ import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.UnmodifiableIterator;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.ByteSequenceOutputs;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
@ -446,7 +446,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
    private int numDocs;
    private int posPending;
    private int payloadLength;
-    private boolean payloadRetrieved;
    final boolean storeOffsets;
    int offsetLength;
    int startOffset;
@ -484,7 +483,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
      payloadLength = 0;
      this.numDocs = numDocs;
      posPending = 0;
-      payloadRetrieved = false;
      startOffset = storeOffsets ? 0 : -1; // always return -1 if no offsets are stored
      offsetLength = 0;
      return this;
@ -577,10 +575,6 @@ public class MemoryPostingsFormat extends PostingsFormat {
        payload.offset = in.getPosition();
        in.skipBytes(payloadLength);
        payload.length = payloadLength;
-        // Necessary, in case caller changed the
-        // payload.bytes from prior call:
-        payload.bytes = buffer;
-        payloadRetrieved = false;
      }

      //System.out.println("      pos=" + pos + " payload=" + payload + " fp=" + in.getPosition());
@ -599,13 +593,7 @@ public class MemoryPostingsFormat extends PostingsFormat {

    @Override
    public BytesRef getPayload() {
-      payloadRetrieved = true;
-      return payload;
-    }
-
-    @Override
-    public boolean hasPayload() {
-      return !payloadRetrieved && payload.length > 0;
+      return payload.length > 0 ? payload : null;
    }

    @Override
@ -834,6 +822,21 @@ public class MemoryPostingsFormat extends PostingsFormat {
    public Comparator<BytesRef> getComparator() {
      return BytesRef.getUTF8SortedAsUnicodeComparator();
    }
+
+    @Override
+    public boolean hasOffsets() {
+      return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return field.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return field.hasPayloads();
+    }
  }

  @Override
@ -859,24 +862,8 @@ public class MemoryPostingsFormat extends PostingsFormat {

    return new FieldsProducer() {
      @Override
-      public FieldsEnum iterator() {
-        final Iterator<TermsReader> iter = fields.values().iterator();
-
-        return new FieldsEnum() {
-
-          private TermsReader current;
-
-          @Override
-          public String next() {
-            current = iter.next();
-            return current.field.name;
-          }
-
-          @Override
-          public Terms terms() {
-            return current;
-          }
-        };
+      public Iterator<String> iterator() {
+        return new UnmodifiableIterator<String>(fields.keySet().iterator());
      }

      @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldPostingsFormat.java
@ -30,11 +30,11 @@ import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.codecs.PostingsFormat;
 import org.apache.lucene.codecs.TermsConsumer;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.UnmodifiableIterator;

 /**
 * Enables per field format support.
@ -197,34 +197,9 @@ public abstract class PerFieldPostingsFormat extends PostingsFormat {
      }
    }

-    private final class FieldsIterator extends FieldsEnum {
-      private final Iterator<String> it;
-      private String current;
-
-      public FieldsIterator() {
-        it = fields.keySet().iterator();
-      }
-
-      @Override
-      public String next() {
-        if (it.hasNext()) {
-          current = it.next();
-        } else {
-          current = null;
-        }
-
-        return current;
-      }
-
-      @Override
-      public Terms terms() throws IOException {
-        return fields.get(current).terms(current);
-      }
-    }
-
    @Override
-    public FieldsEnum iterator() throws IOException {
-      return new FieldsIterator();
+    public Iterator<String> iterator() {
+      return new UnmodifiableIterator<String>(fields.keySet().iterator());
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/pulsing/PulsingPostingsReader.java
@ -532,19 +532,13 @@ public class PulsingPostingsReader extends PostingsReaderBase {
      }
    }

-    @Override
-    public boolean hasPayload() {
-      return storePayloads && !payloadRetrieved && payloadLength > 0;
-    }
-
    @Override
    public BytesRef getPayload() throws IOException {
      //System.out.println("PR  getPayload payloadLength=" + payloadLength + " this=" + this);
      if (payloadRetrieved) {
-        throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
-      }
-      payloadRetrieved = true;
-      if (payloadLength > 0) {
+        return payload;
+      } else if (storePayloads && payloadLength > 0) {
+        payloadRetrieved = true;
        if (payload == null) {
          payload = new BytesRef(payloadLength);
        } else {
--- a/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/sep/SepPostingsReader.java
@ -714,7 +714,11 @@ public class SepPostingsReader extends PostingsReaderBase {
    @Override
    public BytesRef getPayload() throws IOException {
      if (!payloadPending) {
-        throw new IOException("Either no payload exists at this term position or an attempt was made to load it more than once.");
+        return null;
+      }
+      
+      if (pendingPayloadBytes == 0) {
+        return payload;
      }

      assert pendingPayloadBytes >= payloadLength;
@ -731,15 +735,9 @@ public class SepPostingsReader extends PostingsReaderBase {
      }

      payloadIn.readBytes(payload.bytes, 0, payloadLength);
-      payloadPending = false;
      payload.length = payloadLength;
      pendingPayloadBytes = 0;
      return payload;
    }
-
-    @Override
-    public boolean hasPayload() {
-      return payloadPending && payloadLength > 0;
-    }
  }
 }
--- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextFieldsReader.java
@ -20,14 +20,17 @@ package org.apache.lucene.codecs.simpletext;
 import java.io.IOException;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.Map;
+import java.util.TreeMap;
+import java.util.TreeSet;

 import org.apache.lucene.codecs.FieldsProducer;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
+import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfo.IndexOptions;
 import org.apache.lucene.index.FieldInfos;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
@ -40,6 +43,7 @@ import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.UnmodifiableIterator;
 import org.apache.lucene.util.fst.Builder;
 import org.apache.lucene.util.fst.BytesRefFSTEnum;
 import org.apache.lucene.util.fst.FST;
@ -48,7 +52,7 @@ import org.apache.lucene.util.fst.PositiveIntOutputs;
 import org.apache.lucene.util.fst.Util;

 class SimpleTextFieldsReader extends FieldsProducer {
-
+  private final TreeMap<String,Long> fields;
  private final IndexInput in;
  private final FieldInfos fieldInfos;

@ -66,35 +70,22 @@ class SimpleTextFieldsReader extends FieldsProducer {
    in = state.dir.openInput(SimpleTextPostingsFormat.getPostingsFileName(state.segmentInfo.name, state.segmentSuffix), state.context);
   
    fieldInfos = state.fieldInfos;
+    fields = readFields((IndexInput)in.clone());
  }
-
-  private class SimpleTextFieldsEnum extends FieldsEnum {
-    private final IndexInput in;
-    private final BytesRef scratch = new BytesRef(10);
-    private String current;
-
-    public SimpleTextFieldsEnum() {
-      this.in = (IndexInput) SimpleTextFieldsReader.this.in.clone();
-    }
-
-    @Override
-    public String next() throws IOException {
-      while(true) {
-        SimpleTextUtil.readLine(in, scratch);
-        if (scratch.equals(END)) {
-          current = null;
-          return null;
-        }
-        if (StringHelper.startsWith(scratch, FIELD)) {
-          return current = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
-        }
+  
+  private TreeMap<String,Long> readFields(IndexInput in) throws IOException {
+    BytesRef scratch = new BytesRef(10);
+    TreeMap<String,Long> fields = new TreeMap<String,Long>();
+    
+    while (true) {
+      SimpleTextUtil.readLine(in, scratch);
+      if (scratch.equals(END)) {
+        return fields;
+      } else if (StringHelper.startsWith(scratch, FIELD)) {
+        String fieldName = new String(scratch.bytes, scratch.offset + FIELD.length, scratch.length - FIELD.length, "UTF-8");
+        fields.put(fieldName, in.getFilePointer());
      }
    }
-
-    @Override
-    public Terms terms() throws IOException {
-      return SimpleTextFieldsReader.this.terms(current);
-    }
  }

  private class SimpleTextTermsEnum extends TermsEnum {
@ -471,18 +462,7 @@ class SimpleTextFieldsReader extends FieldsProducer {

    @Override
    public BytesRef getPayload() {
-      // Some tests rely on only being able to retrieve the
-      // payload once
-      try {
-        return payload;
-      } finally {
-        payload = null;
-      }
-    }
-
-    @Override
-    public boolean hasPayload() {
-      return payload != null;
+      return payload;
    }
  }

@ -498,7 +478,7 @@ class SimpleTextFieldsReader extends FieldsProducer {

  private class SimpleTextTerms extends Terms {
    private final long termsStart;
-    private final IndexOptions indexOptions;
+    private final FieldInfo fieldInfo;
    private long sumTotalTermFreq;
    private long sumDocFreq;
    private int docCount;
@ -509,7 +489,7 @@ class SimpleTextFieldsReader extends FieldsProducer {

    public SimpleTextTerms(String field, long termsStart) throws IOException {
      this.termsStart = termsStart;
-      indexOptions = fieldInfos.fieldInfo(field).getIndexOptions();
+      fieldInfo = fieldInfos.fieldInfo(field);
      loadTerms();
    }

@ -579,7 +559,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
    @Override
    public TermsEnum iterator(TermsEnum reuse) throws IOException {
      if (fst != null) {
-        return new SimpleTextTermsEnum(fst, indexOptions);
+        return new SimpleTextTermsEnum(fst, fieldInfo.getIndexOptions());
      } else {
        return TermsEnum.EMPTY;
      }
@ -597,7 +577,7 @@ class SimpleTextFieldsReader extends FieldsProducer {

    @Override
    public long getSumTotalTermFreq() {
-      return indexOptions == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq;
+      return fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY ? -1 : sumTotalTermFreq;
    }

    @Override
@ -609,11 +589,26 @@ class SimpleTextFieldsReader extends FieldsProducer {
    public int getDocCount() throws IOException {
      return docCount;
    }
+
+    @Override
+    public boolean hasOffsets() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return fieldInfo.hasPayloads();
+    }
  }

  @Override
-  public FieldsEnum iterator() throws IOException {
-    return new SimpleTextFieldsEnum();
+  public Iterator<String> iterator() {
+    return new UnmodifiableIterator<String>(fields.keySet().iterator());
  }

  private final Map<String,Terms> termsCache = new HashMap<String,Terms>();
@ -622,15 +617,13 @@ class SimpleTextFieldsReader extends FieldsProducer {
  synchronized public Terms terms(String field) throws IOException {
    Terms terms = termsCache.get(field);
    if (terms == null) {
-      SimpleTextFieldsEnum fe = (SimpleTextFieldsEnum) iterator();
-      String fieldUpto;
-      while((fieldUpto = fe.next()) != null) {
-        if (fieldUpto.equals(field)) {
-          terms = new SimpleTextTerms(field, fe.in.getFilePointer());
-          break;
-        }
+      Long fp = fields.get(field);
+      if (fp == null) {
+        return null;
+      } else {
+        terms = new SimpleTextTerms(field, fp);
+        termsCache.put(field, terms);
      }
-      termsCache.put(field, terms);
    }
    return terms;
  }
--- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsReader.java
@ -29,7 +29,6 @@ import org.apache.lucene.codecs.TermVectorsReader;
 import org.apache.lucene.index.DocsAndPositionsEnum;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.Fields;
-import org.apache.lucene.index.FieldsEnum;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.Terms;
@ -45,6 +44,7 @@ import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.StringHelper;
 import org.apache.lucene.util.UnicodeUtil;
+import org.apache.lucene.util.UnmodifiableIterator;

 import static org.apache.lucene.codecs.simpletext.SimpleTextTermVectorsWriter.*;

@ -126,11 +126,15 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
      assert StringHelper.startsWith(scratch, FIELDOFFSETS);
      boolean offsets = Boolean.parseBoolean(readString(FIELDOFFSETS.length, scratch));
      
+      readLine();
+      assert StringHelper.startsWith(scratch, FIELDPAYLOADS);
+      boolean payloads = Boolean.parseBoolean(readString(FIELDPAYLOADS.length, scratch));
+      
      readLine();
      assert StringHelper.startsWith(scratch, FIELDTERMCOUNT);
      int termCount = parseIntAt(FIELDTERMCOUNT.length);
      
-      SimpleTVTerms terms = new SimpleTVTerms();
+      SimpleTVTerms terms = new SimpleTVTerms(offsets, positions, payloads);
      fields.put(fieldName, terms);
      
      for (int j = 0; j < termCount; j++) {
@ -152,6 +156,9 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
        if (positions || offsets) {
          if (positions) {
            postings.positions = new int[postings.freq];
+            if (payloads) {
+              postings.payloads = new BytesRef[postings.freq];
+            }
          }
        
          if (offsets) {
@ -164,6 +171,17 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
              readLine();
              assert StringHelper.startsWith(scratch, POSITION);
              postings.positions[k] = parseIntAt(POSITION.length);
+              if (payloads) {
+                readLine();
+                assert StringHelper.startsWith(scratch, PAYLOAD);
+                if (scratch.length - PAYLOAD.length == 0) {
+                  postings.payloads[k] = null;
+                } else {
+                  byte payloadBytes[] = new byte[scratch.length - PAYLOAD.length];
+                  System.arraycopy(scratch.bytes, scratch.offset+PAYLOAD.length, payloadBytes, 0, payloadBytes.length);
+                  postings.payloads[k] = new BytesRef(payloadBytes);
+                }
+              }
            }
            
            if (offsets) {
@ -222,26 +240,8 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
    }

    @Override
-    public FieldsEnum iterator() throws IOException {
-      return new FieldsEnum() {
-        private Iterator<Map.Entry<String,SimpleTVTerms>> iterator = fields.entrySet().iterator();
-        private Map.Entry<String,SimpleTVTerms> current = null;
-        
-        @Override
-        public String next() {
-          if (!iterator.hasNext()) {
-            return null;
-          } else {
-            current = iterator.next();
-            return current.getKey();
-          }
-        }
-
-        @Override
-        public Terms terms() {
-          return current.getValue();
-        }
-      };
+    public Iterator<String> iterator() {
+      return new UnmodifiableIterator<String>(fields.keySet().iterator());
    }

    @Override
@ -257,8 +257,14 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
  
  private static class SimpleTVTerms extends Terms {
    final SortedMap<BytesRef,SimpleTVPostings> terms;
+    final boolean hasOffsets;
+    final boolean hasPositions;
+    final boolean hasPayloads;
    
-    SimpleTVTerms() {
+    SimpleTVTerms(boolean hasOffsets, boolean hasPositions, boolean hasPayloads) {
+      this.hasOffsets = hasOffsets;
+      this.hasPositions = hasPositions;
+      this.hasPayloads = hasPayloads;
      terms = new TreeMap<BytesRef,SimpleTVPostings>();
    }
    
@ -292,6 +298,21 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
    public int getDocCount() throws IOException {
      return 1;
    }
+
+    @Override
+    public boolean hasOffsets() {
+      return hasOffsets;
+    }
+
+    @Override
+    public boolean hasPositions() {
+      return hasPositions;
+    }
+    
+    @Override
+    public boolean hasPayloads() {
+      return hasPayloads;
+    }
  }
  
  private static class SimpleTVPostings {
@ -299,6 +320,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
    private int positions[];
    private int startOffsets[];
    private int endOffsets[];
+    private BytesRef payloads[];
  }
  
  private static class SimpleTVTermsEnum extends TermsEnum {
@ -372,7 +394,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
      }
      // TODO: reuse
      SimpleTVDocsAndPositionsEnum e = new SimpleTVDocsAndPositionsEnum();
-      e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets);
+      e.reset(liveDocs, postings.positions, postings.startOffsets, postings.endOffsets, postings.payloads);
      return e;
    }

@ -433,6 +455,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
    private int nextPos;
    private Bits liveDocs;
    private int[] positions;
+    private BytesRef[] payloads;
    private int[] startOffsets;
    private int[] endOffsets;

@ -470,11 +493,12 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
      }
    }

-    public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets) {
+    public void reset(Bits liveDocs, int[] positions, int[] startOffsets, int[] endOffsets, BytesRef payloads[]) {
      this.liveDocs = liveDocs;
      this.positions = positions;
      this.startOffsets = startOffsets;
      this.endOffsets = endOffsets;
+      this.payloads = payloads;
      this.doc = -1;
      didNext = false;
      nextPos = 0;
@ -482,12 +506,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {

    @Override
    public BytesRef getPayload() {
-      return null;
-    }
-
-    @Override
-    public boolean hasPayload() {
-      return false;
+      return payloads == null ? null : payloads[nextPos-1];
    }

    @Override
--- a/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/simpletext/SimpleTextTermVectorsWriter.java
@ -45,10 +45,12 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
  static final BytesRef FIELDNAME          = new BytesRef("    name ");
  static final BytesRef FIELDPOSITIONS     = new BytesRef("    positions ");
  static final BytesRef FIELDOFFSETS       = new BytesRef("    offsets   ");
+  static final BytesRef FIELDPAYLOADS      = new BytesRef("    payloads  ");
  static final BytesRef FIELDTERMCOUNT     = new BytesRef("    numterms ");
  static final BytesRef TERMTEXT           = new BytesRef("    term ");
  static final BytesRef TERMFREQ           = new BytesRef("      freq ");
  static final BytesRef POSITION           = new BytesRef("      position ");
+  static final BytesRef PAYLOAD            = new BytesRef("        payload ");
  static final BytesRef STARTOFFSET        = new BytesRef("        startoffset ");
  static final BytesRef ENDOFFSET          = new BytesRef("        endoffset ");

@ -61,6 +63,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
  private final BytesRef scratch = new BytesRef();
  private boolean offsets;
  private boolean positions;
+  private boolean payloads;

  public SimpleTextTermVectorsWriter(Directory directory, String segment, IOContext context) throws IOException {
    this.directory = directory;
@ -89,7 +92,7 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
  }

  @Override
-  public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets) throws IOException {  
+  public void startField(FieldInfo info, int numTerms, boolean positions, boolean offsets, boolean payloads) throws IOException {  
    write(FIELD);
    write(Integer.toString(info.number));
    newLine();
@ -106,12 +109,17 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
    write(Boolean.toString(offsets));
    newLine();
    
+    write(FIELDPAYLOADS);
+    write(Boolean.toString(payloads));
+    newLine();
+    
    write(FIELDTERMCOUNT);
    write(Integer.toString(numTerms));
    newLine();
    
    this.positions = positions;
    this.offsets = offsets;
+    this.payloads = payloads;
  }

  @Override
@ -126,13 +134,22 @@ public class SimpleTextTermVectorsWriter extends TermVectorsWriter {
  }

  @Override
-  public void addPosition(int position, int startOffset, int endOffset) throws IOException {
+  public void addPosition(int position, int startOffset, int endOffset, BytesRef payload) throws IOException {
    assert positions || offsets;
    
    if (positions) {
      write(POSITION);
      write(Integer.toString(position));
      newLine();
+      
+      if (payloads) {
+        write(PAYLOAD);
+        if (payload != null) {
+          assert payload.length > 0;
+          write(payload);
+        }
+        newLine();
+      }
    }
    
    if (offsets) {
--- a/lucene/core/src/java/org/apache/lucene/document/FieldType.java
+++ b/lucene/core/src/java/org/apache/lucene/document/FieldType.java
@ -39,6 +39,7 @@ public class FieldType implements IndexableFieldType {
  private boolean storeTermVectors;
  private boolean storeTermVectorOffsets;
  private boolean storeTermVectorPositions;
+  private boolean storeTermVectorPayloads;
  private boolean omitNorms;
  private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
  private DocValues.Type docValueType;
@ -53,6 +54,7 @@ public class FieldType implements IndexableFieldType {
    this.storeTermVectors = ref.storeTermVectors();
    this.storeTermVectorOffsets = ref.storeTermVectorOffsets();
    this.storeTermVectorPositions = ref.storeTermVectorPositions();
+    this.storeTermVectorPayloads = ref.storeTermVectorPayloads();
    this.omitNorms = ref.omitNorms();
    this.indexOptions = ref.indexOptions();
    this.docValueType = ref.docValueType();
@ -132,6 +134,15 @@ public class FieldType implements IndexableFieldType {
    this.storeTermVectorPositions = value;
  }
  
+  public boolean storeTermVectorPayloads() {
+    return this.storeTermVectorPayloads;
+  }
+  
+  public void setStoreTermVectorPayloads(boolean value) {
+    checkIfFrozen();
+    this.storeTermVectorPayloads = value;
+  }
+  
  public boolean omitNorms() {
    return this.omitNorms;
  }
@ -198,24 +209,19 @@ public class FieldType implements IndexableFieldType {
        result.append(",");
      result.append("indexed");
      if (tokenized()) {
-        if (result.length() > 0)
-          result.append(",");
-        result.append("tokenized");
+        result.append(",tokenized");
      }
      if (storeTermVectors()) {
-        if (result.length() > 0)
-          result.append(",");
-        result.append("termVector");
+        result.append(",termVector");
      }
      if (storeTermVectorOffsets()) {
-        if (result.length() > 0)
-          result.append(",");
-        result.append("termVectorOffsets");
+        result.append(",termVectorOffsets");
      }
      if (storeTermVectorPositions()) {
-        if (result.length() > 0)
-          result.append(",");
-        result.append("termVectorPosition");
+        result.append(",termVectorPosition");
+        if (storeTermVectorPayloads()) {
+          result.append(",termVectorPayloads");
+        }
      }
      if (omitNorms()) {
        result.append(",omitNorms");
@ -232,7 +238,9 @@ public class FieldType implements IndexableFieldType {
      }
    }
    if (docValueType != null) {
-      result.append(",docValueType=");
+      if (result.length() > 0)
+        result.append(",");
+      result.append("docValueType=");
      result.append(docValueType);
    }
    
--- a/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java
@ -685,12 +685,7 @@ public class CheckIndex {
    DocsAndPositionsEnum postings = null;
    
    String lastField = null;
-    final FieldsEnum fieldsEnum = fields.iterator();
-    while(true) {
-      final String field = fieldsEnum.next();
-      if (field == null) {
-        break;
-      }
+    for (String field : fields) {
      // MultiFieldsEnum relies upon this order...
      if (lastField != null && field.compareTo(lastField) <= 0) {
        throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
@ -713,11 +708,16 @@ public class CheckIndex {
      // assert fields.terms(field) != null;
      computedFieldCount++;
      
-      final Terms terms = fieldsEnum.terms();
+      final Terms terms = fields.terms(field);
      if (terms == null) {
        continue;
      }
      
+      final boolean hasPositions = terms.hasPositions();
+      final boolean hasOffsets = terms.hasOffsets();
+      // term vectors cannot omit TF
+      final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+
      final TermsEnum termsEnum = terms.iterator(null);
      
      boolean hasOrd = true;
@ -777,17 +777,10 @@ public class CheckIndex {
        status.termCount++;
        
        final DocsEnum docs2;
-        final boolean hasPositions;
-        // if we are checking vectors, we have freqs implicitly
-        final boolean hasFreqs = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
-        // if we are checking vectors, offsets are a free-for-all anyway
-        final boolean hasOffsets = isVectors || fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
        if (postings != null) {
          docs2 = postings;
-          hasPositions = true;
        } else {
          docs2 = docs;
-          hasPositions = false;
        }
        
        int lastDoc = -1;
@ -824,22 +817,17 @@ public class CheckIndex {
          if (hasPositions) {
            for(int j=0;j<freq;j++) {
              final int pos = postings.nextPosition();
-              // NOTE: pos=-1 is allowed because of ancient bug
-              // (LUCENE-1542) whereby IndexWriter could
-              // write pos=-1 when first token's posInc is 0
-              // (separately: analyzers should not give
-              // posInc=0 to first token); also, term
-              // vectors are allowed to return pos=-1 if
-              // they indexed offset but not positions:
-              if (pos < -1) {
+
+              if (pos < 0) {
                throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
              }
              if (pos < lastPos) {
                throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
              }
              lastPos = pos;
-              if (postings.hasPayload()) {
-                postings.getPayload();
+              BytesRef payload = postings.getPayload();
+              if (payload != null && payload.length < 1) {
+                throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " payload length is out of bounds " + payload.length);
              }
              if (hasOffsets) {
                int startOffset = postings.startOffset();
@ -924,14 +912,8 @@ public class CheckIndex {
              int lastOffset = 0;
              for(int posUpto=0;posUpto<freq;posUpto++) {
                final int pos = postings.nextPosition();
-                // NOTE: pos=-1 is allowed because of ancient bug
-                // (LUCENE-1542) whereby IndexWriter could
-                // write pos=-1 when first token's posInc is 0
-                // (separately: analyzers should not give
-                // posInc=0 to first token); also, term
-                // vectors are allowed to return pos=-1 if
-                // they indexed offset but not positions:
-                if (pos < -1) {
+
+                if (pos < 0) {
                  throw new RuntimeException("position " + pos + " is out of bounds");
                }
                if (pos < lastPosition) {
@ -1000,11 +982,7 @@ public class CheckIndex {
        // only happen if it's a ghost field (field with
        // no terms, eg there used to be terms but all
        // docs got deleted and then merged away):
-        // make sure TermsEnum is empty:
-        final Terms fieldTerms2 = fieldsEnum.terms();
-        if (fieldTerms2 != null && fieldTerms2.iterator(null).next() != null) {
-          throw new RuntimeException("Fields.terms(field=" + field + ") returned null yet the field appears to have terms");
-        }
+        
      } else {
        if (fieldTerms instanceof BlockTreeTermsReader.FieldReader) {
          final BlockTreeTermsReader.Stats stats = ((BlockTreeTermsReader.FieldReader) fieldTerms).computeStats();
@ -1415,9 +1393,7 @@ public class CheckIndex {
            status.docCount++;
          }

-          FieldsEnum fieldsEnum = tfv.iterator();
-          String field = null;
-          while((field = fieldsEnum.next()) != null) {
+          for(String field : tfv) {
            if (doStats) {
              status.totVectors++;
            }
@ -1432,6 +1408,8 @@ public class CheckIndex {
              Terms terms = tfv.terms(field);
              termsEnum = terms.iterator(termsEnum);
              final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
+              final boolean postingsHasPayload = fieldInfo.hasPayloads();
+              final boolean vectorsHasPayload = terms.hasPayloads();

              Terms postingsTerms = postingsFields.terms(field);
              if (postingsTerms == null) {
@ -1439,19 +1417,18 @@ public class CheckIndex {
              }
              postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum);
              
+              final boolean hasProx = terms.hasOffsets() || terms.hasPositions();
              BytesRef term = null;
              while ((term = termsEnum.next()) != null) {
-                
-                final boolean hasProx;

-                // Try positions:
-                postings = termsEnum.docsAndPositions(null, postings);
-                if (postings == null) {
-                  hasProx = false;
-                  // Try docIDs & freqs:
-                  docs = termsEnum.docs(null, docs);
+                if (hasProx) {
+                  postings = termsEnum.docsAndPositions(null, postings);
+                  assert postings != null;
+                  docs = null;
                } else {
-                  hasProx = true;
+                  docs = termsEnum.docs(null, docs);
+                  assert docs != null;
+                  postings = null;
                }

                final DocsEnum docs2;
@ -1504,7 +1481,7 @@ public class CheckIndex {
                      int pos = postings.nextPosition();
                      if (postingsPostings != null) {
                        int postingsPos = postingsPostings.nextPosition();
-                        if (pos != -1 && postingsPos != -1 && pos != postingsPos) {
+                        if (terms.hasPositions() && pos != postingsPos) {
                          throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos);
                        }
                      }
@ -1535,6 +1512,34 @@ public class CheckIndex {
                          throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset);
                        }
                      }
+                      
+                      BytesRef payload = postings.getPayload();
+           
+                      if (payload != null) {
+                        assert vectorsHasPayload;
+                      }
+                      
+                      if (postingsHasPayload && vectorsHasPayload) {
+                        assert postingsPostings != null;
+                        
+                        if (payload == null) {
+                          // we have payloads, but not at this position. 
+                          // postings has payloads too, it should not have one at this position
+                          if (postingsPostings.getPayload() != null) {
+                            throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.getPayload());
+                          }
+                        } else {
+                          // we have payloads, and one at this position
+                          // postings should also have one at this position, with the same bytes.
+                          if (postingsPostings.getPayload() == null) {
+                            throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not.");
+                          }
+                          BytesRef postingsPayload = postingsPostings.getPayload();
+                          if (!payload.equals(postingsPayload)) {
+                            throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload);
+                          }
+                        }
+                      }
                    }
                  }
                }
--- a/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java
+++ b/lucene/core/src/java/org/apache/lucene/index/CoalescedDeletes.java
@ -24,7 +24,7 @@ import java.util.List;
 import java.util.Map;

 import org.apache.lucene.search.Query;
-import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.MergedIterator;
 import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;

 class CoalescedDeletes {
@ -48,13 +48,14 @@ class CoalescedDeletes {

 public Iterable<Term> termsIterable() {
   return new Iterable<Term>() {
+     @SuppressWarnings("unchecked")
     @Override
     public Iterator<Term> iterator() {
-       ArrayList<Iterator<Term>> subs = new ArrayList<Iterator<Term>>(iterables.size());
-       for (Iterable<Term> iterable : iterables) {
-         subs.add(iterable.iterator());
+       Iterator<Term> subs[] = new Iterator[iterables.size()];
+       for (int i = 0; i < iterables.size(); i++) {
+         subs[i] = iterables.get(i).iterator();
       }
-       return mergedIterator(subs);
+       return new MergedIterator<Term>(subs);
     }
   };
  }
@ -86,106 +87,4 @@ class CoalescedDeletes {
      }
    };
  }
-  
-  /** provides a merged view across multiple iterators */
-  static Iterator<Term> mergedIterator(final List<Iterator<Term>> iterators) {
-    return new Iterator<Term>() {
-      Term current;
-      TermMergeQueue queue = new TermMergeQueue(iterators.size());
-      SubIterator[] top = new SubIterator[iterators.size()];
-      int numTop;
-      
-      {
-        int index = 0;
-        for (Iterator<Term> iterator : iterators) {
-          if (iterator.hasNext()) {
-            SubIterator sub = new SubIterator();
-            sub.current = iterator.next();
-            sub.iterator = iterator;
-            sub.index = index++;
-            queue.add(sub);
-          }
-        }
-      }
-      
-      public boolean hasNext() {
-        if (queue.size() > 0) {
-          return true;
-        }
-        
-        for (int i = 0; i < numTop; i++) {
-          if (top[i].iterator.hasNext()) {
-            return true;
-          }
-        }
-        return false;
-      }
-      
-      public Term next() {
-        // restore queue
-        pushTop();
-        
-        // gather equal top fields
-        if (queue.size() > 0) {
-          pullTop();
-        } else {
-          current = null;
-        }
-        return current;
-      }
-      
-      public void remove() {
-        throw new UnsupportedOperationException();
-      }
-      
-      private void pullTop() {
-        // extract all subs from the queue that have the same top term
-        assert numTop == 0;
-        while (true) {
-          top[numTop++] = queue.pop();
-          if (queue.size() == 0
-              || !(queue.top()).current.equals(top[0].current)) {
-            break;
-          }
-        }
-        current = top[0].current;
-      }
-      
-      private void pushTop() {
-        // call next() on each top, and put back into queue
-        for (int i = 0; i < numTop; i++) {
-          if (top[i].iterator.hasNext()) {
-            top[i].current = top[i].iterator.next();
-            queue.add(top[i]);
-          } else {
-            // no more terms
-            top[i].current = null;
-          }
-        }
-        numTop = 0;
-      }
-    };
-  }
-  
-  private static class SubIterator {
-    Iterator<Term> iterator;
-    Term current;
-    int index;
-  }
-  
-  private static class TermMergeQueue extends PriorityQueue<SubIterator> {
-    TermMergeQueue(int size) {
-      super(size);
-    }
-
-    @Override
-    protected boolean lessThan(SubIterator a, SubIterator b) {
-      final int cmp = a.current.compareTo(b.current);
-      if (cmp != 0) {
-        return cmp < 0;
-      } else {
-        return a.index < b.index;
-      }
-    }
-  }
 }
--- a/lucene/core/src/java/org/apache/lucene/index/DocValues.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocValues.java
@ -105,7 +105,7 @@ public abstract class DocValues implements Closeable {
   * <p>
   * {@link Source} instances obtained from this method are closed / released
   * from the cache once this {@link DocValues} instance is closed by the
-   * {@link IndexReader}, {@link Fields} or {@link FieldsEnum} the
+   * {@link IndexReader}, {@link Fields} or the
   * {@link DocValues} was created from.
   */
  public Source getSource() throws IOException {
--- a/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/DocsAndPositionsEnum.java
@ -48,11 +48,8 @@ public abstract class DocsAndPositionsEnum extends DocsEnum {
  public abstract int endOffset() throws IOException;

  /** Returns the payload at this position, or null if no
-   *  payload was indexed.  Only call this once per
-   *  position. You should not modify anything (neither
-   *  members of the returned BytesRef nor bytes in the
-   *  byte[]). */
+   *  payload was indexed. You should not modify anything 
+   *  (neither members of the returned BytesRef nor bytes 
+   *  in the byte[]). */
  public abstract BytesRef getPayload() throws IOException;
-
-  public abstract boolean hasPayload();
 }
--- a/lucene/core/src/java/org/apache/lucene/index/Fields.java
+++ b/lucene/core/src/java/org/apache/lucene/index/Fields.java
@ -18,15 +18,16 @@ package org.apache.lucene.index;
 */

 import java.io.IOException;
+import java.util.Iterator;

 /** Flex API for access to fields and terms
 *  @lucene.experimental */

-public abstract class Fields {
+public abstract class Fields implements Iterable<String> {

  /** Returns an iterator that will step through all fields
   *  names.  This will not return null.  */
-  public abstract FieldsEnum iterator() throws IOException;
+  public abstract Iterator<String> iterator();

  /** Get the {@link Terms} for this field.  This will return
   *  null if the field does not exist. */
@ -45,12 +46,7 @@ public abstract class Fields {
  // TODO: deprecate?
  public long getUniqueTermCount() throws IOException {
    long numTerms = 0;
-    FieldsEnum it = iterator();
-    while(true) {
-      String field = it.next();
-      if (field == null) {
-        break;
-      }
+    for (String field : this) {
      Terms terms = terms(field);
      if (terms != null) {
        final long termCount = terms.size();
--- a/lucene/core/src/java/org/apache/lucene/index/FieldsEnum.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FieldsEnum.java
@ -1,79 +0,0 @@
-package org.apache.lucene.index;
-
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.io.IOException;
-
-import org.apache.lucene.util.AttributeSource;
-
-/** Enumerates indexed fields.  You must first call {@link
- *  #next} before calling {@link #terms}.
- *
- * @lucene.experimental */
-
-public abstract class FieldsEnum {
-
-  // TODO: maybe allow retrieving FieldInfo for current
-  // field, as optional method?
-
-  private AttributeSource atts = null;
-
-  /**
-   * Returns the related attributes.
-   */
-  public AttributeSource attributes() {
-    if (atts == null) {
-      atts = new AttributeSource();
-    }
-    return atts;
-  }
-  
-  /** Increments the enumeration to the next field. Returns
-   * null when there are no more fields.*/
-  public abstract String next() throws IOException;
-
-  // TODO: would be nice to require/fix all impls so they
-  // never return null here... we have to fix the writers to
-  // never write 0-terms fields... or maybe allow a non-null
-  // Terms instance in just this case
-
-  /** Get {@link Terms} for the current field.  After {@link #next} returns
-   *  null this method should not be called. This method may
-   *  return null in some cases, which means the provided
-   *  field does not have any terms. */
-  public abstract Terms terms() throws IOException;
-
-  // TODO: should we allow pulling Terms as well?  not just
-  // the iterator?
-  
-  public final static FieldsEnum[] EMPTY_ARRAY = new FieldsEnum[0];
-
-  /** Provides zero fields */
-  public final static FieldsEnum EMPTY = new FieldsEnum() {
-
-    @Override
-    public String next() {
-      return null;
-    }
-
-    @Override
-    public Terms terms() {
-      throw new IllegalStateException("this method should never be called");
-    }
-  };
-}
--- a/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FilterAtomicReader.java
@ -24,6 +24,7 @@ import org.apache.lucene.util.automaton.CompiledAutomaton;

 import java.io.IOException;
 import java.util.Comparator;
+import java.util.Iterator;

 /**  A <code>FilterAtomicReader</code> contains another AtomicReader, which it
 * uses as its basic source of data, possibly transforming the data along the
@ -46,7 +47,7 @@ public class FilterAtomicReader extends AtomicReader {
    }

    @Override
-    public FieldsEnum iterator() throws IOException {
+    public Iterator<String> iterator() {
      return in.iterator();
    }

@ -109,28 +110,20 @@ public class FilterAtomicReader extends AtomicReader {
    public TermsEnum intersect(CompiledAutomaton automaton, BytesRef bytes) throws java.io.IOException {
      return in.intersect(automaton, bytes);
    }
-  }

-  /** Base class for filtering {@link TermsEnum} implementations. */
-  public static class FilterFieldsEnum extends FieldsEnum {
-    protected final FieldsEnum in;
-    public FilterFieldsEnum(FieldsEnum in) {
-      this.in = in;
+    @Override
+    public boolean hasOffsets() {
+      return in.hasOffsets();
    }

    @Override
-    public String next() throws IOException {
-      return in.next();
-    }
-
-    @Override
-    public Terms terms() throws IOException {
-      return in.terms();
+    public boolean hasPositions() {
+      return in.hasPositions();
    }
    
    @Override
-    public AttributeSource attributes() {
-      return in.attributes();
+    public boolean hasPayloads() {
+      return in.hasPayloads();
    }
  }

@ -292,11 +285,6 @@ public class FilterAtomicReader extends AtomicReader {
    public BytesRef getPayload() throws IOException {
      return in.getPayload();
    }
-
-    @Override
-    public boolean hasPayload() {
-      return in.hasPayload();
-    }
    
    @Override
    public AttributeSource attributes() {
--- a/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
+++ b/lucene/core/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java
@ -173,7 +173,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
      postings.lastDocCodes[termID] = docState.docID;
    } else {
      postings.lastDocCodes[termID] = docState.docID << 1;
-      postings.docFreqs[termID] = 1;
+      postings.termFreqs[termID] = 1;
      if (hasProx) {
        writeProx(termID, fieldState.position);
        if (hasOffsets) {
@ -194,10 +194,10 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem

    FreqProxPostingsArray postings = (FreqProxPostingsArray) termsHashPerField.postingsArray;

-    assert !hasFreq || postings.docFreqs[termID] > 0;
+    assert !hasFreq || postings.termFreqs[termID] > 0;

    if (!hasFreq) {
-      assert postings.docFreqs == null;
+      assert postings.termFreqs == null;
      if (docState.docID != postings.lastDocIDs[termID]) {
        assert docState.docID > postings.lastDocIDs[termID];
        termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
@ -212,13 +212,13 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem

      // Now that we know doc freq for previous doc,
      // write it & lastDocCode
-      if (1 == postings.docFreqs[termID]) {
+      if (1 == postings.termFreqs[termID]) {
        termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]|1);
      } else {
        termsHashPerField.writeVInt(0, postings.lastDocCodes[termID]);
-        termsHashPerField.writeVInt(0, postings.docFreqs[termID]);
+        termsHashPerField.writeVInt(0, postings.termFreqs[termID]);
      }
-      postings.docFreqs[termID] = 1;
+      postings.termFreqs[termID] = 1;
      fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency);
      postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1;
      postings.lastDocIDs[termID] = docState.docID;
@ -233,7 +233,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
      }
      fieldState.uniqueTermCount++;
    } else {
-      fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.docFreqs[termID]);
+      fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]);
      if (hasProx) {
        writeProx(termID, fieldState.position-postings.lastPositions[termID]);
      }
@ -252,7 +252,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
    public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) {
      super(size);
      if (writeFreqs) {
-        docFreqs = new int[size];
+        termFreqs = new int[size];
      }
      lastDocIDs = new int[size];
      lastDocCodes = new int[size];
@ -267,7 +267,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
      //System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets);
    }

-    int docFreqs[];                                    // # times this term occurs in the current doc
+    int termFreqs[];                                   // # times this term occurs in the current doc
    int lastDocIDs[];                                  // Last docID where this term occurred
    int lastDocCodes[];                                // Code for prior doc
    int lastPositions[];                               // Last position where this term occurred
@ -275,7 +275,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem

    @Override
    ParallelPostingsArray newInstance(int size) {
-      return new FreqProxPostingsArray(size, docFreqs != null, lastPositions != null, lastOffsets != null);
+      return new FreqProxPostingsArray(size, termFreqs != null, lastPositions != null, lastOffsets != null);
    }

    @Override
@ -295,9 +295,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
        assert to.lastOffsets != null;
        System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy);
      }
-      if (docFreqs != null) {
-        assert to.docFreqs != null;
-        System.arraycopy(docFreqs, 0, to.docFreqs, 0, numToCopy);
+      if (termFreqs != null) {
+        assert to.termFreqs != null;
+        System.arraycopy(termFreqs, 0, to.termFreqs, 0, numToCopy);
      }
    }

@ -310,7 +310,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
      if (lastOffsets != null) {
        bytes += RamUsageEstimator.NUM_BYTES_INT;
      }
-      if (docFreqs != null) {
+      if (termFreqs != null) {
        bytes += RamUsageEstimator.NUM_BYTES_INT;
      }

@ -416,21 +416,21 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
      // Now termStates has numToMerge FieldMergeStates
      // which all share the same term.  Now we must
      // interleave the docID streams.
-      int numDocs = 0;
+      int docFreq = 0;
      long totTF = 0;
      int docID = 0;

      while(true) {
        //System.out.println("  cycle");
-        final int termDocFreq;
+        final int termFreq;
        if (freq.eof()) {
          if (postings.lastDocCodes[termID] != -1) {
            // Return last doc
            docID = postings.lastDocIDs[termID];
            if (readTermFreq) {
-              termDocFreq = postings.docFreqs[termID];
+              termFreq = postings.termFreqs[termID];
            } else {
-              termDocFreq = -1;
+              termFreq = -1;
            }
            postings.lastDocCodes[termID] = -1;
          } else {
@ -441,20 +441,20 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
          final int code = freq.readVInt();
          if (!readTermFreq) {
            docID += code;
-            termDocFreq = -1;
+            termFreq = -1;
          } else {
            docID += code >>> 1;
            if ((code & 1) != 0) {
-              termDocFreq = 1;
+              termFreq = 1;
            } else {
-              termDocFreq = freq.readVInt();
+              termFreq = freq.readVInt();
            }
          }

          assert docID != postings.lastDocIDs[termID];
        }

-        numDocs++;
+        docFreq++;
        assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount();

        // NOTE: we could check here if the docID was
@ -469,7 +469,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
        // 2nd sweep does the real flush, but I suspect
        // that'd add too much time to flush.
        visitedDocs.set(docID);
-        postingsConsumer.startDoc(docID, writeTermFreq ? termDocFreq : -1);
+        postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1);
        if (docID < delDocLimit) {
          // Mark it deleted.  TODO: we could also skip
          // writing its postings; this would be
@ -485,7 +485,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
          }
        }

-        totTF += termDocFreq;
+        totTF += termFreq;
        
        // Carefully copy over the prox + payload info,
        // changing the format to match Lucene's segment
@ -495,7 +495,7 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
          // we did record positions (& maybe payload) and/or offsets
          int position = 0;
          int offset = 0;
-          for(int j=0;j<termDocFreq;j++) {
+          for(int j=0;j<termFreq;j++) {
            final BytesRef thisPayload;

            if (readPositions) {
@ -542,9 +542,9 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
        }
        postingsConsumer.finishDoc();
      }
-      termsConsumer.finishTerm(text, new TermStats(numDocs, writeTermFreq ? totTF : -1));
+      termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totTF : -1));
      sumTotalTermFreq += totTF;
-      sumDocFreq += numDocs;
+      sumDocFreq += docFreq;
    }

    termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality());
--- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
@ -2312,9 +2312,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
        }
        SegmentInfos sis = new SegmentInfos(); // read infos from dir
        sis.read(dir);
-        final Set<String> dsFilesCopied = new HashSet<String>();
-        final Map<String, String> dsNames = new HashMap<String, String>();
-        final Set<String> copiedFiles = new HashSet<String>();
+
        for (SegmentInfoPerCommit info : sis) {
          assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name;

@ -2327,7 +2325,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {

          IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.info.sizeInBytes(), true, -1));
          
-          infos.add(copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles));
+          infos.add(copySegmentAsIs(info, newSegName, context));
        }
      }

@ -2463,25 +2461,9 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
  }

  /** Copies the segment files as-is into the IndexWriter's directory. */
-  // TODO: this can be substantially simplified now that 3.x support/shared docstores is removed!
-  private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName,
-                                               Map<String, String> dsNames, Set<String> dsFilesCopied, IOContext context,
-                                               Set<String> copiedFiles)
+  private SegmentInfoPerCommit copySegmentAsIs(SegmentInfoPerCommit info, String segName, IOContext context)
      throws IOException {
-    // Determine if the doc store of this segment needs to be copied. It's
-    // only relevant for segments that share doc store with others,
-    // because the DS might have been copied already, in which case we
-    // just want to update the DS name of this SegmentInfo.
-    final String dsName = info.info.name;
-    assert dsName != null;
-    final String newDsName;
-    if (dsNames.containsKey(dsName)) {
-      newDsName = dsNames.get(dsName);
-    } else {
-      dsNames.put(dsName, segName);
-      newDsName = segName;
-    }
-
+    
    // note: we don't really need this fis (its copied), but we load it up
    // so we don't pass a null value to the si writer
    FieldInfos fis = getFieldInfos(info.info);
@ -2496,7 +2478,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
    }

    //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion());
-    // Same SI as before but we change directory, name and docStoreSegment:
+    // Same SI as before but we change directory and name
    SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(),
                                          info.info.getUseCompoundFile(),
                                          info.info.getCodec(), info.info.getDiagnostics(), attributes);
@ -2513,16 +2495,10 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
    }
    newInfo.setFiles(segFiles);

-    // We must rewrite the SI file because it references
-    // segment name (its own name, if its 3.x, and doc
-    // store segment name):
+    // We must rewrite the SI file because it references segment name in its list of files, etc
    TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory);
-    try {
-      newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);
-    } catch (UnsupportedOperationException uoe) {
-      // OK: 3x codec cannot write a new SI file;
-      // SegmentInfos will write this on commit
-    }
+
+    newInfo.getCodec().segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context);

    final Collection<String> siFiles = trackingDir.getCreatedFiles();

@ -2537,8 +2513,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
      }

      assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles;
-      assert !copiedFiles.contains(file): "file \"" + file + "\" is being copied more than once";
-      copiedFiles.add(file);
+
      info.info.dir.copy(directory, file, newFileName, context);
    }
    
--- a/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
+++ b/lucene/core/src/java/org/apache/lucene/index/IndexableFieldType.java
@ -42,6 +42,9 @@ public interface IndexableFieldType {

  /** True if term vector positions should be indexed */
  public boolean storeTermVectorPositions();
+  
+  /** True if term vector payloads should be indexed */
+  public boolean storeTermVectorPayloads();

  /** True if norms should not be indexed */
  public boolean omitNorms();
--- a/lucene/core/src/java/org/apache/lucene/index/MergeState.java
+++ b/lucene/core/src/java/org/apache/lucene/index/MergeState.java
@ -199,6 +199,7 @@ public class MergeState {
  // and we could make a codec(wrapper) to do all of this privately so IW is uninvolved
  public PayloadProcessorProvider payloadProcessorProvider;
  public ReaderPayloadProcessor[] readerPayloadProcessor;
+  public ReaderPayloadProcessor currentReaderPayloadProcessor;
  public PayloadProcessor[] currentPayloadProcessor;

  // TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging)
--- a/Show More
+++ b/Show More