LUCENE-2326: Removed SVN checkouts for backwards tests. The backwards branch is now included in the svn repository using "svn copy" after release.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@924207 13f79535-47bb-0310-9956-ffa450edef68
2010-03-17 10:24:07 +00:00 · 2010-03-17 10:24:07 +00:00 · 675597141b
parent 5023a08ace
commit 675597141b
753 changed files with 158526 additions and 100 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -238,9 +238,13 @@ Optimizations

 Build

- * LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
-   into core, and moved the ICU-based collation support into contrib/icu.  
-   (Robert Muir)
+* LUCENE-2124: Moved the JDK-based collation support from contrib/collation 
+  into core, and moved the ICU-based collation support into contrib/icu.  
+  (Robert Muir)
+
+* LUCENE-2326: Removed SVN checkouts for backwards tests. The backwards branch
+  is now included in the svn repository using "svn copy" after release.
+  (Uwe Schindler)

 Test Cases

--- a/backwards/backwards-readme.txt
+++ b/backwards/backwards-readme.txt
@ -0,0 +1,13 @@
+This folder contains the src/ folder of the previous Lucene major version.
+
+The test-backwards ANT task compiles the core classes of the previous version and its tests
+against these class files. After that the compiled test classes are run against the new
+lucene-core.jar file.
+
+After branching a new Lucene major version (branch name "lucene_X_Y") do the following:
+
+* svn rm backwards/src/
+* svn cp https://svn.apache.org/repos/asf/lucene/java/branches/lucene_X_Y/src/ backwards/src/
+* Check that everything is correct: The backwards folder should contain a src/ folder
+  that now contains java, test, demo,.... The files should be the ones from the branch.
+* Run "ant test-backwards"
--- a/backwards/src/demo/demo-build.template
+++ b/backwards/src/demo/demo-build.template
@ -0,0 +1,253 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="lucene-demo" default="compile-demo" basedir=".">
+  <dirname file="${ant.file.common}" property="common.dir"/>
+
+  <property name="version" value="@PLACEHOLDER_version@"/>
+  <property name="javac.source" value="@PLACEHOLDER_javac.source@"/>
+  <property name="javac.target" value="@PLACEHOLDER_javac.target@"/>
+	
+  <property name="build.dir" location="build"/>
+	
+	
+  <property name="core.name" value="lucene-core-${version}"/>
+  <property name="demo.name" value="lucene-demos-${version}"/>
+  <property name="demo.war.name" value="luceneweb"/>
+
+  <property name="manifest.file" location="${build.dir}/MANIFEST.MF"/>
+
+  <!-- Build classpath -->
+  <path id="classpath">
+    <pathelement location="${common.dir}/${core.name}.jar"/>
+  </path>
+
+  <path id="demo.classpath">
+    <path refid="classpath"/>
+    <pathelement location="${build.dir}/classes/demo"/>
+  </path>
+	
+  <available
+    property="jar.core.present"
+	type="file"
+	file="${common.dir}/${core.name}.jar"
+  />
+
+  <target name="jar.core-check">
+    <fail unless="jar.core.present">
+	  ##################################################################
+	  ${common.dir}/${core.name}.jar not found.
+	  ##################################################################
+	</fail>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- J A R                                                              -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+
+  <target name="jar-demo" depends="compile-demo"
+	description="Build demo jar file">
+	<sequential>
+  	  <build-manifest/>
+	  <jar
+        destfile="${demo.name}.jar"
+        basedir="${build.dir}/classes/demo"
+        excludes="**/*.java"
+        manifest="${manifest.file}">
+        <metainf dir="${common.dir}">
+          <include name="LICENSE.txt"/>
+          <include name="NOTICE.txt"/>
+        </metainf>
+      </jar>
+    </sequential>
+  </target>
+
+  <target name="war-demo" depends="jar-demo"	
+	description="Build demo war file">
+	<sequential>
+  	  <build-manifest/>
+      <war destfile="${demo.war.name}.war"
+           webxml="src/jsp/WEB-INF/web.xml"
+           manifest="${manifest.file}">
+        <fileset dir="src/jsp" excludes="WEB-INF/web.xml"/>
+        <lib dir="." includes="${demo.name}.jar"/>
+        <lib dir="." includes="${core.name}.jar"/>
+        <metainf dir="${common.dir}">
+          <include name="LICENSE.txt"/>
+          <include name="NOTICE.txt"/>
+        </metainf>
+      </war>
+    </sequential>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- B U I L D  D E M O                                                 -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="compile-demo" depends="jar.core-check"
+	description="Compile demo classes">
+    <mkdir dir="${build.dir}/classes/demo"/>
+
+    <compile
+      srcdir="src/demo"
+      destdir="${build.dir}/classes/demo">
+      <classpath refid="demo.classpath"/>
+    </compile>
+  </target>
+	
+  <target name="clean"
+    description="Removes contents of build directory">
+    <delete dir="${build.dir}"/>
+    <delete dir="${common.dir}/demo-text-dir"/>
+    <delete dir="${common.dir}/demo-html-dir"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- R U N  T E X T  I N D E X I N G  D E M O                           -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="demo-index-text" depends="jar-demo"
+	description="Run text indexing demo (index the sources of the demo).">
+    <echo>----- (1) Prepare dir ----- </echo>
+    <echo>cd ${common.dir} </echo>
+    <echo>rmdir demo-text-dir </echo>
+    <delete dir="${common.dir}/demo-text-dir"/>
+    <echo>mkdir demo-text-dir </echo>
+    <mkdir dir="${common.dir}/demo-text-dir"/>
+    <echo>cd demo-text-dir </echo>
+    <echo>----- (2) Index the files located under ${common.dir}/src ----- </echo>
+    <invoke-java class="IndexFiles" params="${common.dir}/src/demo" paramsDisplay="../src/demo" type="text"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- R U N  T E X T  S E A R C H  D E M O                               -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="demo-search-text" depends="jar-demo"
+	description="Run interactive search demo.">
+    <echo>----- Interactive search ----- </echo>
+    <echo>cd demo-text-dir </echo>
+    <invoke-java class="SearchFiles" params="-index index" paramsDisplay="-index index" type="text"/>
+  </target>
+
+
+  <!-- ================================================================== -->
+  <!-- R U N  H T M L  I N D E X I N G  D E M O                           -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="demo-index-html" depends="jar-demo"
+	description="Run html indexing demo (index the javadocs).">
+    <echo>----- (1) Prepare dir ----- </echo>
+    <echo>cd ${common.dir} </echo>
+    <echo>rmdir demo-html-dir </echo>
+    <delete dir="${common.dir}/demo-html-dir"/>
+    <echo>mkdir demo-html-dir </echo>
+    <mkdir dir="${common.dir}/demo-html-dir"/>
+    <echo>cd demo-html-dir </echo>
+    <echo>----- (2) Index the files located under ${common.dir}/src ----- </echo>
+    <invoke-java class="IndexFiles" params="${common.dir}/docs/api" paramsDisplay="../docs/api" type="html"/>
+  </target>
+
+  <!-- ================================================================== -->
+  <!-- R U N  H T M L  S E A R C H  D E M O                               -->
+  <!-- ================================================================== -->
+  <!--                                                                    -->
+  <!-- ================================================================== -->
+  <target name="demo-search-html" depends="jar-demo"
+	description="Run interactive search demo.">
+    <echo>----- Interactive search ----- </echo>
+    <echo>cd demo-html-dir </echo>
+    <invoke-java class="SearchFiles" params="-index index" paramsDisplay="-index index" type="html"/>
+  </target>
+
+
+  <!--+
+      | M A C R O S
+      +-->
+      
+  <macrodef name="build-manifest" description="Builds a manifest file">
+  	<sequential>
+      <manifest file="${manifest.file}">
+        <attribute name="Specification-Title" value="Lucene Search Engine: demos"/>
+		<!-- spec version must match "digit+{.digit+}*" -->
+		<attribute name="Specification-Version" value="${version}"/>
+		<attribute name="Specification-Vendor"
+		           value="The Apache Software Foundation"/>
+		<attribute name="Implementation-Title" value="org.apache.lucene"/>
+		<!-- impl version can be any string -->
+		<attribute name="Implementation-Version"
+		           value="${version}"/>
+		<attribute name="Implementation-Vendor"
+		           value="The Apache Software Foundation"/>
+		<attribute name="X-Compile-Source-JDK" 
+		           value="${javac.source}"/>
+		<attribute name="X-Compile-Target-JDK" 
+		           value="${javac.target}"/>
+	  </manifest>
+  	</sequential>
+  </macrodef>
+      
+  <macrodef name="compile">
+    <attribute name="srcdir"/>
+    <attribute name="destdir"/>
+    <element name="nested" implicit="yes" optional="yes"/>
+
+    <sequential>
+      <mkdir dir="@{destdir}"/>
+      <javac
+        srcdir="@{srcdir}"
+        destdir="@{destdir}"
+        deprecation="off"
+        debug="on"
+        source="${javac.source}"
+        target="${javac.target}">
+        <nested/>
+      </javac>
+    </sequential>
+  </macrodef>
+
+  <macrodef name="invoke-java">
+    <attribute name="class"/>
+    <attribute name="params"/>
+    <attribute name="paramsDisplay"/>
+    <attribute name="type"/>
+    <sequential>
+      <echo>java -classpath "../${core.name}.jar;../${demo.name}.jar" org.apache.lucene.demo.@{class} @{paramsDisplay} </echo>
+      <java classname="org.apache.lucene.demo.@{class}"
+            dir="${common.dir}/demo-@{type}-dir"
+            fork="true"
+            failonerror="true"
+            maxmemory="128m"
+      >
+        <arg value="@{params}"/>
+        <classpath>
+           <pathelement location="${common.dir}/${core.name}.jar"/>
+           <pathelement location="${common.dir}/${demo.name}.jar"/>
+        </classpath>
+      </java>
+    </sequential>
+  </macrodef>
+
+</project>
--- a/backwards/src/demo/org/apache/lucene/demo/DeleteFiles.java
+++ b/backwards/src/demo/org/apache/lucene/demo/DeleteFiles.java
@ -0,0 +1,66 @@
+package org.apache.lucene.demo;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+//import org.apache.lucene.index.Term;
+
+
+/** Deletes documents from an index that do not contain a term. */
+public class DeleteFiles {
+  
+  private DeleteFiles() {}                         // singleton
+
+  /** Deletes documents from an index that do not contain a term. */
+  public static void main(String[] args) {
+    String usage = "java org.apache.lucene.demo.DeleteFiles <unique_term>";
+    if (args.length == 0) {
+      System.err.println("Usage: " + usage);
+      System.exit(1);
+    }
+    try {
+      Directory directory = FSDirectory.open(new File("index"));
+      IndexReader reader = IndexReader.open(directory, false); // we don't want read-only because we are about to delete
+
+      Term term = new Term("path", args[0]);
+      int deleted = reader.deleteDocuments(term);
+
+      System.out.println("deleted " + deleted +
+ 			 " documents containing " + term);
+
+      // one can also delete documents by their internal id:
+      /*
+      for (int i = 0; i < reader.maxDoc(); i++) {
+        System.out.println("Deleting document with id " + i);
+        reader.delete(i);
+      }*/
+
+      reader.close();
+      directory.close();
+
+    } catch (Exception e) {
+      System.out.println(" caught a " + e.getClass() +
+			 "\n with message: " + e.getMessage());
+    }
+  }
+}
--- a/backwards/src/demo/org/apache/lucene/demo/FileDocument.java
+++ b/backwards/src/demo/org/apache/lucene/demo/FileDocument.java
@ -0,0 +1,71 @@
+package org.apache.lucene.demo;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.FileReader;
+
+import org.apache.lucene.document.DateTools;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+/** A utility for making Lucene Documents from a File. */
+
+public class FileDocument {
+  /** Makes a document for a File.
+    <p>
+    The document has three fields:
+    <ul>
+    <li><code>path</code>--containing the pathname of the file, as a stored,
+    untokenized field;
+    <li><code>modified</code>--containing the last modified date of the file as
+    a field as created by <a
+    href="lucene.document.DateTools.html">DateTools</a>; and
+    <li><code>contents</code>--containing the full contents of the file, as a
+    Reader field;
+    */
+  public static Document Document(File f)
+       throws java.io.FileNotFoundException {
+	 
+    // make a new, empty document
+    Document doc = new Document();
+
+    // Add the path of the file as a field named "path".  Use a field that is 
+    // indexed (i.e. searchable), but don't tokenize the field into words.
+    doc.add(new Field("path", f.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED));
+
+    // Add the last modified date of the file a field named "modified".  Use 
+    // a field that is indexed (i.e. searchable), but don't tokenize the field
+    // into words.
+    doc.add(new Field("modified",
+        DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
+        Field.Store.YES, Field.Index.NOT_ANALYZED));
+
+    // Add the contents of the file to a field named "contents".  Specify a Reader,
+    // so that the text of the file is tokenized and indexed, but not stored.
+    // Note that FileReader expects the file to be in the system's default encoding.
+    // If that's not the case searching for special characters will fail.
+    doc.add(new Field("contents", new FileReader(f)));
+
+    // return the document
+    return doc;
+  }
+
+  private FileDocument() {}
+}
+    
--- a/backwards/src/demo/org/apache/lucene/demo/HTMLDocument.java
+++ b/backwards/src/demo/org/apache/lucene/demo/HTMLDocument.java
@ -0,0 +1,86 @@
+package org.apache.lucene.demo;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.*;
+import org.apache.lucene.document.*;
+import org.apache.lucene.demo.html.HTMLParser;
+
+/** A utility for making Lucene Documents for HTML documents. */
+
+public class HTMLDocument {
+  static char dirSep = System.getProperty("file.separator").charAt(0);
+
+  public static String uid(File f) {
+    // Append path and date into a string in such a way that lexicographic
+    // sorting gives the same results as a walk of the file hierarchy.  Thus
+    // null (\u0000) is used both to separate directory components and to
+    // separate the path from the date.
+    return f.getPath().replace(dirSep, '\u0000') +
+      "\u0000" +
+      DateTools.timeToString(f.lastModified(), DateTools.Resolution.SECOND);
+  }
+
+  public static String uid2url(String uid) {
+    String url = uid.replace('\u0000', '/');	  // replace nulls with slashes
+    return url.substring(0, url.lastIndexOf('/')); // remove date from end
+  }
+
+  public static Document Document(File f)
+       throws IOException, InterruptedException  {
+    // make a new, empty document
+    Document doc = new Document();
+
+    // Add the url as a field named "path".  Use a field that is 
+    // indexed (i.e. searchable), but don't tokenize the field into words.
+    doc.add(new Field("path", f.getPath().replace(dirSep, '/'), Field.Store.YES,
+        Field.Index.NOT_ANALYZED));
+
+    // Add the last modified date of the file a field named "modified".  
+    // Use a field that is indexed (i.e. searchable), but don't tokenize
+    // the field into words.
+    doc.add(new Field("modified",
+        DateTools.timeToString(f.lastModified(), DateTools.Resolution.MINUTE),
+        Field.Store.YES, Field.Index.NOT_ANALYZED));
+
+    // Add the uid as a field, so that index can be incrementally maintained.
+    // This field is not stored with document, it is indexed, but it is not
+    // tokenized prior to indexing.
+    doc.add(new Field("uid", uid(f), Field.Store.NO, Field.Index.NOT_ANALYZED));
+
+    FileInputStream fis = new FileInputStream(f);
+    HTMLParser parser = new HTMLParser(fis);
+      
+    // Add the tag-stripped contents as a Reader-valued Text field so it will
+    // get tokenized and indexed.
+    doc.add(new Field("contents", parser.getReader()));
+
+    // Add the summary as a field that is stored and returned with
+    // hit documents for display.
+    doc.add(new Field("summary", parser.getSummary(), Field.Store.YES, Field.Index.NO));
+
+    // Add the title as a field that it can be searched and that is stored.
+    doc.add(new Field("title", parser.getTitle(), Field.Store.YES, Field.Index.ANALYZED));
+
+    // return the document
+    return doc;
+  }
+
+  private HTMLDocument() {}
+}
+    
--- a/backwards/src/demo/org/apache/lucene/demo/IndexFiles.java
+++ b/backwards/src/demo/org/apache/lucene/demo/IndexFiles.java
@ -0,0 +1,100 @@
+package org.apache.lucene.demo;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Date;
+
+/** Index all text files under a directory. */
+public class IndexFiles {
+  
+  private IndexFiles() {}
+
+  static final File INDEX_DIR = new File("index");
+  
+  /** Index all text files under a directory. */
+  public static void main(String[] args) {
+    String usage = "java org.apache.lucene.demo.IndexFiles <root_directory>";
+    if (args.length == 0) {
+      System.err.println("Usage: " + usage);
+      System.exit(1);
+    }
+
+    if (INDEX_DIR.exists()) {
+      System.out.println("Cannot save index to '" +INDEX_DIR+ "' directory, please delete it first");
+      System.exit(1);
+    }
+    
+    final File docDir = new File(args[0]);
+    if (!docDir.exists() || !docDir.canRead()) {
+      System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
+      System.exit(1);
+    }
+    
+    Date start = new Date();
+    try {
+      IndexWriter writer = new IndexWriter(FSDirectory.open(INDEX_DIR), new StandardAnalyzer(Version.LUCENE_CURRENT), true, IndexWriter.MaxFieldLength.LIMITED);
+      System.out.println("Indexing to directory '" +INDEX_DIR+ "'...");
+      indexDocs(writer, docDir);
+      System.out.println("Optimizing...");
+      writer.optimize();
+      writer.close();
+
+      Date end = new Date();
+      System.out.println(end.getTime() - start.getTime() + " total milliseconds");
+
+    } catch (IOException e) {
+      System.out.println(" caught a " + e.getClass() +
+       "\n with message: " + e.getMessage());
+    }
+  }
+
+  static void indexDocs(IndexWriter writer, File file)
+    throws IOException {
+    // do not try to index files that cannot be read
+    if (file.canRead()) {
+      if (file.isDirectory()) {
+        String[] files = file.list();
+        // an IO error could occur
+        if (files != null) {
+          for (int i = 0; i < files.length; i++) {
+            indexDocs(writer, new File(file, files[i]));
+          }
+        }
+      } else {
+        System.out.println("adding " + file);
+        try {
+          writer.addDocument(FileDocument.Document(file));
+        }
+        // at least on windows, some temporary files raise this exception with an "access denied" message
+        // checking if the file can be read doesn't help
+        catch (FileNotFoundException fnfe) {
+          ;
+        }
+      }
+    }
+  }
+  
+}
--- a/backwards/src/demo/org/apache/lucene/demo/IndexHTML.java
+++ b/backwards/src/demo/org/apache/lucene/demo/IndexHTML.java
@ -0,0 +1,168 @@
+package org.apache.lucene.demo;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.util.Date;
+import java.util.Arrays;
+
+/** Indexer for HTML files. */
+public class IndexHTML {
+  private IndexHTML() {}
+
+  private static boolean deleting = false;	  // true during deletion pass
+  private static IndexReader reader;		  // existing index
+  private static IndexWriter writer;		  // new index being built
+  private static TermEnum uidIter;		  // document id iterator
+
+  /** Indexer for HTML files.*/
+  public static void main(String[] argv) {
+    try {
+      File index = new File("index");
+      boolean create = false;
+      File root = null;
+
+      String usage = "IndexHTML [-create] [-index <index>] <root_directory>";
+
+      if (argv.length == 0) {
+        System.err.println("Usage: " + usage);
+        return;
+      }
+
+      for (int i = 0; i < argv.length; i++) {
+        if (argv[i].equals("-index")) {		  // parse -index option
+          index = new File(argv[++i]);
+        } else if (argv[i].equals("-create")) {	  // parse -create option
+          create = true;
+        } else if (i != argv.length-1) {
+          System.err.println("Usage: " + usage);
+          return;
+        } else
+          root = new File(argv[i]);
+      }
+      
+      if(root == null) {
+        System.err.println("Specify directory to index");
+        System.err.println("Usage: " + usage);
+        return;
+      }
+
+      Date start = new Date();
+
+      if (!create) {				  // delete stale docs
+        deleting = true;
+        indexDocs(root, index, create);
+      }
+      writer = new IndexWriter(FSDirectory.open(index), new StandardAnalyzer(Version.LUCENE_CURRENT), create, 
+                               new IndexWriter.MaxFieldLength(1000000));
+      indexDocs(root, index, create);		  // add new docs
+
+      System.out.println("Optimizing index...");
+      writer.optimize();
+      writer.close();
+
+      Date end = new Date();
+
+      System.out.print(end.getTime() - start.getTime());
+      System.out.println(" total milliseconds");
+
+    } catch (Exception e) {
+      e.printStackTrace();
+    }
+  }
+
+  /* Walk directory hierarchy in uid order, while keeping uid iterator from
+  /* existing index in sync.  Mismatches indicate one of: (a) old documents to
+  /* be deleted; (b) unchanged documents, to be left alone; or (c) new
+  /* documents, to be indexed.
+   */
+
+  private static void indexDocs(File file, File index, boolean create)
+       throws Exception {
+    if (!create) {				  // incrementally update
+
+      reader = IndexReader.open(FSDirectory.open(index), false);		  // open existing index
+      uidIter = reader.terms(new Term("uid", "")); // init uid iterator
+
+      indexDocs(file);
+
+      if (deleting) {				  // delete rest of stale docs
+        while (uidIter.term() != null && uidIter.term().field() == "uid") {
+          System.out.println("deleting " +
+              HTMLDocument.uid2url(uidIter.term().text()));
+          reader.deleteDocuments(uidIter.term());
+          uidIter.next();
+        }
+        deleting = false;
+      }
+
+      uidIter.close();				  // close uid iterator
+      reader.close();				  // close existing index
+
+    } else					  // don't have exisiting
+      indexDocs(file);
+  }
+
+  private static void indexDocs(File file) throws Exception {
+    if (file.isDirectory()) {			  // if a directory
+      String[] files = file.list();		  // list its files
+      Arrays.sort(files);			  // sort the files
+      for (int i = 0; i < files.length; i++)	  // recursively index them
+        indexDocs(new File(file, files[i]));
+
+    } else if (file.getPath().endsWith(".html") || // index .html files
+      file.getPath().endsWith(".htm") || // index .htm files
+      file.getPath().endsWith(".txt")) { // index .txt files
+
+      if (uidIter != null) {
+        String uid = HTMLDocument.uid(file);	  // construct uid for doc
+
+        while (uidIter.term() != null && uidIter.term().field() == "uid" &&
+            uidIter.term().text().compareTo(uid) < 0) {
+          if (deleting) {			  // delete stale docs
+            System.out.println("deleting " +
+                HTMLDocument.uid2url(uidIter.term().text()));
+            reader.deleteDocuments(uidIter.term());
+          }
+          uidIter.next();
+        }
+        if (uidIter.term() != null && uidIter.term().field() == "uid" &&
+            uidIter.term().text().compareTo(uid) == 0) {
+          uidIter.next();			  // keep matching docs
+        } else if (!deleting) {			  // add new docs
+          Document doc = HTMLDocument.Document(file);
+          System.out.println("adding " + doc.get("path"));
+          writer.addDocument(doc);
+        }
+      } else {					  // creating a new index
+        Document doc = HTMLDocument.Document(file);
+        System.out.println("adding " + doc.get("path"));
+        writer.addDocument(doc);		  // add docs unconditionally
+      }
+    }
+  }
+}
--- a/backwards/src/demo/org/apache/lucene/demo/SearchFiles.java
+++ b/backwards/src/demo/org/apache/lucene/demo/SearchFiles.java
@ -0,0 +1,313 @@
+package org.apache.lucene.demo;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.util.Date;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.FilterIndexReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+
+/** Simple command-line based search demo. */
+public class SearchFiles {
+
+  /** Use the norms from one field for all fields.  Norms are read into memory,
+   * using a byte of memory per document per searched field.  This can cause
+   * search of large collections with a large number of fields to run out of
+   * memory.  If all of the fields contain only a single token, then the norms
+   * are all identical, then single norm vector may be shared. */
+  private static class OneNormsReader extends FilterIndexReader {
+    private String field;
+
+    public OneNormsReader(IndexReader in, String field) {
+      super(in);
+      this.field = field;
+    }
+
+    @Override
+    public byte[] norms(String field) throws IOException {
+      return in.norms(this.field);
+    }
+  }
+
+  private SearchFiles() {}
+
+  /** Simple command-line based search demo. */
+  public static void main(String[] args) throws Exception {
+    String usage =
+      "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
+    usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search.";
+    if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
+      System.out.println(usage);
+      System.exit(0);
+    }
+
+    String index = "index";
+    String field = "contents";
+    String queries = null;
+    int repeat = 0;
+    boolean raw = false;
+    String normsField = null;
+    boolean paging = true;
+    int hitsPerPage = 10;
+    
+    for (int i = 0; i < args.length; i++) {
+      if ("-index".equals(args[i])) {
+        index = args[i+1];
+        i++;
+      } else if ("-field".equals(args[i])) {
+        field = args[i+1];
+        i++;
+      } else if ("-queries".equals(args[i])) {
+        queries = args[i+1];
+        i++;
+      } else if ("-repeat".equals(args[i])) {
+        repeat = Integer.parseInt(args[i+1]);
+        i++;
+      } else if ("-raw".equals(args[i])) {
+        raw = true;
+      } else if ("-norms".equals(args[i])) {
+        normsField = args[i+1];
+        i++;
+      } else if ("-paging".equals(args[i])) {
+        if (args[i+1].equals("false")) {
+          paging = false;
+        } else {
+          hitsPerPage = Integer.parseInt(args[i+1]);
+          if (hitsPerPage == 0) {
+            paging = false;
+          }
+        }
+        i++;
+      }
+    }
+    
+    IndexReader reader = IndexReader.open(FSDirectory.open(new File(index)), true); // only searching, so read-only=true
+
+    if (normsField != null)
+      reader = new OneNormsReader(reader, normsField);
+
+    Searcher searcher = new IndexSearcher(reader);
+    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
+
+    BufferedReader in = null;
+    if (queries != null) {
+      in = new BufferedReader(new FileReader(queries));
+    } else {
+      in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
+    }
+    QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer);
+    while (true) {
+      if (queries == null)                        // prompt the user
+        System.out.println("Enter query: ");
+
+      String line = in.readLine();
+
+      if (line == null || line.length() == -1)
+        break;
+
+      line = line.trim();
+      if (line.length() == 0)
+        break;
+      
+      Query query = parser.parse(line);
+      System.out.println("Searching for: " + query.toString(field));
+
+            
+      if (repeat > 0) {                           // repeat & time as benchmark
+        Date start = new Date();
+        for (int i = 0; i < repeat; i++) {
+          searcher.search(query, null, 100);
+        }
+        Date end = new Date();
+        System.out.println("Time: "+(end.getTime()-start.getTime())+"ms");
+      }
+
+      if (paging) {
+        doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null);
+      } else {
+        doStreamingSearch(searcher, query);
+      }
+    }
+    reader.close();
+  }
+  
+  /**
+   * This method uses a custom HitCollector implementation which simply prints out
+   * the docId and score of every matching document. 
+   * 
+   *  This simulates the streaming search use case, where all hits are supposed to
+   *  be processed, regardless of their relevance.
+   */
+  public static void doStreamingSearch(final Searcher searcher, Query query) throws IOException {
+    Collector streamingHitCollector = new Collector() {
+      private Scorer scorer;
+      private int docBase;
+      
+      // simply print docId and score of every matching document
+      @Override
+      public void collect(int doc) throws IOException {
+        System.out.println("doc=" + doc + docBase + " score=" + scorer.score());
+      }
+
+      @Override
+      public boolean acceptsDocsOutOfOrder() {
+        return true;
+      }
+
+      @Override
+      public void setNextReader(IndexReader reader, int docBase)
+          throws IOException {
+        this.docBase = docBase;
+      }
+
+      @Override
+      public void setScorer(Scorer scorer) throws IOException {
+        this.scorer = scorer;
+      }
+      
+    };
+    
+    searcher.search(query, streamingHitCollector);
+  }
+
+  /**
+   * This demonstrates a typical paging search scenario, where the search engine presents 
+   * pages of size n to the user. The user can then go to the next page if interested in
+   * the next hits.
+   * 
+   * When the query is executed for the first time, then only enough results are collected
+   * to fill 5 result pages. If the user wants to page beyond this limit, then the query
+   * is executed another time and all hits are collected.
+   * 
+   */
+  public static void doPagingSearch(BufferedReader in, Searcher searcher, Query query, 
+                                     int hitsPerPage, boolean raw, boolean interactive) throws IOException {
+ 
+    // Collect enough docs to show 5 pages
+    TopScoreDocCollector collector = TopScoreDocCollector.create(
+        5 * hitsPerPage, false);
+    searcher.search(query, collector);
+    ScoreDoc[] hits = collector.topDocs().scoreDocs;
+    
+    int numTotalHits = collector.getTotalHits();
+    System.out.println(numTotalHits + " total matching documents");
+
+    int start = 0;
+    int end = Math.min(numTotalHits, hitsPerPage);
+        
+    while (true) {
+      if (end > hits.length) {
+        System.out.println("Only results 1 - " + hits.length +" of " + numTotalHits + " total matching documents collected.");
+        System.out.println("Collect more (y/n) ?");
+        String line = in.readLine();
+        if (line.length() == 0 || line.charAt(0) == 'n') {
+          break;
+        }
+
+        collector = TopScoreDocCollector.create(numTotalHits, false);
+        searcher.search(query, collector);
+        hits = collector.topDocs().scoreDocs;
+      }
+      
+      end = Math.min(hits.length, start + hitsPerPage);
+      
+      for (int i = start; i < end; i++) {
+        if (raw) {                              // output raw format
+          System.out.println("doc="+hits[i].doc+" score="+hits[i].score);
+          continue;
+        }
+
+        Document doc = searcher.doc(hits[i].doc);
+        String path = doc.get("path");
+        if (path != null) {
+          System.out.println((i+1) + ". " + path);
+          String title = doc.get("title");
+          if (title != null) {
+            System.out.println("   Title: " + doc.get("title"));
+          }
+        } else {
+          System.out.println((i+1) + ". " + "No path for this document");
+        }
+                  
+      }
+
+      if (!interactive) {
+        break;
+      }
+
+      if (numTotalHits >= end) {
+        boolean quit = false;
+        while (true) {
+          System.out.print("Press ");
+          if (start - hitsPerPage >= 0) {
+            System.out.print("(p)revious page, ");  
+          }
+          if (start + hitsPerPage < numTotalHits) {
+            System.out.print("(n)ext page, ");
+          }
+          System.out.println("(q)uit or enter number to jump to a page.");
+          
+          String line = in.readLine();
+          if (line.length() == 0 || line.charAt(0)=='q') {
+            quit = true;
+            break;
+          }
+          if (line.charAt(0) == 'p') {
+            start = Math.max(0, start - hitsPerPage);
+            break;
+          } else if (line.charAt(0) == 'n') {
+            if (start + hitsPerPage < numTotalHits) {
+              start+=hitsPerPage;
+            }
+            break;
+          } else {
+            int page = Integer.parseInt(line);
+            if ((page - 1) * hitsPerPage < numTotalHits) {
+              start = (page - 1) * hitsPerPage;
+              break;
+            } else {
+              System.out.println("No such page");
+            }
+          }
+        }
+        if (quit) break;
+        end = Math.min(numTotalHits, start + hitsPerPage);
+      }
+      
+    }
+
+  }
+}
--- a/backwards/src/demo/org/apache/lucene/demo/html/Entities.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/Entities.java
@ -0,0 +1,329 @@
+package org.apache.lucene.demo.html;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class Entities {
+  static final Map<String,String> decoder = new HashMap<String,String>(300);
+  static final String[]  encoder = new String[0x100];
+
+  static final String decode(String entity) {
+    if (entity.charAt(entity.length()-1) == ';')  // remove trailing semicolon
+      entity = entity.substring(0, entity.length()-1);
+    if (entity.charAt(1) == '#') {
+      int start = 2;
+      int radix = 10;
+      if (entity.charAt(2) == 'X' || entity.charAt(2) == 'x') {
+	start++;
+	radix = 16;
+      }
+      Character c =
+	new Character((char)Integer.parseInt(entity.substring(start), radix));
+      return c.toString();
+    } else {
+      String s = (String)decoder.get(entity);
+      if (s != null)
+	return s;
+      else return "";
+    }
+  }
+
+  public static final String encode(String s) {
+    int length = s.length();
+    StringBuffer buffer = new StringBuffer(length * 2);
+    for (int i = 0; i < length; i++) {
+      char c = s.charAt(i);
+      int j = (int)c;
+      if (j < 0x100 && encoder[j] != null) {
+	buffer.append(encoder[j]);		  // have a named encoding
+	buffer.append(';');
+      } else if (j < 0x80) {
+	buffer.append(c);			  // use ASCII value
+      } else {
+	buffer.append("&#");			  // use numeric encoding
+	buffer.append((int)c);
+	buffer.append(';');
+      }
+    }
+    return buffer.toString();
+  }
+
+  static final void add(String entity, int value) {
+    decoder.put(entity, (new Character((char)value)).toString());
+    if (value < 0x100)
+      encoder[value] = entity;
+  }
+
+  static {
+    add("&nbsp",   160);
+    add("&iexcl",  161);
+    add("&cent",   162);
+    add("&pound",  163);
+    add("&curren", 164);
+    add("&yen",    165);
+    add("&brvbar", 166);
+    add("&sect",   167);
+    add("&uml",    168);
+    add("&copy",   169);
+    add("&ordf",   170);
+    add("&laquo",  171);
+    add("&not",    172);
+    add("&shy",    173);
+    add("&reg",    174);
+    add("&macr",   175);
+    add("&deg",    176);
+    add("&plusmn", 177);
+    add("&sup2",   178);
+    add("&sup3",   179);
+    add("&acute",  180);
+    add("&micro",  181);
+    add("&para",   182);
+    add("&middot", 183);
+    add("&cedil",  184);
+    add("&sup1",   185);
+    add("&ordm",   186);
+    add("&raquo",  187);
+    add("&frac14", 188);
+    add("&frac12", 189);
+    add("&frac34", 190);
+    add("&iquest", 191);
+    add("&Agrave", 192);
+    add("&Aacute", 193);
+    add("&Acirc",  194);
+    add("&Atilde", 195);
+    add("&Auml",   196);
+    add("&Aring",  197);
+    add("&AElig",  198);
+    add("&Ccedil", 199);
+    add("&Egrave", 200);
+    add("&Eacute", 201);
+    add("&Ecirc",  202);
+    add("&Euml",   203);
+    add("&Igrave", 204);
+    add("&Iacute", 205);
+    add("&Icirc",  206);
+    add("&Iuml",   207);
+    add("&ETH",    208);
+    add("&Ntilde", 209);
+    add("&Ograve", 210);
+    add("&Oacute", 211);
+    add("&Ocirc",  212);
+    add("&Otilde", 213);
+    add("&Ouml",   214);
+    add("&times",  215);
+    add("&Oslash", 216);
+    add("&Ugrave", 217);
+    add("&Uacute", 218);
+    add("&Ucirc",  219);
+    add("&Uuml",   220);
+    add("&Yacute", 221);
+    add("&THORN",  222);
+    add("&szlig",  223);
+    add("&agrave", 224);
+    add("&aacute", 225);
+    add("&acirc",  226);
+    add("&atilde", 227);
+    add("&auml",   228);
+    add("&aring",  229);
+    add("&aelig",  230);
+    add("&ccedil", 231);
+    add("&egrave", 232);
+    add("&eacute", 233);
+    add("&ecirc",  234);
+    add("&euml",   235);
+    add("&igrave", 236);
+    add("&iacute", 237);
+    add("&icirc",  238);
+    add("&iuml",   239);
+    add("&eth",    240);
+    add("&ntilde", 241);
+    add("&ograve", 242);
+    add("&oacute", 243);
+    add("&ocirc",  244);
+    add("&otilde", 245);
+    add("&ouml",   246);
+    add("&divide", 247);
+    add("&oslash", 248);
+    add("&ugrave", 249);
+    add("&uacute", 250);
+    add("&ucirc",  251);
+    add("&uuml",   252);
+    add("&yacute", 253);
+    add("&thorn",  254);
+    add("&yuml",   255);
+    add("&fnof",   402);
+    add("&Alpha",  913);
+    add("&Beta",   914);
+    add("&Gamma",  915);
+    add("&Delta",  916);
+    add("&Epsilon",917);
+    add("&Zeta",   918);
+    add("&Eta",    919);
+    add("&Theta",  920);
+    add("&Iota",   921);
+    add("&Kappa",  922);
+    add("&Lambda", 923);
+    add("&Mu",     924);
+    add("&Nu",     925);
+    add("&Xi",     926);
+    add("&Omicron",927);
+    add("&Pi",     928);
+    add("&Rho",    929);
+    add("&Sigma",  931);
+    add("&Tau",    932);
+    add("&Upsilon",933);
+    add("&Phi",    934);
+    add("&Chi",    935);
+    add("&Psi",    936);
+    add("&Omega",  937);
+    add("&alpha",  945);
+    add("&beta",   946);
+    add("&gamma",  947);
+    add("&delta",  948);
+    add("&epsilon",949);
+    add("&zeta",   950);
+    add("&eta",    951);
+    add("&theta",  952);
+    add("&iota",   953);
+    add("&kappa",  954);
+    add("&lambda", 955);
+    add("&mu",     956);
+    add("&nu",     957);
+    add("&xi",     958);
+    add("&omicron",959);
+    add("&pi",     960);
+    add("&rho",    961);
+    add("&sigmaf", 962);
+    add("&sigma",  963);
+    add("&tau",    964);
+    add("&upsilon",965);
+    add("&phi",    966);
+    add("&chi",    967);
+    add("&psi",    968);
+    add("&omega",  969);
+    add("&thetasym",977);
+    add("&upsih",  978);
+    add("&piv",    982);
+    add("&bull",   8226);
+    add("&hellip", 8230);
+    add("&prime",  8242);
+    add("&Prime",  8243);
+    add("&oline",  8254);
+    add("&frasl",  8260);
+    add("&weierp", 8472);
+    add("&image",  8465);
+    add("&real",   8476);
+    add("&trade",  8482);
+    add("&alefsym",8501);
+    add("&larr",   8592);
+    add("&uarr",   8593);
+    add("&rarr",   8594);
+    add("&darr",   8595);
+    add("&harr",   8596);
+    add("&crarr",  8629);
+    add("&lArr",   8656);
+    add("&uArr",   8657);
+    add("&rArr",   8658);
+    add("&dArr",   8659);
+    add("&hArr",   8660);
+    add("&forall", 8704);
+    add("&part",   8706);
+    add("&exist",  8707);
+    add("&empty",  8709);
+    add("&nabla",  8711);
+    add("&isin",   8712);
+    add("&notin",  8713);
+    add("&ni",     8715);
+    add("&prod",   8719);
+    add("&sum",    8721);
+    add("&minus",  8722);
+    add("&lowast", 8727);
+    add("&radic",  8730);
+    add("&prop",   8733);
+    add("&infin",  8734);
+    add("&ang",    8736);
+    add("&and",    8743);
+    add("&or",     8744);
+    add("&cap",    8745);
+    add("&cup",    8746);
+    add("&int",    8747);
+    add("&there4", 8756);
+    add("&sim",    8764);
+    add("&cong",   8773);
+    add("&asymp",  8776);
+    add("&ne",     8800);
+    add("&equiv",  8801);
+    add("&le",     8804);
+    add("&ge",     8805);
+    add("&sub",    8834);
+    add("&sup",    8835);
+    add("&nsub",   8836);
+    add("&sube",   8838);
+    add("&supe",   8839);
+    add("&oplus",  8853);
+    add("&otimes", 8855);
+    add("&perp",   8869);
+    add("&sdot",   8901);
+    add("&lceil",  8968);
+    add("&rceil",  8969);
+    add("&lfloor", 8970);
+    add("&rfloor", 8971);
+    add("&lang",   9001);
+    add("&rang",   9002);
+    add("&loz",    9674);
+    add("&spades", 9824);
+    add("&clubs",  9827);
+    add("&hearts", 9829);
+    add("&diams",  9830);
+    add("&quot",   34);
+    add("&amp",    38);
+    add("&lt",     60);
+    add("&gt",     62);
+    add("&OElig",  338);
+    add("&oelig",  339);
+    add("&Scaron", 352);
+    add("&scaron", 353);
+    add("&Yuml",   376);
+    add("&circ",   710);
+    add("&tilde",  732);
+    add("&ensp",   8194);
+    add("&emsp",   8195);
+    add("&thinsp", 8201);
+    add("&zwnj",   8204);
+    add("&zwj",    8205);
+    add("&lrm",    8206);
+    add("&rlm",    8207);
+    add("&ndash",  8211);
+    add("&mdash",  8212);
+    add("&lsquo",  8216);
+    add("&rsquo",  8217);
+    add("&sbquo",  8218);
+    add("&ldquo",  8220);
+    add("&rdquo",  8221);
+    add("&bdquo",  8222);
+    add("&dagger", 8224);
+    add("&Dagger", 8225);
+    add("&permil", 8240);
+    add("&lsaquo", 8249);
+    add("&rsaquo", 8250);
+    add("&euro",   8364);
+
+  }
+}
--- a/backwards/src/demo/org/apache/lucene/demo/html/HTMLParser.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/HTMLParser.java
@ -0,0 +1,754 @@
+/* Generated By:JavaCC: Do not edit this line. HTMLParser.java */
+package org.apache.lucene.demo.html;
+
+import java.io.*;
+import java.util.Properties;
+
+public class HTMLParser implements HTMLParserConstants {
+  public static int SUMMARY_LENGTH = 200;
+
+  StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
+  StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
+  Properties metaTags=new Properties();
+  String currentMetaTag=null;
+  String currentMetaContent=null;
+  int length = 0;
+  boolean titleComplete = false;
+  boolean inTitle = false;
+  boolean inMetaTag = false;
+  boolean inStyle = false;
+  boolean afterTag = false;
+  boolean afterSpace = false;
+  String eol = System.getProperty("line.separator");
+  Reader pipeIn = null;
+  Writer pipeOut;
+  private MyPipedInputStream pipeInStream = null;
+  private PipedOutputStream pipeOutStream = null;
+
+  private class MyPipedInputStream extends PipedInputStream{
+
+    public MyPipedInputStream(){
+      super();
+    }
+
+    public MyPipedInputStream(PipedOutputStream src) throws IOException{
+      super(src);
+    }
+
+    public boolean full() throws IOException{
+      return this.available() >= PipedInputStream.PIPE_SIZE;
+    }
+  }
+
+  /**
+   * @deprecated Use HTMLParser(FileInputStream) instead
+   */
+  public HTMLParser(File file) throws FileNotFoundException {
+    this(new FileInputStream(file));
+  }
+
+  public String getTitle() throws IOException, InterruptedException {
+    if (pipeIn == null)
+      getReader();                                // spawn parsing thread
+    while (true) {
+      synchronized(this) {
+        if (titleComplete || pipeInStream.full())
+          break;
+        wait(10);
+      }
+    }
+    return title.toString().trim();
+  }
+
+  public Properties getMetaTags() throws IOException,
+InterruptedException {
+    if (pipeIn == null)
+      getReader();                                // spawn parsing thread
+    while (true) {
+      synchronized(this) {
+        if (titleComplete || pipeInStream.full())
+          break;
+        wait(10);
+      }
+    }
+    return metaTags;
+  }
+
+
+  public String getSummary() throws IOException, InterruptedException {
+    if (pipeIn == null)
+      getReader();                                // spawn parsing thread
+    while (true) {
+      synchronized(this) {
+        if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
+          break;
+        wait(10);
+      }
+    }
+    if (summary.length() > SUMMARY_LENGTH)
+      summary.setLength(SUMMARY_LENGTH);
+
+    String sum = summary.toString().trim();
+    String tit = getTitle();
+    if (sum.startsWith(tit) || sum.equals(""))
+      return tit;
+    else
+      return sum;
+  }
+
+  public Reader getReader() throws IOException {
+    if (pipeIn == null) {
+      pipeInStream = new MyPipedInputStream();
+      pipeOutStream = new PipedOutputStream(pipeInStream);
+      pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE");
+      pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE");
+
+      Thread thread = new ParserThread(this);
+      thread.start();                             // start parsing
+    }
+
+    return pipeIn;
+  }
+
+  void addToSummary(String text) {
+    if (summary.length() < SUMMARY_LENGTH) {
+      summary.append(text);
+      if (summary.length() >= SUMMARY_LENGTH) {
+        synchronized(this) {
+          notifyAll();
+        }
+      }
+    }
+  }
+
+  void addText(String text) throws IOException {
+    if (inStyle)
+      return;
+    if (inTitle)
+      title.append(text);
+    else {
+      addToSummary(text);
+      if (!titleComplete && !(title.length() == 0)) {  // finished title
+        synchronized(this) {
+          titleComplete = true;                   // tell waiting threads
+          notifyAll();
+        }
+      }
+    }
+
+    length += text.length();
+    pipeOut.write(text);
+
+    afterSpace = false;
+  }
+
+  void addMetaTag() {
+      metaTags.setProperty(currentMetaTag, currentMetaContent);
+      currentMetaTag = null;
+      currentMetaContent = null;
+      return;
+  }
+
+  void addSpace() throws IOException {
+    if (!afterSpace) {
+      if (inTitle)
+        title.append(" ");
+      else
+        addToSummary(" ");
+
+      String space = afterTag ? eol : " ";
+      length += space.length();
+      pipeOut.write(space);
+      afterSpace = true;
+    }
+  }
+
+  final public void HTMLDocument() throws ParseException, IOException {
+  Token t;
+    label_1:
+    while (true) {
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case ScriptStart:
+      case TagName:
+      case DeclName:
+      case Comment1:
+      case Comment2:
+      case Word:
+      case Entity:
+      case Space:
+      case Punct:
+        ;
+        break;
+      default:
+        jj_la1[0] = jj_gen;
+        break label_1;
+      }
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case TagName:
+        Tag();
+                      afterTag = true;
+        break;
+      case DeclName:
+        t = Decl();
+                      afterTag = true;
+        break;
+      case Comment1:
+      case Comment2:
+        CommentTag();
+                      afterTag = true;
+        break;
+      case ScriptStart:
+        ScriptTag();
+                     afterTag = true;
+        break;
+      case Word:
+        t = jj_consume_token(Word);
+                      addText(t.image); afterTag = false;
+        break;
+      case Entity:
+        t = jj_consume_token(Entity);
+                      addText(Entities.decode(t.image)); afterTag = false;
+        break;
+      case Punct:
+        t = jj_consume_token(Punct);
+                      addText(t.image); afterTag = false;
+        break;
+      case Space:
+        jj_consume_token(Space);
+                      addSpace(); afterTag = false;
+        break;
+      default:
+        jj_la1[1] = jj_gen;
+        jj_consume_token(-1);
+        throw new ParseException();
+      }
+    }
+    jj_consume_token(0);
+  }
+
+  final public void Tag() throws ParseException, IOException {
+  Token t1, t2;
+  boolean inImg = false;
+    t1 = jj_consume_token(TagName);
+   String tagName = t1.image.toLowerCase();
+   if(Tags.WS_ELEMS.contains(tagName) ) {
+      addSpace();
+    }
+    inTitle = tagName.equalsIgnoreCase("<title"); // keep track if in <TITLE>
+    inMetaTag = tagName.equalsIgnoreCase("<META"); // keep track if in <META>
+    inStyle = tagName.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE>
+    inImg = tagName.equalsIgnoreCase("<img");     // keep track if in <IMG>
+
+    label_2:
+    while (true) {
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case ArgName:
+        ;
+        break;
+      default:
+        jj_la1[2] = jj_gen;
+        break label_2;
+      }
+      t1 = jj_consume_token(ArgName);
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case ArgEquals:
+        jj_consume_token(ArgEquals);
+        switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+        case ArgValue:
+        case ArgQuote1:
+        case ArgQuote2:
+          t2 = ArgValue();
+       if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
+         addText("[" + t2.image + "]");
+
+        if(inMetaTag &&
+                        (  t1.image.equalsIgnoreCase("name") ||
+                           t1.image.equalsIgnoreCase("HTTP-EQUIV")
+                        )
+           && t2 != null)
+        {
+                currentMetaTag=t2.image.toLowerCase();
+                if(currentMetaTag != null && currentMetaContent != null) {
+                addMetaTag();
+                }
+        }
+        if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
+null)
+        {
+                currentMetaContent=t2.image.toLowerCase();
+                if(currentMetaTag != null && currentMetaContent != null) {
+                addMetaTag();
+                }
+        }
+          break;
+        default:
+          jj_la1[3] = jj_gen;
+          ;
+        }
+        break;
+      default:
+        jj_la1[4] = jj_gen;
+        ;
+      }
+    }
+    jj_consume_token(TagEnd);
+  }
+
+  final public Token ArgValue() throws ParseException {
+  Token t = null;
+    switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+    case ArgValue:
+      t = jj_consume_token(ArgValue);
+                                              {if (true) return t;}
+      break;
+    default:
+      jj_la1[5] = jj_gen;
+      if (jj_2_1(2)) {
+        jj_consume_token(ArgQuote1);
+        jj_consume_token(CloseQuote1);
+                                              {if (true) return t;}
+      } else {
+        switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+        case ArgQuote1:
+          jj_consume_token(ArgQuote1);
+          t = jj_consume_token(Quote1Text);
+          jj_consume_token(CloseQuote1);
+                                              {if (true) return t;}
+          break;
+        default:
+          jj_la1[6] = jj_gen;
+          if (jj_2_2(2)) {
+            jj_consume_token(ArgQuote2);
+            jj_consume_token(CloseQuote2);
+                                              {if (true) return t;}
+          } else {
+            switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+            case ArgQuote2:
+              jj_consume_token(ArgQuote2);
+              t = jj_consume_token(Quote2Text);
+              jj_consume_token(CloseQuote2);
+                                              {if (true) return t;}
+              break;
+            default:
+              jj_la1[7] = jj_gen;
+              jj_consume_token(-1);
+              throw new ParseException();
+            }
+          }
+        }
+      }
+    }
+    throw new Error("Missing return statement in function");
+  }
+
+  final public Token Decl() throws ParseException {
+  Token t;
+    t = jj_consume_token(DeclName);
+    label_3:
+    while (true) {
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case ArgName:
+      case ArgEquals:
+      case ArgValue:
+      case ArgQuote1:
+      case ArgQuote2:
+        ;
+        break;
+      default:
+        jj_la1[8] = jj_gen;
+        break label_3;
+      }
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case ArgName:
+        jj_consume_token(ArgName);
+        break;
+      case ArgValue:
+      case ArgQuote1:
+      case ArgQuote2:
+        ArgValue();
+        break;
+      case ArgEquals:
+        jj_consume_token(ArgEquals);
+        break;
+      default:
+        jj_la1[9] = jj_gen;
+        jj_consume_token(-1);
+        throw new ParseException();
+      }
+    }
+    jj_consume_token(TagEnd);
+    {if (true) return t;}
+    throw new Error("Missing return statement in function");
+  }
+
+  final public void CommentTag() throws ParseException {
+    switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+    case Comment1:
+      jj_consume_token(Comment1);
+      label_4:
+      while (true) {
+        switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+        case CommentText1:
+          ;
+          break;
+        default:
+          jj_la1[10] = jj_gen;
+          break label_4;
+        }
+        jj_consume_token(CommentText1);
+      }
+      jj_consume_token(CommentEnd1);
+      break;
+    case Comment2:
+      jj_consume_token(Comment2);
+      label_5:
+      while (true) {
+        switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+        case CommentText2:
+          ;
+          break;
+        default:
+          jj_la1[11] = jj_gen;
+          break label_5;
+        }
+        jj_consume_token(CommentText2);
+      }
+      jj_consume_token(CommentEnd2);
+      break;
+    default:
+      jj_la1[12] = jj_gen;
+      jj_consume_token(-1);
+      throw new ParseException();
+    }
+  }
+
+  final public void ScriptTag() throws ParseException {
+    jj_consume_token(ScriptStart);
+    label_6:
+    while (true) {
+      switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+      case ScriptText:
+        ;
+        break;
+      default:
+        jj_la1[13] = jj_gen;
+        break label_6;
+      }
+      jj_consume_token(ScriptText);
+    }
+    jj_consume_token(ScriptEnd);
+  }
+
+  private boolean jj_2_1(int xla) {
+    jj_la = xla; jj_lastpos = jj_scanpos = token;
+    try { return !jj_3_1(); }
+    catch(LookaheadSuccess ls) { return true; }
+    finally { jj_save(0, xla); }
+  }
+
+  private boolean jj_2_2(int xla) {
+    jj_la = xla; jj_lastpos = jj_scanpos = token;
+    try { return !jj_3_2(); }
+    catch(LookaheadSuccess ls) { return true; }
+    finally { jj_save(1, xla); }
+  }
+
+  private boolean jj_3_1() {
+    if (jj_scan_token(ArgQuote1)) return true;
+    if (jj_scan_token(CloseQuote1)) return true;
+    return false;
+  }
+
+  private boolean jj_3_2() {
+    if (jj_scan_token(ArgQuote2)) return true;
+    if (jj_scan_token(CloseQuote2)) return true;
+    return false;
+  }
+
+  /** Generated Token Manager. */
+  public HTMLParserTokenManager token_source;
+  SimpleCharStream jj_input_stream;
+  /** Current token. */
+  public Token token;
+  /** Next token. */
+  public Token jj_nt;
+  private int jj_ntk;
+  private Token jj_scanpos, jj_lastpos;
+  private int jj_la;
+  private int jj_gen;
+  final private int[] jj_la1 = new int[14];
+  static private int[] jj_la1_0;
+  static {
+      jj_la1_init_0();
+   }
+   private static void jj_la1_init_0() {
+      jj_la1_0 = new int[] {0x2c7e,0x2c7e,0x10000,0x380000,0x20000,0x80000,0x100000,0x200000,0x3b0000,0x3b0000,0x8000000,0x20000000,0x30,0x4000,};
+   }
+  final private JJCalls[] jj_2_rtns = new JJCalls[2];
+  private boolean jj_rescan = false;
+  private int jj_gc = 0;
+
+  /** Constructor with InputStream. */
+  public HTMLParser(java.io.InputStream stream) {
+     this(stream, null);
+  }
+  /** Constructor with InputStream and supplied encoding */
+  public HTMLParser(java.io.InputStream stream, String encoding) {
+    try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
+    token_source = new HTMLParserTokenManager(jj_input_stream);
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
+    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.InputStream stream) {
+     ReInit(stream, null);
+  }
+  /** Reinitialise. */
+  public void ReInit(java.io.InputStream stream, String encoding) {
+    try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
+    token_source.ReInit(jj_input_stream);
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
+    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+  }
+
+  /** Constructor. */
+  public HTMLParser(java.io.Reader stream) {
+    jj_input_stream = new SimpleCharStream(stream, 1, 1);
+    token_source = new HTMLParserTokenManager(jj_input_stream);
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
+    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.Reader stream) {
+    jj_input_stream.ReInit(stream, 1, 1);
+    token_source.ReInit(jj_input_stream);
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
+    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+  }
+
+  /** Constructor with generated Token Manager. */
+  public HTMLParser(HTMLParserTokenManager tm) {
+    token_source = tm;
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
+    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+  }
+
+  /** Reinitialise. */
+  public void ReInit(HTMLParserTokenManager tm) {
+    token_source = tm;
+    token = new Token();
+    jj_ntk = -1;
+    jj_gen = 0;
+    for (int i = 0; i < 14; i++) jj_la1[i] = -1;
+    for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+  }
+
+  private Token jj_consume_token(int kind) throws ParseException {
+    Token oldToken;
+    if ((oldToken = token).next != null) token = token.next;
+    else token = token.next = token_source.getNextToken();
+    jj_ntk = -1;
+    if (token.kind == kind) {
+      jj_gen++;
+      if (++jj_gc > 100) {
+        jj_gc = 0;
+        for (int i = 0; i < jj_2_rtns.length; i++) {
+          JJCalls c = jj_2_rtns[i];
+          while (c != null) {
+            if (c.gen < jj_gen) c.first = null;
+            c = c.next;
+          }
+        }
+      }
+      return token;
+    }
+    token = oldToken;
+    jj_kind = kind;
+    throw generateParseException();
+  }
+
+  static private final class LookaheadSuccess extends java.lang.Error { }
+  final private LookaheadSuccess jj_ls = new LookaheadSuccess();
+  private boolean jj_scan_token(int kind) {
+    if (jj_scanpos == jj_lastpos) {
+      jj_la--;
+      if (jj_scanpos.next == null) {
+        jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken();
+      } else {
+        jj_lastpos = jj_scanpos = jj_scanpos.next;
+      }
+    } else {
+      jj_scanpos = jj_scanpos.next;
+    }
+    if (jj_rescan) {
+      int i = 0; Token tok = token;
+      while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; }
+      if (tok != null) jj_add_error_token(kind, i);
+    }
+    if (jj_scanpos.kind != kind) return true;
+    if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls;
+    return false;
+  }
+
+
+/** Get the next Token. */
+  final public Token getNextToken() {
+    if (token.next != null) token = token.next;
+    else token = token.next = token_source.getNextToken();
+    jj_ntk = -1;
+    jj_gen++;
+    return token;
+  }
+
+/** Get the specific Token. */
+  final public Token getToken(int index) {
+    Token t = token;
+    for (int i = 0; i < index; i++) {
+      if (t.next != null) t = t.next;
+      else t = t.next = token_source.getNextToken();
+    }
+    return t;
+  }
+
+  private int jj_ntk() {
+    if ((jj_nt=token.next) == null)
+      return (jj_ntk = (token.next=token_source.getNextToken()).kind);
+    else
+      return (jj_ntk = jj_nt.kind);
+  }
+
+  private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>();
+  private int[] jj_expentry;
+  private int jj_kind = -1;
+  private int[] jj_lasttokens = new int[100];
+  private int jj_endpos;
+
+  private void jj_add_error_token(int kind, int pos) {
+    if (pos >= 100) return;
+    if (pos == jj_endpos + 1) {
+      jj_lasttokens[jj_endpos++] = kind;
+    } else if (jj_endpos != 0) {
+      jj_expentry = new int[jj_endpos];
+      for (int i = 0; i < jj_endpos; i++) {
+        jj_expentry[i] = jj_lasttokens[i];
+      }
+      jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) {
+        int[] oldentry = (int[])(it.next());
+        if (oldentry.length == jj_expentry.length) {
+          for (int i = 0; i < jj_expentry.length; i++) {
+            if (oldentry[i] != jj_expentry[i]) {
+              continue jj_entries_loop;
+            }
+          }
+          jj_expentries.add(jj_expentry);
+          break jj_entries_loop;
+        }
+      }
+      if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
+    }
+  }
+
+  /** Generate ParseException. */
+  public ParseException generateParseException() {
+    jj_expentries.clear();
+    boolean[] la1tokens = new boolean[31];
+    if (jj_kind >= 0) {
+      la1tokens[jj_kind] = true;
+      jj_kind = -1;
+    }
+    for (int i = 0; i < 14; i++) {
+      if (jj_la1[i] == jj_gen) {
+        for (int j = 0; j < 32; j++) {
+          if ((jj_la1_0[i] & (1<<j)) != 0) {
+            la1tokens[j] = true;
+          }
+        }
+      }
+    }
+    for (int i = 0; i < 31; i++) {
+      if (la1tokens[i]) {
+        jj_expentry = new int[1];
+        jj_expentry[0] = i;
+        jj_expentries.add(jj_expentry);
+      }
+    }
+    jj_endpos = 0;
+    jj_rescan_token();
+    jj_add_error_token(0, 0);
+    int[][] exptokseq = new int[jj_expentries.size()][];
+    for (int i = 0; i < jj_expentries.size(); i++) {
+      exptokseq[i] = jj_expentries.get(i);
+    }
+    return new ParseException(token, exptokseq, tokenImage);
+  }
+
+  /** Enable tracing. */
+  final public void enable_tracing() {
+  }
+
+  /** Disable tracing. */
+  final public void disable_tracing() {
+  }
+
+  private void jj_rescan_token() {
+    jj_rescan = true;
+    for (int i = 0; i < 2; i++) {
+    try {
+      JJCalls p = jj_2_rtns[i];
+      do {
+        if (p.gen > jj_gen) {
+          jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
+          switch (i) {
+            case 0: jj_3_1(); break;
+            case 1: jj_3_2(); break;
+          }
+        }
+        p = p.next;
+      } while (p != null);
+      } catch(LookaheadSuccess ls) { }
+    }
+    jj_rescan = false;
+  }
+
+  private void jj_save(int index, int xla) {
+    JJCalls p = jj_2_rtns[index];
+    while (p.gen > jj_gen) {
+      if (p.next == null) { p = p.next = new JJCalls(); break; }
+      p = p.next;
+    }
+    p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla;
+  }
+
+  static final class JJCalls {
+    int gen;
+    Token first;
+    int arg;
+    JJCalls next;
+  }
+
+//    void handleException(Exception e) {
+//      System.out.println(e.toString());  // print the error message
+//      System.out.println("Skipping...");
+//      Token t;
+//      do {
+//        t = getNextToken();
+//      } while (t.kind != TagEnd);
+//    }
+}
--- a/backwards/src/demo/org/apache/lucene/demo/html/HTMLParser.jj
+++ b/backwards/src/demo/org/apache/lucene/demo/html/HTMLParser.jj
@ -0,0 +1,392 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// HTMLParser.jj
+
+options {
+  STATIC = false;
+  OPTIMIZE_TOKEN_MANAGER = true;
+  //DEBUG_LOOKAHEAD = true;
+  //DEBUG_TOKEN_MANAGER = true;
+}
+
+PARSER_BEGIN(HTMLParser)
+
+package org.apache.lucene.demo.html;
+
+import java.io.*;
+import java.util.Properties;
+
+public class HTMLParser {
+  public static int SUMMARY_LENGTH = 200;
+
+  StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
+  StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
+  Properties metaTags=new Properties();
+  String currentMetaTag=null;
+  String currentMetaContent=null;
+  int length = 0;
+  boolean titleComplete = false;
+  boolean inTitle = false;
+  boolean inMetaTag = false;
+  boolean inStyle = false;
+  boolean afterTag = false;
+  boolean afterSpace = false;
+  String eol = System.getProperty("line.separator");
+  Reader pipeIn = null;
+  Writer pipeOut;
+  private MyPipedInputStream pipeInStream = null;
+  private PipedOutputStream pipeOutStream = null;
+  
+  private class MyPipedInputStream extends PipedInputStream{
+    
+    public MyPipedInputStream(){
+      super();
+    }
+    
+    public MyPipedInputStream(PipedOutputStream src) throws IOException{
+      super(src);
+    }
+    
+    public boolean full() throws IOException{
+      return this.available() >= PipedInputStream.PIPE_SIZE;
+    }
+  }
+
+  /**
+   * @deprecated Use HTMLParser(FileInputStream) instead
+   */
+  public HTMLParser(File file) throws FileNotFoundException {
+    this(new FileInputStream(file));
+  }
+
+  public String getTitle() throws IOException, InterruptedException {
+    if (pipeIn == null)
+      getReader();				  // spawn parsing thread
+    while (true) {
+      synchronized(this) {
+	if (titleComplete || pipeInStream.full())
+	  break;
+	wait(10);
+      }
+    }
+    return title.toString().trim();
+  }
+
+  public Properties getMetaTags() throws IOException,
+InterruptedException {
+    if (pipeIn == null)
+      getReader();				  // spawn parsing thread
+    while (true) {
+      synchronized(this) {
+	if (titleComplete || pipeInStream.full())
+	  break;
+	wait(10);
+      }
+    }
+    return metaTags;
+  }
+
+
+  public String getSummary() throws IOException, InterruptedException {
+    if (pipeIn == null)
+      getReader();				  // spawn parsing thread
+    while (true) {
+      synchronized(this) {
+	if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full())
+	  break;
+	wait(10);
+      }
+    }
+    if (summary.length() > SUMMARY_LENGTH)
+      summary.setLength(SUMMARY_LENGTH);
+
+    String sum = summary.toString().trim();
+    String tit = getTitle();
+    if (sum.startsWith(tit) || sum.equals(""))
+      return tit;
+    else
+      return sum;
+  }
+
+  public Reader getReader() throws IOException {
+    if (pipeIn == null) {
+      pipeInStream = new MyPipedInputStream();
+      pipeOutStream = new PipedOutputStream(pipeInStream);
+      pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE");
+      pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE");
+
+      Thread thread = new ParserThread(this);
+      thread.start();				  // start parsing
+    }
+
+    return pipeIn;
+  }
+
+  void addToSummary(String text) {
+    if (summary.length() < SUMMARY_LENGTH) {
+      summary.append(text);
+      if (summary.length() >= SUMMARY_LENGTH) {
+	synchronized(this) {
+	  notifyAll();
+	}
+      }
+    }
+  }
+
+  void addText(String text) throws IOException {
+    if (inStyle)
+      return;
+    if (inTitle)
+      title.append(text);
+    else {
+      addToSummary(text);
+      if (!titleComplete && !(title.length() == 0)) {  // finished title
+	synchronized(this) {
+	  titleComplete = true;			  // tell waiting threads
+	  notifyAll();
+	}
+      }
+    }
+
+    length += text.length();
+    pipeOut.write(text);
+
+    afterSpace = false;
+  }
+  
+  void addMetaTag() {
+      metaTags.setProperty(currentMetaTag, currentMetaContent);
+      currentMetaTag = null;
+      currentMetaContent = null;
+      return;
+  }
+
+  void addSpace() throws IOException {
+    if (!afterSpace) {
+      if (inTitle)
+	title.append(" ");
+      else
+	addToSummary(" ");
+
+      String space = afterTag ? eol : " ";
+      length += space.length();
+      pipeOut.write(space);
+      afterSpace = true;
+    }
+  }
+
+//    void handleException(Exception e) {
+//      System.out.println(e.toString());  // print the error message
+//      System.out.println("Skipping...");
+//      Token t;
+//      do {
+//        t = getNextToken();
+//      } while (t.kind != TagEnd);
+//    }
+}
+
+PARSER_END(HTMLParser)
+
+
+void HTMLDocument() throws IOException :
+{
+  Token t;
+}
+{
+//  try {
+    ( Tag()         { afterTag = true; }
+    | t=Decl()      { afterTag = true; }
+    | CommentTag()  { afterTag = true; }
+    | ScriptTag()  { afterTag = true; }
+    | t=<Word>      { addText(t.image); afterTag = false; }
+    | t=<Entity>    { addText(Entities.decode(t.image)); afterTag = false; }
+    | t=<Punct>     { addText(t.image); afterTag = false; }
+    | <Space>       { addSpace(); afterTag = false; }
+    )* <EOF>
+//  } catch (ParseException e) {
+//    handleException(e);
+//  }
+}
+
+void Tag() throws IOException :
+{
+  Token t1, t2;
+  boolean inImg = false;
+}
+{
+  t1=<TagName> {
+   String tagName = t1.image.toLowerCase();
+   if(Tags.WS_ELEMS.contains(tagName) ) {
+      addSpace();
+    }
+    inTitle = tagName.equalsIgnoreCase("<title"); // keep track if in <TITLE>
+    inMetaTag = tagName.equalsIgnoreCase("<META"); // keep track if in <META>
+    inStyle = tagName.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE>
+    inImg = tagName.equalsIgnoreCase("<img");	  // keep track if in <IMG>
+  }
+  (t1=<ArgName>
+   (<ArgEquals>
+    (t2=ArgValue()				  // save ALT text in IMG tag
+     {
+       if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
+         addText("[" + t2.image + "]");
+
+    	if(inMetaTag &&
+			(  t1.image.equalsIgnoreCase("name") ||
+			   t1.image.equalsIgnoreCase("HTTP-EQUIV")
+			)
+	   && t2 != null)
+	{
+		currentMetaTag=t2.image.toLowerCase();
+		if(currentMetaTag != null && currentMetaContent != null) {
+        	addMetaTag();
+		}
+	}
+    	if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
+null)
+	{
+		currentMetaContent=t2.image.toLowerCase();
+		if(currentMetaTag != null && currentMetaContent != null) {
+        	addMetaTag();
+		}
+	}
+     }
+    )?
+   )?
+  )*
+  <TagEnd>
+}
+
+Token ArgValue() :
+{
+  Token t = null;
+}
+{
+  t=<ArgValue>                              { return t; }
+| LOOKAHEAD(2)
+  <ArgQuote1> <CloseQuote1>                 { return t; }
+| <ArgQuote1> t=<Quote1Text> <CloseQuote1>  { return t; }
+| LOOKAHEAD(2)
+  <ArgQuote2> <CloseQuote2>                 { return t; }
+| <ArgQuote2> t=<Quote2Text> <CloseQuote2>  { return t; }
+}
+
+
+Token Decl() :
+{
+  Token t;
+}
+{
+  t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd>
+  { return t; }
+}
+
+
+void CommentTag() :
+{}
+{
+  (<Comment1> ( <CommentText1> )* <CommentEnd1>)
+ |
+  (<Comment2> ( <CommentText2> )* <CommentEnd2>)
+}
+
+void ScriptTag() :
+{}
+{
+  <ScriptStart> ( <ScriptText> )* <ScriptEnd>
+}
+
+
+TOKEN :
+{
+  < ScriptStart: "<script" > : WithinScript
+| < TagName:  "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
+| < DeclName: "<"  "!"   ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
+
+| < Comment1:  "<!--" > : WithinComment1
+| < Comment2:  "<!" >   : WithinComment2
+
+| < Word:     ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] |
+                <LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM> )+ >
+| < #LET:     ["A"-"Z","a"-"z","0"-"9"] >
+| < #NUM:     ["0"-"9"] >
+| < #HEX:     ["0"-"9","A"-"F","a"-"f"] >
+
+| < Entity:   ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+ (";")? | "&" "#" ["X","x"] (<HEX>)+ (";")? ) >
+
+| < Space:    (<SP>)+ >
+| < #SP:      [" ","\t","\r","\n"] >
+
+| < Punct:    ~[] > // Keep this last.  It is a catch-all.
+}
+
+<WithinScript> TOKEN:
+{
+  < ScriptText:  (~["<",">"])+ | "<" | ">" >
+| < ScriptEnd: "</script" (~["<",">"])* ">" > : DEFAULT
+}
+
+<WithinTag> TOKEN:
+{
+  < ArgName:   (~[" ","\t","\r","\n","=",">","'","\""])
+               (~[" ","\t","\r","\n","=",">"])* >
+| < ArgEquals: "=" >  : AfterEquals
+| < TagEnd:    ">" | "=>" >  : DEFAULT
+}
+
+<AfterEquals> TOKEN:
+{
+  < ArgValue:  (~[" ","\t","\r","\n","=",">","'","\""])
+	       (~[" ","\t","\r","\n",">"])* > : WithinTag
+}
+
+<WithinTag, AfterEquals> TOKEN:
+{
+  < ArgQuote1: "'"  > : WithinQuote1
+| < ArgQuote2: "\"" > : WithinQuote2
+}
+
+<WithinTag, AfterEquals> SKIP:
+{
+  < <Space> >
+}
+
+<WithinQuote1> TOKEN:
+{
+  < Quote1Text:  (~["'"])+ >
+| < CloseQuote1: <ArgQuote1> > : WithinTag
+}
+
+<WithinQuote2> TOKEN:
+{
+  < Quote2Text:  (~["\""])+ >
+| < CloseQuote2: <ArgQuote2> > : WithinTag
+}
+
+
+<WithinComment1> TOKEN :
+{
+  < CommentText1:  (~["-"])+ | "-" >
+| < CommentEnd1:   "-->" > : DEFAULT
+}
+
+<WithinComment2> TOKEN :
+{
+  < CommentText2:  (~[">"])+ >
+| < CommentEnd2:   ">" > : DEFAULT
+}
--- a/backwards/src/demo/org/apache/lucene/demo/html/HTMLParserConstants.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/HTMLParserConstants.java
@ -0,0 +1,124 @@
+/* Generated By:JavaCC: Do not edit this line. HTMLParserConstants.java */
+package org.apache.lucene.demo.html;
+
+
+/**
+ * Token literal values and constants.
+ * Generated by org.javacc.parser.OtherFilesGen#start()
+ */
+public interface HTMLParserConstants {
+
+  /** End of File. */
+  int EOF = 0;
+  /** RegularExpression Id. */
+  int ScriptStart = 1;
+  /** RegularExpression Id. */
+  int TagName = 2;
+  /** RegularExpression Id. */
+  int DeclName = 3;
+  /** RegularExpression Id. */
+  int Comment1 = 4;
+  /** RegularExpression Id. */
+  int Comment2 = 5;
+  /** RegularExpression Id. */
+  int Word = 6;
+  /** RegularExpression Id. */
+  int LET = 7;
+  /** RegularExpression Id. */
+  int NUM = 8;
+  /** RegularExpression Id. */
+  int HEX = 9;
+  /** RegularExpression Id. */
+  int Entity = 10;
+  /** RegularExpression Id. */
+  int Space = 11;
+  /** RegularExpression Id. */
+  int SP = 12;
+  /** RegularExpression Id. */
+  int Punct = 13;
+  /** RegularExpression Id. */
+  int ScriptText = 14;
+  /** RegularExpression Id. */
+  int ScriptEnd = 15;
+  /** RegularExpression Id. */
+  int ArgName = 16;
+  /** RegularExpression Id. */
+  int ArgEquals = 17;
+  /** RegularExpression Id. */
+  int TagEnd = 18;
+  /** RegularExpression Id. */
+  int ArgValue = 19;
+  /** RegularExpression Id. */
+  int ArgQuote1 = 20;
+  /** RegularExpression Id. */
+  int ArgQuote2 = 21;
+  /** RegularExpression Id. */
+  int Quote1Text = 23;
+  /** RegularExpression Id. */
+  int CloseQuote1 = 24;
+  /** RegularExpression Id. */
+  int Quote2Text = 25;
+  /** RegularExpression Id. */
+  int CloseQuote2 = 26;
+  /** RegularExpression Id. */
+  int CommentText1 = 27;
+  /** RegularExpression Id. */
+  int CommentEnd1 = 28;
+  /** RegularExpression Id. */
+  int CommentText2 = 29;
+  /** RegularExpression Id. */
+  int CommentEnd2 = 30;
+
+  /** Lexical state. */
+  int DEFAULT = 0;
+  /** Lexical state. */
+  int WithinScript = 1;
+  /** Lexical state. */
+  int WithinTag = 2;
+  /** Lexical state. */
+  int AfterEquals = 3;
+  /** Lexical state. */
+  int WithinQuote1 = 4;
+  /** Lexical state. */
+  int WithinQuote2 = 5;
+  /** Lexical state. */
+  int WithinComment1 = 6;
+  /** Lexical state. */
+  int WithinComment2 = 7;
+
+  /** Literal token values. */
+  String[] tokenImage = {
+    "<EOF>",
+    "\"<script\"",
+    "<TagName>",
+    "<DeclName>",
+    "\"<!--\"",
+    "\"<!\"",
+    "<Word>",
+    "<LET>",
+    "<NUM>",
+    "<HEX>",
+    "<Entity>",
+    "<Space>",
+    "<SP>",
+    "<Punct>",
+    "<ScriptText>",
+    "<ScriptEnd>",
+    "<ArgName>",
+    "\"=\"",
+    "<TagEnd>",
+    "<ArgValue>",
+    "\"\\\'\"",
+    "\"\\\"\"",
+    "<token of kind 22>",
+    "<Quote1Text>",
+    "<CloseQuote1>",
+    "<Quote2Text>",
+    "<CloseQuote2>",
+    "<CommentText1>",
+    "\"-->\"",
+    "<CommentText2>",
+    "\">\"",
+  };
+
+}
--- a/backwards/src/demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java
--- a/backwards/src/demo/org/apache/lucene/demo/html/ParseException.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/ParseException.java
@ -0,0 +1,198 @@
+/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
+/* JavaCCOptions:KEEP_LINE_COL=null */
+package org.apache.lucene.demo.html;
+
+/**
+ * This exception is thrown when parse errors are encountered.
+ * You can explicitly create objects of this exception type by
+ * calling the method generateParseException in the generated
+ * parser.
+ *
+ * You can modify this class to customize your error reporting
+ * mechanisms so long as you retain the public fields.
+ */
+public class ParseException extends Exception {
+
+  /**
+   * This constructor is used by the method "generateParseException"
+   * in the generated parser.  Calling this constructor generates
+   * a new object of this type with the fields "currentToken",
+   * "expectedTokenSequences", and "tokenImage" set.  The boolean
+   * flag "specialConstructor" is also set to true to indicate that
+   * this constructor was used to create this object.
+   * This constructor calls its super class with the empty string
+   * to force the "toString" method of parent class "Throwable" to
+   * print the error message in the form:
+   *     ParseException: <result of getMessage>
+   */
+  public ParseException(Token currentTokenVal,
+                        int[][] expectedTokenSequencesVal,
+                        String[] tokenImageVal
+                       )
+  {
+    super("");
+    specialConstructor = true;
+    currentToken = currentTokenVal;
+    expectedTokenSequences = expectedTokenSequencesVal;
+    tokenImage = tokenImageVal;
+  }
+
+  /**
+   * The following constructors are for use by you for whatever
+   * purpose you can think of.  Constructing the exception in this
+   * manner makes the exception behave in the normal way - i.e., as
+   * documented in the class "Throwable".  The fields "errorToken",
+   * "expectedTokenSequences", and "tokenImage" do not contain
+   * relevant information.  The JavaCC generated code does not use
+   * these constructors.
+   */
+
+  public ParseException() {
+    super();
+    specialConstructor = false;
+  }
+
+  /** Constructor with message. */
+  public ParseException(String message) {
+    super(message);
+    specialConstructor = false;
+  }
+
+  /**
+   * This variable determines which constructor was used to create
+   * this object and thereby affects the semantics of the
+   * "getMessage" method (see below).
+   */
+  protected boolean specialConstructor;
+
+  /**
+   * This is the last token that has been consumed successfully.  If
+   * this object has been created due to a parse error, the token
+   * followng this token will (therefore) be the first error token.
+   */
+  public Token currentToken;
+
+  /**
+   * Each entry in this array is an array of integers.  Each array
+   * of integers represents a sequence of tokens (by their ordinal
+   * values) that is expected at this point of the parse.
+   */
+  public int[][] expectedTokenSequences;
+
+  /**
+   * This is a reference to the "tokenImage" array of the generated
+   * parser within which the parse error occurred.  This array is
+   * defined in the generated ...Constants interface.
+   */
+  public String[] tokenImage;
+
+  /**
+   * This method has the standard behavior when this object has been
+   * created using the standard constructors.  Otherwise, it uses
+   * "currentToken" and "expectedTokenSequences" to generate a parse
+   * error message and returns it.  If this object has been created
+   * due to a parse error, and you do not catch it (it gets thrown
+   * from the parser), then this method is called during the printing
+   * of the final stack trace, and hence the correct error message
+   * gets displayed.
+   */
+  public String getMessage() {
+    if (!specialConstructor) {
+      return super.getMessage();
+    }
+    StringBuffer expected = new StringBuffer();
+    int maxSize = 0;
+    for (int i = 0; i < expectedTokenSequences.length; i++) {
+      if (maxSize < expectedTokenSequences[i].length) {
+        maxSize = expectedTokenSequences[i].length;
+      }
+      for (int j = 0; j < expectedTokenSequences[i].length; j++) {
+        expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' ');
+      }
+      if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
+        expected.append("...");
+      }
+      expected.append(eol).append("    ");
+    }
+    String retval = "Encountered \"";
+    Token tok = currentToken.next;
+    for (int i = 0; i < maxSize; i++) {
+      if (i != 0) retval += " ";
+      if (tok.kind == 0) {
+        retval += tokenImage[0];
+        break;
+      }
+      retval += " " + tokenImage[tok.kind];
+      retval += " \"";
+      retval += add_escapes(tok.image);
+      retval += " \"";
+      tok = tok.next;
+    }
+    retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
+    retval += "." + eol;
+    if (expectedTokenSequences.length == 1) {
+      retval += "Was expecting:" + eol + "    ";
+    } else {
+      retval += "Was expecting one of:" + eol + "    ";
+    }
+    retval += expected.toString();
+    return retval;
+  }
+
+  /**
+   * The end of line string for this machine.
+   */
+  protected String eol = System.getProperty("line.separator", "\n");
+
+  /**
+   * Used to convert raw characters to their escaped version
+   * when these raw version cannot be used as part of an ASCII
+   * string literal.
+   */
+  protected String add_escapes(String str) {
+      StringBuffer retval = new StringBuffer();
+      char ch;
+      for (int i = 0; i < str.length(); i++) {
+        switch (str.charAt(i))
+        {
+           case 0 :
+              continue;
+           case '\b':
+              retval.append("\\b");
+              continue;
+           case '\t':
+              retval.append("\\t");
+              continue;
+           case '\n':
+              retval.append("\\n");
+              continue;
+           case '\f':
+              retval.append("\\f");
+              continue;
+           case '\r':
+              retval.append("\\r");
+              continue;
+           case '\"':
+              retval.append("\\\"");
+              continue;
+           case '\'':
+              retval.append("\\\'");
+              continue;
+           case '\\':
+              retval.append("\\\\");
+              continue;
+           default:
+              if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
+                 String s = "0000" + Integer.toString(ch, 16);
+                 retval.append("\\u" + s.substring(s.length() - 4, s.length()));
+              } else {
+                 retval.append(ch);
+              }
+              continue;
+        }
+      }
+      return retval.toString();
+   }
+
+}
+/* JavaCC - OriginalChecksum=63b2008c66e199b79536447c26bee2ab (do not edit this line) */
--- a/backwards/src/demo/org/apache/lucene/demo/html/ParserThread.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/ParserThread.java
@ -0,0 +1,50 @@
+package org.apache.lucene.demo.html;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.*;
+
+class ParserThread extends Thread {
+  HTMLParser parser;
+
+  ParserThread(HTMLParser p) {
+    parser = p;
+  }
+
+  @Override
+  public void run() {				  // convert pipeOut to pipeIn
+    try {
+      try {					  // parse document to pipeOut
+        parser.HTMLDocument();
+      } catch (ParseException e) {
+        System.out.println("Parse Aborted: " + e.getMessage());
+      } catch (TokenMgrError e) {
+        System.out.println("Parse Aborted: " + e.getMessage());
+      } finally {
+        parser.pipeOut.close();
+        synchronized (parser) {
+	      parser.summary.setLength(HTMLParser.SUMMARY_LENGTH);
+	      parser.titleComplete = true;
+	      parser.notifyAll();
+	    }
+      }
+    } catch (IOException e) {
+	  e.printStackTrace();
+    }
+  }
+}
--- a/backwards/src/demo/org/apache/lucene/demo/html/SimpleCharStream.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/SimpleCharStream.java
@ -0,0 +1,472 @@
+/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.1 */
+/* JavaCCOptions:STATIC=false */
+package org.apache.lucene.demo.html;
+
+/**
+ * An implementation of interface CharStream, where the stream is assumed to
+ * contain only ASCII characters (without unicode processing).
+ */
+
+public class SimpleCharStream
+{
+/** Whether parser is static. */
+  public static final boolean staticFlag = false;
+  int bufsize;
+  int available;
+  int tokenBegin;
+/** Position in buffer. */
+  public int bufpos = -1;
+  protected int bufline[];
+  protected int bufcolumn[];
+
+  protected int column = 0;
+  protected int line = 1;
+
+  protected boolean prevCharIsCR = false;
+  protected boolean prevCharIsLF = false;
+
+  protected java.io.Reader inputStream;
+
+  protected char[] buffer;
+  protected int maxNextCharInd = 0;
+  protected int inBuf = 0;
+  protected int tabSize = 8;
+
+  protected void setTabSize(int i) { tabSize = i; }
+  protected int getTabSize(int i) { return tabSize; }
+
+
+  protected void ExpandBuff(boolean wrapAround)
+  {
+     char[] newbuffer = new char[bufsize + 2048];
+     int newbufline[] = new int[bufsize + 2048];
+     int newbufcolumn[] = new int[bufsize + 2048];
+
+     try
+     {
+        if (wrapAround)
+        {
+           System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
+           System.arraycopy(buffer, 0, newbuffer,
+                                             bufsize - tokenBegin, bufpos);
+           buffer = newbuffer;
+
+           System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
+           System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
+           bufline = newbufline;
+
+           System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
+           System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
+           bufcolumn = newbufcolumn;
+
+           maxNextCharInd = (bufpos += (bufsize - tokenBegin));
+        }
+        else
+        {
+           System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
+           buffer = newbuffer;
+
+           System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
+           bufline = newbufline;
+
+           System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
+           bufcolumn = newbufcolumn;
+
+           maxNextCharInd = (bufpos -= tokenBegin);
+        }
+     }
+     catch (Throwable t)
+     {
+        throw new Error(t.getMessage());
+     }
+
+
+     bufsize += 2048;
+     available = bufsize;
+     tokenBegin = 0;
+  }
+
+  protected void FillBuff() throws java.io.IOException
+  {
+     if (maxNextCharInd == available)
+     {
+        if (available == bufsize)
+        {
+           if (tokenBegin > 2048)
+           {
+              bufpos = maxNextCharInd = 0;
+              available = tokenBegin;
+           }
+           else if (tokenBegin < 0)
+              bufpos = maxNextCharInd = 0;
+           else
+              ExpandBuff(false);
+        }
+        else if (available > tokenBegin)
+           available = bufsize;
+        else if ((tokenBegin - available) < 2048)
+           ExpandBuff(true);
+        else
+           available = tokenBegin;
+     }
+
+     int i;
+     try {
+        if ((i = inputStream.read(buffer, maxNextCharInd,
+                                    available - maxNextCharInd)) == -1)
+        {
+           inputStream.close();
+           throw new java.io.IOException();
+        }
+        else
+           maxNextCharInd += i;
+        return;
+     }
+     catch(java.io.IOException e) {
+        --bufpos;
+        backup(0);
+        if (tokenBegin == -1)
+           tokenBegin = bufpos;
+        throw e;
+     }
+  }
+
+/** Start. */
+  public char BeginToken() throws java.io.IOException
+  {
+     tokenBegin = -1;
+     char c = readChar();
+     tokenBegin = bufpos;
+
+     return c;
+  }
+
+  protected void UpdateLineColumn(char c)
+  {
+     column++;
+
+     if (prevCharIsLF)
+     {
+        prevCharIsLF = false;
+        line += (column = 1);
+     }
+     else if (prevCharIsCR)
+     {
+        prevCharIsCR = false;
+        if (c == '\n')
+        {
+           prevCharIsLF = true;
+        }
+        else
+           line += (column = 1);
+     }
+
+     switch (c)
+     {
+        case '\r' :
+           prevCharIsCR = true;
+           break;
+        case '\n' :
+           prevCharIsLF = true;
+           break;
+        case '\t' :
+           column--;
+           column += (tabSize - (column % tabSize));
+           break;
+        default :
+           break;
+     }
+
+     bufline[bufpos] = line;
+     bufcolumn[bufpos] = column;
+  }
+
+/** Read a character. */
+  public char readChar() throws java.io.IOException
+  {
+     if (inBuf > 0)
+     {
+        --inBuf;
+
+        if (++bufpos == bufsize)
+           bufpos = 0;
+
+        return buffer[bufpos];
+     }
+
+     if (++bufpos >= maxNextCharInd)
+        FillBuff();
+
+     char c = buffer[bufpos];
+
+     UpdateLineColumn(c);
+     return c;
+  }
+
+  /**
+   * @deprecated
+   * @see #getEndColumn
+   */
+
+  public int getColumn() {
+     return bufcolumn[bufpos];
+  }
+
+  /**
+   * @deprecated
+   * @see #getEndLine
+   */
+
+  public int getLine() {
+     return bufline[bufpos];
+  }
+
+  /** Get token end column number. */
+  public int getEndColumn() {
+     return bufcolumn[bufpos];
+  }
+
+  /** Get token end line number. */
+  public int getEndLine() {
+     return bufline[bufpos];
+  }
+
+  /** Get token beginning column number. */
+  public int getBeginColumn() {
+     return bufcolumn[tokenBegin];
+  }
+
+  /** Get token beginning line number. */
+  public int getBeginLine() {
+     return bufline[tokenBegin];
+  }
+
+/** Backup a number of characters. */
+  public void backup(int amount) {
+
+    inBuf += amount;
+    if ((bufpos -= amount) < 0)
+       bufpos += bufsize;
+  }
+
+  /** Constructor. */
+  public SimpleCharStream(java.io.Reader dstream, int startline,
+  int startcolumn, int buffersize)
+  {
+    inputStream = dstream;
+    line = startline;
+    column = startcolumn - 1;
+
+    available = bufsize = buffersize;
+    buffer = new char[buffersize];
+    bufline = new int[buffersize];
+    bufcolumn = new int[buffersize];
+  }
+
+  /** Constructor. */
+  public SimpleCharStream(java.io.Reader dstream, int startline,
+                          int startcolumn)
+  {
+     this(dstream, startline, startcolumn, 4096);
+  }
+
+  /** Constructor. */
+  public SimpleCharStream(java.io.Reader dstream)
+  {
+     this(dstream, 1, 1, 4096);
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.Reader dstream, int startline,
+  int startcolumn, int buffersize)
+  {
+    inputStream = dstream;
+    line = startline;
+    column = startcolumn - 1;
+
+    if (buffer == null || buffersize != buffer.length)
+    {
+      available = bufsize = buffersize;
+      buffer = new char[buffersize];
+      bufline = new int[buffersize];
+      bufcolumn = new int[buffersize];
+    }
+    prevCharIsLF = prevCharIsCR = false;
+    tokenBegin = inBuf = maxNextCharInd = 0;
+    bufpos = -1;
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.Reader dstream, int startline,
+                     int startcolumn)
+  {
+     ReInit(dstream, startline, startcolumn, 4096);
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.Reader dstream)
+  {
+     ReInit(dstream, 1, 1, 4096);
+  }
+  /** Constructor. */
+  public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
+  int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
+  {
+     this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
+  }
+
+  /** Constructor. */
+  public SimpleCharStream(java.io.InputStream dstream, int startline,
+  int startcolumn, int buffersize)
+  {
+     this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
+  }
+
+  /** Constructor. */
+  public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
+                          int startcolumn) throws java.io.UnsupportedEncodingException
+  {
+     this(dstream, encoding, startline, startcolumn, 4096);
+  }
+
+  /** Constructor. */
+  public SimpleCharStream(java.io.InputStream dstream, int startline,
+                          int startcolumn)
+  {
+     this(dstream, startline, startcolumn, 4096);
+  }
+
+  /** Constructor. */
+  public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
+  {
+     this(dstream, encoding, 1, 1, 4096);
+  }
+
+  /** Constructor. */
+  public SimpleCharStream(java.io.InputStream dstream)
+  {
+     this(dstream, 1, 1, 4096);
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.InputStream dstream, String encoding, int startline,
+                          int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
+  {
+     ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.InputStream dstream, int startline,
+                          int startcolumn, int buffersize)
+  {
+     ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
+  {
+     ReInit(dstream, encoding, 1, 1, 4096);
+  }
+
+  /** Reinitialise. */
+  public void ReInit(java.io.InputStream dstream)
+  {
+     ReInit(dstream, 1, 1, 4096);
+  }
+  /** Reinitialise. */
+  public void ReInit(java.io.InputStream dstream, String encoding, int startline,
+                     int startcolumn) throws java.io.UnsupportedEncodingException
+  {
+     ReInit(dstream, encoding, startline, startcolumn, 4096);
+  }
+  /** Reinitialise. */
+  public void ReInit(java.io.InputStream dstream, int startline,
+                     int startcolumn)
+  {
+     ReInit(dstream, startline, startcolumn, 4096);
+  }
+  /** Get token literal value. */
+  public String GetImage()
+  {
+     if (bufpos >= tokenBegin)
+        return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
+     else
+        return new String(buffer, tokenBegin, bufsize - tokenBegin) +
+                              new String(buffer, 0, bufpos + 1);
+  }
+
+  /** Get the suffix. */
+  public char[] GetSuffix(int len)
+  {
+     char[] ret = new char[len];
+
+     if ((bufpos + 1) >= len)
+        System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
+     else
+     {
+        System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
+                                                          len - bufpos - 1);
+        System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
+     }
+
+     return ret;
+  }
+
+  /** Reset buffer when finished. */
+  public void Done()
+  {
+     buffer = null;
+     bufline = null;
+     bufcolumn = null;
+  }
+
+  /**
+   * Method to adjust line and column numbers for the start of a token.
+   */
+  public void adjustBeginLineColumn(int newLine, int newCol)
+  {
+     int start = tokenBegin;
+     int len;
+
+     if (bufpos >= tokenBegin)
+     {
+        len = bufpos - tokenBegin + inBuf + 1;
+     }
+     else
+     {
+        len = bufsize - tokenBegin + bufpos + 1 + inBuf;
+     }
+
+     int i = 0, j = 0, k = 0;
+     int nextColDiff = 0, columnDiff = 0;
+
+     while (i < len &&
+            bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
+     {
+        bufline[j] = newLine;
+        nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
+        bufcolumn[j] = newCol + columnDiff;
+        columnDiff = nextColDiff;
+        i++;
+     }
+
+     if (i < len)
+     {
+        bufline[j] = newLine++;
+        bufcolumn[j] = newCol + columnDiff;
+
+        while (i++ < len)
+        {
+           if (bufline[j = start % bufsize] != bufline[++start % bufsize])
+              bufline[j] = newLine++;
+           else
+              bufline[j] = newLine;
+        }
+     }
+
+     line = bufline[j];
+     column = bufcolumn[j];
+  }
+
+}
+/* JavaCC - OriginalChecksum=7393ed4ac2709e2de22d164f9db78b65 (do not edit this line) */
--- a/backwards/src/demo/org/apache/lucene/demo/html/Tags.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/Tags.java
@ -0,0 +1,64 @@
+package org.apache.lucene.demo.html;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+
+public final class Tags {
+
+  /**
+   * contains all tags for which whitespaces have to be inserted for proper tokenization
+   */
+  public static final Set<String> WS_ELEMS = Collections.synchronizedSet(new HashSet<String>());
+
+  static{
+    WS_ELEMS.add("<hr");
+    WS_ELEMS.add("<hr/");  // note that "<hr />" does not need to be listed explicitly
+    WS_ELEMS.add("<br");
+    WS_ELEMS.add("<br/");
+    WS_ELEMS.add("<p");
+    WS_ELEMS.add("</p");
+    WS_ELEMS.add("<div");
+    WS_ELEMS.add("</div");
+    WS_ELEMS.add("<td");
+    WS_ELEMS.add("</td");
+    WS_ELEMS.add("<li");
+    WS_ELEMS.add("</li");
+    WS_ELEMS.add("<q");
+    WS_ELEMS.add("</q");
+    WS_ELEMS.add("<blockquote");
+    WS_ELEMS.add("</blockquote");
+    WS_ELEMS.add("<dt");
+    WS_ELEMS.add("</dt");
+    WS_ELEMS.add("<h1");
+    WS_ELEMS.add("</h1");
+    WS_ELEMS.add("<h2");
+    WS_ELEMS.add("</h2");
+    WS_ELEMS.add("<h3");
+    WS_ELEMS.add("</h3");
+    WS_ELEMS.add("<h4");
+    WS_ELEMS.add("</h4");
+    WS_ELEMS.add("<h5");
+    WS_ELEMS.add("</h5");
+    WS_ELEMS.add("<h6");
+    WS_ELEMS.add("</h6");
+  }
+}
--- a/backwards/src/demo/org/apache/lucene/demo/html/Test.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/Test.java
@ -0,0 +1,51 @@
+package org.apache.lucene.demo.html;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.*;
+
+class Test {
+  public static void main(String[] argv) throws IOException, InterruptedException {
+    if ("-dir".equals(argv[0])) {
+      String[] files = new File(argv[1]).list();
+      java.util.Arrays.sort(files);
+      for (int i = 0; i < files.length; i++) {
+	System.err.println(files[i]);
+	File file = new File(argv[1], files[i]);
+	parse(file);
+      }
+    } else
+      parse(new File(argv[0]));
+  }
+
+  public static void parse(File file) throws IOException, InterruptedException {
+    FileInputStream fis = null;
+    try {
+      fis = new FileInputStream(file);
+      HTMLParser parser = new HTMLParser(fis);
+      System.out.println("Title: " + Entities.encode(parser.getTitle()));
+      System.out.println("Summary: " + Entities.encode(parser.getSummary()));
+      System.out.println("Content:");
+      LineNumberReader reader = new LineNumberReader(parser.getReader());
+      for (String l = reader.readLine(); l != null; l = reader.readLine())
+        System.out.println(l);
+    } finally {
+      if (fis != null) fis.close();
+    }
+  }
+}
--- a/backwards/src/demo/org/apache/lucene/demo/html/Token.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/Token.java
@ -0,0 +1,124 @@
+/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
+/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
+package org.apache.lucene.demo.html;
+
+/**
+ * Describes the input token stream.
+ */
+
+public class Token {
+
+  /**
+   * An integer that describes the kind of this token.  This numbering
+   * system is determined by JavaCCParser, and a table of these numbers is
+   * stored in the file ...Constants.java.
+   */
+  public int kind;
+
+  /** The line number of the first character of this Token. */
+  public int beginLine;
+  /** The column number of the first character of this Token. */
+  public int beginColumn;
+  /** The line number of the last character of this Token. */
+  public int endLine;
+  /** The column number of the last character of this Token. */
+  public int endColumn;
+
+  /**
+   * The string image of the token.
+   */
+  public String image;
+
+  /**
+   * A reference to the next regular (non-special) token from the input
+   * stream.  If this is the last token from the input stream, or if the
+   * token manager has not read tokens beyond this one, this field is
+   * set to null.  This is true only if this token is also a regular
+   * token.  Otherwise, see below for a description of the contents of
+   * this field.
+   */
+  public Token next;
+
+  /**
+   * This field is used to access special tokens that occur prior to this
+   * token, but after the immediately preceding regular (non-special) token.
+   * If there are no such special tokens, this field is set to null.
+   * When there are more than one such special token, this field refers
+   * to the last of these special tokens, which in turn refers to the next
+   * previous special token through its specialToken field, and so on
+   * until the first special token (whose specialToken field is null).
+   * The next fields of special tokens refer to other special tokens that
+   * immediately follow it (without an intervening regular token).  If there
+   * is no such token, this field is null.
+   */
+  public Token specialToken;
+
+  /**
+   * An optional attribute value of the Token.
+   * Tokens which are not used as syntactic sugar will often contain
+   * meaningful values that will be used later on by the compiler or
+   * interpreter. This attribute value is often different from the image.
+   * Any subclass of Token that actually wants to return a non-null value can
+   * override this method as appropriate.
+   */
+  public Object getValue() {
+    return null;
+  }
+
+  /**
+   * No-argument constructor
+   */
+  public Token() {}
+
+  /**
+   * Constructs a new token for the specified Image.
+   */
+  public Token(int kind)
+  {
+     this(kind, null);
+  }
+
+  /**
+   * Constructs a new token for the specified Image and Kind.
+   */
+  public Token(int kind, String image)
+  {
+     this.kind = kind;
+     this.image = image;
+  }
+
+  /**
+   * Returns the image.
+   */
+  public String toString()
+  {
+     return image;
+  }
+
+  /**
+   * Returns a new Token object, by default. However, if you want, you
+   * can create and return subclass objects based on the value of ofKind.
+   * Simply add the cases to the switch for all those special cases.
+   * For example, if you have a subclass of Token called IDToken that
+   * you want to create if ofKind is ID, simply add something like :
+   *
+   *    case MyParserConstants.ID : return new IDToken(ofKind, image);
+   *
+   * to the following switch statement. Then you can cast matchedToken
+   * variable to the appropriate type and use sit in your lexical actions.
+   */
+  public static Token newToken(int ofKind, String image)
+  {
+     switch(ofKind)
+     {
+       default : return new Token(ofKind, image);
+     }
+  }
+
+  public static Token newToken(int ofKind)
+  {
+     return newToken(ofKind, null);
+  }
+
+}
+/* JavaCC - OriginalChecksum=7bf8bdbb1c45bccd8162cdd48316d5e0 (do not edit this line) */
--- a/backwards/src/demo/org/apache/lucene/demo/html/TokenMgrError.java
+++ b/backwards/src/demo/org/apache/lucene/demo/html/TokenMgrError.java
@ -0,0 +1,141 @@
+/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
+/* JavaCCOptions: */
+package org.apache.lucene.demo.html;
+
+/** Token Manager Error. */
+@SuppressWarnings("serial")
+public class TokenMgrError extends Error
+{
+
+   /*
+    * Ordinals for various reasons why an Error of this type can be thrown.
+    */
+
+   /**
+    * Lexical error occurred.
+    */
+   static final int LEXICAL_ERROR = 0;
+
+   /**
+    * An attempt was made to create a second instance of a static token manager.
+    */
+   static final int STATIC_LEXER_ERROR = 1;
+
+   /**
+    * Tried to change to an invalid lexical state.
+    */
+   static final int INVALID_LEXICAL_STATE = 2;
+
+   /**
+    * Detected (and bailed out of) an infinite loop in the token manager.
+    */
+   static final int LOOP_DETECTED = 3;
+
+   /**
+    * Indicates the reason why the exception is thrown. It will have
+    * one of the above 4 values.
+    */
+   int errorCode;
+
+   /**
+    * Replaces unprintable characters by their escaped (or unicode escaped)
+    * equivalents in the given string
+    */
+   protected static final String addEscapes(String str) {
+      StringBuffer retval = new StringBuffer();
+      char ch;
+      for (int i = 0; i < str.length(); i++) {
+        switch (str.charAt(i))
+        {
+           case 0 :
+              continue;
+           case '\b':
+              retval.append("\\b");
+              continue;
+           case '\t':
+              retval.append("\\t");
+              continue;
+           case '\n':
+              retval.append("\\n");
+              continue;
+           case '\f':
+              retval.append("\\f");
+              continue;
+           case '\r':
+              retval.append("\\r");
+              continue;
+           case '\"':
+              retval.append("\\\"");
+              continue;
+           case '\'':
+              retval.append("\\\'");
+              continue;
+           case '\\':
+              retval.append("\\\\");
+              continue;
+           default:
+              if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
+                 String s = "0000" + Integer.toString(ch, 16);
+                 retval.append("\\u" + s.substring(s.length() - 4, s.length()));
+              } else {
+                 retval.append(ch);
+              }
+              continue;
+        }
+      }
+      return retval.toString();
+   }
+
+   /**
+    * Returns a detailed message for the Error when it is thrown by the
+    * token manager to indicate a lexical error.
+    * Parameters :
+    *    EOFSeen     : indicates if EOF caused the lexical error
+    *    curLexState : lexical state in which this error occurred
+    *    errorLine   : line number when the error occurred
+    *    errorColumn : column number when the error occurred
+    *    errorAfter  : prefix that was seen before this error occurred
+    *    curchar     : the offending character
+    * Note: You can customize the lexical error message by modifying this method.
+    */
+   protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
+      return("Lexical error at line " +
+           errorLine + ", column " +
+           errorColumn + ".  Encountered: " +
+           (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") +
+           "after : \"" + addEscapes(errorAfter) + "\"");
+   }
+
+   /**
+    * You can also modify the body of this method to customize your error messages.
+    * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
+    * of end-users concern, so you can return something like :
+    *
+    *     "Internal Error : Please file a bug report .... "
+    *
+    * from this method for such cases in the release version of your parser.
+    */
+   public String getMessage() {
+      return super.getMessage();
+   }
+
+   /*
+    * Constructors of various flavors follow.
+    */
+
+   /** No arg constructor. */
+   public TokenMgrError() {
+   }
+
+   /** Constructor with message and reason. */
+   public TokenMgrError(String message, int reason) {
+      super(message);
+      errorCode = reason;
+   }
+
+   /** Full Constructor. */
+   public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
+      this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
+   }
+}
+/* JavaCC - OriginalChecksum=5ffb7e46d5ae93d8d59e6f4ae7eb36d1 (do not edit this line) */
--- a/backwards/src/java/org/apache/lucene/LucenePackage.java
+++ b/backwards/src/java/org/apache/lucene/LucenePackage.java
@ -0,0 +1,29 @@
+package org.apache.lucene;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Lucene's package information, including version. **/
+public final class LucenePackage {
+
+  private LucenePackage() {}                      // can't construct
+
+  /** Return Lucene's package, including version information. */
+  public static Package get() {
+    return LucenePackage.class.getPackage();
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/ASCIIFoldingFilter.java
--- a/backwards/src/java/org/apache/lucene/analysis/Analyzer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/Analyzer.java
@ -0,0 +1,144 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.IOException;
+import java.io.Closeable;
+import java.lang.reflect.Method;
+
+import org.apache.lucene.util.CloseableThreadLocal;
+import org.apache.lucene.store.AlreadyClosedException;
+
+import org.apache.lucene.document.Fieldable;
+
+/** An Analyzer builds TokenStreams, which analyze text.  It thus represents a
+ *  policy for extracting index terms from text.
+ *  <p>
+ *  Typical implementations first build a Tokenizer, which breaks the stream of
+ *  characters from the Reader into raw Tokens.  One or more TokenFilters may
+ *  then be applied to the output of the Tokenizer.
+ */
+public abstract class Analyzer implements Closeable {
+  /** Creates a TokenStream which tokenizes all the text in the provided
+   * Reader.  Must be able to handle null field name for
+   * backward compatibility.
+   */
+  public abstract TokenStream tokenStream(String fieldName, Reader reader);
+
+  /** Creates a TokenStream that is allowed to be re-used
+   *  from the previous time that the same thread called
+   *  this method.  Callers that do not need to use more
+   *  than one TokenStream at the same time from this
+   *  analyzer should use this method for better
+   *  performance.
+   */
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    return tokenStream(fieldName, reader);
+  }
+
+  private CloseableThreadLocal<Object> tokenStreams = new CloseableThreadLocal<Object>();
+
+  /** Used by Analyzers that implement reusableTokenStream
+   *  to retrieve previously saved TokenStreams for re-use
+   *  by the same thread. */
+  protected Object getPreviousTokenStream() {
+    try {
+      return tokenStreams.get();
+    } catch (NullPointerException npe) {
+      if (tokenStreams == null) {
+        throw new AlreadyClosedException("this Analyzer is closed");
+      } else {
+        throw npe;
+      }
+    }
+  }
+
+  /** Used by Analyzers that implement reusableTokenStream
+   *  to save a TokenStream for later re-use by the same
+   *  thread. */
+  protected void setPreviousTokenStream(Object obj) {
+    try {
+      tokenStreams.set(obj);
+    } catch (NullPointerException npe) {
+      if (tokenStreams == null) {
+        throw new AlreadyClosedException("this Analyzer is closed");
+      } else {
+        throw npe;
+      }
+    }
+  }
+
+  /** @deprecated */
+  protected boolean overridesTokenStreamMethod = false;
+
+  /** @deprecated This is only present to preserve
+   *  back-compat of classes that subclass a core analyzer
+   *  and override tokenStream but not reusableTokenStream */
+  protected void setOverridesTokenStreamMethod(Class<? extends Analyzer> baseClass) {
+    try {
+      Method m = this.getClass().getMethod("tokenStream", String.class, Reader.class);
+      overridesTokenStreamMethod = m.getDeclaringClass() != baseClass;
+    } catch (NoSuchMethodException nsme) {
+      // cannot happen, as baseClass is subclass of Analyzer through generics
+      overridesTokenStreamMethod = false;
+    }
+  }
+
+
+  /**
+   * Invoked before indexing a Fieldable instance if
+   * terms have already been added to that field.  This allows custom
+   * analyzers to place an automatic position increment gap between
+   * Fieldable instances using the same field name.  The default value
+   * position increment gap is 0.  With a 0 position increment gap and
+   * the typical default token position increment of 1, all terms in a field,
+   * including across Fieldable instances, are in successive positions, allowing
+   * exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
+   *
+   * @param fieldName Fieldable name being indexed.
+   * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+   */
+  public int getPositionIncrementGap(String fieldName) {
+    return 0;
+  }
+
+  /**
+   * Just like {@link #getPositionIncrementGap}, except for
+   * Token offsets instead.  By default this returns 1 for
+   * tokenized fields and, as if the fields were joined
+   * with an extra space character, and 0 for un-tokenized
+   * fields.  This method is only called if the field
+   * produced at least one token for indexing.
+   *
+   * @param field the field just indexed
+   * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+   */
+  public int getOffsetGap(Fieldable field) {
+    if (field.isTokenized())
+      return 1;
+    else
+      return 0;
+  }
+
+  /** Frees persistent resources used by this Analyzer */
+  public void close() {
+    tokenStreams.close();
+    tokenStreams = null;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/BaseCharFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/BaseCharFilter.java
@ -0,0 +1,93 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Base utility class for implementing a {@link CharFilter}.
+ * You subclass this, and then record mappings by calling
+ * {@link #addOffCorrectMap}, and then invoke the correct
+ * method to correct an offset.
+ *
+ * <p><b>NOTE</b>: This class is not particularly efficient.
+ * For example, a new class instance is created for every
+ * call to {@link #addOffCorrectMap}, which is then appended
+ * to a private list.
+ */
+public abstract class BaseCharFilter extends CharFilter {
+
+  private List<OffCorrectMap> pcmList;
+  
+  public BaseCharFilter(CharStream in) {
+    super(in);
+  }
+
+  /** Retrieve the corrected offset.  Note that this method
+   *  is slow, if you correct positions far before the most
+   *  recently added position, as it's a simple linear
+   *  search backwards through all offset corrections added
+   *  by {@link #addOffCorrectMap}. */
+  @Override
+  protected int correct(int currentOff) {
+    if (pcmList == null || pcmList.isEmpty()) {
+      return currentOff;
+    }
+    for (int i = pcmList.size() - 1; i >= 0; i--) {
+      if (currentOff >=  pcmList.get(i).off) {
+        return currentOff + pcmList.get(i).cumulativeDiff;
+      }
+    }
+    return currentOff;
+  }
+  
+  protected int getLastCumulativeDiff() {
+    return pcmList == null || pcmList.isEmpty() ?
+      0 : pcmList.get(pcmList.size() - 1).cumulativeDiff;
+  }
+
+  protected void addOffCorrectMap(int off, int cumulativeDiff) {
+    if (pcmList == null) {
+      pcmList = new ArrayList<OffCorrectMap>();
+    }
+    pcmList.add(new OffCorrectMap(off, cumulativeDiff));
+  }
+
+  static class OffCorrectMap {
+
+    int off;
+    int cumulativeDiff;
+
+    OffCorrectMap(int off, int cumulativeDiff) {
+      this.off = off;
+      this.cumulativeDiff = cumulativeDiff;
+    }
+
+    @Override
+    public String toString() {
+      StringBuilder sb = new StringBuilder();
+      sb.append('(');
+      sb.append(off);
+      sb.append(',');
+      sb.append(cumulativeDiff);
+      sb.append(')');
+      return sb.toString();
+    }
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/CachingTokenFilter.java
@ -0,0 +1,86 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * This class can be used if the token attributes of a TokenStream
+ * are intended to be consumed more than once. It caches
+ * all token attribute states locally in a List.
+ * 
+ * <P>CachingTokenFilter implements the optional method
+ * {@link TokenStream#reset()}, which repositions the
+ * stream to the first Token. 
+ */
+public final class CachingTokenFilter extends TokenFilter {
+  private List<AttributeSource.State> cache = null;
+  private Iterator<AttributeSource.State> iterator = null; 
+  private AttributeSource.State finalState;
+  
+  public CachingTokenFilter(TokenStream input) {
+    super(input);
+  }
+  
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (cache == null) {
+      // fill cache lazily
+      cache = new LinkedList<AttributeSource.State>();
+      fillCache();
+      iterator = cache.iterator();
+    }
+    
+    if (!iterator.hasNext()) {
+      // the cache is exhausted, return false
+      return false;
+    }
+    // Since the TokenFilter can be reset, the tokens need to be preserved as immutable.
+    restoreState(iterator.next());
+    return true;
+  }
+  
+  @Override
+  public final void end() throws IOException {
+    if (finalState != null) {
+      restoreState(finalState);
+    }
+  }
+
+  @Override
+  public void reset() throws IOException {
+    if(cache != null) {
+      iterator = cache.iterator();
+    }
+  }
+  
+  private void fillCache() throws IOException {
+    while(input.incrementToken()) {
+      cache.add(captureState());
+    }
+    // capture final state
+    input.end();
+    finalState = captureState();
+  }
+
+}
--- a/backwards/src/java/org/apache/lucene/analysis/CharArraySet.java
+++ b/backwards/src/java/org/apache/lucene/analysis/CharArraySet.java
@ -0,0 +1,390 @@
+package org.apache.lucene.analysis;
+
+import java.util.AbstractSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * A simple class that stores Strings as char[]'s in a
+ * hash table.  Note that this is not a general purpose
+ * class.  For example, it cannot remove items from the
+ * set, nor does it resize its hash table to be smaller,
+ * etc.  It is designed to be quick to test if a char[]
+ * is in the set without the necessity of converting it
+ * to a String first.
+ * <P>
+ * <em>Please note:</em> This class implements {@link java.util.Set Set} but
+ * does not behave like it should in all cases. The generic type is
+ * {@code Set<Object>}, because you can add any object to it,
+ * that has a string representation. The add methods will use
+ * {@link Object#toString} and store the result using a {@code char[]}
+ * buffer. The same behaviour have the {@code contains()} methods.
+ * The {@link #iterator()} returns an {@code Iterator<String>}.
+ * For type safety also {@link #stringIterator()} is provided.
+ */
+
+public class CharArraySet extends AbstractSet<Object> {
+  private final static int INIT_SIZE = 8;
+  private char[][] entries;
+  private int count;
+  private final boolean ignoreCase;
+  public static final CharArraySet EMPTY_SET = CharArraySet.unmodifiableSet(new CharArraySet(0, false));
+
+  /** Create set with enough capacity to hold startSize
+   *  terms */
+  public CharArraySet(int startSize, boolean ignoreCase) {
+    this.ignoreCase = ignoreCase;
+    int size = INIT_SIZE;
+    while(startSize + (startSize>>2) > size)
+      size <<= 1;
+    entries = new char[size][];
+  }
+
+  /** Create set from a Collection of char[] or String */
+  public CharArraySet(Collection<? extends Object> c, boolean ignoreCase) {
+    this(c.size(), ignoreCase);
+    addAll(c);
+  }
+  
+  /** Create set from entries */
+  private CharArraySet(char[][] entries, boolean ignoreCase, int count){
+    this.entries = entries;
+    this.ignoreCase = ignoreCase;
+    this.count = count;
+  }
+
+  /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
+   * are in the set */
+  public boolean contains(char[] text, int off, int len) {
+    return entries[getSlot(text, off, len)] != null;
+  }
+
+  /** true if the <code>CharSequence</code> is in the set */
+  public boolean contains(CharSequence cs) {
+    return entries[getSlot(cs)] != null;
+  }
+
+  private int getSlot(char[] text, int off, int len) {
+    int code = getHashCode(text, off, len);
+    int pos = code & (entries.length-1);
+    char[] text2 = entries[pos];
+    if (text2 != null && !equals(text, off, len, text2)) {
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        pos = code & (entries.length-1);
+        text2 = entries[pos];
+      } while (text2 != null && !equals(text, off, len, text2));
+    }
+    return pos;
+  }
+
+  /** Returns true if the String is in the set */  
+  private int getSlot(CharSequence text) {
+    int code = getHashCode(text);
+    int pos = code & (entries.length-1);
+    char[] text2 = entries[pos];
+    if (text2 != null && !equals(text, text2)) {
+      final int inc = ((code>>8)+code)|1;
+      do {
+        code += inc;
+        pos = code & (entries.length-1);
+        text2 = entries[pos];
+      } while (text2 != null && !equals(text, text2));
+    }
+    return pos;
+  }
+
+  /** Add this CharSequence into the set */
+  public boolean add(CharSequence text) {
+    return add(text.toString()); // could be more efficient
+  }
+  
+  /** Add this String into the set */
+  public boolean add(String text) {
+    return add(text.toCharArray());
+  }
+
+  /** Add this char[] directly to the set.
+   * If ignoreCase is true for this Set, the text array will be directly modified.
+   * The user should never modify this text array after calling this method.
+   */
+  public boolean add(char[] text) {
+    if (ignoreCase)
+      for(int i=0;i<text.length;i++)
+        text[i] = Character.toLowerCase(text[i]);
+    int slot = getSlot(text, 0, text.length);
+    if (entries[slot] != null) return false;
+    entries[slot] = text;
+    count++;
+
+    if (count + (count>>2) > entries.length) {
+      rehash();
+    }
+
+    return true;
+  }
+
+  private boolean equals(char[] text1, int off, int len, char[] text2) {
+    if (len != text2.length)
+      return false;
+    if (ignoreCase) {
+      for(int i=0;i<len;i++) {
+        if (Character.toLowerCase(text1[off+i]) != text2[i])
+          return false;
+      }
+    } else {
+      for(int i=0;i<len;i++) {
+        if (text1[off+i] != text2[i])
+          return false;
+      }
+    }
+    return true;
+  }
+
+  private boolean equals(CharSequence text1, char[] text2) {
+    int len = text1.length();
+    if (len != text2.length)
+      return false;
+    if (ignoreCase) {
+      for(int i=0;i<len;i++) {
+        if (Character.toLowerCase(text1.charAt(i)) != text2[i])
+          return false;
+      }
+    } else {
+      for(int i=0;i<len;i++) {
+        if (text1.charAt(i) != text2[i])
+          return false;
+      }
+    }
+    return true;
+  }
+
+  private void rehash() {
+    final int newSize = 2*entries.length;
+    char[][] oldEntries = entries;
+    entries = new char[newSize][];
+
+    for(int i=0;i<oldEntries.length;i++) {
+      char[] text = oldEntries[i];
+      if (text != null) {
+        // todo: could be faster... no need to compare strings on collision
+        entries[getSlot(text,0,text.length)] = text;
+      }
+    }
+  }
+  
+  private int getHashCode(char[] text, int offset, int len) {
+    int code = 0;
+    final int stop = offset + len;
+    if (ignoreCase) {
+      for (int i=offset; i<stop; i++) {
+        code = code*31 + Character.toLowerCase(text[i]);
+      }
+    } else {
+      for (int i=offset; i<stop; i++) {
+        code = code*31 + text[i];
+      }
+    }
+    return code;
+  }
+
+  private int getHashCode(CharSequence text) {
+    int code = 0;
+    int len = text.length();
+    if (ignoreCase) {
+      for (int i=0; i<len; i++) {
+        code = code*31 + Character.toLowerCase(text.charAt(i));
+      }
+    } else {
+      for (int i=0; i<len; i++) {
+        code = code*31 + text.charAt(i);
+      }
+    }
+    return code;
+  }
+
+
+  @Override
+  public int size() {
+    return count;
+  }
+
+  @Override
+  public boolean isEmpty() {
+    return count==0;
+  }
+
+  @Override
+  public boolean contains(Object o) {
+    if (o instanceof char[]) {
+      final char[] text = (char[])o;
+      return contains(text, 0, text.length);
+    } 
+    return contains(o.toString());
+  }
+
+  @Override
+  public boolean add(Object o) {
+    if (o instanceof char[]) {
+      return add((char[])o);
+    }
+    return add(o.toString());
+  }
+  
+  /**
+   * Returns an unmodifiable {@link CharArraySet}. This allows to provide
+   * unmodifiable views of internal sets for "read-only" use.
+   * 
+   * @param set
+   *          a set for which the unmodifiable set is returned.
+   * @return an new unmodifiable {@link CharArraySet}.
+   * @throws NullPointerException
+   *           if the given set is <code>null</code>.
+   */
+  public static CharArraySet unmodifiableSet(CharArraySet set) {
+    if (set == null)
+      throw new NullPointerException("Given set is null");
+    if (set == EMPTY_SET)
+      return EMPTY_SET;
+    if (set instanceof UnmodifiableCharArraySet)
+      return set;
+
+    /*
+     * Instead of delegating calls to the given set copy the low-level values to
+     * the unmodifiable Subclass
+     */
+    return new UnmodifiableCharArraySet(set.entries, set.ignoreCase, set.count);
+  }
+
+  /**
+   * Returns a copy of the given set as a {@link CharArraySet}. If the given set
+   * is a {@link CharArraySet} the ignoreCase property will be preserved.
+   * 
+   * @param set
+   *          a set to copy
+   * @return a copy of the given set as a {@link CharArraySet}. If the given set
+   *         is a {@link CharArraySet} the ignoreCase property will be
+   *         preserved.
+   */
+  public static CharArraySet copy(Set<?> set) {
+    if (set == null)
+      throw new NullPointerException("Given set is null");
+    if(set == EMPTY_SET)
+      return EMPTY_SET;
+    final boolean ignoreCase = set instanceof CharArraySet ? ((CharArraySet) set).ignoreCase
+        : false;
+    return new CharArraySet(set, ignoreCase);
+  }
+  
+
+  /** The Iterator<String> for this set.  Strings are constructed on the fly, so
+   * use <code>nextCharArray</code> for more efficient access. */
+  public class CharArraySetIterator implements Iterator<String> {
+    int pos=-1;
+    char[] next;
+    CharArraySetIterator() {
+      goNext();
+    }
+
+    private void goNext() {
+      next = null;
+      pos++;
+      while (pos < entries.length && (next=entries[pos]) == null) pos++;
+    }
+
+    public boolean hasNext() {
+      return next != null;
+    }
+
+    /** do not modify the returned char[] */
+    public char[] nextCharArray() {
+      char[] ret = next;
+      goNext();
+      return ret;
+    }
+
+    /** Returns the next String, as a Set<String> would...
+     * use nextCharArray() for better efficiency. */
+    public String next() {
+      return new String(nextCharArray());
+    }
+
+    public void remove() {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  /** returns an iterator of new allocated Strings */
+  public Iterator<String> stringIterator() {
+    return new CharArraySetIterator();
+  }
+
+  /** returns an iterator of new allocated Strings, this method violates the Set interface */
+  @Override
+  @SuppressWarnings("unchecked")
+  public Iterator<Object> iterator() {
+    return (Iterator) stringIterator();
+  }
+  
+  /**
+   * Efficient unmodifiable {@link CharArraySet}. This implementation does not
+   * delegate calls to a give {@link CharArraySet} like
+   * {@link Collections#unmodifiableSet(java.util.Set)} does. Instead is passes
+   * the internal representation of a {@link CharArraySet} to a super
+   * constructor and overrides all mutators. 
+   */
+  private static final class UnmodifiableCharArraySet extends CharArraySet {
+
+    private UnmodifiableCharArraySet(char[][] entries, boolean ignoreCase,
+        int count) {
+      super(entries, ignoreCase, count);
+    }
+
+    @Override
+    public boolean add(Object o){
+      throw new UnsupportedOperationException();
+    }
+    
+    @Override
+    public boolean addAll(Collection<? extends Object> coll) {
+      throw new UnsupportedOperationException();
+    }
+    
+    @Override
+    public boolean add(char[] text) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean add(CharSequence text) {
+      throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public boolean add(String text) {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+}
--- a/backwards/src/java/org/apache/lucene/analysis/CharFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/CharFilter.java
@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+
+/**
+ * Subclasses of CharFilter can be chained to filter CharStream.
+ * They can be used as {@link java.io.Reader} with additional offset
+ * correction. {@link Tokenizer}s will automatically use {@link #correctOffset}
+ * if a CharFilter/CharStream subclass is used.
+ *
+ * @version $Id$
+ *
+ */
+public abstract class CharFilter extends CharStream {
+
+  protected CharStream input;
+
+  protected CharFilter(CharStream in) {
+    input = in;
+  }
+
+  /**
+   * Subclass may want to override to correct the current offset.
+   *
+   * @param currentOff current offset
+   * @return corrected offset
+   */
+  protected int correct(int currentOff) {
+    return currentOff;
+  }
+
+  /**
+   * Chains the corrected offset through the input
+   * CharFilter.
+   */
+  @Override
+  public final int correctOffset(int currentOff) {
+    return input.correctOffset(correct(currentOff));
+  }
+
+  @Override
+  public void close() throws IOException {
+    input.close();
+  }
+
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    return input.read(cbuf, off, len);
+  }
+
+  @Override
+  public boolean markSupported(){
+    return input.markSupported();
+  }
+
+  @Override
+  public void mark( int readAheadLimit ) throws IOException {
+    input.mark(readAheadLimit);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    input.reset();
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/CharReader.java
+++ b/backwards/src/java/org/apache/lucene/analysis/CharReader.java
@ -0,0 +1,71 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * CharReader is a Reader wrapper. It reads chars from
+ * Reader and outputs {@link CharStream}, defining an
+ * identify function {@link #correctOffset} method that
+ * simply returns the provided offset.
+ */
+public final class CharReader extends CharStream {
+
+  protected Reader input;
+  
+  public static CharStream get(Reader input) {
+    return input instanceof CharStream ?
+      (CharStream)input : new CharReader(input);
+  }
+
+  private CharReader(Reader in) {
+    input = in;
+  }
+
+  @Override
+  public int correctOffset(int currentOff) {
+    return currentOff;
+  }
+
+  @Override
+  public void close() throws IOException {
+    input.close();
+  }
+
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    return input.read(cbuf, off, len);
+  }
+
+  @Override
+  public boolean markSupported(){
+    return input.markSupported();
+  }
+
+  @Override
+  public void mark( int readAheadLimit ) throws IOException {
+    input.mark(readAheadLimit);
+  }
+
+  @Override
+  public void reset() throws IOException {
+    input.reset();
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/CharStream.java
+++ b/backwards/src/java/org/apache/lucene/analysis/CharStream.java
@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.Reader;
+
+/**
+ * CharStream adds {@link #correctOffset}
+ * functionality over {@link Reader}.  All Tokenizers accept a
+ * CharStream instead of {@link Reader} as input, which enables
+ * arbitrary character based filtering before tokenization. 
+ * The {@link #correctOffset} method fixed offsets to account for
+ * removal or insertion of characters, so that the offsets
+ * reported in the tokens match the character offsets of the
+ * original Reader.
+ */
+public abstract class CharStream extends Reader {
+
+  /**
+   * Called by CharFilter(s) and Tokenizer to correct token offset.
+   *
+   * @param currentOff offset as seen in the output
+   * @return corrected offset based on the input
+   */
+  public abstract int correctOffset(int currentOff);
+}
--- a/backwards/src/java/org/apache/lucene/analysis/CharTokenizer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/CharTokenizer.java
@ -0,0 +1,126 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/** An abstract base class for simple, character-oriented tokenizers.*/
+public abstract class CharTokenizer extends Tokenizer {
+  public CharTokenizer(Reader input) {
+    super(input);
+    offsetAtt = addAttribute(OffsetAttribute.class);
+    termAtt = addAttribute(TermAttribute.class);
+  }
+
+  public CharTokenizer(AttributeSource source, Reader input) {
+    super(source, input);
+    offsetAtt = addAttribute(OffsetAttribute.class);
+    termAtt = addAttribute(TermAttribute.class);
+  }
+
+  public CharTokenizer(AttributeFactory factory, Reader input) {
+    super(factory, input);
+    offsetAtt = addAttribute(OffsetAttribute.class);
+    termAtt = addAttribute(TermAttribute.class);
+  }
+  
+  private int offset = 0, bufferIndex = 0, dataLen = 0;
+  private static final int MAX_WORD_LEN = 255;
+  private static final int IO_BUFFER_SIZE = 4096;
+  private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
+  
+  private TermAttribute termAtt;
+  private OffsetAttribute offsetAtt;
+
+  /** Returns true iff a character should be included in a token.  This
+   * tokenizer generates as tokens adjacent sequences of characters which
+   * satisfy this predicate.  Characters for which this is false are used to
+   * define token boundaries and are not included in tokens. */
+  protected abstract boolean isTokenChar(char c);
+
+  /** Called on each token character to normalize it before it is added to the
+   * token.  The default implementation does nothing.  Subclasses may use this
+   * to, e.g., lowercase tokens. */
+  protected char normalize(char c) {
+    return c;
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    clearAttributes();
+    int length = 0;
+    int start = bufferIndex;
+    char[] buffer = termAtt.termBuffer();
+    while (true) {
+
+      if (bufferIndex >= dataLen) {
+        offset += dataLen;
+        dataLen = input.read(ioBuffer);
+        if (dataLen == -1) {
+          dataLen = 0;                            // so next offset += dataLen won't decrement offset
+          if (length > 0)
+            break;
+          else
+            return false;
+        }
+        bufferIndex = 0;
+      }
+
+      final char c = ioBuffer[bufferIndex++];
+
+      if (isTokenChar(c)) {               // if it's a token char
+
+        if (length == 0)                 // start of token
+          start = offset + bufferIndex - 1;
+        else if (length == buffer.length)
+          buffer = termAtt.resizeTermBuffer(1+length);
+
+        buffer[length++] = normalize(c); // buffer it, normalized
+
+        if (length == MAX_WORD_LEN)      // buffer overflow!
+          break;
+
+      } else if (length > 0)             // at non-Letter w/ chars
+        break;                           // return 'em
+    }
+
+    termAtt.setTermLength(length);
+    offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
+    return true;
+  }
+  
+  @Override
+  public final void end() {
+    // set final offset
+    int finalOffset = correctOffset(offset);
+    offsetAtt.setOffset(finalOffset, finalOffset);
+  }
+
+  @Override
+  public void reset(Reader input) throws IOException {
+    super.reset(input);
+    bufferIndex = 0;
+    offset = 0;
+    dataLen = 0;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/ISOLatin1AccentFilter.java
@ -0,0 +1,260 @@
+package org.apache.lucene.analysis;
+
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * A filter that replaces accented characters in the ISO Latin 1 character set 
+ * (ISO-8859-1) by their unaccented equivalent. The case will not be altered.
+ * <p>
+ * For instance, '&agrave;' will be replaced by 'a'.
+ * <p>
+ * 
+ * @deprecated If you build a new index, use {@link ASCIIFoldingFilter}
+ * which covers a superset of Latin 1.
+ * This class is included for use with existing
+ * indexes and will be removed in a future release (possibly Lucene 4.0).
+ */
+public final class ISOLatin1AccentFilter extends TokenFilter {
+  public ISOLatin1AccentFilter(TokenStream input) {
+    super(input);
+    termAtt = addAttribute(TermAttribute.class);
+  }
+
+  private char[] output = new char[256];
+  private int outputPos;
+  private TermAttribute termAtt;
+    
+  @Override
+  public final boolean incrementToken() throws java.io.IOException {    
+    if (input.incrementToken()) {
+      final char[] buffer = termAtt.termBuffer();
+      final int length = termAtt.termLength();
+      // If no characters actually require rewriting then we
+      // just return token as-is:
+      for(int i=0;i<length;i++) {
+        final char c = buffer[i];
+        if (c >= '\u00c0' && c <= '\uFB06') {
+          removeAccents(buffer, length);
+          termAtt.setTermBuffer(output, 0, outputPos);
+          break;
+        }
+      }
+      return true;
+    } else
+      return false;
+  }
+
+  /**
+   * To replace accented characters in a String by unaccented equivalents.
+   */
+  public final void removeAccents(char[] input, int length) {
+
+    // Worst-case length required:
+    final int maxSizeNeeded = 2*length;
+
+    int size = output.length;
+    while (size < maxSizeNeeded)
+      size *= 2;
+
+    if (size != output.length)
+      output = new char[size];
+
+    outputPos = 0;
+
+    int pos = 0;
+
+    for (int i=0; i<length; i++, pos++) {
+      final char c = input[pos];
+
+      // Quick test: if it's not in range then just keep
+      // current character
+      if (c < '\u00c0' || c > '\uFB06')
+        output[outputPos++] = c;
+      else {
+        switch (c) {
+        case '\u00C0' : // À
+        case '\u00C1' : // Á
+        case '\u00C2' : // Â
+        case '\u00C3' : // Ã
+        case '\u00C4' : // Ä
+        case '\u00C5' : // Å
+          output[outputPos++] = 'A';
+          break;
+        case '\u00C6' : // Æ
+          output[outputPos++] = 'A';
+          output[outputPos++] = 'E';
+          break;
+        case '\u00C7' : // Ç
+          output[outputPos++] = 'C';
+          break;
+        case '\u00C8' : // È
+        case '\u00C9' : // É
+        case '\u00CA' : // Ê
+        case '\u00CB' : // Ë
+          output[outputPos++] = 'E';
+          break;
+        case '\u00CC' : // Ì
+        case '\u00CD' : // Í
+        case '\u00CE' : // Î
+        case '\u00CF' : // Ï
+          output[outputPos++] = 'I';
+          break;
+        case '\u0132' : // Ĳ
+            output[outputPos++] = 'I';
+            output[outputPos++] = 'J';
+            break;
+        case '\u00D0' : // Ð
+          output[outputPos++] = 'D';
+          break;
+        case '\u00D1' : // Ñ
+          output[outputPos++] = 'N';
+          break;
+        case '\u00D2' : // Ò
+        case '\u00D3' : // Ó
+        case '\u00D4' : // Ô
+        case '\u00D5' : // Õ
+        case '\u00D6' : // Ö
+        case '\u00D8' : // Ø
+          output[outputPos++] = 'O';
+          break;
+        case '\u0152' : // Œ
+          output[outputPos++] = 'O';
+          output[outputPos++] = 'E';
+          break;
+        case '\u00DE' : // Þ
+          output[outputPos++] = 'T';
+          output[outputPos++] = 'H';
+          break;
+        case '\u00D9' : // Ù
+        case '\u00DA' : // Ú
+        case '\u00DB' : // Û
+        case '\u00DC' : // Ü
+          output[outputPos++] = 'U';
+          break;
+        case '\u00DD' : // Ý
+        case '\u0178' : // Ÿ
+          output[outputPos++] = 'Y';
+          break;
+        case '\u00E0' : // à
+        case '\u00E1' : // á
+        case '\u00E2' : // â
+        case '\u00E3' : // ã
+        case '\u00E4' : // ä
+        case '\u00E5' : // å
+          output[outputPos++] = 'a';
+          break;
+        case '\u00E6' : // æ
+          output[outputPos++] = 'a';
+          output[outputPos++] = 'e';
+          break;
+        case '\u00E7' : // ç
+          output[outputPos++] = 'c';
+          break;
+        case '\u00E8' : // è
+        case '\u00E9' : // é
+        case '\u00EA' : // ê
+        case '\u00EB' : // ë
+          output[outputPos++] = 'e';
+          break;
+        case '\u00EC' : // ì
+        case '\u00ED' : // í
+        case '\u00EE' : // î
+        case '\u00EF' : // ï
+          output[outputPos++] = 'i';
+          break;
+        case '\u0133' : // ĳ
+            output[outputPos++] = 'i';
+            output[outputPos++] = 'j';
+            break;
+        case '\u00F0' : // ð
+          output[outputPos++] = 'd';
+          break;
+        case '\u00F1' : // ñ
+          output[outputPos++] = 'n';
+          break;
+        case '\u00F2' : // ò
+        case '\u00F3' : // ó
+        case '\u00F4' : // ô
+        case '\u00F5' : // õ
+        case '\u00F6' : // ö
+        case '\u00F8' : // ø
+          output[outputPos++] = 'o';
+          break;
+        case '\u0153' : // œ
+          output[outputPos++] = 'o';
+          output[outputPos++] = 'e';
+          break;
+        case '\u00DF' : // ß
+          output[outputPos++] = 's';
+          output[outputPos++] = 's';
+          break;
+        case '\u00FE' : // þ
+          output[outputPos++] = 't';
+          output[outputPos++] = 'h';
+          break;
+        case '\u00F9' : // ù
+        case '\u00FA' : // ú
+        case '\u00FB' : // û
+        case '\u00FC' : // ü
+          output[outputPos++] = 'u';
+          break;
+        case '\u00FD' : // ý
+        case '\u00FF' : // ÿ
+          output[outputPos++] = 'y';
+          break;
+        case '\uFB00': // ﬀ
+            output[outputPos++] = 'f';
+            output[outputPos++] = 'f';
+            break;
+        case '\uFB01': // ﬁ
+            output[outputPos++] = 'f';
+            output[outputPos++] = 'i';
+            break;
+        case '\uFB02': // ﬂ
+            output[outputPos++] = 'f';
+            output[outputPos++] = 'l';
+            break;
+        // following 2 are commented as they can break the maxSizeNeeded (and doing *3 could be expensive)
+//        case '\uFB03': // ﬃ
+//            output[outputPos++] = 'f';
+//            output[outputPos++] = 'f';
+//            output[outputPos++] = 'i';
+//            break;
+//        case '\uFB04': // ﬄ
+//            output[outputPos++] = 'f';
+//            output[outputPos++] = 'f';
+//            output[outputPos++] = 'l';
+//            break;
+        case '\uFB05': // ﬅ
+            output[outputPos++] = 'f';
+            output[outputPos++] = 't';
+            break;
+        case '\uFB06': // ﬆ
+            output[outputPos++] = 's';
+            output[outputPos++] = 't';
+          break;
+        default :
+          output[outputPos++] = c;
+          break;
+        }
+      }
+    }
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/KeywordAnalyzer.java
@ -0,0 +1,53 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * "Tokenizes" the entire stream as a single token. This is useful
+ * for data like zip codes, ids, and some product names.
+ */
+public class KeywordAnalyzer extends Analyzer {
+  public KeywordAnalyzer() {
+    setOverridesTokenStreamMethod(KeywordAnalyzer.class);
+  }
+  @Override
+  public TokenStream tokenStream(String fieldName,
+                                 final Reader reader) {
+    return new KeywordTokenizer(reader);
+  }
+  @Override
+  public TokenStream reusableTokenStream(String fieldName,
+                                         final Reader reader) throws IOException {
+    if (overridesTokenStreamMethod) {
+      // LUCENE-1678: force fallback to tokenStream() if we
+      // have been subclassed and that subclass overrides
+      // tokenStream but not reusableTokenStream
+      return tokenStream(fieldName, reader);
+    }
+    Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
+    if (tokenizer == null) {
+      tokenizer = new KeywordTokenizer(reader);
+      setPreviousTokenStream(tokenizer);
+    } else
+      tokenizer.reset(reader);
+    return tokenizer;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/KeywordTokenizer.java
@ -0,0 +1,98 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Emits the entire input as a single token.
+ */
+public final class KeywordTokenizer extends Tokenizer {
+  
+  private static final int DEFAULT_BUFFER_SIZE = 256;
+
+  private boolean done;
+  private int finalOffset;
+  private TermAttribute termAtt;
+  private OffsetAttribute offsetAtt;
+  
+  public KeywordTokenizer(Reader input) {
+    this(input, DEFAULT_BUFFER_SIZE);
+  }
+
+  public KeywordTokenizer(Reader input, int bufferSize) {
+    super(input);
+    init(bufferSize);
+  }
+
+  public KeywordTokenizer(AttributeSource source, Reader input, int bufferSize) {
+    super(source, input);
+    init(bufferSize);
+  }
+
+  public KeywordTokenizer(AttributeFactory factory, Reader input, int bufferSize) {
+    super(factory, input);
+    init(bufferSize);
+  }
+  
+  private void init(int bufferSize) {
+    this.done = false;
+    termAtt = addAttribute(TermAttribute.class);
+    offsetAtt = addAttribute(OffsetAttribute.class);
+    termAtt.resizeTermBuffer(bufferSize);    
+  }
+  
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (!done) {
+      clearAttributes();
+      done = true;
+      int upto = 0;
+      char[] buffer = termAtt.termBuffer();
+      while (true) {
+        final int length = input.read(buffer, upto, buffer.length-upto);
+        if (length == -1) break;
+        upto += length;
+        if (upto == buffer.length)
+          buffer = termAtt.resizeTermBuffer(1+buffer.length);
+      }
+      termAtt.setTermLength(upto);
+      finalOffset = correctOffset(upto);
+      offsetAtt.setOffset(correctOffset(0), finalOffset);
+      return true;
+    }
+    return false;
+  }
+  
+  @Override
+  public final void end() {
+    // set final offset 
+    offsetAtt.setOffset(finalOffset, finalOffset);
+  }
+
+  @Override
+  public void reset(Reader input) throws IOException {
+    super.reset(input);
+    this.done = false;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/LengthFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/LengthFilter.java
@ -0,0 +1,62 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+/**
+ * Removes words that are too long or too short from the stream.
+ */
+public final class LengthFilter extends TokenFilter {
+
+  final int min;
+  final int max;
+  
+  private TermAttribute termAtt;
+
+  /**
+   * Build a filter that removes words that are too long or too
+   * short from the text.
+   */
+  public LengthFilter(TokenStream in, int min, int max)
+  {
+    super(in);
+    this.min = min;
+    this.max = max;
+    termAtt = addAttribute(TermAttribute.class);
+  }
+  
+  /**
+   * Returns the next input Token whose term() is the right len
+   */
+  @Override
+  public final boolean incrementToken() throws IOException {
+    // return the first non-stop word found
+    while (input.incrementToken()) {
+      int len = termAtt.termLength();
+      if (len >= min && len <= max) {
+          return true;
+      }
+      // note: else we ignore it but should we index each part of it?
+    }
+    // reached EOS -- return false
+    return false;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/LetterTokenizer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/LetterTokenizer.java
@ -0,0 +1,53 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.util.AttributeSource;
+
+/** A LetterTokenizer is a tokenizer that divides text at non-letters.  That's
+  to say, it defines tokens as maximal strings of adjacent letters, as defined
+  by java.lang.Character.isLetter() predicate.
+
+  Note: this does a decent job for most European languages, but does a terrible
+  job for some Asian languages, where words are not separated by spaces. */
+
+public class LetterTokenizer extends CharTokenizer {
+  /** Construct a new LetterTokenizer. */
+  public LetterTokenizer(Reader in) {
+    super(in);
+  }
+  
+  /** Construct a new LetterTokenizer using a given {@link AttributeSource}. */
+  public LetterTokenizer(AttributeSource source, Reader in) {
+    super(source, in);
+  }
+  
+  /** Construct a new LetterTokenizer using a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. */
+  public LetterTokenizer(AttributeFactory factory, Reader in) {
+    super(factory, in);
+  }
+
+  /** Collects only characters which satisfy
+   * {@link Character#isLetter(char)}.*/
+  @Override
+  protected boolean isTokenChar(char c) {
+    return Character.isLetter(c);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
@ -0,0 +1,48 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+/**
+ * Normalizes token text to lower case.
+ */
+public final class LowerCaseFilter extends TokenFilter {
+  public LowerCaseFilter(TokenStream in) {
+    super(in);
+    termAtt = addAttribute(TermAttribute.class);
+  }
+
+  private TermAttribute termAtt;
+  
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+
+      final char[] buffer = termAtt.termBuffer();
+      final int length = termAtt.termLength();
+      for(int i=0;i<length;i++)
+        buffer[i] = Character.toLowerCase(buffer[i]);
+
+      return true;
+    } else
+      return false;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java
@ -0,0 +1,56 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * LowerCaseTokenizer performs the function of LetterTokenizer
+ * and LowerCaseFilter together.  It divides text at non-letters and converts
+ * them to lower case.  While it is functionally equivalent to the combination
+ * of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+ * to doing the two tasks at once, hence this (redundant) implementation.
+ * <P>
+ * Note: this does a decent job for most European languages, but does a terrible
+ * job for some Asian languages, where words are not separated by spaces.
+ */
+public final class LowerCaseTokenizer extends LetterTokenizer {
+  /** Construct a new LowerCaseTokenizer. */
+  public LowerCaseTokenizer(Reader in) {
+    super(in);
+  }
+
+  /** Construct a new LowerCaseTokenizer using a given {@link AttributeSource}. */
+  public LowerCaseTokenizer(AttributeSource source, Reader in) {
+    super(source, in);
+  }
+
+  /** Construct a new LowerCaseTokenizer using a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. */
+  public LowerCaseTokenizer(AttributeFactory factory, Reader in) {
+    super(factory, in);
+  }
+  
+  /** Converts char to lower case
+   * {@link Character#toLowerCase(char)}.*/
+  @Override
+  protected char normalize(char c) {
+    return Character.toLowerCase(c);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/MappingCharFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/MappingCharFilter.java
@ -0,0 +1,137 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.io.IOException;
+import java.io.Reader;
+import java.util.LinkedList;
+
+/**
+ * Simplistic {@link CharFilter} that applies the mappings
+ * contained in a {@link NormalizeCharMap} to the character
+ * stream, and correcting the resulting changes to the
+ * offsets.
+ */
+public class MappingCharFilter extends BaseCharFilter {
+
+  private final NormalizeCharMap normMap;
+  private LinkedList<Character> buffer;
+  private String replacement;
+  private int charPointer;
+  private int nextCharCounter;
+
+  /** Default constructor that takes a {@link CharStream}. */
+  public MappingCharFilter(NormalizeCharMap normMap, CharStream in) {
+    super(in);
+    this.normMap = normMap;
+  }
+
+  /** Easy-use constructor that takes a {@link Reader}. */
+  public MappingCharFilter(NormalizeCharMap normMap, Reader in) {
+    super(CharReader.get(in));
+    this.normMap = normMap;
+  }
+
+  @Override
+  public int read() throws IOException {
+    while(true) {
+      if (replacement != null && charPointer < replacement.length()) {
+        return replacement.charAt(charPointer++);
+      }
+
+      int firstChar = nextChar();
+      if (firstChar == -1) return -1;
+      NormalizeCharMap nm = normMap.submap != null ?
+        normMap.submap.get(Character.valueOf((char) firstChar)) : null;
+      if (nm == null) return firstChar;
+      NormalizeCharMap result = match(nm);
+      if (result == null) return firstChar;
+      replacement = result.normStr;
+      charPointer = 0;
+      if (result.diff != 0) {
+        int prevCumulativeDiff = getLastCumulativeDiff();
+        if (result.diff < 0) {
+          for(int i = 0; i < -result.diff ; i++)
+            addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i);
+        } else {
+          addOffCorrectMap(nextCharCounter - result.diff - prevCumulativeDiff, prevCumulativeDiff + result.diff);
+        }
+      }
+    }
+  }
+
+  private int nextChar() throws IOException {
+    nextCharCounter++;
+    if (buffer != null && !buffer.isEmpty()) {
+      return buffer.removeFirst().charValue();
+    }
+    return input.read();
+  }
+
+  private void pushChar(int c) {
+    nextCharCounter--;
+    if(buffer == null)
+      buffer = new LinkedList<Character>();
+    buffer.addFirst(Character.valueOf((char) c));
+  }
+
+  private void pushLastChar(int c) {
+    if (buffer == null) {
+      buffer = new LinkedList<Character>();
+    }
+    buffer.addLast(Character.valueOf((char) c));
+  }
+
+  private NormalizeCharMap match(NormalizeCharMap map) throws IOException {
+    NormalizeCharMap result = null;
+    if (map.submap != null) {
+      int chr = nextChar();
+      if (chr != -1) {
+        NormalizeCharMap subMap = map.submap.get(Character.valueOf((char) chr));
+        if (subMap != null) {
+          result = match(subMap);
+        }
+        if (result == null) {
+          pushChar(chr);
+        }
+      }
+    }
+    if (result == null && map.normStr != null) {
+      result = map;
+    }
+    return result;
+  }
+
+  @Override
+  public int read(char[] cbuf, int off, int len) throws IOException {
+    char[] tmp = new char[len];
+    int l = input.read(tmp, 0, len);
+    if (l != -1) {
+      for(int i = 0; i < l; i++)
+        pushLastChar(tmp[i]);
+    }
+    l = 0;
+    for(int i = off; i < off + len; i++) {
+      int c = read();
+      if (c == -1) break;
+      cbuf[i] = (char) c;
+      l++;
+    }
+    return l == 0 ? -1 : l;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
+++ b/backwards/src/java/org/apache/lucene/analysis/NormalizeCharMap.java
@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Holds a map of String input to String output, to be used
+ * with {@link MappingCharFilter}.
+ */
+public class NormalizeCharMap {
+
+  Map<Character, NormalizeCharMap> submap;
+  String normStr;
+  int diff;
+
+  /** Records a replacement to be applied to the inputs
+   *  stream.  Whenever <code>singleMatch</code> occurs in
+   *  the input, it will be replaced with
+   *  <code>replacement</code>.
+   *
+   * @param singleMatch input String to be replaced
+   * @param replacement output String
+   */
+  public void add(String singleMatch, String replacement) {
+    NormalizeCharMap currMap = this;
+    for(int i = 0; i < singleMatch.length(); i++) {
+      char c = singleMatch.charAt(i);
+      if (currMap.submap == null) {
+        currMap.submap = new HashMap<Character, NormalizeCharMap>(1);
+      }
+      NormalizeCharMap map = currMap.submap.get(Character.valueOf(c));
+      if (map == null) {
+        map = new NormalizeCharMap();
+        currMap.submap.put(Character.valueOf(c), map);
+      }
+      currMap = map;
+    }
+    if (currMap.normStr != null) {
+      throw new RuntimeException("MappingCharFilter: there is already a mapping for " + singleMatch);
+    }
+    currMap.normStr = replacement;
+    currMap.diff = singleMatch.length() - replacement.length();
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/NumericTokenStream.java
+++ b/backwards/src/java/org/apache/lucene/analysis/NumericTokenStream.java
@ -0,0 +1,252 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.document.NumericField; // for javadocs
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.search.NumericRangeFilter; // for javadocs
+import org.apache.lucene.search.SortField; // for javadocs
+import org.apache.lucene.search.FieldCache; // javadocs
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+
+/**
+ * <b>Expert:</b> This class provides a {@link TokenStream}
+ * for indexing numeric values that can be used by {@link
+ * NumericRangeQuery} or {@link NumericRangeFilter}.
+ *
+ * <p>Note that for simple usage, {@link NumericField} is
+ * recommended.  {@link NumericField} disables norms and
+ * term freqs, as they are not usually needed during
+ * searching.  If you need to change these settings, you
+ * should use this class.
+ *
+ * <p>See {@link NumericField} for capabilities of fields
+ * indexed numerically.</p>
+ *
+ * <p>Here's an example usage, for an <code>int</code> field:
+ *
+ * <pre>
+ *  Field field = new Field(name, new NumericTokenStream(precisionStep).setIntValue(value));
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  document.add(field);
+ * </pre>
+ *
+ * <p>For optimal performance, re-use the TokenStream and Field instance
+ * for more than one document:
+ *
+ * <pre>
+ *  NumericTokenStream stream = new NumericTokenStream(precisionStep);
+ *  Field field = new Field(name, stream);
+ *  field.setOmitNorms(true);
+ *  field.setOmitTermFreqAndPositions(true);
+ *  Document document = new Document();
+ *  document.add(field);
+ *
+ *  for(all documents) {
+ *    stream.setIntValue(value)
+ *    writer.addDocument(document);
+ *  }
+ * </pre>
+ *
+ * <p>This stream is not intended to be used in analyzers;
+ * it's more for iterating the different precisions during
+ * indexing a specific numeric value.</p>
+
+ * <p><b>NOTE</b>: as token streams are only consumed once
+ * the document is added to the index, if you index more
+ * than one numeric field, use a separate <code>NumericTokenStream</code>
+ * instance for each.</p>
+ *
+ * <p>See {@link NumericRangeQuery} for more details on the
+ * <a
+ * href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
+ * parameter as well as how numeric fields work under the hood.</p>
+ *
+ * <p><font color="red"><b>NOTE:</b> This API is experimental and
+ * might change in incompatible ways in the next release.</font>
+ *
+ * @since 2.9
+ */
+public final class NumericTokenStream extends TokenStream {
+
+  /** The full precision token gets this token type assigned. */
+  public static final String TOKEN_TYPE_FULL_PREC  = "fullPrecNumeric";
+
+  /** The lower precision tokens gets this token type assigned. */
+  public static final String TOKEN_TYPE_LOWER_PREC = "lowerPrecNumeric";
+
+  /**
+   * Creates a token stream for numeric values using the default <code>precisionStep</code>
+   * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream() {
+    this(NumericUtils.PRECISION_STEP_DEFAULT);
+  }
+  
+  /**
+   * Creates a token stream for numeric values with the specified
+   * <code>precisionStep</code>. The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream(final int precisionStep) {
+    super();
+    this.precisionStep = precisionStep;
+    if (precisionStep < 1)
+      throw new IllegalArgumentException("precisionStep must be >=1");
+  }
+
+  /**
+   * Expert: Creates a token stream for numeric values with the specified
+   * <code>precisionStep</code> using the given {@link AttributeSource}.
+   * The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream(AttributeSource source, final int precisionStep) {
+    super(source);
+    this.precisionStep = precisionStep;
+    if (precisionStep < 1)
+      throw new IllegalArgumentException("precisionStep must be >=1");
+  }
+
+  /**
+   * Expert: Creates a token stream for numeric values with the specified
+   * <code>precisionStep</code> using the given
+   * {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
+   * The stream is not yet initialized,
+   * before using set a value using the various set<em>???</em>Value() methods.
+   */
+  public NumericTokenStream(AttributeFactory factory, final int precisionStep) {
+    super(factory);
+    this.precisionStep = precisionStep;
+    if (precisionStep < 1)
+      throw new IllegalArgumentException("precisionStep must be >=1");
+  }
+
+  /**
+   * Initializes the token stream with the supplied <code>long</code> value.
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   * <code>new Field(name, new NumericTokenStream(precisionStep).setLongValue(value))</code>
+   */
+  public NumericTokenStream setLongValue(final long value) {
+    this.value = value;
+    valSize = 64;
+    shift = 0;
+    return this;
+  }
+  
+  /**
+   * Initializes the token stream with the supplied <code>int</code> value.
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   * <code>new Field(name, new NumericTokenStream(precisionStep).setIntValue(value))</code>
+   */
+  public NumericTokenStream setIntValue(final int value) {
+    this.value = (long) value;
+    valSize = 32;
+    shift = 0;
+    return this;
+  }
+  
+  /**
+   * Initializes the token stream with the supplied <code>double</code> value.
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   * <code>new Field(name, new NumericTokenStream(precisionStep).setDoubleValue(value))</code>
+   */
+  public NumericTokenStream setDoubleValue(final double value) {
+    this.value = NumericUtils.doubleToSortableLong(value);
+    valSize = 64;
+    shift = 0;
+    return this;
+  }
+  
+  /**
+   * Initializes the token stream with the supplied <code>float</code> value.
+   * @param value the value, for which this TokenStream should enumerate tokens.
+   * @return this instance, because of this you can use it the following way:
+   * <code>new Field(name, new NumericTokenStream(precisionStep).setFloatValue(value))</code>
+   */
+  public NumericTokenStream setFloatValue(final float value) {
+    this.value = (long) NumericUtils.floatToSortableInt(value);
+    valSize = 32;
+    shift = 0;
+    return this;
+  }
+  
+  @Override
+  public void reset() {
+    if (valSize == 0)
+      throw new IllegalStateException("call set???Value() before usage");
+    shift = 0;
+  }
+
+  @Override
+  public boolean incrementToken() {
+    if (valSize == 0)
+      throw new IllegalStateException("call set???Value() before usage");
+    if (shift >= valSize)
+      return false;
+
+    clearAttributes();
+    final char[] buffer;
+    switch (valSize) {
+      case 64:
+        buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_LONG);
+        termAtt.setTermLength(NumericUtils.longToPrefixCoded(value, shift, buffer));
+        break;
+      
+      case 32:
+        buffer = termAtt.resizeTermBuffer(NumericUtils.BUF_SIZE_INT);
+        termAtt.setTermLength(NumericUtils.intToPrefixCoded((int) value, shift, buffer));
+        break;
+      
+      default:
+        // should not happen
+        throw new IllegalArgumentException("valSize must be 32 or 64");
+    }
+    
+    typeAtt.setType((shift == 0) ? TOKEN_TYPE_FULL_PREC : TOKEN_TYPE_LOWER_PREC);
+    posIncrAtt.setPositionIncrement((shift == 0) ? 1 : 0);
+    shift += precisionStep;
+    return true;
+  }
+  
+  @Override
+  public String toString() {
+    final StringBuilder sb = new StringBuilder("(numeric,valSize=").append(valSize);
+    sb.append(",precisionStep=").append(precisionStep).append(')');
+    return sb.toString();
+  }
+
+  // members
+  private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  
+  private int shift = 0, valSize = 0; // valSize==0 means not initialized
+  private final int precisionStep;
+  
+  private long value = 0L;
+}
--- a/backwards/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java
+++ b/backwards/src/java/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java
@ -0,0 +1,127 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.IOException;
+import java.util.Map;
+import java.util.HashMap;
+
+/**
+ * This analyzer is used to facilitate scenarios where different
+ * fields require different analysis techniques.  Use {@link #addAnalyzer}
+ * to add a non-default analyzer on a field name basis.
+ * 
+ * <p>Example usage:
+ * 
+ * <pre>
+ *   PerFieldAnalyzerWrapper aWrapper =
+ *      new PerFieldAnalyzerWrapper(new StandardAnalyzer());
+ *   aWrapper.addAnalyzer("firstname", new KeywordAnalyzer());
+ *   aWrapper.addAnalyzer("lastname", new KeywordAnalyzer());
+ * </pre>
+ * 
+ * <p>In this example, StandardAnalyzer will be used for all fields except "firstname"
+ * and "lastname", for which KeywordAnalyzer will be used.
+ * 
+ * <p>A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing
+ * and query parsing.
+ */
+public class PerFieldAnalyzerWrapper extends Analyzer {
+  private Analyzer defaultAnalyzer;
+  private Map<String,Analyzer> analyzerMap = new HashMap<String,Analyzer>();
+
+
+  /**
+   * Constructs with default analyzer.
+   *
+   * @param defaultAnalyzer Any fields not specifically
+   * defined to use a different analyzer will use the one provided here.
+   */
+  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer) {
+    this(defaultAnalyzer, null);
+  }
+  
+  /**
+   * Constructs with default analyzer and a map of analyzers to use for 
+   * specific fields.
+   *
+   * @param defaultAnalyzer Any fields not specifically
+   * defined to use a different analyzer will use the one provided here.
+   * @param fieldAnalyzers a Map (String field name to the Analyzer) to be 
+   * used for those fields 
+   */
+  public PerFieldAnalyzerWrapper(Analyzer defaultAnalyzer, 
+      Map<String,Analyzer> fieldAnalyzers) {
+    this.defaultAnalyzer = defaultAnalyzer;
+    if (fieldAnalyzers != null) {
+      analyzerMap.putAll(fieldAnalyzers);
+    }
+    setOverridesTokenStreamMethod(PerFieldAnalyzerWrapper.class);
+  }
+  
+
+  /**
+   * Defines an analyzer to use for the specified field.
+   *
+   * @param fieldName field name requiring a non-default analyzer
+   * @param analyzer non-default analyzer to use for field
+   */
+  public void addAnalyzer(String fieldName, Analyzer analyzer) {
+    analyzerMap.put(fieldName, analyzer);
+  }
+
+  @Override
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    Analyzer analyzer = analyzerMap.get(fieldName);
+    if (analyzer == null) {
+      analyzer = defaultAnalyzer;
+    }
+
+    return analyzer.tokenStream(fieldName, reader);
+  }
+  
+  @Override
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    if (overridesTokenStreamMethod) {
+      // LUCENE-1678: force fallback to tokenStream() if we
+      // have been subclassed and that subclass overrides
+      // tokenStream but not reusableTokenStream
+      return tokenStream(fieldName, reader);
+    }
+    Analyzer analyzer = analyzerMap.get(fieldName);
+    if (analyzer == null)
+      analyzer = defaultAnalyzer;
+
+    return analyzer.reusableTokenStream(fieldName, reader);
+  }
+  
+  /** Return the positionIncrementGap from the analyzer assigned to fieldName */
+  @Override
+  public int getPositionIncrementGap(String fieldName) {
+    Analyzer analyzer = analyzerMap.get(fieldName);
+    if (analyzer == null)
+      analyzer = defaultAnalyzer;
+    return analyzer.getPositionIncrementGap(fieldName);
+  }
+  
+  @Override
+  public String toString() {
+    return "PerFieldAnalyzerWrapper(" + analyzerMap + ", default=" + defaultAnalyzer + ")";
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/PorterStemFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/PorterStemFilter.java
@ -0,0 +1,61 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+/** Transforms the token stream as per the Porter stemming algorithm.
+    Note: the input to the stemming filter must already be in lower case,
+    so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+    down the Tokenizer chain in order for this to work properly!
+    <P>
+    To use this filter with other analyzers, you'll want to write an
+    Analyzer class that sets up the TokenStream chain as you want it.
+    To use this with LowerCaseTokenizer, for example, you'd write an
+    analyzer like this:
+    <P>
+    <PRE>
+    class MyAnalyzer extends Analyzer {
+      public final TokenStream tokenStream(String fieldName, Reader reader) {
+        return new PorterStemFilter(new LowerCaseTokenizer(reader));
+      }
+    }
+    </PRE>
+*/
+public final class PorterStemFilter extends TokenFilter {
+  private PorterStemmer stemmer;
+  private TermAttribute termAtt;
+
+  public PorterStemFilter(TokenStream in) {
+    super(in);
+    stemmer = new PorterStemmer();
+    termAtt = addAttribute(TermAttribute.class);
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if (!input.incrementToken())
+      return false;
+
+    if (stemmer.stem(termAtt.termBuffer(), 0, termAtt.termLength()))
+      termAtt.setTermBuffer(stemmer.getResultBuffer(), 0, stemmer.getResultLength());
+    return true;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/PorterStemmer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/PorterStemmer.java
@ -0,0 +1,546 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+   Porter stemmer in Java. The original paper is in
+
+       Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+       no. 3, pp 130-137,
+
+   See also http://www.tartarus.org/~martin/PorterStemmer/index.html
+
+   Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+   Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+   is then out outside the bounds of b.
+
+   Similarly,
+
+   Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+   'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+   b[j] is then outside the bounds of b.
+
+   Release 3.
+
+   [ This version is derived from Release 3, modified by Brian Goetz to
+     optimize for fewer object creations.  ]
+
+*/
+
+
+import java.io.*;
+
+/**
+ *
+ * Stemmer, implementing the Porter Stemming Algorithm
+ *
+ * The Stemmer class transforms a word into its root form.  The input
+ * word can be provided a character at time (by calling add()), or at once
+ * by calling one of the various stem(something) methods.
+ */
+
+class PorterStemmer
+{
+  private char[] b;
+  private int i,    /* offset into b */
+    j, k, k0;
+  private boolean dirty = false;
+  private static final int INC = 50; /* unit of size whereby b is increased */
+  private static final int EXTRA = 1;
+
+  public PorterStemmer() {
+    b = new char[INC];
+    i = 0;
+  }
+
+  /**
+   * reset() resets the stemmer so it can stem another word.  If you invoke
+   * the stemmer by calling add(char) and then stem(), you must call reset()
+   * before starting another word.
+   */
+  public void reset() { i = 0; dirty = false; }
+
+  /**
+   * Add a character to the word being stemmed.  When you are finished
+   * adding characters, you can call stem(void) to process the word.
+   */
+  public void add(char ch) {
+    if (b.length <= i + EXTRA) {
+      char[] new_b = new char[b.length+INC];
+      System.arraycopy(b, 0, new_b, 0, b.length);
+      b = new_b;
+    }
+    b[i++] = ch;
+  }
+
+  /**
+   * After a word has been stemmed, it can be retrieved by toString(),
+   * or a reference to the internal buffer can be retrieved by getResultBuffer
+   * and getResultLength (which is generally more efficient.)
+   */
+  @Override
+  public String toString() { return new String(b,0,i); }
+
+  /**
+   * Returns the length of the word resulting from the stemming process.
+   */
+  public int getResultLength() { return i; }
+
+  /**
+   * Returns a reference to a character buffer containing the results of
+   * the stemming process.  You also need to consult getResultLength()
+   * to determine the length of the result.
+   */
+  public char[] getResultBuffer() { return b; }
+
+  /* cons(i) is true <=> b[i] is a consonant. */
+
+  private final boolean cons(int i) {
+    switch (b[i]) {
+    case 'a': case 'e': case 'i': case 'o': case 'u':
+      return false;
+    case 'y':
+      return (i==k0) ? true : !cons(i-1);
+    default:
+      return true;
+    }
+  }
+
+  /* m() measures the number of consonant sequences between k0 and j. if c is
+     a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+     presence,
+
+          <c><v>       gives 0
+          <c>vc<v>     gives 1
+          <c>vcvc<v>   gives 2
+          <c>vcvcvc<v> gives 3
+          ....
+  */
+
+  private final int m() {
+    int n = 0;
+    int i = k0;
+    while(true) {
+      if (i > j)
+        return n;
+      if (! cons(i))
+        break;
+      i++;
+    }
+    i++;
+    while(true) {
+      while(true) {
+        if (i > j)
+          return n;
+        if (cons(i))
+          break;
+        i++;
+      }
+      i++;
+      n++;
+      while(true) {
+        if (i > j)
+          return n;
+        if (! cons(i))
+          break;
+        i++;
+      }
+      i++;
+    }
+  }
+
+  /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+  private final boolean vowelinstem() {
+    int i;
+    for (i = k0; i <= j; i++)
+      if (! cons(i))
+        return true;
+    return false;
+  }
+
+  /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+  private final boolean doublec(int j) {
+    if (j < k0+1)
+      return false;
+    if (b[j] != b[j-1])
+      return false;
+    return cons(j);
+  }
+
+  /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+     and also if the second c is not w,x or y. this is used when trying to
+     restore an e at the end of a short word. e.g.
+
+          cav(e), lov(e), hop(e), crim(e), but
+          snow, box, tray.
+
+  */
+
+  private final boolean cvc(int i) {
+    if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2))
+      return false;
+    else {
+      int ch = b[i];
+      if (ch == 'w' || ch == 'x' || ch == 'y') return false;
+    }
+    return true;
+  }
+
+  private final boolean ends(String s) {
+    int l = s.length();
+    int o = k-l+1;
+    if (o < k0)
+      return false;
+    for (int i = 0; i < l; i++)
+      if (b[o+i] != s.charAt(i))
+        return false;
+    j = k-l;
+    return true;
+  }
+
+  /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+     k. */
+
+  void setto(String s) {
+    int l = s.length();
+    int o = j+1;
+    for (int i = 0; i < l; i++)
+      b[o+i] = s.charAt(i);
+    k = j+l;
+    dirty = true;
+  }
+
+  /* r(s) is used further down. */
+
+  void r(String s) { if (m() > 0) setto(s); }
+
+  /* step1() gets rid of plurals and -ed or -ing. e.g.
+
+           caresses  ->  caress
+           ponies    ->  poni
+           ties      ->  ti
+           caress    ->  caress
+           cats      ->  cat
+
+           feed      ->  feed
+           agreed    ->  agree
+           disabled  ->  disable
+
+           matting   ->  mat
+           mating    ->  mate
+           meeting   ->  meet
+           milling   ->  mill
+           messing   ->  mess
+
+           meetings  ->  meet
+
+  */
+
+  private final void step1() {
+    if (b[k] == 's') {
+      if (ends("sses")) k -= 2;
+      else if (ends("ies")) setto("i");
+      else if (b[k-1] != 's') k--;
+    }
+    if (ends("eed")) {
+      if (m() > 0)
+        k--;
+    }
+    else if ((ends("ed") || ends("ing")) && vowelinstem()) {
+      k = j;
+      if (ends("at")) setto("ate");
+      else if (ends("bl")) setto("ble");
+      else if (ends("iz")) setto("ize");
+      else if (doublec(k)) {
+        int ch = b[k--];
+        if (ch == 'l' || ch == 's' || ch == 'z')
+          k++;
+      }
+      else if (m() == 1 && cvc(k))
+        setto("e");
+    }
+  }
+
+  /* step2() turns terminal y to i when there is another vowel in the stem. */
+
+  private final void step2() {
+    if (ends("y") && vowelinstem()) {
+      b[k] = 'i';
+      dirty = true;
+    }
+  }
+
+  /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+     -ation) maps to -ize etc. note that the string before the suffix must give
+     m() > 0. */
+
+  private final void step3() {
+    if (k == k0) return; /* For Bug 1 */
+    switch (b[k-1]) {
+    case 'a':
+      if (ends("ational")) { r("ate"); break; }
+      if (ends("tional")) { r("tion"); break; }
+      break;
+    case 'c':
+      if (ends("enci")) { r("ence"); break; }
+      if (ends("anci")) { r("ance"); break; }
+      break;
+    case 'e':
+      if (ends("izer")) { r("ize"); break; }
+      break;
+    case 'l':
+      if (ends("bli")) { r("ble"); break; }
+      if (ends("alli")) { r("al"); break; }
+      if (ends("entli")) { r("ent"); break; }
+      if (ends("eli")) { r("e"); break; }
+      if (ends("ousli")) { r("ous"); break; }
+      break;
+    case 'o':
+      if (ends("ization")) { r("ize"); break; }
+      if (ends("ation")) { r("ate"); break; }
+      if (ends("ator")) { r("ate"); break; }
+      break;
+    case 's':
+      if (ends("alism")) { r("al"); break; }
+      if (ends("iveness")) { r("ive"); break; }
+      if (ends("fulness")) { r("ful"); break; }
+      if (ends("ousness")) { r("ous"); break; }
+      break;
+    case 't':
+      if (ends("aliti")) { r("al"); break; }
+      if (ends("iviti")) { r("ive"); break; }
+      if (ends("biliti")) { r("ble"); break; }
+      break;
+    case 'g':
+      if (ends("logi")) { r("log"); break; }
+    }
+  }
+
+  /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+  private final void step4() {
+    switch (b[k]) {
+    case 'e':
+      if (ends("icate")) { r("ic"); break; }
+      if (ends("ative")) { r(""); break; }
+      if (ends("alize")) { r("al"); break; }
+      break;
+    case 'i':
+      if (ends("iciti")) { r("ic"); break; }
+      break;
+    case 'l':
+      if (ends("ical")) { r("ic"); break; }
+      if (ends("ful")) { r(""); break; }
+      break;
+    case 's':
+      if (ends("ness")) { r(""); break; }
+      break;
+    }
+  }
+
+  /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+
+  private final void step5() {
+    if (k == k0) return; /* for Bug 1 */
+    switch (b[k-1]) {
+    case 'a':
+      if (ends("al")) break;
+      return;
+    case 'c':
+      if (ends("ance")) break;
+      if (ends("ence")) break;
+      return;
+    case 'e':
+      if (ends("er")) break; return;
+    case 'i':
+      if (ends("ic")) break; return;
+    case 'l':
+      if (ends("able")) break;
+      if (ends("ible")) break; return;
+    case 'n':
+      if (ends("ant")) break;
+      if (ends("ement")) break;
+      if (ends("ment")) break;
+      /* element etc. not stripped before the m */
+      if (ends("ent")) break;
+      return;
+    case 'o':
+      if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
+      /* j >= 0 fixes Bug 2 */
+      if (ends("ou")) break;
+      return;
+      /* takes care of -ous */
+    case 's':
+      if (ends("ism")) break;
+      return;
+    case 't':
+      if (ends("ate")) break;
+      if (ends("iti")) break;
+      return;
+    case 'u':
+      if (ends("ous")) break;
+      return;
+    case 'v':
+      if (ends("ive")) break;
+      return;
+    case 'z':
+      if (ends("ize")) break;
+      return;
+    default:
+      return;
+    }
+    if (m() > 1)
+      k = j;
+  }
+
+  /* step6() removes a final -e if m() > 1. */
+
+  private final void step6() {
+    j = k;
+    if (b[k] == 'e') {
+      int a = m();
+      if (a > 1 || a == 1 && !cvc(k-1))
+        k--;
+    }
+    if (b[k] == 'l' && doublec(k) && m() > 1)
+      k--;
+  }
+
+
+  /**
+   * Stem a word provided as a String.  Returns the result as a String.
+   */
+  public String stem(String s) {
+    if (stem(s.toCharArray(), s.length()))
+      return toString();
+    else
+      return s;
+  }
+
+  /** Stem a word contained in a char[].  Returns true if the stemming process
+   * resulted in a word different from the input.  You can retrieve the
+   * result with getResultLength()/getResultBuffer() or toString().
+   */
+  public boolean stem(char[] word) {
+    return stem(word, word.length);
+  }
+
+  /** Stem a word contained in a portion of a char[] array.  Returns
+   * true if the stemming process resulted in a word different from
+   * the input.  You can retrieve the result with
+   * getResultLength()/getResultBuffer() or toString().
+   */
+  public boolean stem(char[] wordBuffer, int offset, int wordLen) {
+    reset();
+    if (b.length < wordLen) {
+      char[] new_b = new char[wordLen + EXTRA];
+      b = new_b;
+    }
+    System.arraycopy(wordBuffer, offset, b, 0, wordLen);
+    i = wordLen;
+    return stem(0);
+  }
+
+  /** Stem a word contained in a leading portion of a char[] array.
+   * Returns true if the stemming process resulted in a word different
+   * from the input.  You can retrieve the result with
+   * getResultLength()/getResultBuffer() or toString().
+   */
+  public boolean stem(char[] word, int wordLen) {
+    return stem(word, 0, wordLen);
+  }
+
+  /** Stem the word placed into the Stemmer buffer through calls to add().
+   * Returns true if the stemming process resulted in a word different
+   * from the input.  You can retrieve the result with
+   * getResultLength()/getResultBuffer() or toString().
+   */
+  public boolean stem() {
+    return stem(0);
+  }
+
+  public boolean stem(int i0) {
+    k = i - 1;
+    k0 = i0;
+    if (k > k0+1) {
+      step1(); step2(); step3(); step4(); step5(); step6();
+    }
+    // Also, a word is considered dirty if we lopped off letters
+    // Thanks to Ifigenia Vairelles for pointing this out.
+    if (i != k+1)
+      dirty = true;
+    i = k+1;
+    return dirty;
+  }
+
+  /** Test program for demonstrating the Stemmer.  It reads a file and
+   * stems each word, writing the result to standard out.
+   * Usage: Stemmer file-name
+   */
+  public static void main(String[] args) {
+    PorterStemmer s = new PorterStemmer();
+
+    for (int i = 0; i < args.length; i++) {
+      try {
+        InputStream in = new FileInputStream(args[i]);
+        byte[] buffer = new byte[1024];
+        int bufferLen, offset, ch;
+
+        bufferLen = in.read(buffer);
+        offset = 0;
+        s.reset();
+
+        while(true) {
+          if (offset < bufferLen)
+            ch = buffer[offset++];
+          else {
+            bufferLen = in.read(buffer);
+            offset = 0;
+            if (bufferLen < 0)
+              ch = -1;
+            else
+              ch = buffer[offset++];
+          }
+
+          if (Character.isLetter((char) ch)) {
+            s.add(Character.toLowerCase((char) ch));
+          }
+          else {
+             s.stem();
+             System.out.print(s.toString());
+             s.reset();
+             if (ch < 0)
+               break;
+             else {
+               System.out.print((char) ch);
+             }
+           }
+        }
+
+        in.close();
+      }
+      catch (IOException e) {
+        System.out.println("error reading " + args[i]);
+      }
+    }
+  }
+}
+
--- a/backwards/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java
@ -0,0 +1,42 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.IOException;
+
+/** An {@link Analyzer} that filters {@link LetterTokenizer} 
+ *  with {@link LowerCaseFilter} */
+
+public final class SimpleAnalyzer extends Analyzer {
+  @Override
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    return new LowerCaseTokenizer(reader);
+  }
+
+  @Override
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
+    if (tokenizer == null) {
+      tokenizer = new LowerCaseTokenizer(reader);
+      setPreviousTokenStream(tokenizer);
+    } else
+      tokenizer.reset(reader);
+    return tokenizer;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/StopAnalyzer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/StopAnalyzer.java
@ -0,0 +1,119 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Arrays;
+import java.util.Set;
+import java.util.List;
+
+import org.apache.lucene.util.Version;
+
+/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StopAnalyzer:
+ * <ul>
+ *   <li> As of 2.9, position increments are preserved
+ * </ul>
+*/
+
+public final class StopAnalyzer extends Analyzer {
+  private final Set<?> stopWords;
+  private final boolean enablePositionIncrements;
+  
+  /** An unmodifiable set containing some common English words that are not usually useful
+  for searching.*/
+  public static final Set<?> ENGLISH_STOP_WORDS_SET;
+  
+  static {
+    final List<String> stopWords = Arrays.asList(
+      "a", "an", "and", "are", "as", "at", "be", "but", "by",
+      "for", "if", "in", "into", "is", "it",
+      "no", "not", "of", "on", "or", "such",
+      "that", "the", "their", "then", "there", "these",
+      "they", "this", "to", "was", "will", "with"
+    );
+    final CharArraySet stopSet = new CharArraySet(stopWords.size(), false);
+    stopSet.addAll(stopWords);  
+    ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet); 
+  }
+  
+  /** Builds an analyzer which removes words in
+   *  {@link #ENGLISH_STOP_WORDS_SET}.
+   * @param matchVersion See <a href="#version">above</a>
+   */
+  public StopAnalyzer(Version matchVersion) {
+    stopWords = ENGLISH_STOP_WORDS_SET;
+    enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+  }
+
+  /** Builds an analyzer with the stop words from the given set.
+   * @param matchVersion See <a href="#version">above</a>
+   * @param stopWords Set of stop words */
+  public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
+    this.stopWords = stopWords;
+    enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+  }
+
+  /** Builds an analyzer with the stop words from the given file.
+   * @see WordlistLoader#getWordSet(File)
+   * @param matchVersion See <a href="#version">above</a>
+   * @param stopwordsFile File to load stop words from */
+  public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
+    stopWords = WordlistLoader.getWordSet(stopwordsFile);
+    this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+  }
+
+  /** Builds an analyzer with the stop words from the given reader.
+   * @see WordlistLoader#getWordSet(Reader)
+   * @param matchVersion See <a href="#version">above</a>
+   * @param stopwords Reader to load stop words from */
+  public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+    stopWords = WordlistLoader.getWordSet(stopwords);
+    this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+  }
+
+  /** Filters LowerCaseTokenizer with StopFilter. */
+  @Override
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    return new StopFilter(enablePositionIncrements, new LowerCaseTokenizer(reader), stopWords);
+  }
+
+  /** Filters LowerCaseTokenizer with StopFilter. */
+  private class SavedStreams {
+    Tokenizer source;
+    TokenStream result;
+  };
+  @Override
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
+    if (streams == null) {
+      streams = new SavedStreams();
+      streams.source = new LowerCaseTokenizer(reader);
+      streams.result = new StopFilter(enablePositionIncrements, streams.source, stopWords);
+      setPreviousTokenStream(streams);
+    } else
+      streams.source.reset(reader);
+    return streams.result;
+  }
+}
+
--- a/backwards/src/java/org/apache/lucene/analysis/StopFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/StopFilter.java
@ -0,0 +1,191 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Set;
+import java.util.List;
+
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.queryParser.QueryParser; // for javadoc
+import org.apache.lucene.util.Version;
+
+/**
+ * Removes stop words from a token stream.
+ */
+
+public final class StopFilter extends TokenFilter {
+
+  private final CharArraySet stopWords;
+  private boolean enablePositionIncrements = false;
+
+  private TermAttribute termAtt;
+  private PositionIncrementAttribute posIncrAtt;
+
+  /**
+   * Construct a token stream filtering the given input.
+   * If <code>stopWords</code> is an instance of {@link CharArraySet} (true if
+   * <code>makeStopSet()</code> was used to construct the set) it will be directly used
+   * and <code>ignoreCase</code> will be ignored since <code>CharArraySet</code>
+   * directly controls case sensitivity.
+   * <p/>
+   * If <code>stopWords</code> is not an instance of {@link CharArraySet},
+   * a new CharArraySet will be constructed and <code>ignoreCase</code> will be
+   * used to specify the case sensitivity of that set.
+   *
+   * @param enablePositionIncrements true if token positions should record the removed stop words
+   * @param input Input TokenStream
+   * @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
+   * @param ignoreCase if true, all words are lower cased first
+   */
+  public StopFilter(boolean enablePositionIncrements, TokenStream input, Set<?> stopWords, boolean ignoreCase)
+  {
+    super(input);
+    if (stopWords instanceof CharArraySet) {
+      this.stopWords = (CharArraySet)stopWords;
+    } else {
+      this.stopWords = new CharArraySet(stopWords.size(), ignoreCase);
+      this.stopWords.addAll(stopWords);
+    }
+    this.enablePositionIncrements = enablePositionIncrements;
+    termAtt = addAttribute(TermAttribute.class);
+    posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  }
+
+  /**
+   * Constructs a filter which removes words from the input
+   * TokenStream that are named in the Set.
+   *
+   * @param enablePositionIncrements true if token positions should record the removed stop words
+   * @param in Input stream
+   * @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords
+   * @see #makeStopSet(java.lang.String[])
+   */
+  public StopFilter(boolean enablePositionIncrements, TokenStream in, Set<?> stopWords) {
+    this(enablePositionIncrements, in, stopWords, false);
+  }
+
+  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
+   * 
+   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static final Set<Object> makeStopSet(String... stopWords) {
+    return makeStopSet(stopWords, false);
+  }
+
+  /**
+   * Builds a Set from an array of stop words,
+   * appropriate for passing into the StopFilter constructor.
+   * This permits this stopWords construction to be cached once when
+   * an Analyzer is constructed.
+   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
+   * @return A Set ({@link CharArraySet}) containing the words
+   * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
+   */
+  public static final Set<Object> makeStopSet(List<?> stopWords) {
+    return makeStopSet(stopWords, false);
+  }
+    
+  /**
+   * 
+   * @param stopWords An array of stopwords
+   * @param ignoreCase If true, all words are lower cased first.  
+   * @return a Set containing the words
+   */    
+  public static final Set<Object> makeStopSet(String[] stopWords, boolean ignoreCase) {
+    CharArraySet stopSet = new CharArraySet(stopWords.length, ignoreCase);
+    stopSet.addAll(Arrays.asList(stopWords));
+    return stopSet;
+  }
+
+  /**
+   *
+   * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
+   * @param ignoreCase if true, all words are lower cased first
+   * @return A Set ({@link CharArraySet}) containing the words
+   */
+  public static final Set<Object> makeStopSet(List<?> stopWords, boolean ignoreCase){
+    CharArraySet stopSet = new CharArraySet(stopWords.size(), ignoreCase);
+    stopSet.addAll(stopWords);
+    return stopSet;
+  }
+  
+  /**
+   * Returns the next input Token whose term() is not a stop word.
+   */
+  @Override
+  public final boolean incrementToken() throws IOException {
+    // return the first non-stop word found
+    int skippedPositions = 0;
+    while (input.incrementToken()) {
+      if (!stopWords.contains(termAtt.termBuffer(), 0, termAtt.termLength())) {
+        if (enablePositionIncrements) {
+          posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
+        }
+        return true;
+      }
+      skippedPositions += posIncrAtt.getPositionIncrement();
+    }
+    // reached EOS -- return false
+    return false;
+  }
+
+  /**
+   * Returns version-dependent default for
+   * enablePositionIncrements.  Analyzers that embed
+   * StopFilter use this method when creating the
+   * StopFilter.  Prior to 2.9, this returns false.  On 2.9
+   * or later, it returns true.
+   */
+  public static boolean getEnablePositionIncrementsVersionDefault(Version matchVersion) {
+    return matchVersion.onOrAfter(Version.LUCENE_29);
+  }
+
+  /**
+   * @see #setEnablePositionIncrements(boolean). 
+   */
+  public boolean getEnablePositionIncrements() {
+    return enablePositionIncrements;
+  }
+
+  /**
+   * If <code>true</code>, this StopFilter will preserve
+   * positions of the incoming tokens (ie, accumulate and
+   * set position increments of the removed stop tokens).
+   * Generally, <code>true</code> is best as it does not
+   * lose information (positions of the original tokens)
+   * during indexing.
+   * 
+   * <p> When set, when a token is stopped
+   * (omitted), the position increment of the following
+   * token is incremented.
+   *
+   * <p> <b>NOTE</b>: be sure to also
+   * set {@link QueryParser#setEnablePositionIncrements} if
+   * you use QueryParser to create queries.
+   */
+  public void setEnablePositionIncrements(boolean enable) {
+    this.enablePositionIncrements = enable;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/TeeSinkTokenFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/TeeSinkTokenFilter.java
@ -0,0 +1,245 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.lang.ref.WeakReference;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * This TokenFilter provides the ability to set aside attribute states
+ * that have already been analyzed.  This is useful in situations where multiple fields share
+ * many common analysis steps and then go their separate ways.
+ * <p/>
+ * It is also useful for doing things like entity extraction or proper noun analysis as
+ * part of the analysis workflow and saving off those tokens for use in another field.
+ *
+ * <pre>
+TeeSinkTokenFilter source1 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader1));
+TeeSinkTokenFilter.SinkTokenStream sink1 = source1.newSinkTokenStream();
+TeeSinkTokenFilter.SinkTokenStream sink2 = source1.newSinkTokenStream();
+
+TeeSinkTokenFilter source2 = new TeeSinkTokenFilter(new WhitespaceTokenizer(reader2));
+source2.addSinkTokenStream(sink1);
+source2.addSinkTokenStream(sink2);
+
+TokenStream final1 = new LowerCaseFilter(source1);
+TokenStream final2 = source2;
+TokenStream final3 = new EntityDetect(sink1);
+TokenStream final4 = new URLDetect(sink2);
+
+d.add(new Field("f1", final1));
+d.add(new Field("f2", final2));
+d.add(new Field("f3", final3));
+d.add(new Field("f4", final4));
+ * </pre>
+ * In this example, <code>sink1</code> and <code>sink2</code> will both get tokens from both
+ * <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
+ * and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
+ * It is important, that tees are consumed before sinks (in the above example, the field names must be
+ * less the sink's field names). If you are not sure, which stream is consumed first, you can simply
+ * add another sink and then pass all tokens to the sinks at once using {@link #consumeAllTokens}.
+ * This TokenFilter is exhausted after this. In the above example, change
+ * the example above to:
+ * <pre>
+...
+TokenStream final1 = new LowerCaseFilter(source1.newSinkTokenStream());
+TokenStream final2 = source2.newSinkTokenStream();
+sink1.consumeAllTokens();
+sink2.consumeAllTokens();
+...
+ * </pre>
+ * In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are ready.
+ * <p>Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene.
+ */
+public final class TeeSinkTokenFilter extends TokenFilter {
+  private final List<WeakReference<SinkTokenStream>> sinks = new LinkedList<WeakReference<SinkTokenStream>>();
+  
+  /**
+   * Instantiates a new TeeSinkTokenFilter.
+   */
+  public TeeSinkTokenFilter(TokenStream input) {
+    super(input);
+  }
+
+  /**
+   * Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream.
+   */
+  public SinkTokenStream newSinkTokenStream() {
+    return newSinkTokenStream(ACCEPT_ALL_FILTER);
+  }
+  
+  /**
+   * Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream
+   * that pass the supplied filter.
+   * @see SinkFilter
+   */
+  public SinkTokenStream newSinkTokenStream(SinkFilter filter) {
+    SinkTokenStream sink = new SinkTokenStream(this.cloneAttributes(), filter);
+    this.sinks.add(new WeakReference<SinkTokenStream>(sink));
+    return sink;
+  }
+  
+  /**
+   * Adds a {@link SinkTokenStream} created by another <code>TeeSinkTokenFilter</code>
+   * to this one. The supplied stream will also receive all consumed tokens.
+   * This method can be used to pass tokens from two different tees to one sink.
+   */
+  public void addSinkTokenStream(final SinkTokenStream sink) {
+    // check that sink has correct factory
+    if (!this.getAttributeFactory().equals(sink.getAttributeFactory())) {
+      throw new IllegalArgumentException("The supplied sink is not compatible to this tee");
+    }
+    // add eventually missing attribute impls to the existing sink
+    for (Iterator<AttributeImpl> it = this.cloneAttributes().getAttributeImplsIterator(); it.hasNext(); ) {
+      sink.addAttributeImpl(it.next());
+    }
+    this.sinks.add(new WeakReference<SinkTokenStream>(sink));
+  }
+  
+  /**
+   * <code>TeeSinkTokenFilter</code> passes all tokens to the added sinks
+   * when itself is consumed. To be sure, that all tokens from the input
+   * stream are passed to the sinks, you can call this methods.
+   * This instance is exhausted after this, but all sinks are instant available.
+   */
+  public void consumeAllTokens() throws IOException {
+    while (incrementToken());
+  }
+  
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (input.incrementToken()) {
+      // capture state lazily - maybe no SinkFilter accepts this state
+      AttributeSource.State state = null;
+      for (WeakReference<SinkTokenStream> ref : sinks) {
+        final SinkTokenStream sink = ref.get();
+        if (sink != null) {
+          if (sink.accept(this)) {
+            if (state == null) {
+              state = this.captureState();
+            }
+            sink.addState(state);
+          }
+        }
+      }
+      return true;
+    }
+    
+    return false;
+  }
+  
+  @Override
+  public final void end() throws IOException {
+    super.end();
+    AttributeSource.State finalState = captureState();
+    for (WeakReference<SinkTokenStream> ref : sinks) {
+      final SinkTokenStream sink = ref.get();
+      if (sink != null) {
+        sink.setFinalState(finalState);
+      }
+    }
+  }
+  
+  /**
+   * A filter that decides which {@link AttributeSource} states to store in the sink.
+   */
+  public static abstract class SinkFilter {
+    /**
+     * Returns true, iff the current state of the passed-in {@link AttributeSource} shall be stored
+     * in the sink. 
+     */
+    public abstract boolean accept(AttributeSource source);
+    
+    /**
+     * Called by {@link SinkTokenStream#reset()}. This method does nothing by default
+     * and can optionally be overridden.
+     */
+    public void reset() throws IOException {
+      // nothing to do; can be overridden
+    }
+  }
+  
+  public static final class SinkTokenStream extends TokenStream {
+    private final List<AttributeSource.State> cachedStates = new LinkedList<AttributeSource.State>();
+    private AttributeSource.State finalState;
+    private Iterator<AttributeSource.State> it = null;
+    private SinkFilter filter;
+    
+    private SinkTokenStream(AttributeSource source, SinkFilter filter) {
+      super(source);
+      this.filter = filter;
+    }
+    
+    private boolean accept(AttributeSource source) {
+      return filter.accept(source);
+    }
+    
+    private void addState(AttributeSource.State state) {
+      if (it != null) {
+        throw new IllegalStateException("The tee must be consumed before sinks are consumed.");
+      }
+      cachedStates.add(state);
+    }
+    
+    private void setFinalState(AttributeSource.State finalState) {
+      this.finalState = finalState;
+    }
+    
+    @Override
+    public final boolean incrementToken() throws IOException {
+      // lazy init the iterator
+      if (it == null) {
+        it = cachedStates.iterator();
+      }
+    
+      if (!it.hasNext()) {
+        return false;
+      }
+      
+      AttributeSource.State state = it.next();
+      restoreState(state);
+      return true;
+    }
+  
+    @Override
+    public final void end() throws IOException {
+      if (finalState != null) {
+        restoreState(finalState);
+      }
+    }
+    
+    @Override
+    public final void reset() {
+      it = cachedStates.iterator();
+    }
+  }
+    
+  private static final SinkFilter ACCEPT_ALL_FILTER = new SinkFilter() {
+    @Override
+    public boolean accept(AttributeSource source) {
+      return true;
+    }
+  };
+  
+}
--- a/backwards/src/java/org/apache/lucene/analysis/Token.java
+++ b/backwards/src/java/org/apache/lucene/analysis/Token.java
@ -0,0 +1,811 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.index.TermPositions;     // for javadoc
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.AttributeImpl;
+
+/** 
+  A Token is an occurrence of a term from the text of a field.  It consists of
+  a term's text, the start and end offset of the term in the text of the field,
+  and a type string.
+  <p>
+  The start and end offsets permit applications to re-associate a token with
+  its source text, e.g., to display highlighted query terms in a document
+  browser, or to show matching text fragments in a <abbr title="KeyWord In Context">KWIC</abbr>
+  display, etc.
+  <p>
+  The type is a string, assigned by a lexical analyzer
+  (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+  belongs to.  For example an end of sentence marker token might be implemented
+  with type "eos".  The default token type is "word".  
+  <p>
+  A Token can optionally have metadata (a.k.a. Payload) in the form of a variable
+  length byte array. Use {@link TermPositions#getPayloadLength()} and 
+  {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads from the index.
+  
+  <br><br>
+  
+  <p><b>NOTE:</b> As of 2.9, Token implements all {@link Attribute} interfaces
+  that are part of core Lucene and can be found in the {@code tokenattributes} subpackage.
+  Even though it is not necessary to use Token anymore, with the new TokenStream API it can
+  be used as convenience class that implements all {@link Attribute}s, which is especially useful
+  to easily switch from the old to the new TokenStream API.
+  
+  <br><br>
+  
+  <p>Tokenizers and TokenFilters should try to re-use a Token
+  instance when possible for best performance, by
+  implementing the {@link TokenStream#incrementToken()} API.
+  Failing that, to create a new Token you should first use
+  one of the constructors that starts with null text.  To load
+  the token from a char[] use {@link #setTermBuffer(char[], int, int)}.
+  To load from a String use {@link #setTermBuffer(String)} or {@link #setTermBuffer(String, int, int)}.
+  Alternatively you can get the Token's termBuffer by calling either {@link #termBuffer()},
+  if you know that your text is shorter than the capacity of the termBuffer
+  or {@link #resizeTermBuffer(int)}, if there is any possibility
+  that you may need to grow the buffer. Fill in the characters of your term into this
+  buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string,
+  or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to
+  set the length of the term text.  See <a target="_top"
+  href="https://issues.apache.org/jira/browse/LUCENE-969">LUCENE-969</a>
+  for details.</p>
+  <p>Typical Token reuse patterns:
+  <ul>
+  <li> Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(string, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  </li>
+  <li> Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+  </pre>
+  </li>
+  <li> Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified):<br/>
+  <pre>
+    return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+  </pre>
+  </li>
+  </ul>
+  A few things to note:
+  <ul>
+  <li>clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but should affect no one.</li>
+  <li>Because <code>TokenStreams</code> can be chained, one cannot assume that the <code>Token's</code> current type is correct.</li>
+  <li>The startOffset and endOffset represent the start and offset in the source text, so be careful in adjusting them.</li>
+  <li>When caching a reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again.</li>
+  </ul>
+  </p>
+
+  @see org.apache.lucene.index.Payload
+*/
+public class Token extends AttributeImpl 
+                   implements Cloneable, TermAttribute, TypeAttribute, PositionIncrementAttribute,
+                              FlagsAttribute, OffsetAttribute, PayloadAttribute {
+
+  public static final String DEFAULT_TYPE = "word";
+
+  private static int MIN_BUFFER_SIZE = 10;
+
+  private char[] termBuffer;
+  private int termLength;
+  private int startOffset,endOffset;
+  private String type = DEFAULT_TYPE;
+  private int flags;
+  private Payload payload;
+  private int positionIncrement = 1;
+
+  /** Constructs a Token will null text. */
+  public Token() {
+  }
+
+  /** Constructs a Token with null text and start & end
+   *  offsets.
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text */
+  public Token(int start, int end) {
+    startOffset = start;
+    endOffset = end;
+  }
+
+  /** Constructs a Token with null text and start & end
+   *  offsets plus the Token type.
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text
+   *  @param typ the lexical type of this Token */
+  public Token(int start, int end, String typ) {
+    startOffset = start;
+    endOffset = end;
+    type = typ;
+  }
+
+  /**
+   * Constructs a Token with null text and start & end
+   *  offsets plus flags. NOTE: flags is EXPERIMENTAL.
+   *  @param start start offset in the source text
+   *  @param end end offset in the source text
+   *  @param flags The bits to set for this token
+   */
+  public Token(int start, int end, int flags) {
+    startOffset = start;
+    endOffset = end;
+    this.flags = flags;
+  }
+
+  /** Constructs a Token with the given term text, and start
+   *  & end offsets.  The type defaults to "word."
+   *  <b>NOTE:</b> for better indexing speed you should
+   *  instead use the char[] termBuffer methods to set the
+   *  term text.
+   *  @param text term text
+   *  @param start start offset
+   *  @param end end offset
+   */
+  public Token(String text, int start, int end) {
+    setTermBuffer(text);
+    startOffset = start;
+    endOffset = end;
+  }
+
+  /** Constructs a Token with the given text, start and end
+   *  offsets, & type.  <b>NOTE:</b> for better indexing
+   *  speed you should instead use the char[] termBuffer
+   *  methods to set the term text.
+   *  @param text term text
+   *  @param start start offset
+   *  @param end end offset
+   *  @param typ token type
+   */
+  public Token(String text, int start, int end, String typ) {
+    setTermBuffer(text);
+    startOffset = start;
+    endOffset = end;
+    type = typ;
+  }
+
+  /**
+   *  Constructs a Token with the given text, start and end
+   *  offsets, & type.  <b>NOTE:</b> for better indexing
+   *  speed you should instead use the char[] termBuffer
+   *  methods to set the term text.
+   * @param text
+   * @param start
+   * @param end
+   * @param flags token type bits
+   */
+  public Token(String text, int start, int end, int flags) {
+    setTermBuffer(text);
+    startOffset = start;
+    endOffset = end;
+    this.flags = flags;
+  }
+
+  /**
+   *  Constructs a Token with the given term buffer (offset
+   *  & length), start and end
+   *  offsets
+   * @param startTermBuffer
+   * @param termBufferOffset
+   * @param termBufferLength
+   * @param start
+   * @param end
+   */
+  public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) {
+    setTermBuffer(startTermBuffer, termBufferOffset, termBufferLength);
+    startOffset = start;
+    endOffset = end;
+  }
+
+  /** Set the position increment.  This determines the position of this token
+   * relative to the previous Token in a {@link TokenStream}, used in phrase
+   * searching.
+   *
+   * <p>The default value is one.
+   *
+   * <p>Some common uses for this are:<ul>
+   *
+   * <li>Set it to zero to put multiple terms in the same position.  This is
+   * useful if, e.g., a word has multiple stems.  Searches for phrases
+   * including either stem will match.  In this case, all but the first stem's
+   * increment should be set to zero: the increment of the first instance
+   * should be one.  Repeating a token with an increment of zero can also be
+   * used to boost the scores of matches on that token.
+   *
+   * <li>Set it to values greater than one to inhibit exact phrase matches.
+   * If, for example, one does not want phrases to match across removed stop
+   * words, then one could build a stop word filter that removes stop words and
+   * also sets the increment to the number of stop words removed before each
+   * non-stop word.  Then exact phrase queries will only match when the terms
+   * occur with no intervening stop words.
+   *
+   * </ul>
+   * @param positionIncrement the distance from the prior term
+   * @see org.apache.lucene.index.TermPositions
+   */
+  public void setPositionIncrement(int positionIncrement) {
+    if (positionIncrement < 0)
+      throw new IllegalArgumentException
+        ("Increment must be zero or greater: " + positionIncrement);
+    this.positionIncrement = positionIncrement;
+  }
+
+  /** Returns the position increment of this Token.
+   * @see #setPositionIncrement
+   */
+  public int getPositionIncrement() {
+    return positionIncrement;
+  }
+
+  /** Returns the Token's term text.
+   * 
+   * This method has a performance penalty
+   * because the text is stored internally in a char[].  If
+   * possible, use {@link #termBuffer()} and {@link
+   * #termLength()} directly instead.  If you really need a
+   * String, use this method, which is nothing more than
+   * a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+   */
+  public final String term() {
+    initTermBuffer();
+    return new String(termBuffer, 0, termLength);
+  }
+
+  /** Copies the contents of buffer, starting at offset for
+   *  length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
+  public final void setTermBuffer(char[] buffer, int offset, int length) {
+    growTermBuffer(length);
+    System.arraycopy(buffer, offset, termBuffer, 0, length);
+    termLength = length;
+  }
+
+  /** Copies the contents of buffer into the termBuffer array.
+   *  @param buffer the buffer to copy
+   */
+  public final void setTermBuffer(String buffer) {
+    final int length = buffer.length();
+    growTermBuffer(length);
+    buffer.getChars(0, length, termBuffer, 0);
+    termLength = length;
+  }
+
+  /** Copies the contents of buffer, starting at offset and continuing
+   *  for length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
+  public final void setTermBuffer(String buffer, int offset, int length) {
+    assert offset <= buffer.length();
+    assert offset + length <= buffer.length();
+    growTermBuffer(length);
+    buffer.getChars(offset, offset + length, termBuffer, 0);
+    termLength = length;
+  }
+
+  /** Returns the internal termBuffer character array which
+   *  you can then directly alter.  If the array is too
+   *  small for your token, use {@link
+   *  #resizeTermBuffer(int)} to increase it.  After
+   *  altering the buffer be sure to call {@link
+   *  #setTermLength} to record the number of valid
+   *  characters that were placed into the termBuffer. */
+  public final char[] termBuffer() {
+    initTermBuffer();
+    return termBuffer;
+  }
+
+  /** Grows the termBuffer to at least size newSize, preserving the
+   *  existing content. Note: If the next operation is to change
+   *  the contents of the term buffer use
+   *  {@link #setTermBuffer(char[], int, int)},
+   *  {@link #setTermBuffer(String)}, or
+   *  {@link #setTermBuffer(String, int, int)}
+   *  to optimally combine the resize with the setting of the termBuffer.
+   *  @param newSize minimum size of the new termBuffer
+   *  @return newly created termBuffer with length >= newSize
+   */
+  public char[] resizeTermBuffer(int newSize) {
+    if (termBuffer == null) {
+      // The buffer is always at least MIN_BUFFER_SIZE
+      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)]; 
+    } else {
+      if(termBuffer.length < newSize){
+        // Not big enough; create a new array with slight
+        // over allocation and preserve content
+        final char[] newCharBuffer = new char[ArrayUtil.getNextSize(newSize)];
+        System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
+        termBuffer = newCharBuffer;
+      }
+    } 
+    return termBuffer;   
+  }
+
+  /** Allocates a buffer char[] of at least newSize, without preserving the existing content.
+   * its always used in places that set the content 
+   *  @param newSize minimum size of the buffer
+   */
+  private void growTermBuffer(int newSize) {
+    if (termBuffer == null) {
+      // The buffer is always at least MIN_BUFFER_SIZE    
+      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];   
+    } else {
+      if(termBuffer.length < newSize){
+        // Not big enough; create a new array with slight
+        // over allocation:
+        termBuffer = new char[ArrayUtil.getNextSize(newSize)];
+      }
+    } 
+  }
+  
+  private void initTermBuffer() {
+    if (termBuffer == null) {
+      termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
+      termLength = 0;
+    }
+  }
+
+  /** Return number of valid characters (length of the term)
+   *  in the termBuffer array. */
+  public final int termLength() {
+    initTermBuffer();
+    return termLength;
+  }
+
+  /** Set number of valid characters (length of the term) in
+   *  the termBuffer array. Use this to truncate the termBuffer
+   *  or to synchronize with external manipulation of the termBuffer.
+   *  Note: to grow the size of the array,
+   *  use {@link #resizeTermBuffer(int)} first.
+   *  @param length the truncated length
+   */
+  public final void setTermLength(int length) {
+    initTermBuffer();
+    if (length > termBuffer.length)
+      throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
+    termLength = length;
+  }
+
+  /** Returns this Token's starting offset, the position of the first character
+    corresponding to this token in the source text.
+
+    Note that the difference between endOffset() and startOffset() may not be
+    equal to {@link #termLength}, as the term text may have been altered by a
+    stemmer or some other filter. */
+  public final int startOffset() {
+    return startOffset;
+  }
+
+  /** Set the starting offset.
+      @see #startOffset() */
+  public void setStartOffset(int offset) {
+    this.startOffset = offset;
+  }
+
+  /** Returns this Token's ending offset, one greater than the position of the
+    last character corresponding to this token in the source text. The length
+    of the token in the source text is (endOffset - startOffset). */
+  public final int endOffset() {
+    return endOffset;
+  }
+
+  /** Set the ending offset.
+      @see #endOffset() */
+  public void setEndOffset(int offset) {
+    this.endOffset = offset;
+  }
+  
+  /** Set the starting and ending offset.
+  @see #startOffset() and #endOffset()*/
+  public void setOffset(int startOffset, int endOffset) {
+    this.startOffset = startOffset;
+    this.endOffset = endOffset;
+  }
+
+  /** Returns this Token's lexical type.  Defaults to "word". */
+  public final String type() {
+    return type;
+  }
+
+  /** Set the lexical type.
+      @see #type() */
+  public final void setType(String type) {
+    this.type = type;
+  }
+
+  /**
+   * EXPERIMENTAL:  While we think this is here to stay, we may want to change it to be a long.
+   * <p/>
+   *
+   * Get the bitset for any bits that have been set.  This is completely distinct from {@link #type()}, although they do share similar purposes.
+   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
+   *
+   *
+   * @return The bits
+   */
+  public int getFlags() {
+    return flags;
+  }
+
+  /**
+   * @see #getFlags()
+   */
+  public void setFlags(int flags) {
+    this.flags = flags;
+  }
+
+  /**
+   * Returns this Token's payload.
+   */ 
+  public Payload getPayload() {
+    return this.payload;
+  }
+
+  /** 
+   * Sets this Token's payload.
+   */
+  public void setPayload(Payload payload) {
+    this.payload = payload;
+  }
+  
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append('(');
+    initTermBuffer();
+    if (termBuffer == null)
+      sb.append("null");
+    else
+      sb.append(termBuffer, 0, termLength);
+      sb.append(',').append(startOffset).append(',').append(endOffset);
+    if (!type.equals("word"))
+      sb.append(",type=").append(type);
+    if (positionIncrement != 1)
+      sb.append(",posIncr=").append(positionIncrement);
+    sb.append(')');
+    return sb.toString();
+  }
+
+  /** Resets the term text, payload, flags, and positionIncrement,
+   * startOffset, endOffset and token type to default.
+   */
+  @Override
+  public void clear() {
+    payload = null;
+    // Leave termBuffer to allow re-use
+    termLength = 0;
+    positionIncrement = 1;
+    flags = 0;
+    startOffset = endOffset = 0;
+    type = DEFAULT_TYPE;
+  }
+
+  @Override
+  public Object clone() {
+    Token t = (Token)super.clone();
+    // Do a deep clone
+    if (termBuffer != null) {
+      t.termBuffer = (char[]) termBuffer.clone();
+    }
+    if (payload != null) {
+      t.payload = (Payload) payload.clone();
+    }
+    return t;
+  }
+
+  /** Makes a clone, but replaces the term buffer &
+   * start/end offset in the process.  This is more
+   * efficient than doing a full clone (and then calling
+   * setTermBuffer) because it saves a wasted copy of the old
+   * termBuffer. */
+  public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset);
+    t.positionIncrement = positionIncrement;
+    t.flags = flags;
+    t.type = type;
+    if (payload != null)
+      t.payload = (Payload) payload.clone();
+    return t;
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == this)
+      return true;
+
+    if (obj instanceof Token) {
+      Token other = (Token) obj;
+
+      initTermBuffer();
+      other.initTermBuffer();
+      
+      if (termLength == other.termLength &&
+          startOffset == other.startOffset &&
+          endOffset == other.endOffset && 
+          flags == other.flags &&
+          positionIncrement == other.positionIncrement &&
+          subEqual(type, other.type) &&
+          subEqual(payload, other.payload)) {
+        for(int i=0;i<termLength;i++)
+          if (termBuffer[i] != other.termBuffer[i])
+            return false;
+        return true;
+      } else
+        return false;
+    } else
+      return false;
+  }
+
+  private boolean subEqual(Object o1, Object o2) {
+    if (o1 == null)
+      return o2 == null;
+    else
+      return o1.equals(o2);
+  }
+
+  @Override
+  public int hashCode() {
+    initTermBuffer();
+    int code = termLength;
+    code = code * 31 + startOffset;
+    code = code * 31 + endOffset;
+    code = code * 31 + flags;
+    code = code * 31 + positionIncrement;
+    code = code * 31 + type.hashCode();
+    code = (payload == null ? code : code * 31 + payload.hashCode());
+    code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
+    return code;
+  }
+      
+  // like clear() but doesn't clear termBuffer/text
+  private void clearNoTermBuffer() {
+    payload = null;
+    positionIncrement = 1;
+    flags = 0;
+    startOffset = endOffset = 0;
+    type = DEFAULT_TYPE;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(char[], int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset},
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
+    clearNoTermBuffer();
+    payload = null;
+    positionIncrement = 1;
+    setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(char[], int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    clearNoTermBuffer();
+    setTermBuffer(newTermBuffer, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(String)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newStartOffset, int newEndOffset, String newType) {
+    clearNoTermBuffer();
+    setTermBuffer(newTerm);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(String, int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType}
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset, String newType) {
+    clearNoTermBuffer();
+    setTermBuffer(newTerm, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = newType;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(String)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newStartOffset, int newEndOffset) {
+    clearNoTermBuffer();
+    setTermBuffer(newTerm);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /** Shorthand for calling {@link #clear},
+   *  {@link #setTermBuffer(String, int, int)},
+   *  {@link #setStartOffset},
+   *  {@link #setEndOffset}
+   *  {@link #setType} on Token.DEFAULT_TYPE
+   *  @return this Token instance */
+  public Token reinit(String newTerm, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) {
+    clearNoTermBuffer();
+    setTermBuffer(newTerm, newTermOffset, newTermLength);
+    startOffset = newStartOffset;
+    endOffset = newEndOffset;
+    type = DEFAULT_TYPE;
+    return this;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one. Note: Payloads are shared.
+   * @param prototype
+   */
+  public void reinit(Token prototype) {
+    prototype.initTermBuffer();
+    setTermBuffer(prototype.termBuffer, 0, prototype.termLength);
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+   * @param prototype
+   * @param newTerm
+   */
+  public void reinit(Token prototype, String newTerm) {
+    setTermBuffer(newTerm);
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
+
+  /**
+   * Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared.
+   * @param prototype
+   * @param newTermBuffer
+   * @param offset
+   * @param length
+   */
+  public void reinit(Token prototype, char[] newTermBuffer, int offset, int length) {
+    setTermBuffer(newTermBuffer, offset, length);
+    positionIncrement = prototype.positionIncrement;
+    flags = prototype.flags;
+    startOffset = prototype.startOffset;
+    endOffset = prototype.endOffset;
+    type = prototype.type;
+    payload =  prototype.payload;
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    if (target instanceof Token) {
+      final Token to = (Token) target;
+      to.reinit(this);
+      // reinit shares the payload, so clone it:
+      if (payload !=null) {
+        to.payload = (Payload) payload.clone();
+      }
+    } else {
+      initTermBuffer();
+      ((TermAttribute) target).setTermBuffer(termBuffer, 0, termLength);
+      ((OffsetAttribute) target).setOffset(startOffset, endOffset);
+      ((PositionIncrementAttribute) target).setPositionIncrement(positionIncrement);
+      ((PayloadAttribute) target).setPayload((payload == null) ? null : (Payload) payload.clone());
+      ((FlagsAttribute) target).setFlags(flags);
+      ((TypeAttribute) target).setType(type);
+    }
+  }
+
+  /** Convenience factory that returns <code>Token</code> as implementation for the basic
+   * attributes and return the default impl (with &quot;Impl&quot; appended) for all other
+   * attributes.
+   * @since 3.0
+   */
+  public static final AttributeSource.AttributeFactory TOKEN_ATTRIBUTE_FACTORY =
+    new TokenAttributeFactory(AttributeSource.AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
+  
+  /** <b>Expert:</b> Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes
+   * and for all other attributes calls the given delegate factory.
+   * @since 3.0
+   */
+  public static final class TokenAttributeFactory extends AttributeSource.AttributeFactory {
+    
+    private final AttributeSource.AttributeFactory delegate;
+    
+    /** <b>Expert</b>: Creates an AttributeFactory returning {@link Token} as instance for the basic attributes
+     * and for all other attributes calls the given delegate factory. */
+    public TokenAttributeFactory(AttributeSource.AttributeFactory delegate) {
+      this.delegate = delegate;
+    }
+  
+    @Override
+    public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
+      return attClass.isAssignableFrom(Token.class)
+        ? new Token() : delegate.createAttributeInstance(attClass);
+    }
+    
+    @Override
+    public boolean equals(Object other) {
+      if (this == other) return true;
+      if (other instanceof TokenAttributeFactory) {
+        final TokenAttributeFactory af = (TokenAttributeFactory) other;
+        return this.delegate.equals(af.delegate);
+      }
+      return false;
+    }
+    
+    @Override
+    public int hashCode() {
+      return delegate.hashCode() ^ 0x0a45aa31;
+    }
+  }
+
+}
--- a/backwards/src/java/org/apache/lucene/analysis/TokenFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/TokenFilter.java
@ -0,0 +1,56 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+/** A TokenFilter is a TokenStream whose input is another TokenStream.
+  <p>
+  This is an abstract class; subclasses must override {@link #incrementToken()}.
+  @see TokenStream
+  */
+public abstract class TokenFilter extends TokenStream {
+  /** The source of tokens for this filter. */
+  protected final TokenStream input;
+
+  /** Construct a token stream filtering the given input. */
+  protected TokenFilter(TokenStream input) {
+    super(input);
+    this.input = input;
+  }
+  
+  /** Performs end-of-stream operations, if any, and calls then <code>end()</code> on the
+   * input TokenStream.<p/> 
+   * <b>NOTE:</b> Be sure to call <code>super.end()</code> first when overriding this method.*/
+  @Override
+  public void end() throws IOException {
+    input.end();
+  }
+  
+  /** Close the input TokenStream. */
+  @Override
+  public void close() throws IOException {
+    input.close();
+  }
+
+  /** Reset the filter as well as the input TokenStream. */
+  @Override
+  public void reset() throws IOException {
+    input.reset();
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/TokenStream.java
+++ b/backwards/src/java/org/apache/lucene/analysis/TokenStream.java
@ -0,0 +1,161 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.io.Closeable;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.util.Attribute;
+import org.apache.lucene.util.AttributeImpl;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * A <code>TokenStream</code> enumerates the sequence of tokens, either from
+ * {@link Field}s of a {@link Document} or from query text.
+ * <p>
+ * This is an abstract class; concrete subclasses are:
+ * <ul>
+ * <li>{@link Tokenizer}, a <code>TokenStream</code> whose input is a Reader; and
+ * <li>{@link TokenFilter}, a <code>TokenStream</code> whose input is another
+ * <code>TokenStream</code>.
+ * </ul>
+ * A new <code>TokenStream</code> API has been introduced with Lucene 2.9. This API
+ * has moved from being {@link Token}-based to {@link Attribute}-based. While
+ * {@link Token} still exists in 2.9 as a convenience class, the preferred way
+ * to store the information of a {@link Token} is to use {@link AttributeImpl}s.
+ * <p>
+ * <code>TokenStream</code> now extends {@link AttributeSource}, which provides
+ * access to all of the token {@link Attribute}s for the <code>TokenStream</code>.
+ * Note that only one instance per {@link AttributeImpl} is created and reused
+ * for every token. This approach reduces object creation and allows local
+ * caching of references to the {@link AttributeImpl}s. See
+ * {@link #incrementToken()} for further details.
+ * <p>
+ * <b>The workflow of the new <code>TokenStream</code> API is as follows:</b>
+ * <ol>
+ * <li>Instantiation of <code>TokenStream</code>/{@link TokenFilter}s which add/get
+ * attributes to/from the {@link AttributeSource}.
+ * <li>The consumer calls {@link TokenStream#reset()}.
+ * <li>The consumer retrieves attributes from the stream and stores local
+ * references to all attributes it wants to access.
+ * <li>The consumer calls {@link #incrementToken()} until it returns false
+ * consuming the attributes after each call.
+ * <li>The consumer calls {@link #end()} so that any end-of-stream operations
+ * can be performed.
+ * <li>The consumer calls {@link #close()} to release any resource when finished
+ * using the <code>TokenStream</code>.
+ * </ol>
+ * To make sure that filters and consumers know which attributes are available,
+ * the attributes must be added during instantiation. Filters and consumers are
+ * not required to check for availability of attributes in
+ * {@link #incrementToken()}.
+ * <p>
+ * You can find some example code for the new API in the analysis package level
+ * Javadoc.
+ * <p>
+ * Sometimes it is desirable to capture a current state of a <code>TokenStream</code>,
+ * e.g., for buffering purposes (see {@link CachingTokenFilter},
+ * {@link TeeSinkTokenFilter}). For this usecase
+ * {@link AttributeSource#captureState} and {@link AttributeSource#restoreState}
+ * can be used.
+ */
+public abstract class TokenStream extends AttributeSource implements Closeable {
+
+  /**
+   * A TokenStream using the default attribute factory.
+   */
+  protected TokenStream() {
+    super();
+  }
+  
+  /**
+   * A TokenStream that uses the same attributes as the supplied one.
+   */
+  protected TokenStream(AttributeSource input) {
+    super(input);
+  }
+  
+  /**
+   * A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances.
+   */
+  protected TokenStream(AttributeFactory factory) {
+    super(factory);
+  }
+  
+  /**
+   * Consumers (i.e., {@link IndexWriter}) use this method to advance the stream to
+   * the next token. Implementing classes must implement this method and update
+   * the appropriate {@link AttributeImpl}s with the attributes of the next
+   * token.
+   * <P>
+   * The producer must make no assumptions about the attributes after the method
+   * has been returned: the caller may arbitrarily change it. If the producer
+   * needs to preserve the state for subsequent calls, it can use
+   * {@link #captureState} to create a copy of the current attribute state.
+   * <p>
+   * This method is called for every token of a document, so an efficient
+   * implementation is crucial for good performance. To avoid calls to
+   * {@link #addAttribute(Class)} and {@link #getAttribute(Class)},
+   * references to all {@link AttributeImpl}s that this stream uses should be
+   * retrieved during instantiation.
+   * <p>
+   * To ensure that filters and consumers know which attributes are available,
+   * the attributes must be added during instantiation. Filters and consumers
+   * are not required to check for availability of attributes in
+   * {@link #incrementToken()}.
+   * 
+   * @return false for end of stream; true otherwise
+   */
+  public abstract boolean incrementToken() throws IOException;
+  
+  /**
+   * This method is called by the consumer after the last token has been
+   * consumed, after {@link #incrementToken()} returned <code>false</code>
+   * (using the new <code>TokenStream</code> API). Streams implementing the old API
+   * should upgrade to use this feature.
+   * <p/>
+   * This method can be used to perform any end-of-stream operations, such as
+   * setting the final offset of a stream. The final offset of a stream might
+   * differ from the offset of the last token eg in case one or more whitespaces
+   * followed after the last token, but a {@link WhitespaceTokenizer} was used.
+   * 
+   * @throws IOException
+   */
+  public void end() throws IOException {
+    // do nothing by default
+  }
+
+  /**
+   * Resets this stream to the beginning. This is an optional operation, so
+   * subclasses may or may not implement this method. {@link #reset()} is not needed for
+   * the standard indexing process. However, if the tokens of a
+   * <code>TokenStream</code> are intended to be consumed more than once, it is
+   * necessary to implement {@link #reset()}. Note that if your TokenStream
+   * caches tokens and feeds them back again after a reset, it is imperative
+   * that you clone the tokens when you store them away (on the first pass) as
+   * well as when you return them (on future passes after {@link #reset()}).
+   */
+  public void reset() throws IOException {}
+  
+  /** Releases resources associated with this stream. */
+  public void close() throws IOException {}
+  
+}
--- a/backwards/src/java/org/apache/lucene/analysis/Tokenizer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/Tokenizer.java
@ -0,0 +1,92 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.AttributeSource;
+
+import java.io.Reader;
+import java.io.IOException;
+
+/** A Tokenizer is a TokenStream whose input is a Reader.
+  <p>
+  This is an abstract class; subclasses must override {@link #incrementToken()}
+  <p>
+  NOTE: Subclasses overriding {@link #incrementToken()} must
+  call {@link AttributeSource#clearAttributes()} before
+  setting attributes.
+  Subclasses overriding {@link #incrementToken()} must call
+  {@link Token#clear()} before setting Token attributes. 
+ */
+public abstract class Tokenizer extends TokenStream {
+  /** The text source for this Tokenizer. */
+  protected Reader input;
+
+  /** Construct a tokenizer with null input. */
+  protected Tokenizer() {}
+  
+  /** Construct a token stream processing the given input. */
+  protected Tokenizer(Reader input) {
+    this.input = CharReader.get(input);
+  }
+  
+  /** Construct a tokenizer with null input using the given AttributeFactory. */
+  protected Tokenizer(AttributeFactory factory) {
+    super(factory);
+  }
+
+  /** Construct a token stream processing the given input using the given AttributeFactory. */
+  protected Tokenizer(AttributeFactory factory, Reader input) {
+    super(factory);
+    this.input = CharReader.get(input);
+  }
+
+  /** Construct a token stream processing the given input using the given AttributeSource. */
+  protected Tokenizer(AttributeSource source) {
+    super(source);
+  }
+
+  /** Construct a token stream processing the given input using the given AttributeSource. */
+  protected Tokenizer(AttributeSource source, Reader input) {
+    super(source);
+    this.input = CharReader.get(input);
+  }
+  
+  /** By default, closes the input Reader. */
+  @Override
+  public void close() throws IOException {
+    input.close();
+  }
+  
+  /** Return the corrected offset. If {@link #input} is a {@link CharStream} subclass
+   * this method calls {@link CharStream#correctOffset}, else returns <code>currentOff</code>.
+   * @param currentOff offset as seen in the output
+   * @return corrected offset based on the input
+   * @see CharStream#correctOffset
+   */
+  protected final int correctOffset(int currentOff) {
+    return (input instanceof CharStream) ? ((CharStream) input).correctOffset(currentOff) : currentOff;
+  }
+
+  /** Expert: Reset the tokenizer to a new reader.  Typically, an
+   *  analyzer (in its reusableTokenStream method) will use
+   *  this to re-use a previously created tokenizer. */
+  public void reset(Reader input) throws IOException {
+    this.input = input;
+  }
+}
+
--- a/backwards/src/java/org/apache/lucene/analysis/WhitespaceAnalyzer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/WhitespaceAnalyzer.java
@ -0,0 +1,41 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+import java.io.IOException;
+
+/** An Analyzer that uses {@link WhitespaceTokenizer}. */
+
+public final class WhitespaceAnalyzer extends Analyzer {
+  @Override
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    return new WhitespaceTokenizer(reader);
+  }
+
+  @Override
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    Tokenizer tokenizer = (Tokenizer) getPreviousTokenStream();
+    if (tokenizer == null) {
+      tokenizer = new WhitespaceTokenizer(reader);
+      setPreviousTokenStream(tokenizer);
+    } else
+      tokenizer.reset(reader);
+    return tokenizer;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/WhitespaceTokenizer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/WhitespaceTokenizer.java
@ -0,0 +1,49 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.util.AttributeSource;
+
+/** A WhitespaceTokenizer is a tokenizer that divides text at whitespace.
+ * Adjacent sequences of non-Whitespace characters form tokens. */
+
+public class WhitespaceTokenizer extends CharTokenizer {
+  /** Construct a new WhitespaceTokenizer. */
+  public WhitespaceTokenizer(Reader in) {
+    super(in);
+  }
+
+  /** Construct a new WhitespaceTokenizer using a given {@link AttributeSource}. */
+  public WhitespaceTokenizer(AttributeSource source, Reader in) {
+    super(source, in);
+  }
+
+  /** Construct a new WhitespaceTokenizer using a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}. */
+  public WhitespaceTokenizer(AttributeFactory factory, Reader in) {
+    super(factory, in);
+  }
+  
+  /** Collects only characters which do not satisfy
+   * {@link Character#isWhitespace(char)}.*/
+  @Override
+  protected boolean isTokenChar(char c) {
+    return !Character.isWhitespace(c);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/WordlistLoader.java
+++ b/backwards/src/java/org/apache/lucene/analysis/WordlistLoader.java
@ -0,0 +1,177 @@
+package org.apache.lucene.analysis;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.HashSet;
+
+/**
+ * Loader for text files that represent a list of stopwords.
+ */
+public class WordlistLoader {
+
+  /**
+   * Loads a text file and adds every line as an entry to a HashSet (omitting
+   * leading and trailing whitespace). Every line of the file should contain only
+   * one word. The words need to be in lowercase if you make use of an
+   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+   *
+   * @param wordfile File containing the wordlist
+   * @return A HashSet with the file's words
+   */
+  public static HashSet<String> getWordSet(File wordfile) throws IOException {
+    HashSet<String> result = new HashSet<String>();
+    FileReader reader = null;
+    try {
+      reader = new FileReader(wordfile);
+      result = getWordSet(reader);
+    }
+    finally {
+      if (reader != null)
+        reader.close();
+    }
+    return result;
+  }
+
+  /**
+   * Loads a text file and adds every non-comment line as an entry to a HashSet (omitting
+   * leading and trailing whitespace). Every line of the file should contain only
+   * one word. The words need to be in lowercase if you make use of an
+   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+   *
+   * @param wordfile File containing the wordlist
+   * @param comment The comment string to ignore
+   * @return A HashSet with the file's words
+   */
+  public static HashSet<String> getWordSet(File wordfile, String comment) throws IOException {
+    HashSet<String> result = new HashSet<String>();
+    FileReader reader = null;
+    try {
+      reader = new FileReader(wordfile);
+      result = getWordSet(reader, comment);
+    }
+    finally {
+      if (reader != null)
+        reader.close();
+    }
+    return result;
+  }
+
+
+  /**
+   * Reads lines from a Reader and adds every line as an entry to a HashSet (omitting
+   * leading and trailing whitespace). Every line of the Reader should contain only
+   * one word. The words need to be in lowercase if you make use of an
+   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+   *
+   * @param reader Reader containing the wordlist
+   * @return A HashSet with the reader's words
+   */
+  public static HashSet<String> getWordSet(Reader reader) throws IOException {
+    HashSet<String> result = new HashSet<String>();
+    BufferedReader br = null;
+    try {
+      if (reader instanceof BufferedReader) {
+        br = (BufferedReader) reader;
+      } else {
+        br = new BufferedReader(reader);
+      }
+      String word = null;
+      while ((word = br.readLine()) != null) {
+        result.add(word.trim());
+      }
+    }
+    finally {
+      if (br != null)
+        br.close();
+    }
+    return result;
+  }
+
+  /**
+   * Reads lines from a Reader and adds every non-comment line as an entry to a HashSet (omitting
+   * leading and trailing whitespace). Every line of the Reader should contain only
+   * one word. The words need to be in lowercase if you make use of an
+   * Analyzer which uses LowerCaseFilter (like StandardAnalyzer).
+   *
+   * @param reader Reader containing the wordlist
+   * @param comment The string representing a comment.
+   * @return A HashSet with the reader's words
+   */
+  public static HashSet<String> getWordSet(Reader reader, String comment) throws IOException {
+    HashSet<String> result = new HashSet<String>();
+    BufferedReader br = null;
+    try {
+      if (reader instanceof BufferedReader) {
+        br = (BufferedReader) reader;
+      } else {
+        br = new BufferedReader(reader);
+      }
+      String word = null;
+      while ((word = br.readLine()) != null) {
+        if (word.startsWith(comment) == false){
+          result.add(word.trim());
+        }
+      }
+    }
+    finally {
+      if (br != null)
+        br.close();
+    }
+    return result;
+  }
+
+
+
+  /**
+   * Reads a stem dictionary. Each line contains:
+   * <pre>word<b>\t</b>stem</pre>
+   * (i.e. two tab seperated words)
+   *
+   * @return stem dictionary that overrules the stemming algorithm
+   * @throws IOException 
+   */
+  public static HashMap<String, String> getStemDict(File wordstemfile) throws IOException {
+    if (wordstemfile == null)
+      throw new NullPointerException("wordstemfile may not be null");
+    HashMap<String, String> result = new HashMap<String, String>();
+    BufferedReader br = null;
+    FileReader fr = null;
+    try {
+      fr = new FileReader(wordstemfile);
+      br = new BufferedReader(fr);
+      String line;
+      while ((line = br.readLine()) != null) {
+        String[] wordstem = line.split("\t", 2);
+        result.put(wordstem[0], wordstem[1]);
+      }
+    } finally {
+      if (fr != null)
+        fr.close();
+      if (br != null)
+        br.close();
+    }
+    return result;
+  }
+
+}
--- a/backwards/src/java/org/apache/lucene/analysis/package.html
+++ b/backwards/src/java/org/apache/lucene/analysis/package.html
@ -0,0 +1,635 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+<p>API and code to convert text into indexable/searchable tokens.  Covers {@link org.apache.lucene.analysis.Analyzer} and related classes.</p>
+<h2>Parsing? Tokenization? Analysis!</h2>
+<p>
+Lucene, indexing and search library, accepts only plain text input.
+<p>
+<h2>Parsing</h2>
+<p>
+Applications that build their search capabilities upon Lucene may support documents in various formats &ndash; HTML, XML, PDF, Word &ndash; just to name a few.
+Lucene does not care about the <i>Parsing</i> of these and other document formats, and it is the responsibility of the 
+application using Lucene to use an appropriate <i>Parser</i> to convert the original format into plain text before passing that plain text to Lucene.
+<p>
+<h2>Tokenization</h2>
+<p>
+Plain text passed to Lucene for indexing goes through a process generally called tokenization. Tokenization is the process
+of breaking input text into small indexing elements &ndash; tokens.
+The way input text is broken into tokens heavily influences how people will then be able to search for that text. 
+For instance, sentences beginnings and endings can be identified to provide for more accurate phrase 
+and proximity searches (though sentence identification is not provided by Lucene).
+<p>
+In some cases simply breaking the input text into tokens is not enough &ndash; a deeper <i>Analysis</i> may be needed.
+There are many post tokenization steps that can be done, including (but not limited to):
+<ul>
+  <li><a href="http://en.wikipedia.org/wiki/Stemming">Stemming</a> &ndash; 
+      Replacing of words by their stems. 
+      For instance with English stemming "bikes" is replaced by "bike"; 
+      now query "bike" can find both documents containing "bike" and those containing "bikes".
+  </li>
+  <li><a href="http://en.wikipedia.org/wiki/Stop_words">Stop Words Filtering</a> &ndash; 
+      Common words like "the", "and" and "a" rarely add any value to a search.
+      Removing them shrinks the index size and increases performance.
+      It may also reduce some "noise" and actually improve search quality.
+  </li>
+  <li><a href="http://en.wikipedia.org/wiki/Text_normalization">Text Normalization</a> &ndash; 
+      Stripping accents and other character markings can make for better searching.
+  </li>
+  <li><a href="http://en.wikipedia.org/wiki/Synonym">Synonym Expansion</a> &ndash; 
+      Adding in synonyms at the same token position as the current word can mean better 
+      matching when users search with words in the synonym set.
+  </li>
+</ul> 
+<p>
+<h2>Core Analysis</h2>
+<p>
+  The analysis package provides the mechanism to convert Strings and Readers into tokens that can be indexed by Lucene.  There
+  are three main classes in the package from which all analysis processes are derived.  These are:
+  <ul>
+    <li>{@link org.apache.lucene.analysis.Analyzer} &ndash; An Analyzer is responsible for building a {@link org.apache.lucene.analysis.TokenStream} which can be consumed
+    by the indexing and searching processes.  See below for more information on implementing your own Analyzer.</li>
+    <li>{@link org.apache.lucene.analysis.Tokenizer} &ndash; A Tokenizer is a {@link org.apache.lucene.analysis.TokenStream} and is responsible for breaking
+    up incoming text into tokens. In most cases, an Analyzer will use a Tokenizer as the first step in
+    the analysis process.</li>
+    <li>{@link org.apache.lucene.analysis.TokenFilter} &ndash; A TokenFilter is also a {@link org.apache.lucene.analysis.TokenStream} and is responsible
+    for modifying tokens that have been created by the Tokenizer.  Common modifications performed by a
+    TokenFilter are: deletion, stemming, synonym injection, and down casing.  Not all Analyzers require TokenFilters</li>
+  </ul>
+  <b>Lucene 2.9 introduces a new TokenStream API. Please see the section "New TokenStream API" below for more details.</b>
+</p>
+<h2>Hints, Tips and Traps</h2>
+<p>
+   The synergy between {@link org.apache.lucene.analysis.Analyzer} and {@link org.apache.lucene.analysis.Tokenizer}
+   is sometimes confusing. To ease on this confusion, some clarifications:
+   <ul>
+      <li>The {@link org.apache.lucene.analysis.Analyzer} is responsible for the entire task of 
+          <u>creating</u> tokens out of the input text, while the {@link org.apache.lucene.analysis.Tokenizer}
+          is only responsible for <u>breaking</u> the input text into tokens. Very likely, tokens created 
+          by the {@link org.apache.lucene.analysis.Tokenizer} would be modified or even omitted 
+          by the {@link org.apache.lucene.analysis.Analyzer} (via one or more
+          {@link org.apache.lucene.analysis.TokenFilter}s) before being returned.
+       </li>
+       <li>{@link org.apache.lucene.analysis.Tokenizer} is a {@link org.apache.lucene.analysis.TokenStream}, 
+           but {@link org.apache.lucene.analysis.Analyzer} is not.
+       </li>
+       <li>{@link org.apache.lucene.analysis.Analyzer} is "field aware", but 
+           {@link org.apache.lucene.analysis.Tokenizer} is not.
+       </li>
+   </ul>
+</p>
+<p>
+  Lucene Java provides a number of analysis capabilities, the most commonly used one being the {@link
+  org.apache.lucene.analysis.standard.StandardAnalyzer}.  Many applications will have a long and industrious life with nothing more
+  than the StandardAnalyzer.  However, there are a few other classes/packages that are worth mentioning:
+  <ol>
+    <li>{@link org.apache.lucene.analysis.PerFieldAnalyzerWrapper} &ndash; Most Analyzers perform the same operation on all
+      {@link org.apache.lucene.document.Field}s.  The PerFieldAnalyzerWrapper can be used to associate a different Analyzer with different
+      {@link org.apache.lucene.document.Field}s.</li>
+    <li>The contrib/analyzers library located at the root of the Lucene distribution has a number of different Analyzer implementations to solve a variety
+    of different problems related to searching.  Many of the Analyzers are designed to analyze non-English languages.</li>
+    <li>The contrib/snowball library 
+        located at the root of the Lucene distribution has Analyzer and TokenFilter 
+        implementations for a variety of Snowball stemmers.  
+        See <a href="http://snowball.tartarus.org">http://snowball.tartarus.org</a> 
+        for more information on Snowball stemmers.</li>
+    <li>There are a variety of Tokenizer and TokenFilter implementations in this package.  Take a look around, chances are someone has implemented what you need.</li>
+  </ol>
+</p>
+<p>
+  Analysis is one of the main causes of performance degradation during indexing.  Simply put, the more you analyze the slower the indexing (in most cases).
+  Perhaps your application would be just fine using the simple {@link org.apache.lucene.analysis.WhitespaceTokenizer} combined with a
+  {@link org.apache.lucene.analysis.StopFilter}. The contrib/benchmark library can be useful for testing out the speed of the analysis process.
+</p>
+<h2>Invoking the Analyzer</h2>
+<p>
+  Applications usually do not invoke analysis &ndash; Lucene does it for them:
+  <ul>
+    <li>At indexing, as a consequence of 
+        {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document) addDocument(doc)},
+        the Analyzer in effect for indexing is invoked for each indexed field of the added document.
+    </li>
+    <li>At search, as a consequence of
+        {@link org.apache.lucene.queryParser.QueryParser#parse(java.lang.String) QueryParser.parse(queryText)},
+        the QueryParser may invoke the Analyzer in effect.
+        Note that for some queries analysis does not take place, e.g. wildcard queries.
+    </li>
+  </ul>
+  However an application might invoke Analysis of any text for testing or for any other purpose, something like:
+  <PRE>
+      Analyzer analyzer = new StandardAnalyzer(); // or any other analyzer
+      TokenStream ts = analyzer.tokenStream("myfield",new StringReader("some text goes here"));
+      while (ts.incrementToken()) {
+        System.out.println("token: "+ts));
+      }
+  </PRE>
+</p>
+<h2>Indexing Analysis vs. Search Analysis</h2>
+<p>
+  Selecting the "correct" analyzer is crucial
+  for search quality, and can also affect indexing and search performance.
+  The "correct" analyzer differs between applications.
+  Lucene java's wiki page 
+  <a href="http://wiki.apache.org/lucene-java/AnalysisParalysis">AnalysisParalysis</a> 
+  provides some data on "analyzing your analyzer".
+  Here are some rules of thumb:
+  <ol>
+    <li>Test test test... (did we say test?)</li>
+    <li>Beware of over analysis &ndash; might hurt indexing performance.</li>
+    <li>Start with same analyzer for indexing and search, otherwise searches would not find what they are supposed to...</li>
+    <li>In some cases a different analyzer is required for indexing and search, for instance:
+        <ul>
+           <li>Certain searches require more stop words to be filtered. (I.e. more than those that were filtered at indexing.)</li>
+           <li>Query expansion by synonyms, acronyms, auto spell correction, etc.</li>
+        </ul>
+        This might sometimes require a modified analyzer &ndash; see the next section on how to do that.
+    </li>
+  </ol>
+</p>
+<h2>Implementing your own Analyzer</h2>
+<p>Creating your own Analyzer is straightforward. It usually involves either wrapping an existing Tokenizer and  set of TokenFilters to create a new Analyzer
+or creating both the Analyzer and a Tokenizer or TokenFilter.  Before pursuing this approach, you may find it worthwhile
+to explore the contrib/analyzers library and/or ask on the java-user@lucene.apache.org mailing list first to see if what you need already exists.
+If you are still committed to creating your own Analyzer or TokenStream derivation (Tokenizer or TokenFilter) have a look at
+the source code of any one of the many samples located in this package.
+</p>
+<p>
+  The following sections discuss some aspects of implementing your own analyzer.
+</p>
+<h3>Field Section Boundaries</h3>
+<p>
+  When {@link org.apache.lucene.document.Document#add(org.apache.lucene.document.Fieldable) document.add(field)}
+  is called multiple times for the same field name, we could say that each such call creates a new 
+  section for that field in that document. 
+  In fact, a separate call to 
+  {@link org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String, java.io.Reader) tokenStream(field,reader)}
+  would take place for each of these so called "sections".
+  However, the default Analyzer behavior is to treat all these sections as one large section. 
+  This allows phrase search and proximity search to seamlessly cross 
+  boundaries between these "sections".
+  In other words, if a certain field "f" is added like this:
+  <PRE>
+      document.add(new Field("f","first ends",...);
+      document.add(new Field("f","starts two",...);
+      indexWriter.addDocument(document);
+  </PRE>
+  Then, a phrase search for "ends starts" would find that document.
+  Where desired, this behavior can be modified by introducing a "position gap" between consecutive field "sections", 
+  simply by overriding 
+  {@link org.apache.lucene.analysis.Analyzer#getPositionIncrementGap(java.lang.String) Analyzer.getPositionIncrementGap(fieldName)}:
+  <PRE>
+      Analyzer myAnalyzer = new StandardAnalyzer() {
+         public int getPositionIncrementGap(String fieldName) {
+           return 10;
+         }
+      };
+  </PRE>
+</p>
+<h3>Token Position Increments</h3>
+<p>
+   By default, all tokens created by Analyzers and Tokenizers have a 
+   {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute#getPositionIncrement() position increment} of one.
+   This means that the position stored for that token in the index would be one more than
+   that of the previous token.
+   Recall that phrase and proximity searches rely on position info.
+</p>
+<p>
+   If the selected analyzer filters the stop words "is" and "the", then for a document 
+   containing the string "blue is the sky", only the tokens "blue", "sky" are indexed, 
+   with position("sky") = 1 + position("blue"). Now, a phrase query "blue is the sky"
+   would find that document, because the same analyzer filters the same stop words from
+   that query. But also the phrase query "blue sky" would find that document.
+</p>
+<p>   
+   If this behavior does not fit the application needs,
+   a modified analyzer can be used, that would increment further the positions of
+   tokens following a removed stop word, using
+   {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute#setPositionIncrement(int)}.
+   This can be done with something like:
+   <PRE>
+      public TokenStream tokenStream(final String fieldName, Reader reader) {
+        final TokenStream ts = someAnalyzer.tokenStream(fieldName, reader);
+        TokenStream res = new TokenStream() {
+          TermAttribute termAtt = addAttribute(TermAttribute.class);
+          PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+        
+          public boolean incrementToken() throws IOException {
+            int extraIncrement = 0;
+            while (true) {
+              boolean hasNext = ts.incrementToken();
+              if (hasNext) {
+                if (stopWords.contains(termAtt.term())) {
+                  extraIncrement++; // filter this word
+                  continue;
+                } 
+                if (extraIncrement>0) {
+                  posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement()+extraIncrement);
+                }
+              }
+              return hasNext;
+            }
+          }
+        };
+        return res;
+      }
+   </PRE>
+   Now, with this modified analyzer, the phrase query "blue sky" would find that document.
+   But note that this is yet not a perfect solution, because any phrase query "blue w1 w2 sky"
+   where both w1 and w2 are stop words would match that document.
+</p>
+<p>
+   Few more use cases for modifying position increments are:
+   <ol>
+     <li>Inhibiting phrase and proximity matches in sentence boundaries &ndash; for this, a tokenizer that 
+         identifies a new sentence can add 1 to the position increment of the first token of the new sentence.</li>
+     <li>Injecting synonyms &ndash; here, synonyms of a token should be added after that token, 
+         and their position increment should be set to 0.
+         As result, all synonyms of a token would be considered to appear in exactly the 
+         same position as that token, and so would they be seen by phrase and proximity searches.</li>
+   </ol>
+</p>
+<h2>New TokenStream API</h2>
+<p>
+	With Lucene 2.9 we introduce a new TokenStream API. The old API used to produce Tokens. A Token
+	has getter and setter methods for different properties like positionIncrement and termText.
+	While this approach was sufficient for the default indexing format, it is not versatile enough for
+	Flexible Indexing, a term which summarizes the effort of making the Lucene indexer pluggable and extensible for custom
+	index formats.
+</p>
+<p>
+A fully customizable indexer means that users will be able to store custom data structures on disk. Therefore an API
+is necessary that can transport custom types of data from the documents to the indexer.
+</p>
+<h3>Attribute and AttributeSource</h3> 
+Lucene 2.9 therefore introduces a new pair of classes called {@link org.apache.lucene.util.Attribute} and
+{@link org.apache.lucene.util.AttributeSource}. An Attribute serves as a
+particular piece of information about a text token. For example, {@link org.apache.lucene.analysis.tokenattributes.TermAttribute}
+ contains the term text of a token, and {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} contains the start and end character offsets of a token.
+An AttributeSource is a collection of Attributes with a restriction: there may be only one instance of each attribute type. TokenStream now extends AttributeSource, which
+means that one can add Attributes to a TokenStream. Since TokenFilter extends TokenStream, all filters are also
+AttributeSources.
+<p>
+	Lucene now provides six Attributes out of the box, which replace the variables the Token class has:
+	<ul>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.TermAttribute}<p>The term text of a token.</p></li>
+  	  <li>{@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute}<p>The start and end offset of token in characters.</p></li>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute}<p>See above for detailed information about position increment.</p></li>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.PayloadAttribute}<p>The payload that a Token can optionally have.</p></li>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.TypeAttribute}<p>The type of the token. Default is 'word'.</p></li>
+	  <li>{@link org.apache.lucene.analysis.tokenattributes.FlagsAttribute}<p>Optional flags a token can have.</p></li>
+	</ul>
+</p>
+<h3>Using the new TokenStream API</h3>
+There are a few important things to know in order to use the new API efficiently which are summarized here. You may want
+to walk through the example below first and come back to this section afterwards.
+<ol><li>
+Please keep in mind that an AttributeSource can only have one instance of a particular Attribute. Furthermore, if 
+a chain of a TokenStream and multiple TokenFilters is used, then all TokenFilters in that chain share the Attributes
+with the TokenStream.
+</li>
+<br>
+<li>
+Attribute instances are reused for all tokens of a document. Thus, a TokenStream/-Filter needs to update
+the appropriate Attribute(s) in incrementToken(). The consumer, commonly the Lucene indexer, consumes the data in the
+Attributes and then calls incrementToken() again until it retuns false, which indicates that the end of the stream
+was reached. This means that in each call of incrementToken() a TokenStream/-Filter can safely overwrite the data in
+the Attribute instances.
+</li>
+<br>
+<li>
+For performance reasons a TokenStream/-Filter should add/get Attributes during instantiation; i.e., create an attribute in the
+constructor and store references to it in an instance variable.  Using an instance variable instead of calling addAttribute()/getAttribute() 
+in incrementToken() will avoid attribute lookups for every token in the document.
+</li>
+<br>
+<li>
+All methods in AttributeSource are idempotent, which means calling them multiple times always yields the same
+result. This is especially important to know for addAttribute(). The method takes the <b>type</b> (<code>Class</code>)
+of an Attribute as an argument and returns an <b>instance</b>. If an Attribute of the same type was previously added, then
+the already existing instance is returned, otherwise a new instance is created and returned. Therefore TokenStreams/-Filters
+can safely call addAttribute() with the same Attribute type multiple times. Even consumers of TokenStreams should
+normally call addAttribute() instead of getAttribute(), because it would not fail if the TokenStream does not have this
+Attribute (getAttribute() would throw an IllegalArgumentException, if the Attribute is missing). More advanced code
+could simply check with hasAttribute(), if a TokenStream has it, and may conditionally leave out processing for
+extra performance.
+</li></ol>
+<h3>Example</h3>
+In this example we will create a WhiteSpaceTokenizer and use a LengthFilter to suppress all words that only
+have two or less characters. The LengthFilter is part of the Lucene core and its implementation will be explained
+here to illustrate the usage of the new TokenStream API.<br>
+Then we will develop a custom Attribute, a PartOfSpeechAttribute, and add another filter to the chain which
+utilizes the new custom attribute, and call it PartOfSpeechTaggingFilter.
+<h4>Whitespace tokenization</h4>
+<pre>
+public class MyAnalyzer extends Analyzer {
+
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream stream = new WhitespaceTokenizer(reader);
+    return stream;
+  }
+  
+  public static void main(String[] args) throws IOException {
+    // text to tokenize
+    final String text = "This is a demo of the new TokenStream API";
+    
+    MyAnalyzer analyzer = new MyAnalyzer();
+    TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
+    
+    // get the TermAttribute from the TokenStream
+    TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
+
+    stream.reset();
+    
+    // print all tokens until stream is exhausted
+    while (stream.incrementToken()) {
+      System.out.println(termAtt.term());
+    }
+    
+    stream.end()
+    stream.close();
+  }
+}
+</pre>
+In this easy example a simple white space tokenization is performed. In main() a loop consumes the stream and
+prints the term text of the tokens by accessing the TermAttribute that the WhitespaceTokenizer provides. 
+Here is the output:
+<pre>
+This
+is
+a
+demo
+of
+the
+new
+TokenStream
+API
+</pre>
+<h4>Adding a LengthFilter</h4>
+We want to suppress all tokens that have 2 or less characters. We can do that easily by adding a LengthFilter 
+to the chain. Only the tokenStream() method in our analyzer needs to be changed:
+<pre>
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream stream = new WhitespaceTokenizer(reader);
+    stream = new LengthFilter(stream, 3, Integer.MAX_VALUE);
+    return stream;
+  }
+</pre>
+Note how now only words with 3 or more characters are contained in the output:
+<pre>
+This
+demo
+the
+new
+TokenStream
+API
+</pre>
+Now let's take a look how the LengthFilter is implemented (it is part of Lucene's core):
+<pre>
+public final class LengthFilter extends TokenFilter {
+
+  final int min;
+  final int max;
+  
+  private TermAttribute termAtt;
+
+  /**
+   * Build a filter that removes words that are too long or too
+   * short from the text.
+   */
+  public LengthFilter(TokenStream in, int min, int max)
+  {
+    super(in);
+    this.min = min;
+    this.max = max;
+    termAtt = addAttribute(TermAttribute.class);
+  }
+  
+  /**
+   * Returns the next input Token whose term() is the right len
+   */
+  public final boolean incrementToken() throws IOException
+  {
+    assert termAtt != null;
+    // return the first non-stop word found
+    while (input.incrementToken()) {
+      int len = termAtt.termLength();
+      if (len >= min && len <= max) {
+          return true;
+      }
+      // note: else we ignore it but should we index each part of it?
+    }
+    // reached EOS -- return null
+    return false;
+  }
+}
+</pre>
+The TermAttribute is added in the constructor and stored in the instance variable <code>termAtt</code>.
+Remember that there can only be a single instance of TermAttribute in the chain, so in our example the 
+<code>addAttribute()</code> call in LengthFilter returns the TermAttribute that the WhitespaceTokenizer already added. The tokens
+are retrieved from the input stream in the <code>incrementToken()</code> method. By looking at the term text
+in the TermAttribute the length of the term can be determined and too short or too long tokens are skipped. 
+Note how <code>incrementToken()</code> can efficiently access the instance variable; no attribute lookup
+is neccessary. The same is true for the consumer, which can simply use local references to the Attributes.
+
+<h4>Adding a custom Attribute</h4>
+Now we're going to implement our own custom Attribute for part-of-speech tagging and call it consequently 
+<code>PartOfSpeechAttribute</code>. First we need to define the interface of the new Attribute:
+<pre>
+  public interface PartOfSpeechAttribute extends Attribute {
+    public static enum PartOfSpeech {
+      Noun, Verb, Adjective, Adverb, Pronoun, Preposition, Conjunction, Article, Unknown
+    }
+  
+    public void setPartOfSpeech(PartOfSpeech pos);
+  
+    public PartOfSpeech getPartOfSpeech();
+  }
+</pre>
+
+Now we also need to write the implementing class. The name of that class is important here: By default, Lucene
+checks if there is a class with the name of the Attribute with the postfix 'Impl'. In this example, we would
+consequently call the implementing class <code>PartOfSpeechAttributeImpl</code>. <br/>
+This should be the usual behavior. However, there is also an expert-API that allows changing these naming conventions:
+{@link org.apache.lucene.util.AttributeSource.AttributeFactory}. The factory accepts an Attribute interface as argument
+and returns an actual instance. You can implement your own factory if you need to change the default behavior. <br/><br/>
+
+Now here is the actual class that implements our new Attribute. Notice that the class has to extend
+{@link org.apache.lucene.util.AttributeImpl}:
+
+<pre>
+public final class PartOfSpeechAttributeImpl extends AttributeImpl 
+                            implements PartOfSpeechAttribute{
+  
+  private PartOfSpeech pos = PartOfSpeech.Unknown;
+  
+  public void setPartOfSpeech(PartOfSpeech pos) {
+    this.pos = pos;
+  }
+  
+  public PartOfSpeech getPartOfSpeech() {
+    return pos;
+  }
+
+  public void clear() {
+    pos = PartOfSpeech.Unknown;
+  }
+
+  public void copyTo(AttributeImpl target) {
+    ((PartOfSpeechAttributeImpl) target).pos = pos;
+  }
+
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof PartOfSpeechAttributeImpl) {
+      return pos == ((PartOfSpeechAttributeImpl) other).pos;
+    }
+ 
+    return false;
+  }
+
+  public int hashCode() {
+    return pos.ordinal();
+  }
+}
+</pre>
+This is a simple Attribute implementation has only a single variable that stores the part-of-speech of a token. It extends the
+new <code>AttributeImpl</code> class and therefore implements its abstract methods <code>clear(), copyTo(), equals(), hashCode()</code>.
+Now we need a TokenFilter that can set this new PartOfSpeechAttribute for each token. In this example we show a very naive filter
+that tags every word with a leading upper-case letter as a 'Noun' and all other words as 'Unknown'.
+<pre>
+  public static class PartOfSpeechTaggingFilter extends TokenFilter {
+    PartOfSpeechAttribute posAtt;
+    TermAttribute termAtt;
+    
+    protected PartOfSpeechTaggingFilter(TokenStream input) {
+      super(input);
+      posAtt = addAttribute(PartOfSpeechAttribute.class);
+      termAtt = addAttribute(TermAttribute.class);
+    }
+    
+    public boolean incrementToken() throws IOException {
+      if (!input.incrementToken()) {return false;}
+      posAtt.setPartOfSpeech(determinePOS(termAtt.termBuffer(), 0, termAtt.termLength()));
+      return true;
+    }
+    
+    // determine the part of speech for the given term
+    protected PartOfSpeech determinePOS(char[] term, int offset, int length) {
+      // naive implementation that tags every uppercased word as noun
+      if (length > 0 && Character.isUpperCase(term[0])) {
+        return PartOfSpeech.Noun;
+      }
+      return PartOfSpeech.Unknown;
+    }
+  }
+</pre>
+Just like the LengthFilter, this new filter accesses the attributes it needs in the constructor and
+stores references in instance variables. Notice how you only need to pass in the interface of the new
+Attribute and instantiating the correct class is automatically been taken care of.
+Now we need to add the filter to the chain:
+<pre>
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream stream = new WhitespaceTokenizer(reader);
+    stream = new LengthFilter(stream, 3, Integer.MAX_VALUE);
+    stream = new PartOfSpeechTaggingFilter(stream);
+    return stream;
+  }
+</pre>
+Now let's look at the output:
+<pre>
+This
+demo
+the
+new
+TokenStream
+API
+</pre>
+Apparently it hasn't changed, which shows that adding a custom attribute to a TokenStream/Filter chain does not
+affect any existing consumers, simply because they don't know the new Attribute. Now let's change the consumer
+to make use of the new PartOfSpeechAttribute and print it out:
+<pre>
+  public static void main(String[] args) throws IOException {
+    // text to tokenize
+    final String text = "This is a demo of the new TokenStream API";
+    
+    MyAnalyzer analyzer = new MyAnalyzer();
+    TokenStream stream = analyzer.tokenStream("field", new StringReader(text));
+    
+    // get the TermAttribute from the TokenStream
+    TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
+    
+    // get the PartOfSpeechAttribute from the TokenStream
+    PartOfSpeechAttribute posAtt = stream.addAttribute(PartOfSpeechAttribute.class);
+    
+    stream.reset();
+
+    // print all tokens until stream is exhausted
+    while (stream.incrementToken()) {
+      System.out.println(termAtt.term() + ": " + posAtt.getPartOfSpeech());
+    }
+    
+    stream.end();
+    stream.close();
+  }
+</pre>
+The change that was made is to get the PartOfSpeechAttribute from the TokenStream and print out its contents in
+the while loop that consumes the stream. Here is the new output:
+<pre>
+This: Noun
+demo: Unknown
+the: Unknown
+new: Unknown
+TokenStream: Noun
+API: Noun
+</pre>
+Each word is now followed by its assigned PartOfSpeech tag. Of course this is a naive 
+part-of-speech tagging. The word 'This' should not even be tagged as noun; it is only spelled capitalized because it
+is the first word of a sentence. Actually this is a good opportunity for an excerise. To practice the usage of the new
+API the reader could now write an Attribute and TokenFilter that can specify for each word if it was the first token
+of a sentence or not. Then the PartOfSpeechTaggingFilter can make use of this knowledge and only tag capitalized words
+as nouns if not the first word of a sentence (we know, this is still not a correct behavior, but hey, it's a good exercise). 
+As a small hint, this is how the new Attribute class could begin:
+<pre>
+  public class FirstTokenOfSentenceAttributeImpl extends Attribute
+                   implements FirstTokenOfSentenceAttribute {
+    
+    private boolean firstToken;
+    
+    public void setFirstToken(boolean firstToken) {
+      this.firstToken = firstToken;
+    }
+    
+    public boolean getFirstToken() {
+      return firstToken;
+    }
+
+    public void clear() {
+      firstToken = false;
+    }
+
+  ...
+</pre>
+</body>
+</html>
--- a/backwards/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
+++ b/backwards/src/java/org/apache/lucene/analysis/standard/READ_BEFORE_REGENERATING.txt
@ -0,0 +1,25 @@
+/*
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+*/
+
+
+WARNING: if you change StandardTokenizerImpl.jflex and need to regenerate
+      the tokenizer, only use Java 1.4 !!!
+      This grammar currently uses constructs (eg :digit:, :letter:) whose 
+      meaning can vary according to the JRE used to run jflex.  See
+      https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+      For current backwards compatibility it is needed to support
+      only Java 1.4 - this will change in Lucene 3.1.
--- a/backwards/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@ -0,0 +1,161 @@
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.util.Version;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Set;
+
+/**
+ * Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter} and {@link StopFilter}, using a list of
+ * English stop words.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StandardAnalyzer:
+ * <ul>
+ *   <li> As of 2.9, StopFilter preserves position
+ *        increments
+ *   <li> As of 2.4, Tokens incorrectly identified as acronyms
+ *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
+ * </ul>
+ */
+public class StandardAnalyzer extends Analyzer {
+  private Set<?> stopSet;
+
+  /**
+   * Specifies whether deprecated acronyms should be replaced with HOST type.
+   * See {@linkplain https://issues.apache.org/jira/browse/LUCENE-1068}
+   */
+  private final boolean replaceInvalidAcronym,enableStopPositionIncrements;
+
+  /** An unmodifiable set containing some common English words that are usually not
+  useful for searching. */
+  public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET; 
+  private final Version matchVersion;
+
+  /** Builds an analyzer with the default stop words ({@link
+   * #STOP_WORDS_SET}).
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
+   */
+  public StandardAnalyzer(Version matchVersion) {
+    this(matchVersion, STOP_WORDS_SET);
+  }
+
+  /** Builds an analyzer with the given stop words.
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
+   * @param stopWords stop words */
+  public StandardAnalyzer(Version matchVersion, Set<?> stopWords) {
+    stopSet = stopWords;
+    setOverridesTokenStreamMethod(StandardAnalyzer.class);
+    enableStopPositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
+    replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
+    this.matchVersion = matchVersion;
+  }
+
+  /** Builds an analyzer with the stop words from the given file.
+   * @see WordlistLoader#getWordSet(File)
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
+   * @param stopwords File to read stop words from */
+  public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+  }
+
+  /** Builds an analyzer with the stop words from the given reader.
+   * @see WordlistLoader#getWordSet(Reader)
+   * @param matchVersion Lucene version to match See {@link
+   * <a href="#version">above</a>}
+   * @param stopwords Reader to read stop words from */
+  public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
+    this(matchVersion, WordlistLoader.getWordSet(stopwords));
+  }
+
+  /** Constructs a {@link StandardTokenizer} filtered by a {@link
+  StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
+  @Override
+  public TokenStream tokenStream(String fieldName, Reader reader) {
+    StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
+    tokenStream.setMaxTokenLength(maxTokenLength);
+    TokenStream result = new StandardFilter(tokenStream);
+    result = new LowerCaseFilter(result);
+    result = new StopFilter(enableStopPositionIncrements, result, stopSet);
+    return result;
+  }
+
+  private static final class SavedStreams {
+    StandardTokenizer tokenStream;
+    TokenStream filteredTokenStream;
+  }
+
+  /** Default maximum allowed token length */
+  public static final int DEFAULT_MAX_TOKEN_LENGTH = 255;
+
+  private int maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH;
+
+  /**
+   * Set maximum allowed token length.  If a token is seen
+   * that exceeds this length then it is discarded.  This
+   * setting only takes effect the next time tokenStream or
+   * reusableTokenStream is called.
+   */
+  public void setMaxTokenLength(int length) {
+    maxTokenLength = length;
+  }
+    
+  /**
+   * @see #setMaxTokenLength
+   */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+  @Override
+  public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
+    if (overridesTokenStreamMethod) {
+      // LUCENE-1678: force fallback to tokenStream() if we
+      // have been subclassed and that subclass overrides
+      // tokenStream but not reusableTokenStream
+      return tokenStream(fieldName, reader);
+    }
+    SavedStreams streams = (SavedStreams) getPreviousTokenStream();
+    if (streams == null) {
+      streams = new SavedStreams();
+      setPreviousTokenStream(streams);
+      streams.tokenStream = new StandardTokenizer(matchVersion, reader);
+      streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
+      streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
+      streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements,
+                                                   streams.filteredTokenStream, stopSet);
+    } else {
+      streams.tokenStream.reset(reader);
+    }
+    streams.tokenStream.setMaxTokenLength(maxTokenLength);
+    
+    streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym);
+
+    return streams.filteredTokenStream;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ b/backwards/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@ -0,0 +1,76 @@
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+
+/** Normalizes tokens extracted with {@link StandardTokenizer}. */
+
+public final class StandardFilter extends TokenFilter {
+
+
+  /** Construct filtering <i>in</i>. */
+  public StandardFilter(TokenStream in) {
+    super(in);
+    termAtt = addAttribute(TermAttribute.class);
+    typeAtt = addAttribute(TypeAttribute.class);
+  }
+
+  private static final String APOSTROPHE_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.APOSTROPHE];
+  private static final String ACRONYM_TYPE = StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM];
+
+  // this filters uses attribute type
+  private TypeAttribute typeAtt;
+  private TermAttribute termAtt;
+  
+  /** Returns the next token in the stream, or null at EOS.
+   * <p>Removes <tt>'s</tt> from the end of words.
+   * <p>Removes dots from acronyms.
+   */
+  @Override
+  public final boolean incrementToken() throws java.io.IOException {
+    if (!input.incrementToken()) {
+      return false;
+    }
+
+    char[] buffer = termAtt.termBuffer();
+    final int bufferLength = termAtt.termLength();
+    final String type = typeAtt.type();
+
+    if (type == APOSTROPHE_TYPE &&      // remove 's
+  bufferLength >= 2 &&
+        buffer[bufferLength-2] == '\'' &&
+        (buffer[bufferLength-1] == 's' || buffer[bufferLength-1] == 'S')) {
+      // Strip last 2 characters off
+      termAtt.setTermLength(bufferLength - 2);
+    } else if (type == ACRONYM_TYPE) {      // remove dots
+      int upto = 0;
+      for(int i=0;i<bufferLength;i++) {
+        char c = buffer[i];
+        if (c != '.')
+          buffer[upto++] = c;
+      }
+      termAtt.setTermLength(upto);
+    }
+
+    return true;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
+++ b/backwards/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java
@ -0,0 +1,244 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.standard;
+
+import java.io.IOException;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+import org.apache.lucene.util.Version;
+
+/** A grammar-based tokenizer constructed with JFlex
+ *
+ * <p> This should be a good tokenizer for most European-language documents:
+ *
+ * <ul>
+ *   <li>Splits words at punctuation characters, removing punctuation. However, a 
+ *     dot that's not followed by whitespace is considered part of a token.
+ *   <li>Splits words at hyphens, unless there's a number in the token, in which case
+ *     the whole token is interpreted as a product number and is not split.
+ *   <li>Recognizes email addresses and internet hostnames as one token.
+ * </ul>
+ *
+ * <p>Many applications have specific tokenizer needs.  If this tokenizer does
+ * not suit your application, please consider copying this source code
+ * directory to your project and maintaining your own grammar-based tokenizer.
+ *
+ * <a name="version"/>
+ * <p>You must specify the required {@link Version}
+ * compatibility when creating StandardAnalyzer:
+ * <ul>
+ *   <li> As of 2.4, Tokens incorrectly identified as acronyms
+ *        are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
+ * </ul>
+ */
+
+public final class StandardTokenizer extends Tokenizer {
+  /** A private instance of the JFlex-constructed scanner */
+  private final StandardTokenizerImpl scanner;
+
+  public static final int ALPHANUM          = 0;
+  public static final int APOSTROPHE        = 1;
+  public static final int ACRONYM           = 2;
+  public static final int COMPANY           = 3;
+  public static final int EMAIL             = 4;
+  public static final int HOST              = 5;
+  public static final int NUM               = 6;
+  public static final int CJ                = 7;
+
+  /**
+   * @deprecated this solves a bug where HOSTs that end with '.' are identified
+   *             as ACRONYMs.
+   */
+  public static final int ACRONYM_DEP       = 8;
+
+  /** String token types that correspond to token type int constants */
+  public static final String [] TOKEN_TYPES = new String [] {
+    "<ALPHANUM>",
+    "<APOSTROPHE>",
+    "<ACRONYM>",
+    "<COMPANY>",
+    "<EMAIL>",
+    "<HOST>",
+    "<NUM>",
+    "<CJ>",
+    "<ACRONYM_DEP>"
+  };
+
+  private boolean replaceInvalidAcronym;
+    
+  private int maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
+
+  /** Set the max allowed token length.  Any token longer
+   *  than this is skipped. */
+  public void setMaxTokenLength(int length) {
+    this.maxTokenLength = length;
+  }
+
+  /** @see #setMaxTokenLength */
+  public int getMaxTokenLength() {
+    return maxTokenLength;
+  }
+
+  /**
+   * Creates a new instance of the {@link org.apache.lucene.analysis.standard.StandardTokenizer}.  Attaches
+   * the <code>input</code> to the newly created JFlex scanner.
+   *
+   * @param input The input reader
+   *
+   * See http://issues.apache.org/jira/browse/LUCENE-1068
+   */
+  public StandardTokenizer(Version matchVersion, Reader input) {
+    super();
+    this.scanner = new StandardTokenizerImpl(input);
+    init(input, matchVersion);
+  }
+
+  /**
+   * Creates a new StandardTokenizer with a given {@link AttributeSource}. 
+   */
+  public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) {
+    super(source);
+    this.scanner = new StandardTokenizerImpl(input);
+    init(input, matchVersion);
+  }
+
+  /**
+   * Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory} 
+   */
+  public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
+    super(factory);
+    this.scanner = new StandardTokenizerImpl(input);
+    init(input, matchVersion);
+  }
+
+  private void init(Reader input, Version matchVersion) {
+    if (matchVersion.onOrAfter(Version.LUCENE_24)) {
+      replaceInvalidAcronym = true;
+    } else {
+      replaceInvalidAcronym = false;
+    }
+    this.input = input;    
+    termAtt = addAttribute(TermAttribute.class);
+    offsetAtt = addAttribute(OffsetAttribute.class);
+    posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+    typeAtt = addAttribute(TypeAttribute.class);
+  }
+
+  // this tokenizer generates three attributes:
+  // offset, positionIncrement and type
+  private TermAttribute termAtt;
+  private OffsetAttribute offsetAtt;
+  private PositionIncrementAttribute posIncrAtt;
+  private TypeAttribute typeAtt;
+
+  /*
+   * (non-Javadoc)
+   *
+   * @see org.apache.lucene.analysis.TokenStream#next()
+   */
+  @Override
+  public final boolean incrementToken() throws IOException {
+    clearAttributes();
+    int posIncr = 1;
+
+    while(true) {
+      int tokenType = scanner.getNextToken();
+
+      if (tokenType == StandardTokenizerImpl.YYEOF) {
+        return false;
+      }
+
+      if (scanner.yylength() <= maxTokenLength) {
+        posIncrAtt.setPositionIncrement(posIncr);
+        scanner.getText(termAtt);
+        final int start = scanner.yychar();
+        offsetAtt.setOffset(correctOffset(start), correctOffset(start+termAtt.termLength()));
+        // This 'if' should be removed in the next release. For now, it converts
+        // invalid acronyms to HOST. When removed, only the 'else' part should
+        // remain.
+        if (tokenType == StandardTokenizerImpl.ACRONYM_DEP) {
+          if (replaceInvalidAcronym) {
+            typeAtt.setType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.HOST]);
+            termAtt.setTermLength(termAtt.termLength() - 1); // remove extra '.'
+          } else {
+            typeAtt.setType(StandardTokenizerImpl.TOKEN_TYPES[StandardTokenizerImpl.ACRONYM]);
+          }
+        } else {
+          typeAtt.setType(StandardTokenizerImpl.TOKEN_TYPES[tokenType]);
+        }
+        return true;
+      } else
+        // When we skip a too-long term, we still increment the
+        // position increment
+        posIncr++;
+    }
+  }
+  
+  @Override
+  public final void end() {
+    // set final offset
+    int finalOffset = correctOffset(scanner.yychar() + scanner.yylength());
+    offsetAtt.setOffset(finalOffset, finalOffset);
+  }
+
+  /*
+   * (non-Javadoc)
+   *
+   * @see org.apache.lucene.analysis.TokenStream#reset()
+   */
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    scanner.yyreset(input);
+  }
+
+  @Override
+  public void reset(Reader reader) throws IOException {
+    super.reset(reader);
+    reset();
+  }
+
+  /**
+   * Prior to https://issues.apache.org/jira/browse/LUCENE-1068, StandardTokenizer mischaracterized as acronyms tokens like www.abc.com
+   * when they should have been labeled as hosts instead.
+   * @return true if StandardTokenizer now returns these tokens as Hosts, otherwise false
+   *
+   * @deprecated Remove in 3.X and make true the only valid value
+   */
+  public boolean isReplaceInvalidAcronym() {
+    return replaceInvalidAcronym;
+  }
+
+  /**
+   *
+   * @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST.
+   * @deprecated Remove in 3.X and make true the only valid value
+   *
+   * See https://issues.apache.org/jira/browse/LUCENE-1068
+   */
+  public void setReplaceInvalidAcronym(boolean replaceInvalidAcronym) {
+    this.replaceInvalidAcronym = replaceInvalidAcronym;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
+++ b/backwards/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java
@ -0,0 +1,723 @@
+/* The following code was generated by JFlex 1.4.1 on 9/4/08 6:49 PM */
+
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+WARNING: if you change StandardTokenizerImpl.jflex and need to regenerate
+      the tokenizer, only use Java 1.4 !!!
+      This grammar currently uses constructs (eg :digit:, :letter:) whose 
+      meaning can vary according to the JRE used to run jflex.  See
+      https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+      For current backwards compatibility it is needed to support
+      only Java 1.4 - this will change in Lucene 3.1.
+
+*/
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+
+/**
+ * This class is a scanner generated by 
+ * <a href="http://www.jflex.de/">JFlex</a> 1.4.1
+ * on 9/4/08 6:49 PM from the specification file
+ * <tt>/tango/mike/src/lucene.standarddigit/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
+ */
+class StandardTokenizerImpl {
+
+  /** This character denotes the end of file */
+  public static final int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+  private static final int ZZ_BUFFERSIZE = 16384;
+
+  /** lexical states */
+  public static final int YYINITIAL = 0;
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final String ZZ_CMAP_PACKED = 
+    "\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5"+
+    "\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12"+
+    "\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12"+
+    "\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12\34\0\136\12"+
+    "\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12\11\0\1\12"+
+    "\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12\1\0\24\12"+
+    "\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12\12\0\71\12"+
+    "\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12\67\0\46\12"+
+    "\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12\56\0\32\12"+
+    "\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12\17\0\2\12"+
+    "\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0\46\12\u015f\0"+
+    "\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2\25\0"+
+    "\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12\3\0"+
+    "\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12\23\0\6\12"+
+    "\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12\1\0\2\12"+
+    "\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2\2\0\3\12"+
+    "\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12\1\0\7\12"+
+    "\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0\1\12"+
+    "\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12"+
+    "\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12"+
+    "\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12\3\0\2\12"+
+    "\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12\3\0\10\12"+
+    "\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12\1\0\27\12"+
+    "\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2\25\0\10\12"+
+    "\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\44\0\1\12"+
+    "\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12"+
+    "\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12\3\0\30\12"+
+    "\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1\60\12\1\1"+
+    "\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0\1\12\2\0"+
+    "\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0\7\12\1\0"+
+    "\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0\4\12\1\0"+
+    "\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0\12\2\2\0"+
+    "\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0\42\12\35\0"+
+    "\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0\12\2\6\0"+
+    "\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0\104\12\5\0"+
+    "\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0\4\12\2\0"+
+    "\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0\1\12\1\0"+
+    "\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0"+
+    "\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0\27\12\1\0"+
+    "\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\47\12\1\0"+
+    "\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0"+
+    "\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0\12\2\6\0"+
+    "\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0\26\12\2\0"+
+    "\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0\1\12\1\0"+
+    "\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0\7\12\1\0"+
+    "\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0\6\12\4\0"+
+    "\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0\1\12\4\0"+
+    "\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0\1\12\1\0"+
+    "\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0\7\12\u0ecb\0"+
+    "\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13\2\13\132\13"+
+    "\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0"+
+    "\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12"+
+    "\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12\5\0\1\12"+
+    "\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12\1\0\2\12"+
+    "\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12\2\0\66\12"+
+    "\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12\23\0\12\2"+
+    "\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12"+
+    "\2\0\6\12\2\0\6\12\2\0\3\12\43\0";
+
+  /** 
+   * Translates characters to character classes
+   */
+  private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED);
+
+  /** 
+   * Translates DFA states to action switch labels.
+   */
+  private static final int [] ZZ_ACTION = zzUnpackAction();
+
+  private static final String ZZ_ACTION_PACKED_0 =
+    "\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4"+
+    "\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4"+
+    "\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12"+
+    "\1\4";
+
+  private static int [] zzUnpackAction() {
+    int [] result = new int[51];
+    int offset = 0;
+    offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAction(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /** 
+   * Translates a state to a row index in the transition table
+   */
+  private static final int [] ZZ_ROWMAP = zzUnpackRowMap();
+
+  private static final String ZZ_ROWMAP_PACKED_0 =
+    "\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124"+
+    "\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304"+
+    "\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134"+
+    "\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4"+
+    "\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206"+
+    "\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214"+
+    "\0\u0268\0\u0276\0\u0284";
+
+  private static int [] zzUnpackRowMap() {
+    int [] result = new int[51];
+    int offset = 0;
+    offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackRowMap(String packed, int offset, int [] result) {
+    int i = 0;  /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int high = packed.charAt(i++) << 16;
+      result[j++] = high | packed.charAt(i++);
+    }
+    return j;
+  }
+
+  /** 
+   * The transition table of the DFA
+   */
+  private static final int [] ZZ_TRANS = zzUnpackTrans();
+
+  private static final String ZZ_TRANS_PACKED_0 =
+    "\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2"+
+    "\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13"+
+    "\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11"+
+    "\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20"+
+    "\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0"+
+    "\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27"+
+    "\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0"+
+    "\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37"+
+    "\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44"+
+    "\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0"+
+    "\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4"+
+    "\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0"+
+    "\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24"+
+    "\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54"+
+    "\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0"+
+    "\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56"+
+    "\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52"+
+    "\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31"+
+    "\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0"+
+    "\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0"+
+    "\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33"+
+    "\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13"+
+    "\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11"+
+    "\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57"+
+    "\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0"+
+    "\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37"+
+    "\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40"+
+    "\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12"+
+    "\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13"+
+    "\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16"+
+    "\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13"+
+    "\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25"+
+    "\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0"+
+    "\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0"+
+    "\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0"+
+    "\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0"+
+    "\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0"+
+    "\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0"+
+    "\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0"+
+    "\1\11\2\52\1\0\1\24\3\0";
+
+  private static int [] zzUnpackTrans() {
+    int [] result = new int[658];
+    int offset = 0;
+    offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackTrans(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      value--;
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+
+  /* error codes */
+  private static final int ZZ_UNKNOWN_ERROR = 0;
+  private static final int ZZ_NO_MATCH = 1;
+  private static final int ZZ_PUSHBACK_2BIG = 2;
+
+  /* error messages for the codes above */
+  private static final String ZZ_ERROR_MSG[] = {
+    "Unkown internal scanner error",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+  /**
+   * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+   */
+  private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute();
+
+  private static final String ZZ_ATTRIBUTE_PACKED_0 =
+    "\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0"+
+    "\1\1\1\0\17\1\1\0\1\1\3\0\5\1";
+
+  private static int [] zzUnpackAttribute() {
+    int [] result = new int[51];
+    int offset = 0;
+    offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
+    return result;
+  }
+
+  private static int zzUnpackAttribute(String packed, int offset, int [] result) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      do result[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+  /** the input device */
+  private java.io.Reader zzReader;
+
+  /** the current state of the DFA */
+  private int zzState;
+
+  /** the current lexical state */
+  private int zzLexicalState = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char zzBuffer[] = new char[ZZ_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int zzMarkedPos;
+
+  /** the textposition at the last state to be included in yytext */
+  private int zzPushbackPos;
+
+  /** the current text position in the buffer */
+  private int zzCurrentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int zzStartRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int zzEndRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn;
+
+  /** 
+   * zzAtBOL == true <=> the scanner is currently at the beginning of a line
+   */
+  private boolean zzAtBOL = true;
+
+  /** zzAtEOF == true <=> the scanner is at the EOF */
+  private boolean zzAtEOF;
+
+  /* user code: */
+
+public static final int ALPHANUM          = StandardTokenizer.ALPHANUM;
+public static final int APOSTROPHE        = StandardTokenizer.APOSTROPHE;
+public static final int ACRONYM           = StandardTokenizer.ACRONYM;
+public static final int COMPANY           = StandardTokenizer.COMPANY;
+public static final int EMAIL             = StandardTokenizer.EMAIL;
+public static final int HOST              = StandardTokenizer.HOST;
+public static final int NUM               = StandardTokenizer.NUM;
+public static final int CJ                = StandardTokenizer.CJ;
+/**
+ * @deprecated this solves a bug where HOSTs that end with '.' are identified
+ *             as ACRONYMs.
+ */
+public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
+
+public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+
+public final int yychar()
+{
+    return yychar;
+}
+
+/**
+ * Fills Lucene token with the current token text.
+ */
+final void getText(Token t) {
+  t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+}
+
+/**
+ * Fills TermAttribute with the current token text.
+ */
+final void getText(TermAttribute t) {
+  t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+}
+
+
+  /**
+   * Creates a new scanner
+   * There is also a java.io.InputStream version of this constructor.
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  StandardTokenizerImpl(java.io.Reader in) {
+    this.zzReader = in;
+  }
+
+  /**
+   * Creates a new scanner.
+   * There is also java.io.Reader version of this constructor.
+   *
+   * @param   in  the java.io.Inputstream to read input from.
+   */
+  StandardTokenizerImpl(java.io.InputStream in) {
+    this(new java.io.InputStreamReader(in));
+  }
+
+  /** 
+   * Unpacks the compressed character translation table.
+   *
+   * @param packed   the packed character translation table
+   * @return         the unpacked character translation table
+   */
+  private static char [] zzUnpackCMap(String packed) {
+    char [] map = new char[0x10000];
+    int i = 0;  /* index in packed string  */
+    int j = 0;  /* index in unpacked array */
+    while (i < 1154) {
+      int  count = packed.charAt(i++);
+      char value = packed.charAt(i++);
+      do map[j++] = value; while (--count > 0);
+    }
+    return map;
+  }
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  private boolean zzRefill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (zzStartRead > 0) {
+      System.arraycopy(zzBuffer, zzStartRead,
+                       zzBuffer, 0,
+                       zzEndRead-zzStartRead);
+
+      /* translate stored positions */
+      zzEndRead-= zzStartRead;
+      zzCurrentPos-= zzStartRead;
+      zzMarkedPos-= zzStartRead;
+      zzPushbackPos-= zzStartRead;
+      zzStartRead = 0;
+    }
+
+    /* is the buffer big enough? */
+    if (zzCurrentPos >= zzBuffer.length) {
+      /* if not: blow it up */
+      char newBuffer[] = new char[zzCurrentPos*2];
+      System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length);
+      zzBuffer = newBuffer;
+    }
+
+    /* finally: fill the buffer with new input */
+    int numRead = zzReader.read(zzBuffer, zzEndRead,
+                                            zzBuffer.length-zzEndRead);
+
+    if (numRead < 0) {
+      return true;
+    }
+    else {
+      zzEndRead+= numRead;
+      return false;
+    }
+  }
+
+    
+  /**
+   * Closes the input stream.
+   */
+  public final void yyclose() throws java.io.IOException {
+    zzAtEOF = true;            /* indicate end of file */
+    zzEndRead = zzStartRead;  /* invalidate buffer    */
+
+    if (zzReader != null)
+      zzReader.close();
+  }
+
+
+  /**
+   * Resets the scanner to read from a new input stream.
+   * Does not close the old reader.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>ZZ_INITIAL</tt>.
+   *
+   * @param reader   the new input stream 
+   */
+  public final void yyreset(java.io.Reader reader) {
+    zzReader = reader;
+    zzAtBOL  = true;
+    zzAtEOF  = false;
+    zzEndRead = zzStartRead = 0;
+    zzCurrentPos = zzMarkedPos = zzPushbackPos = 0;
+    yyline = yychar = yycolumn = 0;
+    zzLexicalState = YYINITIAL;
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  public final int yystate() {
+    return zzLexicalState;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  public final void yybegin(int newState) {
+    zzLexicalState = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  public final String yytext() {
+    return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  public final char yycharat(int pos) {
+    return zzBuffer[zzStartRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  public final int yylength() {
+    return zzMarkedPos-zzStartRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+  private void zzScanError(int errorCode) {
+    String message;
+    try {
+      message = ZZ_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR];
+    }
+
+    throw new Error(message);
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+  public void yypushback(int number)  {
+    if ( number > yylength() )
+      zzScanError(ZZ_PUSHBACK_2BIG);
+
+    zzMarkedPos -= number;
+  }
+
+
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   java.io.IOException  if any I/O-Error occurs
+   */
+  public int getNextToken() throws java.io.IOException {
+    int zzInput;
+    int zzAction;
+
+    // cached fields:
+    int zzCurrentPosL;
+    int zzMarkedPosL;
+    int zzEndReadL = zzEndRead;
+    char [] zzBufferL = zzBuffer;
+    char [] zzCMapL = ZZ_CMAP;
+
+    int [] zzTransL = ZZ_TRANS;
+    int [] zzRowMapL = ZZ_ROWMAP;
+    int [] zzAttrL = ZZ_ATTRIBUTE;
+
+    while (true) {
+      zzMarkedPosL = zzMarkedPos;
+
+      yychar+= zzMarkedPosL-zzStartRead;
+
+      zzAction = -1;
+
+      zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL;
+  
+      zzState = zzLexicalState;
+
+
+      zzForAction: {
+        while (true) {
+    
+          if (zzCurrentPosL < zzEndReadL)
+            zzInput = zzBufferL[zzCurrentPosL++];
+          else if (zzAtEOF) {
+            zzInput = YYEOF;
+            break zzForAction;
+          }
+          else {
+            // store back cached positions
+            zzCurrentPos  = zzCurrentPosL;
+            zzMarkedPos   = zzMarkedPosL;
+            boolean eof = zzRefill();
+            // get translated positions and possibly new buffer
+            zzCurrentPosL  = zzCurrentPos;
+            zzMarkedPosL   = zzMarkedPos;
+            zzBufferL      = zzBuffer;
+            zzEndReadL     = zzEndRead;
+            if (eof) {
+              zzInput = YYEOF;
+              break zzForAction;
+            }
+            else {
+              zzInput = zzBufferL[zzCurrentPosL++];
+            }
+          }
+          int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ];
+          if (zzNext == -1) break zzForAction;
+          zzState = zzNext;
+
+          int zzAttributes = zzAttrL[zzState];
+          if ( (zzAttributes & 1) == 1 ) {
+            zzAction = zzState;
+            zzMarkedPosL = zzCurrentPosL;
+            if ( (zzAttributes & 8) == 8 ) break zzForAction;
+          }
+
+        }
+      }
+
+      // store back cached position
+      zzMarkedPos = zzMarkedPosL;
+
+      switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
+        case 4: 
+          { return HOST;
+          }
+        case 11: break;
+        case 9: 
+          { return ACRONYM;
+          }
+        case 12: break;
+        case 8: 
+          { return ACRONYM_DEP;
+          }
+        case 13: break;
+        case 1: 
+          { /* ignore */
+          }
+        case 14: break;
+        case 5: 
+          { return NUM;
+          }
+        case 15: break;
+        case 3: 
+          { return CJ;
+          }
+        case 16: break;
+        case 2: 
+          { return ALPHANUM;
+          }
+        case 17: break;
+        case 7: 
+          { return COMPANY;
+          }
+        case 18: break;
+        case 6: 
+          { return APOSTROPHE;
+          }
+        case 19: break;
+        case 10: 
+          { return EMAIL;
+          }
+        case 20: break;
+        default: 
+          if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
+            zzAtEOF = true;
+            return YYEOF;
+          } 
+          else {
+            zzScanError(ZZ_NO_MATCH);
+          }
+      }
+    }
+  }
+
+
+}
--- a/backwards/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
+++ b/backwards/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex
@ -0,0 +1,145 @@
+package org.apache.lucene.analysis.standard;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+
+WARNING: if you change StandardTokenizerImpl.jflex and need to regenerate
+      the tokenizer, only use Java 1.4 !!!
+      This grammar currently uses constructs (eg :digit:, :letter:) whose 
+      meaning can vary according to the JRE used to run jflex.  See
+      https://issues.apache.org/jira/browse/LUCENE-1126 for details.
+      For current backwards compatibility it is needed to support
+      only Java 1.4 - this will change in Lucene 3.1.
+
+*/
+
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+
+%%
+
+%class StandardTokenizerImpl
+%unicode
+%integer
+%function getNextToken
+%pack
+%char
+
+%{
+
+public static final int ALPHANUM          = StandardTokenizer.ALPHANUM;
+public static final int APOSTROPHE        = StandardTokenizer.APOSTROPHE;
+public static final int ACRONYM           = StandardTokenizer.ACRONYM;
+public static final int COMPANY           = StandardTokenizer.COMPANY;
+public static final int EMAIL             = StandardTokenizer.EMAIL;
+public static final int HOST              = StandardTokenizer.HOST;
+public static final int NUM               = StandardTokenizer.NUM;
+public static final int CJ                = StandardTokenizer.CJ;
+/**
+ * @deprecated this solves a bug where HOSTs that end with '.' are identified
+ *             as ACRONYMs.
+ */
+public static final int ACRONYM_DEP       = StandardTokenizer.ACRONYM_DEP;
+
+public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
+
+public final int yychar()
+{
+    return yychar;
+}
+
+/**
+ * Fills Lucene token with the current token text.
+ */
+final void getText(Token t) {
+  t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+}
+
+/**
+ * Fills TermAttribute with the current token text.
+ */
+final void getText(TermAttribute t) {
+  t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+}
+
+%}
+
+THAI       = [\u0E00-\u0E59]
+
+// basic word: a sequence of digits & letters (includes Thai to enable ThaiAnalyzer to function)
+ALPHANUM   = ({LETTER}|{THAI}|[:digit:])+
+
+// internal apostrophes: O'Reilly, you're, O'Reilly's
+// use a post-filter to remove possessives
+APOSTROPHE =  {ALPHA} ("'" {ALPHA})+
+
+// acronyms: U.S.A., I.B.M., etc.
+// use a post-filter to remove dots
+ACRONYM    =  {LETTER} "." ({LETTER} ".")+
+
+ACRONYM_DEP	= {ALPHANUM} "." ({ALPHANUM} ".")+
+
+// company names like AT&T and Excite@Home.
+COMPANY    =  {ALPHA} ("&"|"@") {ALPHA}
+
+// email addresses
+EMAIL      =  {ALPHANUM} (("."|"-"|"_") {ALPHANUM})* "@" {ALPHANUM} (("."|"-") {ALPHANUM})+
+
+// hostname
+HOST       =  {ALPHANUM} ((".") {ALPHANUM})+
+
+// floating point, serial, model numbers, ip addresses, etc.
+// every other segment must have at least one digit
+NUM        = ({ALPHANUM} {P} {HAS_DIGIT}
+           | {HAS_DIGIT} {P} {ALPHANUM}
+           | {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+
+           | {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
+           | {ALPHANUM} {P} {HAS_DIGIT} ({P} {ALPHANUM} {P} {HAS_DIGIT})+
+           | {HAS_DIGIT} {P} {ALPHANUM} ({P} {HAS_DIGIT} {P} {ALPHANUM})+)
+
+// punctuation
+P	         = ("_"|"-"|"/"|"."|",")
+
+// at least one digit
+HAS_DIGIT  = ({LETTER}|[:digit:])* [:digit:] ({LETTER}|[:digit:])*
+
+ALPHA      = ({LETTER})+
+
+// From the JFlex manual: "the expression that matches everything of <a> not matched by <b> is !(!<a>|<b>)"
+LETTER     = !(![:letter:]|{CJ})
+
+// Chinese and Japanese (but NOT Korean, which is included in [:letter:])
+CJ         = [\u3100-\u312f\u3040-\u309F\u30A0-\u30FF\u31F0-\u31FF\u3300-\u337f\u3400-\u4dbf\u4e00-\u9fff\uf900-\ufaff\uff65-\uff9f]
+
+WHITESPACE = \r\n | [ \r\n\t\f]
+
+%%
+
+{ALPHANUM}                                                     { return ALPHANUM; }
+{APOSTROPHE}                                                   { return APOSTROPHE; }
+{ACRONYM}                                                      { return ACRONYM; }
+{COMPANY}                                                      { return COMPANY; }
+{EMAIL}                                                        { return EMAIL; }
+{HOST}                                                         { return HOST; }
+{NUM}                                                          { return NUM; }
+{CJ}                                                           { return CJ; }
+{ACRONYM_DEP}                                                  { return ACRONYM_DEP; }
+
+/** Ignore the rest */
+. | {WHITESPACE}                                               { /* ignore */ }
--- a/backwards/src/java/org/apache/lucene/analysis/standard/package.html
+++ b/backwards/src/java/org/apache/lucene/analysis/standard/package.html
@ -0,0 +1,25 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+A fast grammar-based tokenizer constructed with JFlex.
+</body>
+</html>
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttribute.java
@ -0,0 +1,44 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.util.Attribute;
+
+/**
+ * This attribute can be used to pass different flags down the {@link Tokenizer} chain,
+ * eg from one TokenFilter to another one. 
+ */
+public interface FlagsAttribute extends Attribute {
+  /**
+   * EXPERIMENTAL:  While we think this is here to stay, we may want to change it to be a long.
+   * <p/>
+   *
+   * Get the bitset for any bits that have been set.  This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
+   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
+   *
+   *
+   * @return The bits
+   */
+  public int getFlags();
+
+  /**
+   * @see #getFlags()
+   */
+  public void setFlags(int flags);  
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/FlagsAttributeImpl.java
@ -0,0 +1,80 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * This attribute can be used to pass different flags down the tokenizer chain,
+ * eg from one TokenFilter to another one. 
+ */
+public class FlagsAttributeImpl extends AttributeImpl implements FlagsAttribute, Cloneable, Serializable {
+  private int flags = 0;
+  
+  /**
+   * EXPERIMENTAL:  While we think this is here to stay, we may want to change it to be a long.
+   * <p/>
+   *
+   * Get the bitset for any bits that have been set.  This is completely distinct from {@link TypeAttribute#type()}, although they do share similar purposes.
+   * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s.
+   *
+   *
+   * @return The bits
+   */
+  public int getFlags() {
+    return flags;
+  }
+
+  /**
+   * @see #getFlags()
+   */
+  public void setFlags(int flags) {
+    this.flags = flags;
+  }
+  
+  @Override
+  public void clear() {
+    flags = 0;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (this == other) {
+      return true;
+    }
+    
+    if (other instanceof FlagsAttributeImpl) {
+      return ((FlagsAttributeImpl) other).flags == flags;
+    }
+    
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return flags;
+  }
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    FlagsAttribute t = (FlagsAttribute) target;
+    t.setFlags(flags);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttribute.java
@ -0,0 +1,44 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+/**
+ * The start and end character offset of a Token. 
+ */
+public interface OffsetAttribute extends Attribute {
+  /** Returns this Token's starting offset, the position of the first character
+  corresponding to this token in the source text.
+
+  Note that the difference between endOffset() and startOffset() may not be
+  equal to termText.length(), as the term text may have been altered by a
+  stemmer or some other filter. */
+  public int startOffset();
+
+  
+  /** Set the starting and ending offset.
+    @see #startOffset() and #endOffset()*/
+  public void setOffset(int startOffset, int endOffset);
+  
+
+  /** Returns this Token's ending offset, one greater than the position of the
+  last character corresponding to this token in the source text. The length
+  of the token in the source text is (endOffset - startOffset). */
+  public int endOffset();
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/OffsetAttributeImpl.java
@ -0,0 +1,90 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * The start and end character offset of a Token. 
+ */
+public class OffsetAttributeImpl extends AttributeImpl implements OffsetAttribute, Cloneable, Serializable {
+  private int startOffset;
+  private int endOffset;
+
+  /** Returns this Token's starting offset, the position of the first character
+  corresponding to this token in the source text.
+
+  Note that the difference between endOffset() and startOffset() may not be
+  equal to termText.length(), as the term text may have been altered by a
+  stemmer or some other filter. */
+  public int startOffset() {
+    return startOffset;
+  }
+
+  
+  /** Set the starting and ending offset.
+    @see #startOffset() and #endOffset()*/
+  public void setOffset(int startOffset, int endOffset) {
+    this.startOffset = startOffset;
+    this.endOffset = endOffset;
+  }
+  
+
+  /** Returns this Token's ending offset, one greater than the position of the
+  last character corresponding to this token in the source text. The length
+  of the token in the source text is (endOffset - startOffset). */
+  public int endOffset() {
+    return endOffset;
+  }
+
+
+  @Override
+  public void clear() {
+    startOffset = 0;
+    endOffset = 0;
+  }
+  
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof OffsetAttributeImpl) {
+      OffsetAttributeImpl o = (OffsetAttributeImpl) other;
+      return o.startOffset == startOffset && o.endOffset == endOffset;
+    }
+    
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    int code = startOffset;
+    code = code * 31 + endOffset;
+    return code;
+  } 
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    OffsetAttribute t = (OffsetAttribute) target;
+    t.setOffset(startOffset, endOffset);
+  }  
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttribute.java
@ -0,0 +1,36 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.Attribute;
+
+/**
+ * The payload of a Token. See also {@link Payload}.
+ */
+public interface PayloadAttribute extends Attribute {
+  /**
+   * Returns this Token's payload.
+   */ 
+  public Payload getPayload();
+
+  /** 
+   * Sets this Token's payload.
+   */
+  public void setPayload(Payload payload);
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/PayloadAttributeImpl.java
@ -0,0 +1,101 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+
+import org.apache.lucene.index.Payload;
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * The payload of a Token. See also {@link Payload}.
+ */
+public class PayloadAttributeImpl extends AttributeImpl implements PayloadAttribute, Cloneable, Serializable {
+  private Payload payload;  
+  
+  /**
+   * Initialize this attribute with no payload.
+   */
+  public PayloadAttributeImpl() {}
+  
+  /**
+   * Initialize this attribute with the given payload. 
+   */
+  public PayloadAttributeImpl(Payload payload) {
+    this.payload = payload;
+  }
+  
+  /**
+   * Returns this Token's payload.
+   */ 
+  public Payload getPayload() {
+    return this.payload;
+  }
+
+  /** 
+   * Sets this Token's payload.
+   */
+  public void setPayload(Payload payload) {
+    this.payload = payload;
+  }
+  
+  @Override
+  public void clear() {
+    payload = null;
+  }
+
+  @Override
+  public Object clone()  {
+    PayloadAttributeImpl clone = (PayloadAttributeImpl) super.clone();
+    if (payload != null) {
+      clone.payload = (Payload) payload.clone();
+    }
+    return clone;
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof PayloadAttribute) {
+      PayloadAttributeImpl o = (PayloadAttributeImpl) other;
+      if (o.payload == null || payload == null) {
+        return o.payload == null && payload == null;
+      }
+      
+      return o.payload.equals(payload);
+    }
+    
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return (payload == null) ? 0 : payload.hashCode();
+  }
+
+  @Override
+  public void copyTo(AttributeImpl target) {
+    PayloadAttribute t = (PayloadAttribute) target;
+    t.setPayload((payload == null) ? null : (Payload) payload.clone());
+  }  
+
+  
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttribute.java
@ -0,0 +1,59 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+/** The positionIncrement determines the position of this token
+ * relative to the previous Token in a TokenStream, used in phrase
+ * searching.
+ *
+ * <p>The default value is one.
+ *
+ * <p>Some common uses for this are:<ul>
+ *
+ * <li>Set it to zero to put multiple terms in the same position.  This is
+ * useful if, e.g., a word has multiple stems.  Searches for phrases
+ * including either stem will match.  In this case, all but the first stem's
+ * increment should be set to zero: the increment of the first instance
+ * should be one.  Repeating a token with an increment of zero can also be
+ * used to boost the scores of matches on that token.
+ *
+ * <li>Set it to values greater than one to inhibit exact phrase matches.
+ * If, for example, one does not want phrases to match across removed stop
+ * words, then one could build a stop word filter that removes stop words and
+ * also sets the increment to the number of stop words removed before each
+ * non-stop word.  Then exact phrase queries will only match when the terms
+ * occur with no intervening stop words.
+ *
+ * </ul>
+ * 
+ * @see org.apache.lucene.index.TermPositions
+ */
+public interface PositionIncrementAttribute extends Attribute {
+  /** Set the position increment. The default value is one.
+   *
+   * @param positionIncrement the distance from the prior term
+   */
+  public void setPositionIncrement(int positionIncrement);
+
+  /** Returns the position increment of this Token.
+   * @see #setPositionIncrement
+   */
+  public int getPositionIncrement();
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/PositionIncrementAttributeImpl.java
@ -0,0 +1,99 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.AttributeImpl;
+
+/** The positionIncrement determines the position of this token
+ * relative to the previous Token in a {@link TokenStream}, used in phrase
+ * searching.
+ *
+ * <p>The default value is one.
+ *
+ * <p>Some common uses for this are:<ul>
+ *
+ * <li>Set it to zero to put multiple terms in the same position.  This is
+ * useful if, e.g., a word has multiple stems.  Searches for phrases
+ * including either stem will match.  In this case, all but the first stem's
+ * increment should be set to zero: the increment of the first instance
+ * should be one.  Repeating a token with an increment of zero can also be
+ * used to boost the scores of matches on that token.
+ *
+ * <li>Set it to values greater than one to inhibit exact phrase matches.
+ * If, for example, one does not want phrases to match across removed stop
+ * words, then one could build a stop word filter that removes stop words and
+ * also sets the increment to the number of stop words removed before each
+ * non-stop word.  Then exact phrase queries will only match when the terms
+ * occur with no intervening stop words.
+ *
+ * </ul>
+ */
+public class PositionIncrementAttributeImpl extends AttributeImpl implements PositionIncrementAttribute, Cloneable, Serializable {
+  private int positionIncrement = 1;
+  
+  /** Set the position increment. The default value is one.
+   *
+   * @param positionIncrement the distance from the prior term
+   */
+  public void setPositionIncrement(int positionIncrement) {
+    if (positionIncrement < 0)
+      throw new IllegalArgumentException
+        ("Increment must be zero or greater: " + positionIncrement);
+    this.positionIncrement = positionIncrement;
+  }
+
+  /** Returns the position increment of this Token.
+   * @see #setPositionIncrement
+   */
+  public int getPositionIncrement() {
+    return positionIncrement;
+  }
+
+  @Override
+  public void clear() {
+    this.positionIncrement = 1;
+  }
+  
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof PositionIncrementAttributeImpl) {
+      return positionIncrement == ((PositionIncrementAttributeImpl) other).positionIncrement;
+    }
+ 
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return positionIncrement;
+  }
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    PositionIncrementAttribute t = (PositionIncrementAttribute) target;
+    t.setPositionIncrement(positionIncrement);
+  }  
+
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/TermAttribute.java
@ -0,0 +1,91 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+/**
+ * The term text of a Token.
+ */
+public interface TermAttribute extends Attribute {
+  /** Returns the Token's term text.
+   * 
+   * This method has a performance penalty
+   * because the text is stored internally in a char[].  If
+   * possible, use {@link #termBuffer()} and {@link
+   * #termLength()} directly instead.  If you really need a
+   * String, use this method, which is nothing more than
+   * a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+   */
+  public String term();
+  
+  /** Copies the contents of buffer, starting at offset for
+   *  length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
+  public void setTermBuffer(char[] buffer, int offset, int length);
+
+  /** Copies the contents of buffer into the termBuffer array.
+   *  @param buffer the buffer to copy
+   */
+  public void setTermBuffer(String buffer);
+
+  /** Copies the contents of buffer, starting at offset and continuing
+   *  for length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
+  public void setTermBuffer(String buffer, int offset, int length);
+  
+  /** Returns the internal termBuffer character array which
+   *  you can then directly alter.  If the array is too
+   *  small for your token, use {@link
+   *  #resizeTermBuffer(int)} to increase it.  After
+   *  altering the buffer be sure to call {@link
+   *  #setTermLength} to record the number of valid
+   *  characters that were placed into the termBuffer. */
+  public char[] termBuffer();
+
+  /** Grows the termBuffer to at least size newSize, preserving the
+   *  existing content. Note: If the next operation is to change
+   *  the contents of the term buffer use
+   *  {@link #setTermBuffer(char[], int, int)},
+   *  {@link #setTermBuffer(String)}, or
+   *  {@link #setTermBuffer(String, int, int)}
+   *  to optimally combine the resize with the setting of the termBuffer.
+   *  @param newSize minimum size of the new termBuffer
+   *  @return newly created termBuffer with length >= newSize
+   */
+  public char[] resizeTermBuffer(int newSize);
+
+  /** Return number of valid characters (length of the term)
+   *  in the termBuffer array. */
+  public int termLength();
+  
+  /** Set number of valid characters (length of the term) in
+   *  the termBuffer array. Use this to truncate the termBuffer
+   *  or to synchronize with external manipulation of the termBuffer.
+   *  Note: to grow the size of the array,
+   *  use {@link #resizeTermBuffer(int)} first.
+   *  @param length the truncated length
+   */
+  public void setTermLength(int length);
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/TermAttributeImpl.java
@ -0,0 +1,226 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * The term text of a Token.
+ */
+public class TermAttributeImpl extends AttributeImpl implements TermAttribute, Cloneable, Serializable {
+  private static int MIN_BUFFER_SIZE = 10;
+  
+  private char[] termBuffer;
+  private int termLength;
+  
+  /** Returns the Token's term text.
+   * 
+   * This method has a performance penalty
+   * because the text is stored internally in a char[].  If
+   * possible, use {@link #termBuffer()} and {@link
+   * #termLength()} directly instead.  If you really need a
+   * String, use this method, which is nothing more than
+   * a convenience call to <b>new String(token.termBuffer(), 0, token.termLength())</b>
+   */
+  public String term() {
+    initTermBuffer();
+    return new String(termBuffer, 0, termLength);
+  }
+
+  /** Copies the contents of buffer, starting at offset for
+   *  length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
+  public void setTermBuffer(char[] buffer, int offset, int length) {
+    growTermBuffer(length);
+    System.arraycopy(buffer, offset, termBuffer, 0, length);
+    termLength = length;
+  }
+
+  /** Copies the contents of buffer into the termBuffer array.
+   *  @param buffer the buffer to copy
+   */
+  public void setTermBuffer(String buffer) {
+    int length = buffer.length();
+    growTermBuffer(length);
+    buffer.getChars(0, length, termBuffer, 0);
+    termLength = length;
+  }
+
+  /** Copies the contents of buffer, starting at offset and continuing
+   *  for length characters, into the termBuffer array.
+   *  @param buffer the buffer to copy
+   *  @param offset the index in the buffer of the first character to copy
+   *  @param length the number of characters to copy
+   */
+  public void setTermBuffer(String buffer, int offset, int length) {
+    assert offset <= buffer.length();
+    assert offset + length <= buffer.length();
+    growTermBuffer(length);
+    buffer.getChars(offset, offset + length, termBuffer, 0);
+    termLength = length;
+  }
+
+  /** Returns the internal termBuffer character array which
+   *  you can then directly alter.  If the array is too
+   *  small for your token, use {@link
+   *  #resizeTermBuffer(int)} to increase it.  After
+   *  altering the buffer be sure to call {@link
+   *  #setTermLength} to record the number of valid
+   *  characters that were placed into the termBuffer. */
+  public char[] termBuffer() {
+    initTermBuffer();
+    return termBuffer;
+  }
+
+  /** Grows the termBuffer to at least size newSize, preserving the
+   *  existing content. Note: If the next operation is to change
+   *  the contents of the term buffer use
+   *  {@link #setTermBuffer(char[], int, int)},
+   *  {@link #setTermBuffer(String)}, or
+   *  {@link #setTermBuffer(String, int, int)}
+   *  to optimally combine the resize with the setting of the termBuffer.
+   *  @param newSize minimum size of the new termBuffer
+   *  @return newly created termBuffer with length >= newSize
+   */
+  public char[] resizeTermBuffer(int newSize) {
+    if (termBuffer == null) {
+      // The buffer is always at least MIN_BUFFER_SIZE
+      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)]; 
+    } else {
+      if(termBuffer.length < newSize){
+        // Not big enough; create a new array with slight
+        // over allocation and preserve content
+        final char[] newCharBuffer = new char[ArrayUtil.getNextSize(newSize)];
+        System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length);
+        termBuffer = newCharBuffer;
+      }
+    } 
+    return termBuffer;   
+  }
+
+
+  /** Allocates a buffer char[] of at least newSize, without preserving the existing content.
+   * its always used in places that set the content 
+   *  @param newSize minimum size of the buffer
+   */
+  private void growTermBuffer(int newSize) {
+    if (termBuffer == null) {
+      // The buffer is always at least MIN_BUFFER_SIZE
+      termBuffer = new char[ArrayUtil.getNextSize(newSize < MIN_BUFFER_SIZE ? MIN_BUFFER_SIZE : newSize)];   
+    } else {
+      if(termBuffer.length < newSize){
+        // Not big enough; create a new array with slight
+        // over allocation:
+        termBuffer = new char[ArrayUtil.getNextSize(newSize)];
+      }
+    } 
+  }
+  
+  private void initTermBuffer() {
+    if (termBuffer == null) {
+      termBuffer = new char[ArrayUtil.getNextSize(MIN_BUFFER_SIZE)];
+      termLength = 0;
+    }
+  }
+
+  /** Return number of valid characters (length of the term)
+   *  in the termBuffer array. */
+  public int termLength() {
+    return termLength;
+  }
+
+  /** Set number of valid characters (length of the term) in
+   *  the termBuffer array. Use this to truncate the termBuffer
+   *  or to synchronize with external manipulation of the termBuffer.
+   *  Note: to grow the size of the array,
+   *  use {@link #resizeTermBuffer(int)} first.
+   *  @param length the truncated length
+   */
+  public void setTermLength(int length) {
+    initTermBuffer();
+    if (length > termBuffer.length)
+      throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")");
+    termLength = length;
+  }
+
+  @Override
+  public int hashCode() {
+    initTermBuffer();
+    int code = termLength;
+    code = code * 31 + ArrayUtil.hashCode(termBuffer, 0, termLength);
+    return code;
+  }
+
+  @Override
+  public void clear() {
+    termLength = 0;    
+  }
+
+  @Override
+  public Object clone() {
+    TermAttributeImpl t = (TermAttributeImpl)super.clone();
+    // Do a deep clone
+    if (termBuffer != null) {
+      t.termBuffer = (char[]) termBuffer.clone();
+    }
+    return t;
+  }
+  
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof TermAttribute) {
+      initTermBuffer();
+      TermAttributeImpl o = ((TermAttributeImpl) other);
+      o.initTermBuffer();
+      
+      if (termLength != o.termLength)
+        return false;
+      for(int i=0;i<termLength;i++) {
+        if (termBuffer[i] != o.termBuffer[i]) {
+          return false;
+        }
+      }
+      return true;
+    }
+    
+    return false;
+  }
+
+  @Override
+  public String toString() {
+    initTermBuffer();
+    return "term=" + new String(termBuffer, 0, termLength);
+  }
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    initTermBuffer();
+    TermAttribute t = (TermAttribute) target;
+    t.setTermBuffer(termBuffer, 0, termLength);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttribute.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttribute.java
@ -0,0 +1,32 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.util.Attribute;
+
+/**
+ * A Token's lexical type. The Default value is "word". 
+ */
+public interface TypeAttribute extends Attribute {
+  /** Returns this Token's lexical type.  Defaults to "word". */
+  public String type();
+
+  /** Set the lexical type.
+      @see #type() */
+  public void setType(String type);
+}
--- a/backwards/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java
+++ b/backwards/src/java/org/apache/lucene/analysis/tokenattributes/TypeAttributeImpl.java
@ -0,0 +1,78 @@
+package org.apache.lucene.analysis.tokenattributes;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Serializable;
+
+import org.apache.lucene.util.AttributeImpl;
+
+/**
+ * A Token's lexical type. The Default value is "word". 
+ */
+public class TypeAttributeImpl extends AttributeImpl implements TypeAttribute, Cloneable, Serializable {
+  private String type;
+  public static final String DEFAULT_TYPE = "word";
+  
+  public TypeAttributeImpl() {
+    this(DEFAULT_TYPE); 
+  }
+  
+  public TypeAttributeImpl(String type) {
+    this.type = type;
+  }
+  
+  /** Returns this Token's lexical type.  Defaults to "word". */
+  public String type() {
+    return type;
+  }
+
+  /** Set the lexical type.
+      @see #type() */
+  public void setType(String type) {
+    this.type = type;
+  }
+
+  @Override
+  public void clear() {
+    type = DEFAULT_TYPE;    
+  }
+
+  @Override
+  public boolean equals(Object other) {
+    if (other == this) {
+      return true;
+    }
+    
+    if (other instanceof TypeAttributeImpl) {
+      return type.equals(((TypeAttributeImpl) other).type);
+    }
+    
+    return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return type.hashCode();
+  }
+  
+  @Override
+  public void copyTo(AttributeImpl target) {
+    TypeAttribute t = (TypeAttribute) target;
+    t.setType(type);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/document/AbstractField.java
+++ b/backwards/src/java/org/apache/lucene/document/AbstractField.java
@ -0,0 +1,294 @@
+package org.apache.lucene.document;
+/**
+ * Copyright 2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.PhraseQuery; // for javadocs
+import org.apache.lucene.search.spans.SpanQuery;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.util.StringHelper; // for javadocs
+
+
+/**
+ *
+ *
+ **/
+public abstract class AbstractField implements Fieldable {
+
+  protected String name = "body";
+  protected boolean storeTermVector = false;
+  protected boolean storeOffsetWithTermVector = false;
+  protected boolean storePositionWithTermVector = false;
+  protected boolean omitNorms = false;
+  protected boolean isStored = false;
+  protected boolean isIndexed = true;
+  protected boolean isTokenized = true;
+  protected boolean isBinary = false;
+  protected boolean lazy = false;
+  protected boolean omitTermFreqAndPositions = false;
+  protected float boost = 1.0f;
+  // the data object for all different kind of field values
+  protected Object fieldsData = null;
+  // pre-analyzed tokenStream for indexed fields
+  protected TokenStream tokenStream;
+  // length/offset for all primitive types
+  protected int binaryLength;
+  protected int binaryOffset;
+
+  protected AbstractField()
+  {
+  }
+
+  protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
+    if (name == null)
+      throw new NullPointerException("name cannot be null");
+    this.name = StringHelper.intern(name);        // field names are interned
+
+    this.isStored = store.isStored();
+    this.isIndexed = index.isIndexed();
+    this.isTokenized = index.isAnalyzed();
+    this.omitNorms = index.omitNorms();
+
+    this.isBinary = false;
+
+    setStoreTermVector(termVector);
+  }
+
+  /** Sets the boost factor hits on this field.  This value will be
+   * multiplied into the score of all hits on this this field of this
+   * document.
+   *
+   * <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
+   * containing this field.  If a document has multiple fields with the same
+   * name, all such values are multiplied together.  This product is then
+   * used to compute the norm factor for the field.  By
+   * default, in the {@link
+   * org.apache.lucene.search.Similarity#computeNorm(String,
+   * FieldInvertState)} method, the boost value is multipled
+   * by the {@link
+   * org.apache.lucene.search.Similarity#lengthNorm(String,
+   * int)} and then
+   * rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
+   * index.  One should attempt to ensure that this product does not overflow
+   * the range of that encoding.
+   *
+   * @see org.apache.lucene.document.Document#setBoost(float)
+   * @see org.apache.lucene.search.Similarity#computeNorm(String, org.apache.lucene.index.FieldInvertState)
+   * @see org.apache.lucene.search.Similarity#encodeNorm(float)
+   */
+  public void setBoost(float boost) {
+    this.boost = boost;
+  }
+
+  /** Returns the boost factor for hits for this field.
+   *
+   * <p>The default value is 1.0.
+   *
+   * <p>Note: this value is not stored directly with the document in the index.
+   * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
+   * {@link org.apache.lucene.search.Searcher#doc(int)} may thus not have the same value present as when
+   * this field was indexed.
+   *
+   * @see #setBoost(float)
+   */
+  public float getBoost() {
+    return boost;
+  }
+
+  /** Returns the name of the field as an interned string.
+   * For example "date", "title", "body", ...
+   */
+  public String name()    { return name; }
+
+  protected void setStoreTermVector(Field.TermVector termVector) {
+    this.storeTermVector = termVector.isStored();
+    this.storePositionWithTermVector = termVector.withPositions();
+    this.storeOffsetWithTermVector = termVector.withOffsets();
+  }
+
+  /** True iff the value of the field is to be stored in the index for return
+    with search hits.  It is an error for this to be true if a field is
+    Reader-valued. */
+  public final boolean  isStored()  { return isStored; }
+
+  /** True iff the value of the field is to be indexed, so that it may be
+    searched on. */
+  public final boolean  isIndexed()   { return isIndexed; }
+
+  /** True iff the value of the field should be tokenized as text prior to
+    indexing.  Un-tokenized fields are indexed as a single word and may not be
+    Reader-valued. */
+  public final boolean  isTokenized()   { return isTokenized; }
+
+  /** True iff the term or terms used to index this field are stored as a term
+   *  vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
+   *  These methods do not provide access to the original content of the field,
+   *  only to terms used to index it. If the original content must be
+   *  preserved, use the <code>stored</code> attribute instead.
+   *
+   * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
+   */
+  public final boolean isTermVectorStored() { return storeTermVector; }
+
+  /**
+   * True iff terms are stored as term vector together with their offsets 
+   * (start and end position in source text).
+   */
+  public boolean isStoreOffsetWithTermVector(){
+    return storeOffsetWithTermVector;
+  }
+
+  /**
+   * True iff terms are stored as term vector together with their token positions.
+   */
+  public boolean isStorePositionWithTermVector(){
+    return storePositionWithTermVector;
+  }
+
+  /** True iff the value of the filed is stored as binary */
+  public final boolean  isBinary() {
+    return isBinary;
+  }
+
+
+  /**
+   * Return the raw byte[] for the binary field.  Note that
+   * you must also call {@link #getBinaryLength} and {@link
+   * #getBinaryOffset} to know which range of bytes in this
+   * returned array belong to the field.
+   * @return reference to the Field value as byte[].
+   */
+  public byte[] getBinaryValue() {
+    return getBinaryValue(null);
+  }
+  
+  public byte[] getBinaryValue(byte[] result){
+    if (isBinary || fieldsData instanceof byte[])
+      return (byte[]) fieldsData;
+    else
+      return null;
+  }
+
+  /**
+   * Returns length of byte[] segment that is used as value, if Field is not binary
+   * returned value is undefined
+   * @return length of byte[] segment that represents this Field value
+   */
+  public int getBinaryLength() {
+    if (isBinary) {
+      return binaryLength;
+    } else if (fieldsData instanceof byte[])
+      return ((byte[]) fieldsData).length;
+    else
+      return 0;
+  }
+
+  /**
+   * Returns offset into byte[] segment that is used as value, if Field is not binary
+   * returned value is undefined
+   * @return index of the first character in byte[] segment that represents this Field value
+   */
+  public int getBinaryOffset() {
+    return binaryOffset;
+  }
+
+  /** True if norms are omitted for this indexed field */
+  public boolean getOmitNorms() { return omitNorms; }
+
+  /** @see #setOmitTermFreqAndPositions */
+  public boolean getOmitTermFreqAndPositions() { return omitTermFreqAndPositions; }
+  
+  /** Expert:
+   *
+   * If set, omit normalization factors associated with this indexed field.
+   * This effectively disables indexing boosts and length normalization for this field.
+   */
+  public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
+
+  /** Expert:
+   *
+   * If set, omit term freq, positions and payloads from
+   * postings for this field.
+   *
+   * <p><b>NOTE</b>: While this option reduces storage space
+   * required in the index, it also means any query
+   * requiring positional information, such as {@link
+   * PhraseQuery} or {@link SpanQuery} subclasses will
+   * silently fail to find results.
+   */
+  public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) { this.omitTermFreqAndPositions=omitTermFreqAndPositions; }
+ 
+  public boolean isLazy() {
+    return lazy;
+  }
+
+  /** Prints a Field for human consumption. */
+  @Override
+  public final String toString() {
+    StringBuilder result = new StringBuilder();
+    if (isStored) {
+      result.append("stored");
+    }
+    if (isIndexed) {
+      if (result.length() > 0)
+        result.append(",");
+      result.append("indexed");
+    }
+    if (isTokenized) {
+      if (result.length() > 0)
+        result.append(",");
+      result.append("tokenized");
+    }
+    if (storeTermVector) {
+      if (result.length() > 0)
+        result.append(",");
+      result.append("termVector");
+    }
+    if (storeOffsetWithTermVector) {
+      if (result.length() > 0)
+        result.append(",");
+      result.append("termVectorOffsets");
+    }
+    if (storePositionWithTermVector) {
+      if (result.length() > 0)
+        result.append(",");
+      result.append("termVectorPosition");
+    }
+    if (isBinary) {
+      if (result.length() > 0)
+        result.append(",");
+      result.append("binary");
+    }
+    if (omitNorms) {
+      result.append(",omitNorms");
+    }
+    if (omitTermFreqAndPositions) {
+      result.append(",omitTermFreqAndPositions");
+    }
+    if (lazy){
+      result.append(",lazy");
+    }
+    result.append('<');
+    result.append(name);
+    result.append(':');
+
+    if (fieldsData != null && lazy == false) {
+      result.append(fieldsData);
+    }
+
+    result.append('>');
+    return result.toString();
+  }
+}
--- a/backwards/src/java/org/apache/lucene/document/CompressionTools.java
+++ b/backwards/src/java/org/apache/lucene/document/CompressionTools.java
@ -0,0 +1,124 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
+import java.util.zip.DataFormatException;
+import java.io.ByteArrayOutputStream;
+import org.apache.lucene.util.UnicodeUtil;
+
+/** Simple utility class providing static methods to
+ *  compress and decompress binary data for stored fields.
+ *  This class uses java.util.zip.Deflater and Inflater
+ *  classes to compress and decompress.
+ */
+
+public class CompressionTools {
+
+  // Export only static methods
+  private CompressionTools() {}
+
+  /** Compresses the specified byte range using the
+   *  specified compressionLevel (constants are defined in
+   *  java.util.zip.Deflater). */
+  public static byte[] compress(byte[] value, int offset, int length, int compressionLevel) {
+
+    /* Create an expandable byte array to hold the compressed data.
+     * You cannot use an array that's the same size as the orginal because
+     * there is no guarantee that the compressed data will be smaller than
+     * the uncompressed data. */
+    ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
+
+    Deflater compressor = new Deflater();
+
+    try {
+      compressor.setLevel(compressionLevel);
+      compressor.setInput(value, offset, length);
+      compressor.finish();
+
+      // Compress the data
+      final byte[] buf = new byte[1024];
+      while (!compressor.finished()) {
+        int count = compressor.deflate(buf);
+        bos.write(buf, 0, count);
+      }
+    } finally {
+      compressor.end();
+    }
+
+    return bos.toByteArray();
+  }
+
+  /** Compresses the specified byte range, with default BEST_COMPRESSION level */
+  public static byte[] compress(byte[] value, int offset, int length) {
+    return compress(value, offset, length, Deflater.BEST_COMPRESSION);
+  }
+  
+  /** Compresses all bytes in the array, with default BEST_COMPRESSION level */
+  public static byte[] compress(byte[] value) {
+    return compress(value, 0, value.length, Deflater.BEST_COMPRESSION);
+  }
+
+  /** Compresses the String value, with default BEST_COMPRESSION level */
+  public static byte[] compressString(String value) {
+    return compressString(value, Deflater.BEST_COMPRESSION);
+  }
+
+  /** Compresses the String value using the specified
+   *  compressionLevel (constants are defined in
+   *  java.util.zip.Deflater). */
+  public static byte[] compressString(String value, int compressionLevel) {
+    UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result();
+    UnicodeUtil.UTF16toUTF8(value, 0, value.length(), result);
+    return compress(result.result, 0, result.length, compressionLevel);
+  }
+
+  /** Decompress the byte array previously returned by
+   *  compress */
+  public static byte[] decompress(byte[] value) throws DataFormatException {
+    // Create an expandable byte array to hold the decompressed data
+    ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length);
+
+    Inflater decompressor = new Inflater();
+
+    try {
+      decompressor.setInput(value);
+
+      // Decompress the data
+      final byte[] buf = new byte[1024];
+      while (!decompressor.finished()) {
+        int count = decompressor.inflate(buf);
+        bos.write(buf, 0, count);
+      }
+    } finally {  
+      decompressor.end();
+    }
+    
+    return bos.toByteArray();
+  }
+
+  /** Decompress the byte array previously returned by
+   *  compressString back into a String */
+  public static String decompressString(byte[] value) throws DataFormatException {
+    UnicodeUtil.UTF16Result result = new UnicodeUtil.UTF16Result();
+    final byte[] bytes = decompress(value);
+    UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
+    return new String(result.result, 0, result.length);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/document/DateField.java
+++ b/backwards/src/java/org/apache/lucene/document/DateField.java
@ -0,0 +1,122 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
+
+import java.util.Date;   // for javadoc
+import java.util.Calendar;   // for javadoc
+
+// do not remove in 3.0, needed for reading old indexes!
+
+/**
+ * Provides support for converting dates to strings and vice-versa.
+ * The strings are structured so that lexicographic sorting orders by date,
+ * which makes them suitable for use as field values and search terms.
+ *
+ * <P>Note that this class saves dates with millisecond granularity,
+ * which is bad for {@link TermRangeQuery} and {@link PrefixQuery}, as those
+ * queries are expanded to a BooleanQuery with a potentially large number
+ * of terms when searching. Thus you might want to use
+ * {@link DateTools} instead.
+ *
+ * <P>
+ * Note: dates before 1970 cannot be used, and therefore cannot be
+ * indexed when using this class. See {@link DateTools} for an
+ * alternative without such a limitation.
+ *
+ * <P>
+ * Another approach is {@link NumericUtils}, which provides
+ * a sortable binary representation (prefix encoded) of numeric values, which
+ * date/time are.
+ * For indexing a {@link Date} or {@link Calendar}, just get the unix timestamp as
+ * <code>long</code> using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and
+ * index this as a numeric value with {@link NumericField}
+ * and use {@link NumericRangeQuery} to query it.
+ *
+ * @deprecated If you build a new index, use {@link DateTools} or 
+ * {@link NumericField} instead.
+ * This class is included for use with existing
+ * indices and will be removed in a future release (possibly Lucene 4.0).
+ */
+public class DateField {
+  
+  private DateField() {}
+
+  // make date strings long enough to last a millenium
+  private static int DATE_LEN = Long.toString(1000L*365*24*60*60*1000,
+					       Character.MAX_RADIX).length();
+
+  public static String MIN_DATE_STRING() {
+    return timeToString(0);
+  }
+
+  public static String MAX_DATE_STRING() {
+    char[] buffer = new char[DATE_LEN];
+    char c = Character.forDigit(Character.MAX_RADIX-1, Character.MAX_RADIX);
+    for (int i = 0 ; i < DATE_LEN; i++)
+      buffer[i] = c;
+    return new String(buffer);
+  }
+
+  /**
+   * Converts a Date to a string suitable for indexing.
+   * @throws RuntimeException if the date specified in the
+   * method argument is before 1970
+   */
+  public static String dateToString(Date date) {
+    return timeToString(date.getTime());
+  }
+  /**
+   * Converts a millisecond time to a string suitable for indexing.
+   * @throws RuntimeException if the time specified in the
+   * method argument is negative, that is, before 1970
+   */
+  public static String timeToString(long time) {
+    if (time < 0)
+      throw new RuntimeException("time '" + time + "' is too early, must be >= 0");
+
+    String s = Long.toString(time, Character.MAX_RADIX);
+
+    if (s.length() > DATE_LEN)
+      throw new RuntimeException("time '" + time + "' is too late, length of string " +
+          "representation must be <= " + DATE_LEN);
+
+    // Pad with leading zeros
+    if (s.length() < DATE_LEN) {
+      StringBuilder sb = new StringBuilder(s);
+      while (sb.length() < DATE_LEN)
+        sb.insert(0, 0);
+      s = sb.toString();
+    }
+
+    return s;
+  }
+
+  /** Converts a string-encoded date into a millisecond time. */
+  public static long stringToTime(String s) {
+    return Long.parseLong(s, Character.MAX_RADIX);
+  }
+  /** Converts a string-encoded date into a Date object. */
+  public static Date stringToDate(String s) {
+    return new Date(stringToTime(s));
+  }
+}
--- a/backwards/src/java/org/apache/lucene/document/DateTools.java
+++ b/backwards/src/java/org/apache/lucene/document/DateTools.java
@ -0,0 +1,256 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.TimeZone;
+import java.util.Locale;
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
+
+/**
+ * Provides support for converting dates to strings and vice-versa.
+ * The strings are structured so that lexicographic sorting orders 
+ * them by date, which makes them suitable for use as field values 
+ * and search terms.
+ * 
+ * <P>This class also helps you to limit the resolution of your dates. Do not
+ * save dates with a finer resolution than you really need, as then
+ * RangeQuery and PrefixQuery will require more memory and become slower.
+ * 
+ * <P>Compared to {@link DateField} the strings generated by the methods
+ * in this class take slightly more space, unless your selected resolution
+ * is set to <code>Resolution.DAY</code> or lower.
+ *
+ * <P>
+ * Another approach is {@link NumericUtils}, which provides
+ * a sortable binary representation (prefix encoded) of numeric values, which
+ * date/time are.
+ * For indexing a {@link Date} or {@link Calendar}, just get the unix timestamp as
+ * <code>long</code> using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and
+ * index this as a numeric value with {@link NumericField}
+ * and use {@link NumericRangeQuery} to query it.
+ */
+public class DateTools {
+  
+  private final static TimeZone GMT = TimeZone.getTimeZone("GMT");
+
+  private static final SimpleDateFormat YEAR_FORMAT = new SimpleDateFormat("yyyy", Locale.US);
+  private static final SimpleDateFormat MONTH_FORMAT = new SimpleDateFormat("yyyyMM", Locale.US);
+  private static final SimpleDateFormat DAY_FORMAT = new SimpleDateFormat("yyyyMMdd", Locale.US);
+  private static final SimpleDateFormat HOUR_FORMAT = new SimpleDateFormat("yyyyMMddHH", Locale.US);
+  private static final SimpleDateFormat MINUTE_FORMAT = new SimpleDateFormat("yyyyMMddHHmm", Locale.US);
+  private static final SimpleDateFormat SECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmss", Locale.US);
+  private static final SimpleDateFormat MILLISECOND_FORMAT = new SimpleDateFormat("yyyyMMddHHmmssSSS", Locale.US);
+  static {
+    // times need to be normalized so the value doesn't depend on the 
+    // location the index is created/used:
+    YEAR_FORMAT.setTimeZone(GMT);
+    MONTH_FORMAT.setTimeZone(GMT);
+    DAY_FORMAT.setTimeZone(GMT);
+    HOUR_FORMAT.setTimeZone(GMT);
+    MINUTE_FORMAT.setTimeZone(GMT);
+    SECOND_FORMAT.setTimeZone(GMT);
+    MILLISECOND_FORMAT.setTimeZone(GMT);
+  }
+
+  private static final Calendar calInstance = Calendar.getInstance(GMT);
+  
+  // cannot create, the class has static methods only
+  private DateTools() {}
+
+  /**
+   * Converts a Date to a string suitable for indexing.
+   * 
+   * @param date the date to be converted
+   * @param resolution the desired resolution, see
+   *  {@link #round(Date, DateTools.Resolution)}
+   * @return a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
+   *  depending on <code>resolution</code>; using GMT as timezone 
+   */
+  public static synchronized String dateToString(Date date, Resolution resolution) {
+    return timeToString(date.getTime(), resolution);
+  }
+
+  /**
+   * Converts a millisecond time to a string suitable for indexing.
+   * 
+   * @param time the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+   * @param resolution the desired resolution, see
+   *  {@link #round(long, DateTools.Resolution)}
+   * @return a string in format <code>yyyyMMddHHmmssSSS</code> or shorter,
+   *  depending on <code>resolution</code>; using GMT as timezone
+   */
+  public static synchronized String timeToString(long time, Resolution resolution) {
+    calInstance.setTimeInMillis(round(time, resolution));
+    Date date = calInstance.getTime();
+    
+    if (resolution == Resolution.YEAR) {
+      return YEAR_FORMAT.format(date);
+    } else if (resolution == Resolution.MONTH) {
+      return MONTH_FORMAT.format(date);
+    } else if (resolution == Resolution.DAY) {
+      return DAY_FORMAT.format(date);
+    } else if (resolution == Resolution.HOUR) {
+      return HOUR_FORMAT.format(date);
+    } else if (resolution == Resolution.MINUTE) {
+      return MINUTE_FORMAT.format(date);
+    } else if (resolution == Resolution.SECOND) {
+      return SECOND_FORMAT.format(date);
+    } else if (resolution == Resolution.MILLISECOND) {
+      return MILLISECOND_FORMAT.format(date);
+    }
+    
+    throw new IllegalArgumentException("unknown resolution " + resolution);
+  }
+  
+  /**
+   * Converts a string produced by <code>timeToString</code> or
+   * <code>dateToString</code> back to a time, represented as the
+   * number of milliseconds since January 1, 1970, 00:00:00 GMT.
+   * 
+   * @param dateString the date string to be converted
+   * @return the number of milliseconds since January 1, 1970, 00:00:00 GMT
+   * @throws ParseException if <code>dateString</code> is not in the 
+   *  expected format 
+   */
+  public static synchronized long stringToTime(String dateString) throws ParseException {
+    return stringToDate(dateString).getTime();
+  }
+
+  /**
+   * Converts a string produced by <code>timeToString</code> or
+   * <code>dateToString</code> back to a time, represented as a
+   * Date object.
+   * 
+   * @param dateString the date string to be converted
+   * @return the parsed time as a Date object 
+   * @throws ParseException if <code>dateString</code> is not in the 
+   *  expected format 
+   */
+  public static synchronized Date stringToDate(String dateString) throws ParseException {
+    if (dateString.length() == 4) {
+      return YEAR_FORMAT.parse(dateString);
+    } else if (dateString.length() == 6) {
+      return MONTH_FORMAT.parse(dateString);
+    } else if (dateString.length() == 8) {
+      return DAY_FORMAT.parse(dateString);
+    } else if (dateString.length() == 10) {
+      return HOUR_FORMAT.parse(dateString);
+    } else if (dateString.length() == 12) {
+      return MINUTE_FORMAT.parse(dateString);
+    } else if (dateString.length() == 14) {
+      return SECOND_FORMAT.parse(dateString);
+    } else if (dateString.length() == 17) {
+      return MILLISECOND_FORMAT.parse(dateString);
+    }
+    throw new ParseException("Input is not valid date string: " + dateString, 0);
+  }
+  
+  /**
+   * Limit a date's resolution. For example, the date <code>2004-09-21 13:50:11</code>
+   * will be changed to <code>2004-09-01 00:00:00</code> when using
+   * <code>Resolution.MONTH</code>. 
+   * 
+   * @param resolution The desired resolution of the date to be returned
+   * @return the date with all values more precise than <code>resolution</code>
+   *  set to 0 or 1
+   */
+  public static synchronized Date round(Date date, Resolution resolution) {
+    return new Date(round(date.getTime(), resolution));
+  }
+  
+  /**
+   * Limit a date's resolution. For example, the date <code>1095767411000</code>
+   * (which represents 2004-09-21 13:50:11) will be changed to 
+   * <code>1093989600000</code> (2004-09-01 00:00:00) when using
+   * <code>Resolution.MONTH</code>.
+   * 
+   * @param resolution The desired resolution of the date to be returned
+   * @return the date with all values more precise than <code>resolution</code>
+   *  set to 0 or 1, expressed as milliseconds since January 1, 1970, 00:00:00 GMT
+   */
+  public static synchronized long round(long time, Resolution resolution) {
+    calInstance.setTimeInMillis(time);
+    
+    if (resolution == Resolution.YEAR) {
+      calInstance.set(Calendar.MONTH, 0);
+      calInstance.set(Calendar.DAY_OF_MONTH, 1);
+      calInstance.set(Calendar.HOUR_OF_DAY, 0);
+      calInstance.set(Calendar.MINUTE, 0);
+      calInstance.set(Calendar.SECOND, 0);
+      calInstance.set(Calendar.MILLISECOND, 0);
+    } else if (resolution == Resolution.MONTH) {
+      calInstance.set(Calendar.DAY_OF_MONTH, 1);
+      calInstance.set(Calendar.HOUR_OF_DAY, 0);
+      calInstance.set(Calendar.MINUTE, 0);
+      calInstance.set(Calendar.SECOND, 0);
+      calInstance.set(Calendar.MILLISECOND, 0);
+    } else if (resolution == Resolution.DAY) {
+      calInstance.set(Calendar.HOUR_OF_DAY, 0);
+      calInstance.set(Calendar.MINUTE, 0);
+      calInstance.set(Calendar.SECOND, 0);
+      calInstance.set(Calendar.MILLISECOND, 0);
+    } else if (resolution == Resolution.HOUR) {
+      calInstance.set(Calendar.MINUTE, 0);
+      calInstance.set(Calendar.SECOND, 0);
+      calInstance.set(Calendar.MILLISECOND, 0);
+    } else if (resolution == Resolution.MINUTE) {
+      calInstance.set(Calendar.SECOND, 0);
+      calInstance.set(Calendar.MILLISECOND, 0);
+    } else if (resolution == Resolution.SECOND) {
+      calInstance.set(Calendar.MILLISECOND, 0);
+    } else if (resolution == Resolution.MILLISECOND) {
+      // don't cut off anything
+    } else {
+      throw new IllegalArgumentException("unknown resolution " + resolution);
+    }
+    return calInstance.getTimeInMillis();
+  }
+
+  /** Specifies the time granularity. */
+  public static class Resolution {
+    
+    public static final Resolution YEAR = new Resolution("year");
+    public static final Resolution MONTH = new Resolution("month");
+    public static final Resolution DAY = new Resolution("day");
+    public static final Resolution HOUR = new Resolution("hour");
+    public static final Resolution MINUTE = new Resolution("minute");
+    public static final Resolution SECOND = new Resolution("second");
+    public static final Resolution MILLISECOND = new Resolution("millisecond");
+
+    private String resolution;
+
+    private Resolution() {
+    }
+    
+    private Resolution(String resolution) {
+      this.resolution = resolution;
+    }
+    
+    @Override
+    public String toString() {
+      return resolution;
+    }
+
+  }
+
+}
--- a/backwards/src/java/org/apache/lucene/document/Document.java
+++ b/backwards/src/java/org/apache/lucene/document/Document.java
@ -0,0 +1,305 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.*;             // for javadoc
+import org.apache.lucene.search.ScoreDoc; // for javadoc
+import org.apache.lucene.search.Searcher;  // for javadoc
+import org.apache.lucene.index.IndexReader;  // for javadoc
+
+/** Documents are the unit of indexing and search.
+ *
+ * A Document is a set of fields.  Each field has a name and a textual value.
+ * A field may be {@link Fieldable#isStored() stored} with the document, in which
+ * case it is returned with search hits on the document.  Thus each document
+ * should typically contain one or more stored fields which uniquely identify
+ * it.
+ *
+ * <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
+ * <i>not</i> available in documents retrieved from the index, e.g. with {@link
+ * ScoreDoc#doc}, {@link Searcher#doc(int)} or {@link
+ * IndexReader#document(int)}.
+ */
+
+public final class Document implements java.io.Serializable {
+  List<Fieldable> fields = new ArrayList<Fieldable>();
+  private float boost = 1.0f;
+
+  /** Constructs a new document with no fields. */
+  public Document() {}
+
+
+  /** Sets a boost factor for hits on any field of this document.  This value
+   * will be multiplied into the score of all hits on this document.
+   *
+   * <p>The default value is 1.0.
+   * 
+   * <p>Values are multiplied into the value of {@link Fieldable#getBoost()} of
+   * each field in this document.  Thus, this method in effect sets a default
+   * boost for the fields of this document.
+   *
+   * @see Fieldable#setBoost(float)
+   */
+  public void setBoost(float boost) {
+    this.boost = boost;
+  }
+
+  /** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}. 
+   *
+   * <p>Note that once a document is indexed this value is no longer available
+   * from the index.  At search time, for retrieved documents, this method always 
+   * returns 1. This however does not mean that the boost value set at  indexing 
+   * time was ignored - it was just combined with other indexing time factors and 
+   * stored elsewhere, for better indexing and search performance. (For more 
+   * information see the "norm(t,d)" part of the scoring formula in 
+   * {@link org.apache.lucene.search.Similarity Similarity}.)
+   *
+   * @see #setBoost(float)
+   */
+  public float getBoost() {
+    return boost;
+  }
+
+  /**
+   * <p>Adds a field to a document.  Several fields may be added with
+   * the same name.  In this case, if the fields are indexed, their text is
+   * treated as though appended for the purposes of search.</p>
+   * <p> Note that add like the removeField(s) methods only makes sense 
+   * prior to adding a document to an index. These methods cannot
+   * be used to change the content of an existing index! In order to achieve this,
+   * a document has to be deleted from an index and a new changed version of that
+   * document has to be added.</p>
+   */
+  public final void add(Fieldable field) {
+    fields.add(field);
+  }
+  
+  /**
+   * <p>Removes field with the specified name from the document.
+   * If multiple fields exist with this name, this method removes the first field that has been added.
+   * If there is no field with the specified name, the document remains unchanged.</p>
+   * <p> Note that the removeField(s) methods like the add method only make sense 
+   * prior to adding a document to an index. These methods cannot
+   * be used to change the content of an existing index! In order to achieve this,
+   * a document has to be deleted from an index and a new changed version of that
+   * document has to be added.</p>
+   */
+  public final void removeField(String name) {
+    Iterator<Fieldable> it = fields.iterator();
+    while (it.hasNext()) {
+      Fieldable field = it.next();
+      if (field.name().equals(name)) {
+        it.remove();
+        return;
+      }
+    }
+  }
+  
+  /**
+   * <p>Removes all fields with the given name from the document.
+   * If there is no field with the specified name, the document remains unchanged.</p>
+   * <p> Note that the removeField(s) methods like the add method only make sense 
+   * prior to adding a document to an index. These methods cannot
+   * be used to change the content of an existing index! In order to achieve this,
+   * a document has to be deleted from an index and a new changed version of that
+   * document has to be added.</p>
+   */
+  public final void removeFields(String name) {
+    Iterator<Fieldable> it = fields.iterator();
+    while (it.hasNext()) {
+      Fieldable field = it.next();
+      if (field.name().equals(name)) {
+        it.remove();
+      }
+    }
+  }
+
+  /** Returns a field with the given name if any exist in this document, or
+   * null.  If multiple fields exists with this name, this method returns the
+   * first value added.
+   * Do not use this method with lazy loaded fields.
+   */
+  public final Field getField(String name) {
+    return (Field) getFieldable(name);
+  }
+
+
+ /** Returns a field with the given name if any exist in this document, or
+   * null.  If multiple fields exists with this name, this method returns the
+   * first value added.
+   */
+ public Fieldable getFieldable(String name) {
+   for (Fieldable field : fields) {
+     if (field.name().equals(name))
+       return field;
+   }
+   return null;
+ }
+
+  /** Returns the string value of the field with the given name if any exist in
+   * this document, or null.  If multiple fields exist with this name, this
+   * method returns the first value added. If only binary fields with this name
+   * exist, returns null.
+   */
+  public final String get(String name) {
+   for (Fieldable field : fields) {
+      if (field.name().equals(name) && (!field.isBinary()))
+        return field.stringValue();
+    }
+    return null;
+  }
+
+  /** Returns a List of all the fields in a document.
+   * <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
+   * <i>not</i> available in documents retrieved from the
+   * index, e.g. {@link Searcher#doc(int)} or {@link
+   * IndexReader#document(int)}.
+   */
+  public final List<Fieldable> getFields() {
+    return fields;
+  }
+
+  private final static Field[] NO_FIELDS = new Field[0];
+  
+  /**
+   * Returns an array of {@link Field}s with the given name.
+   * Do not use with lazy loaded fields.
+   * This method returns an empty array when there are no
+   * matching fields.  It never returns null.
+   *
+   * @param name the name of the field
+   * @return a <code>Field[]</code> array
+   */
+   public final Field[] getFields(String name) {
+     List<Field> result = new ArrayList<Field>();
+     for (Fieldable field : fields) {
+       if (field.name().equals(name)) {
+         result.add((Field) field);
+       }
+     }
+
+     if (result.size() == 0)
+       return NO_FIELDS;
+
+     return result.toArray(new Field[result.size()]);
+   }
+
+
+   private final static Fieldable[] NO_FIELDABLES = new Fieldable[0];
+
+   /**
+   * Returns an array of {@link Fieldable}s with the given name.
+   * This method returns an empty array when there are no
+   * matching fields.  It never returns null.
+   *
+   * @param name the name of the field
+   * @return a <code>Fieldable[]</code> array
+   */
+   public Fieldable[] getFieldables(String name) {
+     List<Fieldable> result = new ArrayList<Fieldable>();
+     for (Fieldable field : fields) {
+       if (field.name().equals(name)) {
+         result.add(field);
+       }
+     }
+
+     if (result.size() == 0)
+       return NO_FIELDABLES;
+
+     return result.toArray(new Fieldable[result.size()]);
+   }
+
+
+   private final static String[] NO_STRINGS = new String[0];
+
+  /**
+   * Returns an array of values of the field specified as the method parameter.
+   * This method returns an empty array when there are no
+   * matching fields.  It never returns null.
+   * @param name the name of the field
+   * @return a <code>String[]</code> of field values
+   */
+  public final String[] getValues(String name) {
+    List<String> result = new ArrayList<String>();
+    for (Fieldable field : fields) {
+      if (field.name().equals(name) && (!field.isBinary()))
+        result.add(field.stringValue());
+    }
+    
+    if (result.size() == 0)
+      return NO_STRINGS;
+    
+    return result.toArray(new String[result.size()]);
+  }
+
+  private final static byte[][] NO_BYTES = new byte[0][];
+
+  /**
+  * Returns an array of byte arrays for of the fields that have the name specified
+  * as the method parameter.  This method returns an empty
+  * array when there are no matching fields.  It never
+  * returns null.
+  *
+  * @param name the name of the field
+  * @return a <code>byte[][]</code> of binary field values
+  */
+  public final byte[][] getBinaryValues(String name) {
+    List<byte[]> result = new ArrayList<byte[]>();
+    for (Fieldable field : fields) {
+      if (field.name().equals(name) && (field.isBinary()))
+        result.add(field.getBinaryValue());
+    }
+  
+    if (result.size() == 0)
+      return NO_BYTES;
+  
+    return result.toArray(new byte[result.size()][]);
+  }
+  
+  /**
+  * Returns an array of bytes for the first (or only) field that has the name
+  * specified as the method parameter. This method will return <code>null</code>
+  * if no binary fields with the specified name are available.
+  * There may be non-binary fields with the same name.
+  *
+  * @param name the name of the field.
+  * @return a <code>byte[]</code> containing the binary field value or <code>null</code>
+  */
+  public final byte[] getBinaryValue(String name) {
+    for (Fieldable field : fields) {
+      if (field.name().equals(name) && (field.isBinary()))
+        return field.getBinaryValue();
+    }
+    return null;
+  }
+  
+  /** Prints the fields of a document for human consumption. */
+  @Override
+  public final String toString() {
+    StringBuilder buffer = new StringBuilder();
+    buffer.append("Document<");
+    for (int i = 0; i < fields.size(); i++) {
+      Fieldable field = fields.get(i);
+      buffer.append(field.toString());
+      if (i != fields.size()-1)
+        buffer.append(" ");
+    }
+    buffer.append(">");
+    return buffer.toString();
+  }
+}
--- a/backwards/src/java/org/apache/lucene/document/Field.java
+++ b/backwards/src/java/org/apache/lucene/document/Field.java
@ -0,0 +1,566 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.IndexWriter;   // for javadoc
+import org.apache.lucene.util.StringHelper;
+
+import java.io.Reader;
+import java.io.Serializable;
+
+/**
+  A field is a section of a Document.  Each field has two parts, a name and a
+  value.  Values may be free text, provided as a String or as a Reader, or they
+  may be atomic keywords, which are not further processed.  Such keywords may
+  be used to represent dates, urls, etc.  Fields are optionally stored in the
+  index, so that they may be returned with hits on the document.
+  */
+
+public final class Field extends AbstractField implements Fieldable, Serializable {
+  
+  /** Specifies whether and how a field should be stored. */
+  public static enum Store {
+
+    /** Store the original field value in the index. This is useful for short texts
+     * like a document's title which should be displayed with the results. The
+     * value is stored in its original form, i.e. no analyzer is used before it is
+     * stored.
+     */
+    YES {
+      @Override
+      public boolean isStored() { return true; }
+    },
+
+    /** Do not store the field value in the index. */
+    NO {
+      @Override
+      public boolean isStored() { return false; }
+    };
+
+    public abstract boolean isStored();
+  }
+
+  /** Specifies whether and how a field should be indexed. */
+  public static enum Index {
+
+    /** Do not index the field value. This field can thus not be searched,
+     * but one can still access its contents provided it is
+     * {@link Field.Store stored}. */
+    NO {
+      @Override
+      public boolean isIndexed()  { return false; }
+      @Override
+      public boolean isAnalyzed() { return false; }
+      @Override
+      public boolean omitNorms()  { return true;  }   
+    },
+
+    /** Index the tokens produced by running the field's
+     * value through an Analyzer.  This is useful for
+     * common text. */
+    ANALYZED {
+      @Override
+      public boolean isIndexed()  { return true;  }
+      @Override
+      public boolean isAnalyzed() { return true;  }
+      @Override
+      public boolean omitNorms()  { return false; }   	
+    },
+
+    /** Index the field's value without using an Analyzer, so it can be searched.
+     * As no analyzer is used the value will be stored as a single term. This is
+     * useful for unique Ids like product numbers.
+     */
+    NOT_ANALYZED {
+      @Override
+      public boolean isIndexed()  { return true;  }
+      @Override
+      public boolean isAnalyzed() { return false; }
+      @Override
+      public boolean omitNorms()  { return false; }   	
+    },
+
+    /** Expert: Index the field's value without an Analyzer,
+     * and also disable the storing of norms.  Note that you
+     * can also separately enable/disable norms by calling
+     * {@link Field#setOmitNorms}.  No norms means that
+     * index-time field and document boosting and field
+     * length normalization are disabled.  The benefit is
+     * less memory usage as norms take up one byte of RAM
+     * per indexed field for every document in the index,
+     * during searching.  Note that once you index a given
+     * field <i>with</i> norms enabled, disabling norms will
+     * have no effect.  In other words, for this to have the
+     * above described effect on a field, all instances of
+     * that field must be indexed with NOT_ANALYZED_NO_NORMS
+     * from the beginning. */
+    NOT_ANALYZED_NO_NORMS {
+      @Override
+      public boolean isIndexed()  { return true;  }
+      @Override
+      public boolean isAnalyzed() { return false; }
+      @Override
+      public boolean omitNorms()  { return true;  }   	
+    },
+
+    /** Expert: Index the tokens produced by running the
+     *  field's value through an Analyzer, and also
+     *  separately disable the storing of norms.  See
+     *  {@link #NOT_ANALYZED_NO_NORMS} for what norms are
+     *  and why you may want to disable them. */
+    ANALYZED_NO_NORMS {
+      @Override
+      public boolean isIndexed()  { return true;  }
+      @Override
+      public boolean isAnalyzed() { return true;  }
+      @Override
+      public boolean omitNorms()  { return true;  }   	
+    };
+
+    /** Get the best representation of the index given the flags. */
+    public static Index toIndex(boolean indexed, boolean analyzed) {
+      return toIndex(indexed, analyzed, false);
+    }
+
+    /** Expert: Get the best representation of the index given the flags. */
+    public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) {
+
+      // If it is not indexed nothing else matters
+      if (!indexed) {
+        return Index.NO;
+      }
+
+      // typical, non-expert
+      if (!omitNorms) {
+        if (analyzed) {
+          return Index.ANALYZED;
+        }
+        return Index.NOT_ANALYZED;
+      }
+
+      // Expert: Norms omitted
+      if (analyzed) {
+        return Index.ANALYZED_NO_NORMS;
+      }
+      return Index.NOT_ANALYZED_NO_NORMS;
+    }
+
+    public abstract boolean isIndexed();
+    public abstract boolean isAnalyzed();
+    public abstract boolean omitNorms();  	
+  }
+
+  /** Specifies whether and how a field should have term vectors. */
+  public static enum TermVector {
+    
+    /** Do not store term vectors. 
+     */
+    NO {
+      @Override
+      public boolean isStored()      { return false; }
+      @Override
+      public boolean withPositions() { return false; }
+      @Override
+      public boolean withOffsets()   { return false; }
+    },
+    
+    /** Store the term vectors of each document. A term vector is a list
+     * of the document's terms and their number of occurrences in that document. */
+    YES {
+      @Override
+      public boolean isStored()      { return true;  }
+      @Override
+      public boolean withPositions() { return false; }
+      @Override
+      public boolean withOffsets()   { return false; }
+    },
+    
+    /**
+     * Store the term vector + token position information
+     * 
+     * @see #YES
+     */ 
+    WITH_POSITIONS {
+      @Override
+      public boolean isStored()      { return true;  }
+      @Override
+      public boolean withPositions() { return true;  }
+      @Override
+      public boolean withOffsets()   { return false; }
+    },
+    
+    /**
+     * Store the term vector + Token offset information
+     * 
+     * @see #YES
+     */ 
+    WITH_OFFSETS {
+      @Override
+      public boolean isStored()      { return true;  }
+      @Override
+      public boolean withPositions() { return false; }
+      @Override
+      public boolean withOffsets()   { return true;  }
+    },
+    
+    /**
+     * Store the term vector + Token position and offset information
+     * 
+     * @see #YES
+     * @see #WITH_POSITIONS
+     * @see #WITH_OFFSETS
+     */ 
+    WITH_POSITIONS_OFFSETS {
+      @Override
+      public boolean isStored()      { return true;  }
+      @Override
+      public boolean withPositions() { return true;  }
+      @Override
+      public boolean withOffsets()   { return true;  }
+    };
+
+    /** Get the best representation of a TermVector given the flags. */
+    public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) {
+
+      // If it is not stored, nothing else matters.
+      if (!stored) {
+        return TermVector.NO;
+      }
+
+      if (withOffsets) {
+        if (withPositions) {
+          return Field.TermVector.WITH_POSITIONS_OFFSETS;
+        }
+        return Field.TermVector.WITH_OFFSETS;
+      }
+
+      if (withPositions) {
+        return Field.TermVector.WITH_POSITIONS;
+      }
+      return Field.TermVector.YES;
+    }
+
+    public abstract boolean isStored();
+    public abstract boolean withPositions();
+    public abstract boolean withOffsets();
+  }
+  
+  
+  /** The value of the field as a String, or null.  If null, the Reader value or
+   * binary value is used.  Exactly one of stringValue(),
+   * readerValue(), and getBinaryValue() must be set. */
+  public String stringValue()   { return fieldsData instanceof String ? (String)fieldsData : null; }
+  
+  /** The value of the field as a Reader, or null.  If null, the String value or
+   * binary value is used.  Exactly one of stringValue(),
+   * readerValue(), and getBinaryValue() must be set. */
+  public Reader readerValue()   { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
+    
+  /** The TokesStream for this field to be used when indexing, or null.  If null, the Reader value
+   * or String value is analyzed to produce the indexed tokens. */
+  public TokenStream tokenStreamValue()   { return tokenStream; }
+  
+
+  /** <p>Expert: change the value of this field.  This can
+   *  be used during indexing to re-use a single Field
+   *  instance to improve indexing speed by avoiding GC cost
+   *  of new'ing and reclaiming Field instances.  Typically
+   *  a single {@link Document} instance is re-used as
+   *  well.  This helps most on small documents.</p>
+   * 
+   *  <p>Each Field instance should only be used once
+   *  within a single {@link Document} instance.  See <a
+   *  href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
+   *  for details.</p> */
+  public void setValue(String value) {
+    if (isBinary) {
+      throw new IllegalArgumentException("cannot set a String value on a binary field");
+    }
+    fieldsData = value;
+  }
+
+  /** Expert: change the value of this field.  See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+  public void setValue(Reader value) {
+    if (isBinary) {
+      throw new IllegalArgumentException("cannot set a Reader value on a binary field");
+    }
+    if (isStored) {
+      throw new IllegalArgumentException("cannot set a Reader value on a stored field");
+    }
+    fieldsData = value;
+  }
+
+  /** Expert: change the value of this field.  See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+  public void setValue(byte[] value) {
+    if (!isBinary) {
+      throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
+    }
+    fieldsData = value;
+    binaryLength = value.length;
+    binaryOffset = 0;
+  }
+
+  /** Expert: change the value of this field.  See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
+  public void setValue(byte[] value, int offset, int length) {
+    if (!isBinary) {
+      throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
+    }
+    fieldsData = value;
+    binaryLength = length;
+    binaryOffset = offset;
+  }
+  
+  /** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
+   *  May be combined with stored values from stringValue() or getBinaryValue() */
+  public void setTokenStream(TokenStream tokenStream) {
+    this.isIndexed = true;
+    this.isTokenized = true;
+    this.tokenStream = tokenStream;
+  }
+
+  /**
+   * Create a field by specifying its name, value and how it will
+   * be saved in the index. Term vectors will not be stored in the index.
+   * 
+   * @param name The name of the field
+   * @param value The string to process
+   * @param store Whether <code>value</code> should be stored in the index
+   * @param index Whether the field should be indexed, and if so, if it should
+   *  be tokenized before indexing 
+   * @throws NullPointerException if name or value is <code>null</code>
+   * @throws IllegalArgumentException if the field is neither stored nor indexed 
+   */
+  public Field(String name, String value, Store store, Index index) {
+    this(name, value, store, index, TermVector.NO);
+  }
+  
+  /**
+   * Create a field by specifying its name, value and how it will
+   * be saved in the index.
+   * 
+   * @param name The name of the field
+   * @param value The string to process
+   * @param store Whether <code>value</code> should be stored in the index
+   * @param index Whether the field should be indexed, and if so, if it should
+   *  be tokenized before indexing 
+   * @param termVector Whether term vector should be stored
+   * @throws NullPointerException if name or value is <code>null</code>
+   * @throws IllegalArgumentException in any of the following situations:
+   * <ul> 
+   *  <li>the field is neither stored nor indexed</li> 
+   *  <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
+   * </ul> 
+   */ 
+  public Field(String name, String value, Store store, Index index, TermVector termVector) {
+    this(name, true, value, store, index, termVector);
+  }
+  
+  /**
+   * Create a field by specifying its name, value and how it will
+   * be saved in the index.
+   * 
+   * @param name The name of the field
+   * @param internName Whether to .intern() name or not
+   * @param value The string to process
+   * @param store Whether <code>value</code> should be stored in the index
+   * @param index Whether the field should be indexed, and if so, if it should
+   *  be tokenized before indexing 
+   * @param termVector Whether term vector should be stored
+   * @throws NullPointerException if name or value is <code>null</code>
+   * @throws IllegalArgumentException in any of the following situations:
+   * <ul> 
+   *  <li>the field is neither stored nor indexed</li> 
+   *  <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
+   * </ul> 
+   */ 
+  public Field(String name, boolean internName, String value, Store store, Index index, TermVector termVector) {
+    if (name == null)
+      throw new NullPointerException("name cannot be null");
+    if (value == null)
+      throw new NullPointerException("value cannot be null");
+    if (name.length() == 0 && value.length() == 0)
+      throw new IllegalArgumentException("name and value cannot both be empty");
+    if (index == Index.NO && store == Store.NO)
+      throw new IllegalArgumentException("it doesn't make sense to have a field that "
+         + "is neither indexed nor stored");
+    if (index == Index.NO && termVector != TermVector.NO)
+      throw new IllegalArgumentException("cannot store term vector information "
+         + "for a field that is not indexed");
+          
+    if (internName) // field names are optionally interned
+      name = StringHelper.intern(name);
+    
+    this.name = name; 
+    
+    this.fieldsData = value;
+
+    this.isStored = store.isStored();
+   
+    this.isIndexed = index.isIndexed();
+    this.isTokenized = index.isAnalyzed();
+    this.omitNorms = index.omitNorms();
+    if (index == Index.NO) {
+      this.omitTermFreqAndPositions = false;
+    }    
+
+    this.isBinary = false;
+
+    setStoreTermVector(termVector);
+  }
+
+  /**
+   * Create a tokenized and indexed field that is not stored. Term vectors will
+   * not be stored.  The Reader is read only when the Document is added to the index,
+   * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
+   * has been called.
+   * 
+   * @param name The name of the field
+   * @param reader The reader with the content
+   * @throws NullPointerException if name or reader is <code>null</code>
+   */
+  public Field(String name, Reader reader) {
+    this(name, reader, TermVector.NO);
+  }
+
+  /**
+   * Create a tokenized and indexed field that is not stored, optionally with 
+   * storing term vectors.  The Reader is read only when the Document is added to the index,
+   * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
+   * has been called.
+   * 
+   * @param name The name of the field
+   * @param reader The reader with the content
+   * @param termVector Whether term vector should be stored
+   * @throws NullPointerException if name or reader is <code>null</code>
+   */ 
+  public Field(String name, Reader reader, TermVector termVector) {
+    if (name == null)
+      throw new NullPointerException("name cannot be null");
+    if (reader == null)
+      throw new NullPointerException("reader cannot be null");
+    
+    this.name = StringHelper.intern(name);        // field names are interned
+    this.fieldsData = reader;
+    
+    this.isStored = false;
+    
+    this.isIndexed = true;
+    this.isTokenized = true;
+    
+    this.isBinary = false;
+    
+    setStoreTermVector(termVector);
+  }
+
+  /**
+   * Create a tokenized and indexed field that is not stored. Term vectors will
+   * not be stored. This is useful for pre-analyzed fields.
+   * The TokenStream is read only when the Document is added to the index,
+   * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
+   * has been called.
+   * 
+   * @param name The name of the field
+   * @param tokenStream The TokenStream with the content
+   * @throws NullPointerException if name or tokenStream is <code>null</code>
+   */ 
+  public Field(String name, TokenStream tokenStream) {
+    this(name, tokenStream, TermVector.NO);
+  }
+  
+  /**
+   * Create a tokenized and indexed field that is not stored, optionally with 
+   * storing term vectors.  This is useful for pre-analyzed fields.
+   * The TokenStream is read only when the Document is added to the index,
+   * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
+   * has been called.
+   * 
+   * @param name The name of the field
+   * @param tokenStream The TokenStream with the content
+   * @param termVector Whether term vector should be stored
+   * @throws NullPointerException if name or tokenStream is <code>null</code>
+   */ 
+  public Field(String name, TokenStream tokenStream, TermVector termVector) {
+    if (name == null)
+      throw new NullPointerException("name cannot be null");
+    if (tokenStream == null)
+      throw new NullPointerException("tokenStream cannot be null");
+    
+    this.name = StringHelper.intern(name);        // field names are interned
+    this.fieldsData = null;
+    this.tokenStream = tokenStream;
+
+    this.isStored = false;
+    
+    this.isIndexed = true;
+    this.isTokenized = true;
+    
+    this.isBinary = false;
+    
+    setStoreTermVector(termVector);
+  }
+
+  
+  /**
+   * Create a stored field with binary value. Optionally the value may be compressed.
+   * 
+   * @param name The name of the field
+   * @param value The binary value
+   * @param store How <code>value</code> should be stored (compressed or not)
+   * @throws IllegalArgumentException if store is <code>Store.NO</code> 
+   */
+  public Field(String name, byte[] value, Store store) {
+    this(name, value, 0, value.length, store);
+  }
+
+  /**
+   * Create a stored field with binary value. Optionally the value may be compressed.
+   * 
+   * @param name The name of the field
+   * @param value The binary value
+   * @param offset Starting offset in value where this Field's bytes are
+   * @param length Number of bytes to use for this Field, starting at offset
+   * @param store How <code>value</code> should be stored (compressed or not)
+   * @throws IllegalArgumentException if store is <code>Store.NO</code> 
+   */
+  public Field(String name, byte[] value, int offset, int length, Store store) {
+
+    if (name == null)
+      throw new IllegalArgumentException("name cannot be null");
+    if (value == null)
+      throw new IllegalArgumentException("value cannot be null");
+    
+    this.name = StringHelper.intern(name);        // field names are interned
+    fieldsData = value;
+    
+    if (store == Store.NO)
+      throw new IllegalArgumentException("binary values can't be unstored");
+    
+    isStored = store.isStored();
+    isIndexed   = false;
+    isTokenized = false;
+    omitTermFreqAndPositions = false;
+    omitNorms = true;
+    
+    isBinary    = true;
+    binaryLength = length;
+    binaryOffset = offset;
+    
+    setStoreTermVector(TermVector.NO);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/document/FieldSelector.java
+++ b/backwards/src/java/org/apache/lucene/document/FieldSelector.java
@ -0,0 +1,34 @@
+package org.apache.lucene.document;
+
+import java.io.Serializable;
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about
+ * what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
+ *
+ **/
+public interface FieldSelector extends Serializable {
+
+  /**
+   * 
+   * @param fieldName the field to accept or reject
+   * @return an instance of {@link FieldSelectorResult}
+   * if the {@link Field} named <code>fieldName</code> should be loaded.
+   */
+  FieldSelectorResult accept(String fieldName);
+}
--- a/backwards/src/java/org/apache/lucene/document/FieldSelectorResult.java
+++ b/backwards/src/java/org/apache/lucene/document/FieldSelectorResult.java
@ -0,0 +1,67 @@
+package org.apache.lucene.document;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *  Provides information about what should be done with this Field 
+ *
+ **/
+public enum FieldSelectorResult {
+
+    /**
+     * Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encountered.
+     *  {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
+     *<p/>
+     * {@link Document#add(Fieldable)} should be called by the Reader.
+     */
+  LOAD,
+
+    /**
+     * Lazily load this {@link Field}.  This means the {@link Field} is valid, but it may not actually contain its data until
+     * invoked.  {@link Document#getField(String)} SHOULD NOT BE USED.  {@link Document#getFieldable(String)} is safe to use and should
+     * return a valid instance of a {@link Fieldable}.
+     *<p/>
+     * {@link Document#add(Fieldable)} should be called by the Reader.
+     */
+  LAZY_LOAD,
+
+    /**
+     * Do not load the {@link Field}.  {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
+     * {@link Document#add(Fieldable)} is not called.
+     * <p/>
+     * {@link Document#add(Fieldable)} should not be called by the Reader.
+     */
+  NO_LOAD,
+
+    /**
+     * Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}.  Thus, the
+     * Document may not have its complete set of Fields.  {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
+     * both be valid for this {@link Field}
+     * <p/>
+     * {@link Document#add(Fieldable)} should be called by the Reader.
+     */
+  LOAD_AND_BREAK,
+
+    /** Expert:  Load the size of this {@link Field} rather than its value.
+     * Size is measured as number of bytes required to store the field == bytes for a binary or any compressed value, and 2*chars for a String value.
+     * The size is stored as a binary value, represented as an int in a byte[], with the higher order byte first in [0]
+     */
+  SIZE,
+
+    /** Expert: Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further fields, after the size is loaded */         
+  SIZE_AND_BREAK
+}
--- a/backwards/src/java/org/apache/lucene/document/Fieldable.java
+++ b/backwards/src/java/org/apache/lucene/document/Fieldable.java
@ -0,0 +1,212 @@
+package org.apache.lucene.document;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.index.FieldInvertState; // for javadocs
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.spans.SpanQuery;
+
+import java.io.Reader;
+import java.io.Serializable;
+
+/**
+ * Synonymous with {@link Field}.
+ *
+ * <p><bold>WARNING</bold>: This interface may change within minor versions, despite Lucene's backward compatibility requirements.
+ * This means new methods may be added from version to version.  This change only affects the Fieldable API; other backwards
+ * compatibility promises remain intact. For example, Lucene can still
+ * read and write indices created within the same major version.
+ * </p>
+ *
+ **/
+public interface Fieldable extends Serializable {
+  /** Sets the boost factor hits on this field.  This value will be
+   * multiplied into the score of all hits on this this field of this
+   * document.
+   *
+   * <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
+   * containing this field.  If a document has multiple fields with the same
+   * name, all such values are multiplied together.  This product is then
+   * used to compute the norm factor for the field.  By
+   * default, in the {@link
+   * org.apache.lucene.search.Similarity#computeNorm(String,
+   * FieldInvertState)} method, the boost value is multiplied
+   * by the {@link
+   * org.apache.lucene.search.Similarity#lengthNorm(String,
+   * int)} and then rounded by {@link org.apache.lucene.search.Similarity#encodeNorm(float)} before it is stored in the
+   * index.  One should attempt to ensure that this product does not overflow
+   * the range of that encoding.
+   *
+   * @see org.apache.lucene.document.Document#setBoost(float)
+   * @see org.apache.lucene.search.Similarity#computeNorm(String, FieldInvertState)
+   * @see org.apache.lucene.search.Similarity#encodeNorm(float)
+   */
+  void setBoost(float boost);
+
+  /** Returns the boost factor for hits for this field.
+   *
+   * <p>The default value is 1.0.
+   *
+   * <p>Note: this value is not stored directly with the document in the index.
+   * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
+   * {@link org.apache.lucene.search.Searcher#doc(int)} may thus not have the same value present as when
+   * this field was indexed.
+   *
+   * @see #setBoost(float)
+   */
+  float getBoost();
+
+  /** Returns the name of the field as an interned string.
+   * For example "date", "title", "body", ...
+   */
+  String name();
+
+  /** The value of the field as a String, or null.
+   * <p>
+   * For indexing, if isStored()==true, the stringValue() will be used as the stored field value
+   * unless isBinary()==true, in which case getBinaryValue() will be used.
+   *
+   * If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
+   * If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
+   * else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
+   */
+  public String stringValue();
+  
+  /** The value of the field as a Reader, which can be used at index time to generate indexed tokens.
+   * @see #stringValue()
+   */
+  public Reader readerValue();
+  
+  /** The TokenStream for this field to be used when indexing, or null.
+   * @see #stringValue()
+   */
+  public TokenStream tokenStreamValue();
+
+  /** True if the value of the field is to be stored in the index for return
+    with search hits. */
+  boolean  isStored();
+
+  /** True if the value of the field is to be indexed, so that it may be
+    searched on. */
+  boolean  isIndexed();
+
+  /** True if the value of the field should be tokenized as text prior to
+    indexing.  Un-tokenized fields are indexed as a single word and may not be
+    Reader-valued. */
+  boolean  isTokenized();
+
+  /** True if the term or terms used to index this field are stored as a term
+   *  vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
+   *  These methods do not provide access to the original content of the field,
+   *  only to terms used to index it. If the original content must be
+   *  preserved, use the <code>stored</code> attribute instead.
+   *
+   * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
+   */
+  boolean isTermVectorStored();
+
+  /**
+   * True if terms are stored as term vector together with their offsets 
+   * (start and end positon in source text).
+   */
+  boolean isStoreOffsetWithTermVector();
+
+  /**
+   * True if terms are stored as term vector together with their token positions.
+   */
+  boolean isStorePositionWithTermVector();
+
+  /** True if the value of the field is stored as binary */
+  boolean  isBinary();
+
+  /** True if norms are omitted for this indexed field */
+  boolean getOmitNorms();
+
+  /** Expert:
+   *
+   * If set, omit normalization factors associated with this indexed field.
+   * This effectively disables indexing boosts and length normalization for this field.
+   */
+  void setOmitNorms(boolean omitNorms);
+
+  /**
+   * Indicates whether a Field is Lazy or not.  The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
+   * it's values via {@link #stringValue()} or {@link #getBinaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
+   * retrieved the {@link Document} is still open.
+   *  
+   * @return true if this field can be loaded lazily
+   */
+  boolean isLazy();
+  
+  /**
+   * Returns offset into byte[] segment that is used as value, if Field is not binary
+   * returned value is undefined
+   * @return index of the first character in byte[] segment that represents this Field value
+   */
+  abstract int getBinaryOffset();
+  
+  /**
+   * Returns length of byte[] segment that is used as value, if Field is not binary
+   * returned value is undefined
+   * @return length of byte[] segment that represents this Field value
+   */
+  abstract int getBinaryLength();
+
+  /**
+   * Return the raw byte[] for the binary field.  Note that
+   * you must also call {@link #getBinaryLength} and {@link
+   * #getBinaryOffset} to know which range of bytes in this
+   * returned array belong to the field.
+   * @return reference to the Field value as byte[].
+   */
+  abstract byte[] getBinaryValue();
+
+  /**
+   * Return the raw byte[] for the binary field.  Note that
+   * you must also call {@link #getBinaryLength} and {@link
+   * #getBinaryOffset} to know which range of bytes in this
+   * returned array belong to the field.<p>
+   * About reuse: if you pass in the result byte[] and it is
+   * used, likely the underlying implementation will hold
+   * onto this byte[] and return it in future calls to
+   * {@link #getBinaryValue()}.
+   * So if you subsequently re-use the same byte[] elsewhere
+   * it will alter this Fieldable's value.
+   * @param result  User defined buffer that will be used if
+   *  possible.  If this is null or not large enough, a new
+   *  buffer is allocated
+   * @return reference to the Field value as byte[].
+   */
+  abstract byte[] getBinaryValue(byte[] result);
+  
+  /** @see #setOmitTermFreqAndPositions */
+  boolean getOmitTermFreqAndPositions();
+  
+  /** Expert:
+  *
+  * If set, omit term freq, positions and payloads from
+  * postings for this field.
+  *
+  * <p><b>NOTE</b>: While this option reduces storage space
+  * required in the index, it also means any query
+  * requiring positional information, such as {@link
+  * PhraseQuery} or {@link SpanQuery} subclasses will
+  * silently fail to find results.
+  */
+  void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions);
+}
--- a/backwards/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java
+++ b/backwards/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java
@ -0,0 +1,29 @@
+package org.apache.lucene.document;
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Load the First field and break.
+ * <p/>
+ * See {@link FieldSelectorResult#LOAD_AND_BREAK}
+ */
+public class LoadFirstFieldSelector implements FieldSelector {
+
+  public FieldSelectorResult accept(String fieldName) {
+    return FieldSelectorResult.LOAD_AND_BREAK;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/document/MapFieldSelector.java
+++ b/backwards/src/java/org/apache/lucene/document/MapFieldSelector.java
@ -0,0 +1,67 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A {@link FieldSelector} based on a Map of field names to {@link FieldSelectorResult}s
+ *
+ */
+public class MapFieldSelector implements FieldSelector {
+    
+    Map<String,FieldSelectorResult> fieldSelections;
+    
+    /** Create a a MapFieldSelector
+     * @param fieldSelections maps from field names (String) to {@link FieldSelectorResult}s
+     */
+    public MapFieldSelector(Map<String,FieldSelectorResult> fieldSelections) {
+        this.fieldSelections = fieldSelections;
+    }
+    
+    /** Create a a MapFieldSelector
+     * @param fields fields to LOAD.  List of Strings.  All other fields are NO_LOAD.
+     */
+    public MapFieldSelector(List<String> fields) {
+        fieldSelections = new HashMap<String,FieldSelectorResult>(fields.size()*5/3);
+        for (final String field : fields)
+            fieldSelections.put(field, FieldSelectorResult.LOAD);
+    }
+    
+    /** Create a a MapFieldSelector
+     * @param fields fields to LOAD.  All other fields are NO_LOAD.
+     */
+    public MapFieldSelector(String... fields) {
+      this(Arrays.asList(fields));
+    }
+
+
+    
+    /** Load field according to its associated value in fieldSelections
+     * @param field a field name
+     * @return the fieldSelections value that field maps to or NO_LOAD if none.
+     */
+    public FieldSelectorResult accept(String field) {
+        FieldSelectorResult selection = fieldSelections.get(field);
+        return selection!=null ? selection : FieldSelectorResult.NO_LOAD;
+    }
+    
+}
--- a/backwards/src/java/org/apache/lucene/document/NumberTools.java
+++ b/backwards/src/java/org/apache/lucene/document/NumberTools.java
@ -0,0 +1,139 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.document.NumericField; // for javadocs
+import org.apache.lucene.search.NumericRangeQuery; // for javadocs
+import org.apache.lucene.util.NumericUtils; // for javadocs
+
+// do not remove this class in 3.0, it may be needed to decode old indexes!
+
+/**
+ * Provides support for converting longs to Strings, and back again. The strings
+ * are structured so that lexicographic sorting order is preserved.
+ * 
+ * <p>
+ * That is, if l1 is less than l2 for any two longs l1 and l2, then
+ * NumberTools.longToString(l1) is lexicographically less than
+ * NumberTools.longToString(l2). (Similarly for "greater than" and "equals".)
+ * 
+ * <p>
+ * This class handles <b>all</b> long values (unlike
+ * {@link org.apache.lucene.document.DateField}).
+ * 
+ * @deprecated For new indexes use {@link NumericUtils} instead, which
+ * provides a sortable binary representation (prefix encoded) of numeric
+ * values.
+ * To index and efficiently query numeric values use {@link NumericField}
+ * and {@link NumericRangeQuery}.
+ * This class is included for use with existing
+ * indices and will be removed in a future release (possibly Lucene 4.0).
+ */
+public class NumberTools {
+
+    private static final int RADIX = 36;
+
+    private static final char NEGATIVE_PREFIX = '-';
+
+    // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
+    private static final char POSITIVE_PREFIX = '0';
+
+    //NB: this must be less than
+    /**
+     * Equivalent to longToString(Long.MIN_VALUE)
+     */
+    public static final String MIN_STRING_VALUE = NEGATIVE_PREFIX
+            + "0000000000000";
+
+    /**
+     * Equivalent to longToString(Long.MAX_VALUE)
+     */
+    public static final String MAX_STRING_VALUE = POSITIVE_PREFIX
+            + "1y2p0ij32e8e7";
+
+    /**
+     * The length of (all) strings returned by {@link #longToString}
+     */
+    public static final int STR_SIZE = MIN_STRING_VALUE.length();
+
+    /**
+     * Converts a long to a String suitable for indexing.
+     */
+    public static String longToString(long l) {
+
+        if (l == Long.MIN_VALUE) {
+            // special case, because long is not symmetric around zero
+            return MIN_STRING_VALUE;
+        }
+
+        StringBuilder buf = new StringBuilder(STR_SIZE);
+
+        if (l < 0) {
+            buf.append(NEGATIVE_PREFIX);
+            l = Long.MAX_VALUE + l + 1;
+        } else {
+            buf.append(POSITIVE_PREFIX);
+        }
+        String num = Long.toString(l, RADIX);
+
+        int padLen = STR_SIZE - num.length() - buf.length();
+        while (padLen-- > 0) {
+            buf.append('0');
+        }
+        buf.append(num);
+
+        return buf.toString();
+    }
+
+    /**
+     * Converts a String that was returned by {@link #longToString} back to a
+     * long.
+     * 
+     * @throws IllegalArgumentException
+     *             if the input is null
+     * @throws NumberFormatException
+     *             if the input does not parse (it was not a String returned by
+     *             longToString()).
+     */
+    public static long stringToLong(String str) {
+        if (str == null) {
+            throw new NullPointerException("string cannot be null");
+        }
+        if (str.length() != STR_SIZE) {
+            throw new NumberFormatException("string is the wrong size");
+        }
+
+        if (str.equals(MIN_STRING_VALUE)) {
+            return Long.MIN_VALUE;
+        }
+
+        char prefix = str.charAt(0);
+        long l = Long.parseLong(str.substring(1), RADIX);
+
+        if (prefix == POSITIVE_PREFIX) {
+            // nop
+        } else if (prefix == NEGATIVE_PREFIX) {
+            l = l - Long.MAX_VALUE - 1;
+        } else {
+            throw new NumberFormatException(
+                    "string does not begin with the correct prefix");
+        }
+
+        return l;
+    }
+}
--- a/backwards/src/java/org/apache/lucene/document/NumericField.java
+++ b/backwards/src/java/org/apache/lucene/document/NumericField.java
@ -0,0 +1,277 @@
+package org.apache.lucene.document;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.NumericTokenStream;
+import org.apache.lucene.util.NumericUtils;
+import org.apache.lucene.search.NumericRangeQuery; // javadocs
+import org.apache.lucene.search.NumericRangeFilter; // javadocs
+import org.apache.lucene.search.SortField; // javadocs
+import org.apache.lucene.search.FieldCache; // javadocs
+
+/**
+ * <p>This class provides a {@link Field} that enables indexing
+ * of numeric values for efficient range filtering and
+ * sorting.  Here's an example usage, adding an int value:
+ * <pre>
+ *  document.add(new NumericField(name).setIntValue(value));
+ * </pre>
+ *
+ * For optimal performance, re-use the
+ * <code>NumericField</code> and {@link Document} instance for more than
+ * one document:
+ *
+ * <pre>
+ *  NumericField field = new NumericField(name);
+ *  Document document = new Document();
+ *  document.add(field);
+ *
+ *  for(all documents) {
+ *    ...
+ *    field.setIntValue(value)
+ *    writer.addDocument(document);
+ *    ...
+ *  }
+ * </pre>
+ *
+ * <p>The java native types <code>int</code>, <code>long</code>,
+ * <code>float</code> and <code>double</code> are
+ * directly supported.  However, any value that can be
+ * converted into these native types can also be indexed.
+ * For example, date/time values represented by a
+ * {@link java.util.Date} can be translated into a long
+ * value using the {@link java.util.Date#getTime} method.  If you
+ * don't need millisecond precision, you can quantize the
+ * value, either by dividing the result of
+ * {@link java.util.Date#getTime} or using the separate getters
+ * (for year, month, etc.) to construct an <code>int</code> or
+ * <code>long</code> value.</p>
+ *
+ * <p>To perform range querying or filtering against a
+ * <code>NumericField</code>, use {@link NumericRangeQuery} or {@link
+ * NumericRangeFilter}.  To sort according to a
+ * <code>NumericField</code>, use the normal numeric sort types, eg
+ * {@link SortField#INT}. <code>NumericField</code> values
+ * can also be loaded directly from {@link FieldCache}.</p>
+ *
+ * <p>By default, a <code>NumericField</code>'s value is not stored but
+ * is indexed for range filtering and sorting.  You can use
+ * the {@link #NumericField(String,Field.Store,boolean)}
+ * constructor if you need to change these defaults.</p>
+ *
+ * <p>You may add the same field name as a <code>NumericField</code> to
+ * the same document more than once.  Range querying and
+ * filtering will be the logical OR of all values; so a range query
+ * will hit all documents that have at least one value in
+ * the range. However sort behavior is not defined.  If you need to sort,
+ * you should separately index a single-valued <code>NumericField</code>.</p>
+ *
+ * <p>A <code>NumericField</code> will consume somewhat more disk space
+ * in the index than an ordinary single-valued field.
+ * However, for a typical index that includes substantial
+ * textual content per document, this increase will likely
+ * be in the noise. </p>
+ *
+ * <p>Within Lucene, each numeric value is indexed as a
+ * <em>trie</em> structure, where each term is logically
+ * assigned to larger and larger pre-defined brackets (which
+ * are simply lower-precision representations of the value).
+ * The step size between each successive bracket is called the
+ * <code>precisionStep</code>, measured in bits.  Smaller
+ * <code>precisionStep</code> values result in larger number
+ * of brackets, which consumes more disk space in the index
+ * but may result in faster range search performance.  The
+ * default value, 4, was selected for a reasonable tradeoff
+ * of disk space consumption versus performance.  You can
+ * use the expert constructor {@link
+ * #NumericField(String,int,Field.Store,boolean)} if you'd
+ * like to change the value.  Note that you must also
+ * specify a congruent value when creating {@link
+ * NumericRangeQuery} or {@link NumericRangeFilter}.
+ * For low cardinality fields larger precision steps are good.
+ * If the cardinality is &lt; 100, it is fair
+ * to use {@link Integer#MAX_VALUE}, which produces one
+ * term per value.
+ *
+ * <p>For more information on the internals of numeric trie
+ * indexing, including the <a
+ * href="../search/NumericRangeQuery.html#precisionStepDesc"><code>precisionStep</code></a>
+ * configuration, see {@link NumericRangeQuery}. The format of
+ * indexed values is described in {@link NumericUtils}.
+ *
+ * <p>If you only need to sort by numeric value, and never
+ * run range querying/filtering, you can index using a
+ * <code>precisionStep</code> of {@link Integer#MAX_VALUE}.
+ * This will minimize disk space consumed. </p>
+ *
+ * <p>More advanced users can instead use {@link
+ * NumericTokenStream} directly, when indexing numbers. This
+ * class is a wrapper around this token stream type for
+ * easier, more intuitive usage.</p>
+ *
+ * <p><b>NOTE:</b> This class is only used during
+ * indexing. When retrieving the stored field value from a
+ * {@link Document} instance after search, you will get a
+ * conventional {@link Fieldable} instance where the numeric
+ * values are returned as {@link String}s (according to
+ * <code>toString(value)</code> of the used data type).
+ *
+ * <p><font color="red"><b>NOTE:</b> This API is
+ * experimental and might change in incompatible ways in the
+ * next release.</font>
+ *
+ * @since 2.9
+ */
+public final class NumericField extends AbstractField {
+
+  private final NumericTokenStream tokenStream;
+
+  /**
+   * Creates a field for numeric values using the default <code>precisionStep</code>
+   * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
+   * a numeric value, before indexing a document containing this field,
+   * set a value using the various set<em>???</em>Value() methods.
+   * This constructor creates an indexed, but not stored field.
+   * @param name the field name
+   */
+  public NumericField(String name) {
+    this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true);
+  }
+  
+  /**
+   * Creates a field for numeric values using the default <code>precisionStep</code>
+   * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
+   * a numeric value, before indexing a document containing this field,
+   * set a value using the various set<em>???</em>Value() methods.
+   * @param name the field name
+   * @param store if the field should be stored in plain text form
+   *  (according to <code>toString(value)</code> of the used data type)
+   * @param index if the field should be indexed using {@link NumericTokenStream}
+   */
+  public NumericField(String name, Field.Store store, boolean index) {
+    this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index);
+  }
+  
+  /**
+   * Creates a field for numeric values with the specified
+   * <code>precisionStep</code>. The instance is not yet initialized with
+   * a numeric value, before indexing a document containing this field,
+   * set a value using the various set<em>???</em>Value() methods.
+   * This constructor creates an indexed, but not stored field.
+   * @param name the field name
+   * @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
+   */
+  public NumericField(String name, int precisionStep) {
+    this(name, precisionStep, Field.Store.NO, true);
+  }
+
+  /**
+   * Creates a field for numeric values with the specified
+   * <code>precisionStep</code>. The instance is not yet initialized with
+   * a numeric value, before indexing a document containing this field,
+   * set a value using the various set<em>???</em>Value() methods.
+   * @param name the field name
+   * @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
+   * @param store if the field should be stored in plain text form
+   *  (according to <code>toString(value)</code> of the used data type)
+   * @param index if the field should be indexed using {@link NumericTokenStream}
+   */
+  public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
+    super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
+    setOmitTermFreqAndPositions(true);
+    tokenStream = new NumericTokenStream(precisionStep);
+  }
+
+  /** Returns a {@link NumericTokenStream} for indexing the numeric value. */
+  public TokenStream tokenStreamValue()   {
+    return isIndexed() ? tokenStream : null;
+  }
+  
+  /** Returns always <code>null</code> for numeric fields */
+  @Override
+  public byte[] getBinaryValue(byte[] result){
+    return null;
+  }
+
+  /** Returns always <code>null</code> for numeric fields */
+  public Reader readerValue() {
+    return null;
+  }
+    
+  /** Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). */
+  public String stringValue()   {
+    return (fieldsData == null) ? null : fieldsData.toString();
+  }
+  
+  /** Returns the current numeric value as a subclass of {@link Number}, <code>null</code> if not yet initialized. */
+  public Number getNumericValue() {
+    return (Number) fieldsData;
+  }
+  
+  /**
+   * Initializes the field with the supplied <code>long</code> value.
+   * @param value the numeric value
+   * @return this instance, because of this you can use it the following way:
+   * <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
+   */
+  public NumericField setLongValue(final long value) {
+    tokenStream.setLongValue(value);
+    fieldsData = Long.valueOf(value);
+    return this;
+  }
+  
+  /**
+   * Initializes the field with the supplied <code>int</code> value.
+   * @param value the numeric value
+   * @return this instance, because of this you can use it the following way:
+   * <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
+   */
+  public NumericField setIntValue(final int value) {
+    tokenStream.setIntValue(value);
+    fieldsData = Integer.valueOf(value);
+    return this;
+  }
+  
+  /**
+   * Initializes the field with the supplied <code>double</code> value.
+   * @param value the numeric value
+   * @return this instance, because of this you can use it the following way:
+   * <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
+   */
+  public NumericField setDoubleValue(final double value) {
+    tokenStream.setDoubleValue(value);
+    fieldsData = Double.valueOf(value);
+    return this;
+  }
+  
+  /**
+   * Initializes the field with the supplied <code>float</code> value.
+   * @param value the numeric value
+   * @return this instance, because of this you can use it the following way:
+   * <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
+   */
+  public NumericField setFloatValue(final float value) {
+    tokenStream.setFloatValue(value);
+    fieldsData = Float.valueOf(value);
+    return this;
+  }
+
+}
--- a/backwards/src/java/org/apache/lucene/document/SetBasedFieldSelector.java
+++ b/backwards/src/java/org/apache/lucene/document/SetBasedFieldSelector.java
@ -0,0 +1,58 @@
+package org.apache.lucene.document;
+
+import java.util.Set;
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Declare what fields to load normally and what fields to load lazily
+ *
+ **/
+public class SetBasedFieldSelector implements FieldSelector {
+  
+  private Set<String> fieldsToLoad;
+  private Set<String> lazyFieldsToLoad;
+  
+  /**
+   * Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily.  If both are null, the
+   * Document will not have any {@link Field} on it.  
+   * @param fieldsToLoad A Set of {@link String} field names to load.  May be empty, but not null
+   * @param lazyFieldsToLoad A Set of {@link String} field names to load lazily.  May be empty, but not null  
+   */
+  public SetBasedFieldSelector(Set<String> fieldsToLoad, Set<String> lazyFieldsToLoad) {
+    this.fieldsToLoad = fieldsToLoad;
+    this.lazyFieldsToLoad = lazyFieldsToLoad;
+  }
+
+  /**
+   * Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in either of the 
+   * initializing Sets, then {@link org.apache.lucene.document.FieldSelectorResult#NO_LOAD} is returned.  If a Field name
+   * is in both <code>fieldsToLoad</code> and <code>lazyFieldsToLoad</code>, lazy has precedence.
+   * 
+   * @param fieldName The {@link Field} name to check
+   * @return The {@link FieldSelectorResult}
+   */
+  public FieldSelectorResult accept(String fieldName) {
+    FieldSelectorResult result = FieldSelectorResult.NO_LOAD;
+    if (fieldsToLoad.contains(fieldName) == true){
+      result = FieldSelectorResult.LOAD;
+    }
+    if (lazyFieldsToLoad.contains(fieldName) == true){
+      result = FieldSelectorResult.LAZY_LOAD;
+    }                                           
+    return result;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/document/package.html
+++ b/backwards/src/java/org/apache/lucene/document/package.html
@ -0,0 +1,56 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+</head>
+<body>
+<p>The logical representation of a {@link org.apache.lucene.document.Document} for indexing and searching.</p>
+<p>The document package provides the user level logical representation of content to be indexed and searched.  The
+package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.document.Fieldable}s.</p>
+<h2>Document and Fieldable</h2>
+<p>A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.document.Fieldable}s.  A
+  {@link org.apache.lucene.document.Fieldable} is a logical representation of a user's content that needs to be indexed or stored.
+  {@link org.apache.lucene.document.Fieldable}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized,
+  stored, etc.)  See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.document.Fieldable}
+  for specifics on these properties.
+</p>
+<p>Note: it is common to refer to {@link org.apache.lucene.document.Document}s having {@link org.apache.lucene.document.Field}s, even though technically they have
+{@link org.apache.lucene.document.Fieldable}s.</p>
+<h2>Working with Documents</h2>
+<p>First and foremost, a {@link org.apache.lucene.document.Document} is something created by the user application.  It is your job
+  to create Documents based on the content of the files you are working with in your application (Word, txt, PDF, Excel or any other format.)
+  How this is done is completely up to you.  That being said, there are many tools available in other projects that can make
+  the process of taking a file and converting it into a Lucene {@link org.apache.lucene.document.Document}.  To see an example of this,
+  take a look at the Lucene <a href="../../../../../../gettingstarted.html" target="top">demo</a> and the associated source code
+  for extracting content from HTML.
+</p>
+<p>The {@link org.apache.lucene.document.DateTools} is a utility class to make dates and times searchable
+(remember, Lucene only searches text). {@link org.apache.lucene.document.NumericField} is a special helper class
+to simplify indexing of numeric values (and also dates) for fast range range queries with {@link org.apache.lucene.search.NumericRangeQuery}
+(using a special sortable string representation of numeric values).</p>
+<p>The {@link org.apache.lucene.document.FieldSelector} class provides a mechanism to tell Lucene how to load Documents from
+storage.  If no FieldSelector is used, all Fieldables on a Document will be loaded.  As an example of the FieldSelector usage, consider
+  the common use case of
+displaying search results on a web page and then having users click through to see the full document.  In this scenario, it is often
+  the case that there are many small fields and one or two large fields (containing the contents of the original file). Before the FieldSelector,
+the full Document had to be loaded, including the large fields, in order to display the results.  Now, using the FieldSelector, one
+can {@link org.apache.lucene.document.FieldSelectorResult#LAZY_LOAD} the large fields, thus only loading the large fields
+when a user clicks on the actual link to view the original content.</p>
+</body>
+</html>
--- a/backwards/src/java/org/apache/lucene/index/AllTermDocs.java
+++ b/backwards/src/java/org/apache/lucene/index/AllTermDocs.java
@ -0,0 +1,86 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.index;
+
+import org.apache.lucene.util.BitVector;
+import java.io.IOException;
+
+class AllTermDocs implements TermDocs {
+  protected BitVector deletedDocs;
+  protected int maxDoc;
+  protected int doc = -1;
+
+  protected AllTermDocs(SegmentReader parent) {
+    synchronized (parent) {
+      this.deletedDocs = parent.deletedDocs;
+    }
+    this.maxDoc = parent.maxDoc();
+  }
+
+  public void seek(Term term) throws IOException {
+    if (term==null) {
+      doc = -1;
+    } else {
+      throw new UnsupportedOperationException();
+    }
+  }
+
+  public void seek(TermEnum termEnum) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+  public int doc() {
+    return doc;
+  }
+
+  public int freq() {
+    return 1;
+  }
+
+  public boolean next() throws IOException {
+    return skipTo(doc+1);
+  }
+
+  public int read(int[] docs, int[] freqs) throws IOException {
+    final int length = docs.length;
+    int i = 0;
+    while (i < length && doc < maxDoc) {
+      if (deletedDocs == null || !deletedDocs.get(doc)) {
+        docs[i] = doc;
+        freqs[i] = 1;
+        ++i;
+      }
+      doc++;
+    }
+    return i;
+  }
+
+  public boolean skipTo(int target) throws IOException {
+    doc = target;
+    while (doc < maxDoc) {
+      if (deletedDocs == null || !deletedDocs.get(doc)) {
+        return true;
+      }
+      doc++;
+    }
+    return false;
+  }
+
+  public void close() throws IOException {
+  }
+}
--- a/backwards/src/java/org/apache/lucene/index/BufferedDeletes.java
+++ b/backwards/src/java/org/apache/lucene/index/BufferedDeletes.java
@ -0,0 +1,153 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.HashMap;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.apache.lucene.search.Query;
+
+/** Holds buffered deletes, by docID, term or query.  We
+ *  hold two instances of this class: one for the deletes
+ *  prior to the last flush, the other for deletes after
+ *  the last flush.  This is so if we need to abort
+ *  (discard all buffered docs) we can also discard the
+ *  buffered deletes yet keep the deletes done during
+ *  previously flushed segments. */
+class BufferedDeletes {
+  int numTerms;
+  HashMap<Term,Num> terms = new HashMap<Term,Num>();
+  HashMap<Query,Integer> queries = new HashMap<Query,Integer>();
+  List<Integer> docIDs = new ArrayList<Integer>();
+  long bytesUsed;
+
+  // Number of documents a delete term applies to.
+  final static class Num {
+    private int num;
+
+    Num(int num) {
+      this.num = num;
+    }
+
+    int getNum() {
+      return num;
+    }
+
+    void setNum(int num) {
+      // Only record the new number if it's greater than the
+      // current one.  This is important because if multiple
+      // threads are replacing the same doc at nearly the
+      // same time, it's possible that one thread that got a
+      // higher docID is scheduled before the other
+      // threads.
+      if (num > this.num)
+        this.num = num;
+    }
+  }
+
+  int size() {
+    // We use numTerms not terms.size() intentionally, so
+    // that deletes by the same term multiple times "count",
+    // ie if you ask to flush every 1000 deletes then even
+    // dup'd terms are counted towards that 1000
+    return numTerms + queries.size() + docIDs.size();
+  }
+
+  void update(BufferedDeletes in) {
+    numTerms += in.numTerms;
+    bytesUsed += in.bytesUsed;
+    terms.putAll(in.terms);
+    queries.putAll(in.queries);
+    docIDs.addAll(in.docIDs);
+    in.clear();
+  }
+    
+  void clear() {
+    terms.clear();
+    queries.clear();
+    docIDs.clear();
+    numTerms = 0;
+    bytesUsed = 0;
+  }
+
+  void addBytesUsed(long b) {
+    bytesUsed += b;
+  }
+
+  boolean any() {
+    return terms.size() > 0 || docIDs.size() > 0 || queries.size() > 0;
+  }
+
+  // Remaps all buffered deletes based on a completed
+  // merge
+  synchronized void remap(MergeDocIDRemapper mapper,
+                          SegmentInfos infos,
+                          int[][] docMaps,
+                          int[] delCounts,
+                          MergePolicy.OneMerge merge,
+                          int mergeDocCount) {
+
+    final HashMap<Term,Num> newDeleteTerms;
+
+    // Remap delete-by-term
+    if (terms.size() > 0) {
+      newDeleteTerms = new HashMap<Term, Num>();
+      for(Entry<Term,Num> entry : terms.entrySet()) {
+        Num num = entry.getValue();
+        newDeleteTerms.put(entry.getKey(),
+                           new Num(mapper.remap(num.getNum())));
+      }
+    } else 
+      newDeleteTerms = null;
+    
+
+    // Remap delete-by-docID
+    final List<Integer> newDeleteDocIDs;
+
+    if (docIDs.size() > 0) {
+      newDeleteDocIDs = new ArrayList<Integer>(docIDs.size());
+      for (Integer num : docIDs) {
+        newDeleteDocIDs.add(Integer.valueOf(mapper.remap(num.intValue())));
+      }
+    } else 
+      newDeleteDocIDs = null;
+    
+
+    // Remap delete-by-query
+    final HashMap<Query,Integer> newDeleteQueries;
+    
+    if (queries.size() > 0) {
+      newDeleteQueries = new HashMap<Query, Integer>(queries.size());
+      for(Entry<Query,Integer> entry: queries.entrySet()) {
+        Integer num = entry.getValue();
+        newDeleteQueries.put(entry.getKey(),
+                             Integer.valueOf(mapper.remap(num.intValue())));
+      }
+    } else
+      newDeleteQueries = null;
+
+    if (newDeleteTerms != null)
+      terms = newDeleteTerms;
+    if (newDeleteDocIDs != null)
+      docIDs = newDeleteDocIDs;
+    if (newDeleteQueries != null)
+      queries = newDeleteQueries;
+  }
+}
--- a/backwards/src/java/org/apache/lucene/index/ByteBlockPool.java
+++ b/backwards/src/java/org/apache/lucene/index/ByteBlockPool.java
@ -0,0 +1,147 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Class that Posting and PostingVector use to write byte
+ * streams into shared fixed-size byte[] arrays.  The idea
+ * is to allocate slices of increasing lengths For
+ * example, the first slice is 5 bytes, the next slice is
+ * 14, etc.  We start by writing our bytes into the first
+ * 5 bytes.  When we hit the end of the slice, we allocate
+ * the next slice and then write the address of the new
+ * slice into the last 4 bytes of the previous slice (the
+ * "forwarding address").
+ *
+ * Each slice is filled with 0's initially, and we mark
+ * the end with a non-zero byte.  This way the methods
+ * that are writing into the slice don't need to record
+ * its length and instead allocate a new slice once they
+ * hit a non-zero byte. */
+
+import java.util.Arrays;
+
+final class ByteBlockPool {
+
+  abstract static class Allocator {
+    abstract void recycleByteBlocks(byte[][] blocks, int start, int end);
+    abstract byte[] getByteBlock(boolean trackAllocations);
+  }
+
+  public byte[][] buffers = new byte[10][];
+
+  int bufferUpto = -1;                        // Which buffer we are upto
+  public int byteUpto = DocumentsWriter.BYTE_BLOCK_SIZE;             // Where we are in head buffer
+
+  public byte[] buffer;                              // Current head buffer
+  public int byteOffset = -DocumentsWriter.BYTE_BLOCK_SIZE;          // Current head offset
+
+  private final boolean trackAllocations;
+  private final Allocator allocator;
+
+  public ByteBlockPool(Allocator allocator, boolean trackAllocations) {
+    this.allocator = allocator;
+    this.trackAllocations = trackAllocations;
+  }
+
+  public void reset() {
+    if (bufferUpto != -1) {
+      // We allocated at least one buffer
+
+      for(int i=0;i<bufferUpto;i++)
+        // Fully zero fill buffers that we fully used
+        Arrays.fill(buffers[i], (byte) 0);
+
+      // Partial zero fill the final buffer
+      Arrays.fill(buffers[bufferUpto], 0, byteUpto, (byte) 0);
+          
+      if (bufferUpto > 0)
+        // Recycle all but the first buffer
+        allocator.recycleByteBlocks(buffers, 1, 1+bufferUpto);
+
+      // Re-use the first buffer
+      bufferUpto = 0;
+      byteUpto = 0;
+      byteOffset = 0;
+      buffer = buffers[0];
+    }
+  }
+
+  public void nextBuffer() {
+    if (1+bufferUpto == buffers.length) {
+      byte[][] newBuffers = new byte[(int) (buffers.length*1.5)][];
+      System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
+      buffers = newBuffers;
+    }
+    buffer = buffers[1+bufferUpto] = allocator.getByteBlock(trackAllocations);
+    bufferUpto++;
+
+    byteUpto = 0;
+    byteOffset += DocumentsWriter.BYTE_BLOCK_SIZE;
+  }
+
+  public int newSlice(final int size) {
+    if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE-size)
+      nextBuffer();
+    final int upto = byteUpto;
+    byteUpto += size;
+    buffer[byteUpto-1] = 16;
+    return upto;
+  }
+
+  // Size of each slice.  These arrays should be at most 16
+  // elements (index is encoded with 4 bits).  First array
+  // is just a compact way to encode X+1 with a max.  Second
+  // array is the length of each slice, ie first slice is 5
+  // bytes, next slice is 14 bytes, etc.
+  final static int[] nextLevelArray = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9};
+  final static int[] levelSizeArray = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200};
+  final static int FIRST_LEVEL_SIZE = levelSizeArray[0];
+
+  public int allocSlice(final byte[] slice, final int upto) {
+
+    final int level = slice[upto] & 15;
+    final int newLevel = nextLevelArray[level];
+    final int newSize = levelSizeArray[newLevel];
+
+    // Maybe allocate another block
+    if (byteUpto > DocumentsWriter.BYTE_BLOCK_SIZE-newSize)
+      nextBuffer();
+
+    final int newUpto = byteUpto;
+    final int offset = newUpto + byteOffset;
+    byteUpto += newSize;
+
+    // Copy forward the past 3 bytes (which we are about
+    // to overwrite with the forwarding address):
+    buffer[newUpto] = slice[upto-3];
+    buffer[newUpto+1] = slice[upto-2];
+    buffer[newUpto+2] = slice[upto-1];
+
+    // Write forwarding address at end of last slice:
+    slice[upto-3] = (byte) (offset >>> 24);
+    slice[upto-2] = (byte) (offset >>> 16);
+    slice[upto-1] = (byte) (offset >>> 8);
+    slice[upto] = (byte) offset;
+        
+    // Write new level:
+    buffer[byteUpto-1] = (byte) (16|newLevel);
+
+    return newUpto+3;
+  }
+}
+
--- a/backwards/src/java/org/apache/lucene/index/ByteSliceReader.java
+++ b/backwards/src/java/org/apache/lucene/index/ByteSliceReader.java
@ -0,0 +1,149 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import java.io.IOException;
+
+/* IndexInput that knows how to read the byte slices written
+ * by Posting and PostingVector.  We read the bytes in
+ * each slice until we hit the end of that slice at which
+ * point we read the forwarding address of the next slice
+ * and then jump to it.*/
+final class ByteSliceReader extends IndexInput {
+  ByteBlockPool pool;
+  int bufferUpto;
+  byte[] buffer;
+  public int upto;
+  int limit;
+  int level;
+  public int bufferOffset;
+
+  public int endIndex;
+
+  public void init(ByteBlockPool pool, int startIndex, int endIndex) {
+
+    assert endIndex-startIndex >= 0;
+    assert startIndex >= 0;
+    assert endIndex >= 0;
+
+    this.pool = pool;
+    this.endIndex = endIndex;
+
+    level = 0;
+    bufferUpto = startIndex / DocumentsWriter.BYTE_BLOCK_SIZE;
+    bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE;
+    buffer = pool.buffers[bufferUpto];
+    upto = startIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+
+    final int firstSize = ByteBlockPool.levelSizeArray[0];
+
+    if (startIndex+firstSize >= endIndex) {
+      // There is only this one slice to read
+      limit = endIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+    } else
+      limit = upto+firstSize-4;
+  }
+
+  public boolean eof() {
+    assert upto + bufferOffset <= endIndex;
+    return upto + bufferOffset == endIndex;
+  }
+
+  @Override
+  public byte readByte() {
+    assert !eof();
+    assert upto <= limit;
+    if (upto == limit)
+      nextSlice();
+    return buffer[upto++];
+  }
+
+  public long writeTo(IndexOutput out) throws IOException {
+    long size = 0;
+    while(true) {
+      if (limit + bufferOffset == endIndex) {
+        assert endIndex - bufferOffset >= upto;
+        out.writeBytes(buffer, upto, limit-upto);
+        size += limit-upto;
+        break;
+      } else {
+        out.writeBytes(buffer, upto, limit-upto);
+        size += limit-upto;
+        nextSlice();
+      }
+    }
+
+    return size;
+  }
+
+  public void nextSlice() {
+
+    // Skip to our next slice
+    final int nextIndex = ((buffer[limit]&0xff)<<24) + ((buffer[1+limit]&0xff)<<16) + ((buffer[2+limit]&0xff)<<8) + (buffer[3+limit]&0xff);
+
+    level = ByteBlockPool.nextLevelArray[level];
+    final int newSize = ByteBlockPool.levelSizeArray[level];
+
+    bufferUpto = nextIndex / DocumentsWriter.BYTE_BLOCK_SIZE;
+    bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE;
+
+    buffer = pool.buffers[bufferUpto];
+    upto = nextIndex & DocumentsWriter.BYTE_BLOCK_MASK;
+
+    if (nextIndex + newSize >= endIndex) {
+      // We are advancing to the final slice
+      assert endIndex - nextIndex > 0;
+      limit = endIndex - bufferOffset;
+    } else {
+      // This is not the final slice (subtract 4 for the
+      // forwarding address at the end of this new slice)
+      limit = upto+newSize-4;
+    }
+  }
+
+  @Override
+  public void readBytes(byte[] b, int offset, int len) {
+    while(len > 0) {
+      final int numLeft = limit-upto;
+      if (numLeft < len) {
+        // Read entire slice
+        System.arraycopy(buffer, upto, b, offset, numLeft);
+        offset += numLeft;
+        len -= numLeft;
+        nextSlice();
+      } else {
+        // This slice is the last one
+        System.arraycopy(buffer, upto, b, offset, len);
+        upto += len;
+        break;
+      }
+    }
+  }
+
+  @Override
+  public long getFilePointer() {throw new RuntimeException("not implemented");}
+  @Override
+  public long length() {throw new RuntimeException("not implemented");}
+  @Override
+  public void seek(long pos) {throw new RuntimeException("not implemented");}
+  @Override
+  public void close() {throw new RuntimeException("not implemented");}
+}
+
--- a/backwards/src/java/org/apache/lucene/index/ByteSliceWriter.java
+++ b/backwards/src/java/org/apache/lucene/index/ByteSliceWriter.java
@ -0,0 +1,89 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/**
+ * Class to write byte streams into slices of shared
+ * byte[].  This is used by DocumentsWriter to hold the
+ * posting list for many terms in RAM.
+ */
+
+final class ByteSliceWriter {
+
+  private byte[] slice;
+  private int upto;
+  private final ByteBlockPool pool;
+
+  int offset0;
+
+  public ByteSliceWriter(ByteBlockPool pool) {
+    this.pool = pool;
+  }
+
+  /**
+   * Set up the writer to write at address.
+   */ 
+  public void init(int address) {
+    slice = pool.buffers[address >> DocumentsWriter.BYTE_BLOCK_SHIFT];
+    assert slice != null;
+    upto = address & DocumentsWriter.BYTE_BLOCK_MASK;
+    offset0 = address;
+    assert upto < slice.length;
+  }
+
+  /** Write byte into byte slice stream */
+  public void writeByte(byte b) {
+    assert slice != null;
+    if (slice[upto] != 0) {
+      upto = pool.allocSlice(slice, upto);
+      slice = pool.buffer;
+      offset0 = pool.byteOffset;
+      assert slice != null;
+    }
+    slice[upto++] = b;
+    assert upto != slice.length;
+  }
+
+  public void writeBytes(final byte[] b, int offset, final int len) {
+    final int offsetEnd = offset + len;
+    while(offset < offsetEnd) {
+      if (slice[upto] != 0) {
+        // End marker
+        upto = pool.allocSlice(slice, upto);
+        slice = pool.buffer;
+        offset0 = pool.byteOffset;
+      }
+
+      slice[upto++] = b[offset++];
+      assert upto != slice.length;
+    }
+  }
+
+  public int getAddress() {
+    return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK);
+  }
+
+  public void writeVInt(int i) {
+    while ((i & ~0x7F) != 0) {
+      writeByte((byte)((i & 0x7f) | 0x80));
+      i >>>= 7;
+    }
+    writeByte((byte) i);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/index/CharBlockPool.java
+++ b/backwards/src/java/org/apache/lucene/index/CharBlockPool.java
@ -0,0 +1,56 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+final class CharBlockPool {
+
+  public char[][] buffers = new char[10][];
+  int numBuffer;
+
+  int bufferUpto = -1;                        // Which buffer we are upto
+  public int charUpto = DocumentsWriter.CHAR_BLOCK_SIZE;             // Where we are in head buffer
+
+  public char[] buffer;                              // Current head buffer
+  public int charOffset = -DocumentsWriter.CHAR_BLOCK_SIZE;          // Current head offset
+  final private DocumentsWriter docWriter;
+
+  public CharBlockPool(DocumentsWriter docWriter) {
+    this.docWriter = docWriter;
+  }
+
+  public void reset() {
+    docWriter.recycleCharBlocks(buffers, 1+bufferUpto);
+    bufferUpto = -1;
+    charUpto = DocumentsWriter.CHAR_BLOCK_SIZE;
+    charOffset = -DocumentsWriter.CHAR_BLOCK_SIZE;
+  }
+
+  public void nextBuffer() {
+    if (1+bufferUpto == buffers.length) {
+      char[][] newBuffers = new char[(int) (buffers.length*1.5)][];
+      System.arraycopy(buffers, 0, newBuffers, 0, buffers.length);
+      buffers = newBuffers;
+    }
+    buffer = buffers[1+bufferUpto] = docWriter.getCharBlock();
+    bufferUpto++;
+
+    charUpto = 0;
+    charOffset += DocumentsWriter.CHAR_BLOCK_SIZE;
+  }
+}
+
--- a/backwards/src/java/org/apache/lucene/index/CheckIndex.java
+++ b/backwards/src/java/org/apache/lucene/index/CheckIndex.java
@ -0,0 +1,911 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.document.AbstractField;  // for javadocs
+import org.apache.lucene.document.Document;
+
+import java.text.NumberFormat;
+import java.io.PrintStream;
+import java.io.IOException;
+import java.io.File;
+import java.util.Collection;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Map;
+
+/**
+ * Basic tool and API to check the health of an index and
+ * write a new segments file that removes reference to
+ * problematic segments.
+ * 
+ * <p>As this tool checks every byte in the index, on a large
+ * index it can take quite a long time to run.
+ *
+ * <p><b>WARNING</b>: this tool and API is new and
+ * experimental and is subject to suddenly change in the
+ * next release.  Please make a complete backup of your
+ * index before using this to fix your index!
+ */
+public class CheckIndex {
+
+  private PrintStream infoStream;
+  private Directory dir;
+
+  /**
+   * Returned from {@link #checkIndex()} detailing the health and status of the index.
+   *
+   * <p><b>WARNING</b>: this API is new and experimental and is
+   * subject to suddenly change in the next release.
+   **/
+
+  public static class Status {
+
+    /** True if no problems were found with the index. */
+    public boolean clean;
+
+    /** True if we were unable to locate and load the segments_N file. */
+    public boolean missingSegments;
+
+    /** True if we were unable to open the segments_N file. */
+    public boolean cantOpenSegments;
+
+    /** True if we were unable to read the version number from segments_N file. */
+    public boolean missingSegmentVersion;
+
+    /** Name of latest segments_N file in the index. */
+    public String segmentsFileName;
+
+    /** Number of segments in the index. */
+    public int numSegments;
+
+    /** String description of the version of the index. */
+    public String segmentFormat;
+
+    /** Empty unless you passed specific segments list to check as optional 3rd argument.
+     *  @see CheckIndex#checkIndex(List) */
+    public List<String> segmentsChecked = new ArrayList<String>();
+  
+    /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */
+    public boolean toolOutOfDate;
+
+    /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */
+    public List<SegmentInfoStatus> segmentInfos = new ArrayList<SegmentInfoStatus>();
+  
+    /** Directory index is in. */
+    public Directory dir;
+
+    /** 
+     * SegmentInfos instance containing only segments that
+     * had no problems (this is used with the {@link CheckIndex#fixIndex} 
+     * method to repair the index. 
+     */
+    SegmentInfos newSegments;
+
+    /** How many documents will be lost to bad segments. */
+    public int totLoseDocCount;
+
+    /** How many bad segments were found. */
+    public int numBadSegments;
+
+    /** True if we checked only specific segments ({@link
+     * #checkIndex(List)}) was called with non-null
+     * argument). */
+    public boolean partial;
+
+    /** Holds the userData of the last commit in the index */
+    public Map<String, String> userData;
+
+    /** Holds the status of each segment in the index.
+     *  See {@link #segmentInfos}.
+     *
+     * <p><b>WARNING</b>: this API is new and experimental and is
+     * subject to suddenly change in the next release.
+     */
+    public static class SegmentInfoStatus {
+      /** Name of the segment. */
+      public String name;
+
+      /** Document count (does not take deletions into account). */
+      public int docCount;
+
+      /** True if segment is compound file format. */
+      public boolean compound;
+
+      /** Number of files referenced by this segment. */
+      public int numFiles;
+
+      /** Net size (MB) of the files referenced by this
+       *  segment. */
+      public double sizeMB;
+
+      /** Doc store offset, if this segment shares the doc
+       *  store files (stored fields and term vectors) with
+       *  other segments.  This is -1 if it does not share. */
+      public int docStoreOffset = -1;
+    
+      /** String of the shared doc store segment, or null if
+       *  this segment does not share the doc store files. */
+      public String docStoreSegment;
+
+      /** True if the shared doc store files are compound file
+       *  format. */
+      public boolean docStoreCompoundFile;
+
+      /** True if this segment has pending deletions. */
+      public boolean hasDeletions;
+
+      /** Name of the current deletions file name. */
+      public String deletionsFileName;
+    
+      /** Number of deleted documents. */
+      public int numDeleted;
+
+      /** True if we were able to open a SegmentReader on this
+       *  segment. */
+      public boolean openReaderPassed;
+
+      /** Number of fields in this segment. */
+      int numFields;
+
+      /** True if at least one of the fields in this segment
+       *  does not omitTermFreqAndPositions.
+       *  @see AbstractField#setOmitTermFreqAndPositions */
+      public boolean hasProx;
+
+      /** Map that includes certain
+       *  debugging details that IndexWriter records into
+       *  each segment it creates */
+      public Map<String,String> diagnostics;
+
+      /** Status for testing of field norms (null if field norms could not be tested). */
+      public FieldNormStatus fieldNormStatus;
+
+      /** Status for testing of indexed terms (null if indexed terms could not be tested). */
+      public TermIndexStatus termIndexStatus;
+
+      /** Status for testing of stored fields (null if stored fields could not be tested). */
+      public StoredFieldStatus storedFieldStatus;
+
+      /** Status for testing of term vectors (null if term vectors could not be tested). */
+      public TermVectorStatus termVectorStatus;
+    }
+
+    /**
+     * Status from testing field norms.
+     */
+    public static final class FieldNormStatus {
+      /** Number of fields successfully tested */
+      public long totFields = 0L;
+
+      /** Exception thrown during term index test (null on success) */
+      public Throwable error = null;
+    }
+
+    /**
+     * Status from testing term index.
+     */
+    public static final class TermIndexStatus {
+      /** Total term count */
+      public long termCount = 0L;
+
+      /** Total frequency across all terms. */
+      public long totFreq = 0L;
+      
+      /** Total number of positions. */
+      public long totPos = 0L;
+
+      /** Exception thrown during term index test (null on success) */
+      public Throwable error = null;
+    }
+
+    /**
+     * Status from testing stored fields.
+     */
+    public static final class StoredFieldStatus {
+      
+      /** Number of documents tested. */
+      public int docCount = 0;
+      
+      /** Total number of stored fields tested. */
+      public long totFields = 0;
+      
+      /** Exception thrown during stored fields test (null on success) */
+      public Throwable error = null;
+    }
+
+    /**
+     * Status from testing stored fields.
+     */
+    public static final class TermVectorStatus {
+      
+      /** Number of documents tested. */
+      public int docCount = 0;
+      
+      /** Total number of term vectors tested. */
+      public long totVectors = 0;
+      
+      /** Exception thrown during term vector test (null on success) */
+      public Throwable error = null;
+    }
+  }
+
+  /** Create a new CheckIndex on the directory. */
+  public CheckIndex(Directory dir) {
+    this.dir = dir;
+    infoStream = null;
+  }
+
+  /** Set infoStream where messages should go.  If null, no
+   *  messages are printed */
+  public void setInfoStream(PrintStream out) {
+    infoStream = out;
+  }
+
+  private void msg(String msg) {
+    if (infoStream != null)
+      infoStream.println(msg);
+  }
+
+  private static class MySegmentTermDocs extends SegmentTermDocs {
+
+    int delCount;
+
+    MySegmentTermDocs(SegmentReader p) {    
+      super(p);
+    }
+
+    @Override
+    public void seek(Term term) throws IOException {
+      super.seek(term);
+      delCount = 0;
+    }
+
+    @Override
+    protected void skippingDoc() throws IOException {
+      delCount++;
+    }
+  }
+
+  /** Returns a {@link Status} instance detailing
+   *  the state of the index.
+   *
+   *  <p>As this method checks every byte in the index, on a large
+   *  index it can take quite a long time to run.
+   *
+   *  <p><b>WARNING</b>: make sure
+   *  you only call this when the index is not opened by any
+   *  writer. */
+  public Status checkIndex() throws IOException {
+    return checkIndex(null);
+  }
+
+  /** Returns a {@link Status} instance detailing
+   *  the state of the index.
+   * 
+   *  @param onlySegments list of specific segment names to check
+   *
+   *  <p>As this method checks every byte in the specified
+   *  segments, on a large index it can take quite a long
+   *  time to run.
+   *
+   *  <p><b>WARNING</b>: make sure
+   *  you only call this when the index is not opened by any
+   *  writer. */
+  public Status checkIndex(List<String> onlySegments) throws IOException {
+    NumberFormat nf = NumberFormat.getInstance();
+    SegmentInfos sis = new SegmentInfos();
+    Status result = new Status();
+    result.dir = dir;
+    try {
+      sis.read(dir);
+    } catch (Throwable t) {
+      msg("ERROR: could not read any segments file in directory");
+      result.missingSegments = true;
+      if (infoStream != null)
+        t.printStackTrace(infoStream);
+      return result;
+    }
+
+    final int numSegments = sis.size();
+    final String segmentsFileName = sis.getCurrentSegmentFileName();
+    IndexInput input = null;
+    try {
+      input = dir.openInput(segmentsFileName);
+    } catch (Throwable t) {
+      msg("ERROR: could not open segments file in directory");
+      if (infoStream != null)
+        t.printStackTrace(infoStream);
+      result.cantOpenSegments = true;
+      return result;
+    }
+    int format = 0;
+    try {
+      format = input.readInt();
+    } catch (Throwable t) {
+      msg("ERROR: could not read segment file version in directory");
+      if (infoStream != null)
+        t.printStackTrace(infoStream);
+      result.missingSegmentVersion = true;
+      return result;
+    } finally {
+      if (input != null)
+        input.close();
+    }
+
+    String sFormat = "";
+    boolean skip = false;
+
+    if (format == SegmentInfos.FORMAT)
+      sFormat = "FORMAT [Lucene Pre-2.1]";
+    if (format == SegmentInfos.FORMAT_LOCKLESS)
+      sFormat = "FORMAT_LOCKLESS [Lucene 2.1]";
+    else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE)
+      sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]";
+    else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE)
+      sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]";
+    else {
+      if (format == SegmentInfos.FORMAT_CHECKSUM)
+        sFormat = "FORMAT_CHECKSUM [Lucene 2.4]";
+      else if (format == SegmentInfos.FORMAT_DEL_COUNT)
+        sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]";
+      else if (format == SegmentInfos.FORMAT_HAS_PROX)
+        sFormat = "FORMAT_HAS_PROX [Lucene 2.4]";
+      else if (format == SegmentInfos.FORMAT_USER_DATA)
+        sFormat = "FORMAT_USER_DATA [Lucene 2.9]";
+      else if (format == SegmentInfos.FORMAT_DIAGNOSTICS)
+        sFormat = "FORMAT_DIAGNOSTICS [Lucene 2.9]";
+      else if (format < SegmentInfos.CURRENT_FORMAT) {
+        sFormat = "int=" + format + " [newer version of Lucene than this tool]";
+        skip = true;
+      } else {
+        sFormat = format + " [Lucene 1.3 or prior]";
+      }
+    }
+
+    result.segmentsFileName = segmentsFileName;
+    result.numSegments = numSegments;
+    result.segmentFormat = sFormat;
+    result.userData = sis.getUserData();
+    String userDataString;
+    if (sis.getUserData().size() > 0) {
+      userDataString = " userData=" + sis.getUserData();
+    } else {
+      userDataString = "";
+    }
+
+    msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat + userDataString);
+
+    if (onlySegments != null) {
+      result.partial = true;
+      if (infoStream != null)
+        infoStream.print("\nChecking only these segments:");
+      for (String s : onlySegments) {
+        if (infoStream != null)
+          infoStream.print(" " + s);
+      }
+      result.segmentsChecked.addAll(onlySegments);
+      msg(":");
+    }
+
+    if (skip) {
+      msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting");
+      result.toolOutOfDate = true;
+      return result;
+    }
+
+
+    result.newSegments = (SegmentInfos) sis.clone();
+    result.newSegments.clear();
+
+    for(int i=0;i<numSegments;i++) {
+      final SegmentInfo info = sis.info(i);
+      if (onlySegments != null && !onlySegments.contains(info.name))
+        continue;
+      Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus();
+      result.segmentInfos.add(segInfoStat);
+      msg("  " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount);
+      segInfoStat.name = info.name;
+      segInfoStat.docCount = info.docCount;
+
+      int toLoseDocCount = info.docCount;
+
+      SegmentReader reader = null;
+
+      try {
+        msg("    compound=" + info.getUseCompoundFile());
+        segInfoStat.compound = info.getUseCompoundFile();
+        msg("    hasProx=" + info.getHasProx());
+        segInfoStat.hasProx = info.getHasProx();
+        msg("    numFiles=" + info.files().size());
+        segInfoStat.numFiles = info.files().size();
+        msg("    size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.)));
+        segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.);
+        Map<String,String> diagnostics = info.getDiagnostics();
+        segInfoStat.diagnostics = diagnostics;
+        if (diagnostics.size() > 0) {
+          msg("    diagnostics = " + diagnostics);
+        }
+
+        final int docStoreOffset = info.getDocStoreOffset();
+        if (docStoreOffset != -1) {
+          msg("    docStoreOffset=" + docStoreOffset);
+          segInfoStat.docStoreOffset = docStoreOffset;
+          msg("    docStoreSegment=" + info.getDocStoreSegment());
+          segInfoStat.docStoreSegment = info.getDocStoreSegment();
+          msg("    docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile());
+          segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile();
+        }
+        final String delFileName = info.getDelFileName();
+        if (delFileName == null){
+          msg("    no deletions");
+          segInfoStat.hasDeletions = false;
+        }
+        else{
+          msg("    has deletions [delFileName=" + delFileName + "]");
+          segInfoStat.hasDeletions = true;
+          segInfoStat.deletionsFileName = delFileName;
+        }
+        if (infoStream != null)
+          infoStream.print("    test: open reader.........");
+        reader = SegmentReader.get(true, info, IndexReader.DEFAULT_TERMS_INDEX_DIVISOR);
+
+        segInfoStat.openReaderPassed = true;
+
+        final int numDocs = reader.numDocs();
+        toLoseDocCount = numDocs;
+        if (reader.hasDeletions()) {
+          if (reader.deletedDocs.count() != info.getDelCount()) {
+            throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
+          }
+          if (reader.deletedDocs.count() > reader.maxDoc()) {
+            throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs deletedDocs.count()=" + reader.deletedDocs.count());
+          }
+          if (info.docCount - numDocs != info.getDelCount()){
+            throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
+          }
+          segInfoStat.numDeleted = info.docCount - numDocs;
+          msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]");
+        } else {
+          if (info.getDelCount() != 0) {
+            throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs));
+          }
+          msg("OK");
+        }
+        if (reader.maxDoc() != info.docCount)
+          throw new RuntimeException("SegmentReader.maxDoc() " + reader.maxDoc() + " != SegmentInfos.docCount " + info.docCount);
+
+        // Test getFieldNames()
+        if (infoStream != null) {
+          infoStream.print("    test: fields..............");
+        }         
+        Collection<String> fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL);
+        msg("OK [" + fieldNames.size() + " fields]");
+        segInfoStat.numFields = fieldNames.size();
+        
+        // Test Field Norms
+        segInfoStat.fieldNormStatus = testFieldNorms(fieldNames, reader);
+
+        // Test the Term Index
+        segInfoStat.termIndexStatus = testTermIndex(info, reader);
+
+        // Test Stored Fields
+        segInfoStat.storedFieldStatus = testStoredFields(info, reader, nf);
+
+        // Test Term Vectors
+        segInfoStat.termVectorStatus = testTermVectors(info, reader, nf);
+
+        // Rethrow the first exception we encountered
+        //  This will cause stats for failed segments to be incremented properly
+        if (segInfoStat.fieldNormStatus.error != null) {
+          throw new RuntimeException("Field Norm test failed");
+        } else if (segInfoStat.termIndexStatus.error != null) {
+          throw new RuntimeException("Term Index test failed");
+        } else if (segInfoStat.storedFieldStatus.error != null) {
+          throw new RuntimeException("Stored Field test failed");
+        } else if (segInfoStat.termVectorStatus.error != null) {
+          throw new RuntimeException("Term Vector test failed");
+        }
+
+        msg("");
+
+      } catch (Throwable t) {
+        msg("FAILED");
+        String comment;
+        comment = "fixIndex() would remove reference to this segment";
+        msg("    WARNING: " + comment + "; full exception:");
+        if (infoStream != null)
+          t.printStackTrace(infoStream);
+        msg("");
+        result.totLoseDocCount += toLoseDocCount;
+        result.numBadSegments++;
+        continue;
+      } finally {
+        if (reader != null)
+          reader.close();
+      }
+
+      // Keeper
+      result.newSegments.add((SegmentInfo) info.clone());
+    }
+
+    if (0 == result.numBadSegments) {
+      result.clean = true;
+      msg("No problems were detected with this index.\n");
+    } else
+      msg("WARNING: " + result.numBadSegments + " broken segments (containing " + result.totLoseDocCount + " documents) detected");
+
+    return result;
+  }
+
+  /**
+   * Test field norms.
+   */
+  private Status.FieldNormStatus testFieldNorms(Collection<String> fieldNames, SegmentReader reader) {
+    final Status.FieldNormStatus status = new Status.FieldNormStatus();
+
+    try {
+      // Test Field Norms
+      if (infoStream != null) {
+        infoStream.print("    test: field norms.........");
+      }
+      final byte[] b = new byte[reader.maxDoc()];
+      for (final String fieldName : fieldNames) {
+        reader.norms(fieldName, b, 0);
+        ++status.totFields;
+      }
+
+      msg("OK [" + status.totFields + " fields]");
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+
+    return status;
+  }
+
+  /**
+   * Test the term index.
+   */
+  private Status.TermIndexStatus testTermIndex(SegmentInfo info, SegmentReader reader) {
+    final Status.TermIndexStatus status = new Status.TermIndexStatus();
+
+    try {
+      if (infoStream != null) {
+        infoStream.print("    test: terms, freq, prox...");
+      }
+
+      final TermEnum termEnum = reader.terms();
+      final TermPositions termPositions = reader.termPositions();
+
+      // Used only to count up # deleted docs for this term
+      final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader);
+
+      final int maxDoc = reader.maxDoc();
+
+      while (termEnum.next()) {
+        status.termCount++;
+        final Term term = termEnum.term();
+        final int docFreq = termEnum.docFreq();
+        termPositions.seek(term);
+        int lastDoc = -1;
+        int freq0 = 0;
+        status.totFreq += docFreq;
+        while (termPositions.next()) {
+          freq0++;
+          final int doc = termPositions.doc();
+          final int freq = termPositions.freq();
+          if (doc <= lastDoc)
+            throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
+          if (doc >= maxDoc)
+            throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
+
+          lastDoc = doc;
+          if (freq <= 0)
+            throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds");
+            
+          int lastPos = -1;
+          status.totPos += freq;
+          for(int j=0;j<freq;j++) {
+            final int pos = termPositions.nextPosition();
+            if (pos < -1)
+              throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " is out of bounds");
+            if (pos < lastPos)
+              throw new RuntimeException("term " + term + ": doc " + doc + ": pos " + pos + " < lastPos " + lastPos);
+            lastPos = pos;
+          }
+        }
+
+        // Now count how many deleted docs occurred in
+        // this term:
+        final int delCount;
+        if (reader.hasDeletions()) {
+          myTermDocs.seek(term);
+          while(myTermDocs.next()) { }
+          delCount = myTermDocs.delCount;
+        } else {
+          delCount = 0; 
+        }
+
+        if (freq0 + delCount != docFreq) {
+          throw new RuntimeException("term " + term + " docFreq=" + 
+                                     docFreq + " != num docs seen " + freq0 + " + num docs deleted " + delCount);
+        }
+      }
+
+      msg("OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]");
+
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+
+    return status;
+  }
+  
+  /**
+   * Test stored fields for a segment.
+   */
+  private Status.StoredFieldStatus testStoredFields(SegmentInfo info, SegmentReader reader, NumberFormat format) {
+    final Status.StoredFieldStatus status = new Status.StoredFieldStatus();
+
+    try {
+      if (infoStream != null) {
+        infoStream.print("    test: stored fields.......");
+      }
+
+      // Scan stored fields for all documents
+      for (int j = 0; j < info.docCount; ++j) {
+        if (!reader.isDeleted(j)) {
+          status.docCount++;
+          Document doc = reader.document(j);
+          status.totFields += doc.getFields().size();
+        }
+      }      
+
+      // Validate docCount
+      if (status.docCount != reader.numDocs()) {
+        throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs");
+      }
+
+      msg("OK [" + status.totFields + " total field count; avg " + 
+          format.format((((float) status.totFields)/status.docCount)) + " fields per doc]");      
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+
+    return status;
+  }
+
+  /**
+   * Test term vectors for a segment.
+   */
+  private Status.TermVectorStatus testTermVectors(SegmentInfo info, SegmentReader reader, NumberFormat format) {
+    final Status.TermVectorStatus status = new Status.TermVectorStatus();
+    
+    try {
+      if (infoStream != null) {
+        infoStream.print("    test: term vectors........");
+      }
+
+      for (int j = 0; j < info.docCount; ++j) {
+        if (!reader.isDeleted(j)) {
+          status.docCount++;
+          TermFreqVector[] tfv = reader.getTermFreqVectors(j);
+          if (tfv != null) {
+            status.totVectors += tfv.length;
+          }
+        }
+      }
+      
+      msg("OK [" + status.totVectors + " total vector count; avg " + 
+          format.format((((float) status.totVectors) / status.docCount)) + " term/freq vector fields per doc]");
+    } catch (Throwable e) {
+      msg("ERROR [" + String.valueOf(e.getMessage()) + "]");
+      status.error = e;
+      if (infoStream != null) {
+        e.printStackTrace(infoStream);
+      }
+    }
+    
+    return status;
+  }
+
+  /** Repairs the index using previously returned result
+   *  from {@link #checkIndex}.  Note that this does not
+   *  remove any of the unreferenced files after it's done;
+   *  you must separately open an {@link IndexWriter}, which
+   *  deletes unreferenced files when it's created.
+   *
+   * <p><b>WARNING</b>: this writes a
+   *  new segments file into the index, effectively removing
+   *  all documents in broken segments from the index.
+   *  BE CAREFUL.
+   *
+   * <p><b>WARNING</b>: Make sure you only call this when the
+   *  index is not opened  by any writer. */
+  public void fixIndex(Status result) throws IOException {
+    if (result.partial)
+      throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)");
+    result.newSegments.commit(result.dir);
+  }
+
+  private static boolean assertsOn;
+
+  private static boolean testAsserts() {
+    assertsOn = true;
+    return true;
+  }
+
+  private static boolean assertsOn() {
+    assert testAsserts();
+    return assertsOn;
+  }
+
+  /** Command-line interface to check and fix an index.
+
+    <p>
+    Run it like this:
+    <pre>
+    java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+    </pre>
+    <ul>
+    <li><code>-fix</code>: actually write a new segments_N file, removing any problematic segments
+
+    <li><code>-segment X</code>: only check the specified
+    segment(s).  This can be specified multiple times,
+    to check more than one segment, eg <code>-segment _2
+    -segment _a</code>.  You can't use this with the -fix
+    option.
+    </ul>
+
+    <p><b>WARNING</b>: <code>-fix</code> should only be used on an emergency basis as it will cause
+                       documents (perhaps many) to be permanently removed from the index.  Always make
+                       a backup copy of your index before running this!  Do not run this tool on an index
+                       that is actively being written to.  You have been warned!
+
+    <p>                Run without -fix, this tool will open the index, report version information
+                       and report any exceptions it hits and what action it would take if -fix were
+                       specified.  With -fix, this tool will remove any segments that have issues and
+                       write a new segments_N file.  This means all documents contained in the affected
+                       segments will be removed.
+
+    <p>
+                       This tool exits with exit code 1 if the index cannot be opened or has any
+                       corruption, else 0.
+   */
+  public static void main(String[] args) throws IOException, InterruptedException {
+
+    boolean doFix = false;
+    List<String> onlySegments = new ArrayList<String>();
+    String indexPath = null;
+    int i = 0;
+    while(i < args.length) {
+      if (args[i].equals("-fix")) {
+        doFix = true;
+        i++;
+      } else if (args[i].equals("-segment")) {
+        if (i == args.length-1) {
+          System.out.println("ERROR: missing name for -segment option");
+          System.exit(1);
+        }
+        onlySegments.add(args[i+1]);
+        i += 2;
+      } else {
+        if (indexPath != null) {
+          System.out.println("ERROR: unexpected extra argument '" + args[i] + "'");
+          System.exit(1);
+        }
+        indexPath = args[i];
+        i++;
+      }
+    }
+
+    if (indexPath == null) {
+      System.out.println("\nERROR: index path not specified");
+      System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" +
+                         "\n" +
+                         "  -fix: actually write a new segments_N file, removing any problematic segments\n" +
+                         "  -segment X: only check the specified segments.  This can be specified multiple\n" + 
+                         "              times, to check more than one segment, eg '-segment _2 -segment _a'.\n" +
+                         "              You can't use this with the -fix option\n" +
+                         "\n" + 
+                         "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" +
+                         "documents (perhaps many) to be permanently removed from the index.  Always make\n" +
+                         "a backup copy of your index before running this!  Do not run this tool on an index\n" +
+                         "that is actively being written to.  You have been warned!\n" +
+                         "\n" +
+                         "Run without -fix, this tool will open the index, report version information\n" +
+                         "and report any exceptions it hits and what action it would take if -fix were\n" +
+                         "specified.  With -fix, this tool will remove any segments that have issues and\n" + 
+                         "write a new segments_N file.  This means all documents contained in the affected\n" +
+                         "segments will be removed.\n" +
+                         "\n" +
+                         "This tool exits with exit code 1 if the index cannot be opened or has any\n" +
+                         "corruption, else 0.\n");
+      System.exit(1);
+    }
+
+    if (!assertsOn())
+      System.out.println("\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled");
+
+    if (onlySegments.size() == 0)
+      onlySegments = null;
+    else if (doFix) {
+      System.out.println("ERROR: cannot specify both -fix and -segment");
+      System.exit(1);
+    }
+
+    System.out.println("\nOpening index @ " + indexPath + "\n");
+    Directory dir = null;
+    try {
+      dir = FSDirectory.open(new File(indexPath));
+    } catch (Throwable t) {
+      System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting");
+      t.printStackTrace(System.out);
+      System.exit(1);
+    }
+
+    CheckIndex checker = new CheckIndex(dir);
+    checker.setInfoStream(System.out);
+
+    Status result = checker.checkIndex(onlySegments);
+    if (result.missingSegments) {
+      System.exit(1);
+    }
+
+    if (!result.clean) {
+      if (!doFix) {
+        System.out.println("WARNING: would write new segments file, and " + result.totLoseDocCount + " documents would be lost, if -fix were specified\n");
+      } else {
+        System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n");
+        System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!");
+        for(int s=0;s<5;s++) {
+          Thread.sleep(1000);
+          System.out.println("  " + (5-s) + "...");
+        }
+        System.out.println("Writing...");
+        checker.fixIndex(result);
+        System.out.println("OK");
+        System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\"");
+      }
+    }
+    System.out.println("");
+
+    final int exitCode;
+    if (result != null && result.clean == true)
+      exitCode = 0;
+    else
+      exitCode = 1;
+    System.exit(exitCode);
+  }
+}
--- a/backwards/src/java/org/apache/lucene/index/CompoundFileReader.java
+++ b/backwards/src/java/org/apache/lucene/index/CompoundFileReader.java
@ -0,0 +1,281 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.BufferedIndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.Lock;
+
+import java.util.HashMap;
+import java.io.IOException;
+
+
+/**
+ * Class for accessing a compound stream.
+ * This class implements a directory, but is limited to only read operations.
+ * Directory methods that would normally modify data throw an exception.
+ */
+class CompoundFileReader extends Directory {
+
+    private int readBufferSize;
+
+    private static final class FileEntry {
+        long offset;
+        long length;
+    }
+
+
+    // Base info
+    private Directory directory;
+    private String fileName;
+
+    private IndexInput stream;
+    private HashMap<String,FileEntry> entries = new HashMap<String,FileEntry>();
+
+
+  public CompoundFileReader(Directory dir, String name) throws IOException {
+    this(dir, name, BufferedIndexInput.BUFFER_SIZE);
+  }
+
+  public CompoundFileReader(Directory dir, String name, int readBufferSize)
+    throws IOException
+    {
+        directory = dir;
+        fileName = name;
+        this.readBufferSize = readBufferSize;
+
+        boolean success = false;
+
+        try {
+            stream = dir.openInput(name, readBufferSize);
+
+            // read the directory and init files
+            int count = stream.readVInt();
+            FileEntry entry = null;
+            for (int i=0; i<count; i++) {
+                long offset = stream.readLong();
+                String id = stream.readString();
+
+                if (entry != null) {
+                    // set length of the previous entry
+                    entry.length = offset - entry.offset;
+                }
+
+                entry = new FileEntry();
+                entry.offset = offset;
+                entries.put(id, entry);
+            }
+
+            // set the length of the final entry
+            if (entry != null) {
+                entry.length = stream.length() - entry.offset;
+            }
+
+            success = true;
+
+        } finally {
+            if (! success && (stream != null)) {
+                try {
+                    stream.close();
+                } catch (IOException e) { }
+            }
+        }
+    }
+
+    public Directory getDirectory() {
+        return directory;
+    }
+
+    public String getName() {
+        return fileName;
+    }
+
+    @Override
+    public synchronized void close() throws IOException {
+        if (stream == null)
+            throw new IOException("Already closed");
+
+        entries.clear();
+        stream.close();
+        stream = null;
+    }
+
+    @Override
+    public synchronized IndexInput openInput(String id)
+    throws IOException
+    {
+      // Default to readBufferSize passed in when we were opened
+      return openInput(id, readBufferSize);
+    }
+
+    @Override
+    public synchronized IndexInput openInput(String id, int readBufferSize)
+    throws IOException
+    {
+        if (stream == null)
+            throw new IOException("Stream closed");
+
+        FileEntry entry = entries.get(id);
+        if (entry == null)
+            throw new IOException("No sub-file with id " + id + " found");
+
+        return new CSIndexInput(stream, entry.offset, entry.length, readBufferSize);
+    }
+
+    /** Returns an array of strings, one for each file in the directory. */
+    @Override
+    public String[] listAll() {
+        String res[] = new String[entries.size()];
+        return entries.keySet().toArray(res);
+    }
+
+    /** Returns true iff a file with the given name exists. */
+    @Override
+    public boolean fileExists(String name) {
+        return entries.containsKey(name);
+    }
+
+    /** Returns the time the compound file was last modified. */
+    @Override
+    public long fileModified(String name) throws IOException {
+        return directory.fileModified(fileName);
+    }
+
+    /** Set the modified time of the compound file to now. */
+    @Override
+    public void touchFile(String name) throws IOException {
+        directory.touchFile(fileName);
+    }
+
+    /** Not implemented
+     * @throws UnsupportedOperationException */
+    @Override
+    public void deleteFile(String name)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    /** Not implemented
+     * @throws UnsupportedOperationException */
+    public void renameFile(String from, String to)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    /** Returns the length of a file in the directory.
+     * @throws IOException if the file does not exist */
+    @Override
+    public long fileLength(String name)
+    throws IOException
+    {
+        FileEntry e = entries.get(name);
+        if (e == null)
+            throw new IOException("File " + name + " does not exist");
+        return e.length;
+    }
+
+    /** Not implemented
+     * @throws UnsupportedOperationException */
+    @Override
+    public IndexOutput createOutput(String name)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    /** Not implemented
+     * @throws UnsupportedOperationException */
+    @Override
+    public Lock makeLock(String name)
+    {
+        throw new UnsupportedOperationException();
+    }
+
+    /** Implementation of an IndexInput that reads from a portion of the
+     *  compound file. The visibility is left as "package" *only* because
+     *  this helps with testing since JUnit test cases in a different class
+     *  can then access package fields of this class.
+     */
+    static final class CSIndexInput extends BufferedIndexInput {
+
+        IndexInput base;
+        long fileOffset;
+        long length;
+
+        CSIndexInput(final IndexInput base, final long fileOffset, final long length)
+        {
+            this(base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE);
+        }
+
+        CSIndexInput(final IndexInput base, final long fileOffset, final long length, int readBufferSize)
+        {
+            super(readBufferSize);
+            this.base = (IndexInput)base.clone();
+            this.fileOffset = fileOffset;
+            this.length = length;
+        }
+        
+        @Override
+        public Object clone() {
+          CSIndexInput clone = (CSIndexInput)super.clone();
+          clone.base = (IndexInput)base.clone();
+          clone.fileOffset = fileOffset;
+          clone.length = length;
+          return clone;
+        }
+        
+        /** Expert: implements buffer refill.  Reads bytes from the current
+         *  position in the input.
+         * @param b the array to read bytes into
+         * @param offset the offset in the array to start storing bytes
+         * @param len the number of bytes to read
+         */
+        @Override
+        protected void readInternal(byte[] b, int offset, int len)
+        throws IOException
+        {
+          long start = getFilePointer();
+          if(start + len > length)
+            throw new IOException("read past EOF");
+          base.seek(fileOffset + start);
+          base.readBytes(b, offset, len, false);
+        }
+
+        /** Expert: implements seek.  Sets current position in this file, where
+         *  the next {@link #readInternal(byte[],int,int)} will occur.
+         * @see #readInternal(byte[],int,int)
+         */
+        @Override
+        protected void seekInternal(long pos) {}
+
+        /** Closes the stream to further operations. */
+        @Override
+        public void close() throws IOException {
+          base.close();
+        }
+
+        @Override
+        public long length() {
+          return length;
+        }
+
+
+    }
+    
+}
--- a/backwards/src/java/org/apache/lucene/index/CompoundFileWriter.java
+++ b/backwards/src/java/org/apache/lucene/index/CompoundFileWriter.java
@ -0,0 +1,247 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.IndexInput;
+import java.util.LinkedList;
+import java.util.HashSet;
+
+import java.io.IOException;
+
+
+/**
+ * Combines multiple files into a single compound file.
+ * The file format:<br>
+ * <ul>
+ *     <li>VInt fileCount</li>
+ *     <li>{Directory}
+ *         fileCount entries with the following structure:</li>
+ *         <ul>
+ *             <li>long dataOffset</li>
+ *             <li>String fileName</li>
+ *         </ul>
+ *     <li>{File Data}
+ *         fileCount entries with the raw data of the corresponding file</li>
+ * </ul>
+ *
+ * The fileCount integer indicates how many files are contained in this compound
+ * file. The {directory} that follows has that many entries. Each directory entry
+ * contains a long pointer to the start of this file's data section, and a String
+ * with that file's name.
+ */
+final class CompoundFileWriter {
+
+    private static final class FileEntry {
+        /** source file */
+        String file;
+
+        /** temporary holder for the start of directory entry for this file */
+        long directoryOffset;
+
+        /** temporary holder for the start of this file's data section */
+        long dataOffset;
+    }
+
+
+    private Directory directory;
+    private String fileName;
+    private HashSet<String> ids;
+    private LinkedList<FileEntry> entries;
+    private boolean merged = false;
+    private SegmentMerger.CheckAbort checkAbort;
+
+    /** Create the compound stream in the specified file. The file name is the
+     *  entire name (no extensions are added).
+     *  @throws NullPointerException if <code>dir</code> or <code>name</code> is null
+     */
+    public CompoundFileWriter(Directory dir, String name) {
+      this(dir, name, null);
+    }
+
+    CompoundFileWriter(Directory dir, String name, SegmentMerger.CheckAbort checkAbort) {
+        if (dir == null)
+            throw new NullPointerException("directory cannot be null");
+        if (name == null)
+            throw new NullPointerException("name cannot be null");
+        this.checkAbort = checkAbort;
+        directory = dir;
+        fileName = name;
+        ids = new HashSet<String>();
+        entries = new LinkedList<FileEntry>();
+    }
+
+    /** Returns the directory of the compound file. */
+    public Directory getDirectory() {
+        return directory;
+    }
+
+    /** Returns the name of the compound file. */
+    public String getName() {
+        return fileName;
+    }
+
+    /** Add a source stream. <code>file</code> is the string by which the 
+     *  sub-stream will be known in the compound stream.
+     * 
+     *  @throws IllegalStateException if this writer is closed
+     *  @throws NullPointerException if <code>file</code> is null
+     *  @throws IllegalArgumentException if a file with the same name
+     *   has been added already
+     */
+    public void addFile(String file) {
+        if (merged)
+            throw new IllegalStateException(
+                "Can't add extensions after merge has been called");
+
+        if (file == null)
+            throw new NullPointerException(
+                "file cannot be null");
+
+        if (! ids.add(file))
+            throw new IllegalArgumentException(
+                "File " + file + " already added");
+
+        FileEntry entry = new FileEntry();
+        entry.file = file;
+        entries.add(entry);
+    }
+
+    /** Merge files with the extensions added up to now.
+     *  All files with these extensions are combined sequentially into the
+     *  compound stream. After successful merge, the source files
+     *  are deleted.
+     *  @throws IllegalStateException if close() had been called before or
+     *   if no file has been added to this object
+     */
+    public void close() throws IOException {
+        if (merged)
+            throw new IllegalStateException(
+                "Merge already performed");
+
+        if (entries.isEmpty())
+            throw new IllegalStateException(
+                "No entries to merge have been defined");
+
+        merged = true;
+
+        // open the compound stream
+        IndexOutput os = null;
+        try {
+            os = directory.createOutput(fileName);
+
+            // Write the number of entries
+            os.writeVInt(entries.size());
+
+            // Write the directory with all offsets at 0.
+            // Remember the positions of directory entries so that we can
+            // adjust the offsets later
+            long totalSize = 0;
+            for (FileEntry fe : entries) {
+                fe.directoryOffset = os.getFilePointer();
+                os.writeLong(0);    // for now
+                os.writeString(fe.file);
+                totalSize += directory.fileLength(fe.file);
+            }
+
+            // Pre-allocate size of file as optimization --
+            // this can potentially help IO performance as
+            // we write the file and also later during
+            // searching.  It also uncovers a disk-full
+            // situation earlier and hopefully without
+            // actually filling disk to 100%:
+            final long finalLength = totalSize+os.getFilePointer();
+            os.setLength(finalLength);
+
+            // Open the files and copy their data into the stream.
+            // Remember the locations of each file's data section.
+            byte buffer[] = new byte[16384];
+            for (FileEntry fe : entries) {
+                fe.dataOffset = os.getFilePointer();
+                copyFile(fe, os, buffer);
+            }
+
+            // Write the data offsets into the directory of the compound stream
+            for (FileEntry fe : entries) {
+                os.seek(fe.directoryOffset);
+                os.writeLong(fe.dataOffset);
+            }
+
+            assert finalLength == os.length();
+
+            // Close the output stream. Set the os to null before trying to
+            // close so that if an exception occurs during the close, the
+            // finally clause below will not attempt to close the stream
+            // the second time.
+            IndexOutput tmp = os;
+            os = null;
+            tmp.close();
+
+        } finally {
+            if (os != null) try { os.close(); } catch (IOException e) { }
+        }
+    }
+
+    /** Copy the contents of the file with specified extension into the
+     *  provided output stream. Use the provided buffer for moving data
+     *  to reduce memory allocation.
+     */
+    private void copyFile(FileEntry source, IndexOutput os, byte buffer[])
+    throws IOException
+    {
+        IndexInput is = null;
+        try {
+            long startPtr = os.getFilePointer();
+
+            is = directory.openInput(source.file);
+            long length = is.length();
+            long remainder = length;
+            int chunk = buffer.length;
+
+            while(remainder > 0) {
+                int len = (int) Math.min(chunk, remainder);
+                is.readBytes(buffer, 0, len, false);
+                os.writeBytes(buffer, len);
+                remainder -= len;
+                if (checkAbort != null)
+                  // Roughly every 2 MB we will check if
+                  // it's time to abort
+                  checkAbort.work(80);
+            }
+
+            // Verify that remainder is 0
+            if (remainder != 0)
+                throw new IOException(
+                    "Non-zero remainder length after copying: " + remainder
+                    + " (id: " + source.file + ", length: " + length
+                    + ", buffer size: " + chunk + ")");
+
+            // Verify that the output length diff is equal to original file
+            long endPtr = os.getFilePointer();
+            long diff = endPtr - startPtr;
+            if (diff != length)
+                throw new IOException(
+                    "Difference in the output file offsets " + diff
+                    + " does not match the original file length " + length);
+
+        } finally {
+            if (is != null) is.close();
+        }
+    }
+}
--- a/backwards/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
+++ b/backwards/src/java/org/apache/lucene/index/ConcurrentMergeScheduler.java
@ -0,0 +1,409 @@
+package org.apache.lucene.index;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.ThreadInterruptedException;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+/** A {@link MergeScheduler} that runs each merge using a
+ *  separate thread, up until a maximum number of threads
+ *  ({@link #setMaxThreadCount}) at which when a merge is
+ *  needed, the thread(s) that are updating the index will
+ *  pause until one or more merges completes.  This is a
+ *  simple way to use concurrency in the indexing process
+ *  without having to create and manage application level
+ *  threads. */
+
+public class ConcurrentMergeScheduler extends MergeScheduler {
+
+  private int mergeThreadPriority = -1;
+
+  protected List<MergeThread> mergeThreads = new ArrayList<MergeThread>();
+
+  // Max number of threads allowed to be merging at once
+  private int maxThreadCount = 1;
+
+  protected Directory dir;
+
+  private boolean closed;
+  protected IndexWriter writer;
+  protected int mergeThreadCount;
+
+  public ConcurrentMergeScheduler() {
+    if (allInstances != null) {
+      // Only for testing
+      addMyself();
+    }
+  }
+
+  /** Sets the max # simultaneous threads that may be
+   *  running.  If a merge is necessary yet we already have
+   *  this many threads running, the incoming thread (that
+   *  is calling add/updateDocument) will block until
+   *  a merge thread has completed. */
+  public void setMaxThreadCount(int count) {
+    if (count < 1)
+      throw new IllegalArgumentException("count should be at least 1");
+    maxThreadCount = count;
+  }
+
+  /** Get the max # simultaneous threads that may be
+   *  running. @see #setMaxThreadCount. */
+  public int getMaxThreadCount() {
+    return maxThreadCount;
+  }
+
+  /** Return the priority that merge threads run at.  By
+   *  default the priority is 1 plus the priority of (ie,
+   *  slightly higher priority than) the first thread that
+   *  calls merge. */
+  public synchronized int getMergeThreadPriority() {
+    initMergeThreadPriority();
+    return mergeThreadPriority;
+  }
+
+  /** Set the priority that merge threads run at. */
+  public synchronized void setMergeThreadPriority(int pri) {
+    if (pri > Thread.MAX_PRIORITY || pri < Thread.MIN_PRIORITY)
+      throw new IllegalArgumentException("priority must be in range " + Thread.MIN_PRIORITY + " .. " + Thread.MAX_PRIORITY + " inclusive");
+    mergeThreadPriority = pri;
+
+    final int numThreads = mergeThreadCount();
+    for(int i=0;i<numThreads;i++) {
+      MergeThread merge = mergeThreads.get(i);
+      merge.setThreadPriority(pri);
+    }
+  }
+
+  private boolean verbose() {
+    return writer != null && writer.verbose();
+  }
+  
+  private void message(String message) {
+    if (verbose())
+      writer.message("CMS: " + message);
+  }
+
+  private synchronized void initMergeThreadPriority() {
+    if (mergeThreadPriority == -1) {
+      // Default to slightly higher priority than our
+      // calling thread
+      mergeThreadPriority = 1+Thread.currentThread().getPriority();
+      if (mergeThreadPriority > Thread.MAX_PRIORITY)
+        mergeThreadPriority = Thread.MAX_PRIORITY;
+    }
+  }
+
+  @Override
+  public void close() {
+    closed = true;
+  }
+
+  public synchronized void sync() {
+    while(mergeThreadCount() > 0) {
+      if (verbose())
+        message("now wait for threads; currently " + mergeThreads.size() + " still running");
+      final int count = mergeThreads.size();
+      if (verbose()) {
+        for(int i=0;i<count;i++)
+          message("    " + i + ": " + mergeThreads.get(i));
+      }
+      
+      try {
+        wait();
+      } catch (InterruptedException ie) {
+        throw new ThreadInterruptedException(ie);
+      }
+    }
+  }
+
+  private synchronized int mergeThreadCount() {
+    int count = 0;
+    final int numThreads = mergeThreads.size();
+    for(int i=0;i<numThreads;i++)
+      if (mergeThreads.get(i).isAlive())
+        count++;
+    return count;
+  }
+
+  @Override
+  public void merge(IndexWriter writer)
+    throws CorruptIndexException, IOException {
+
+    assert !Thread.holdsLock(writer);
+
+    this.writer = writer;
+
+    initMergeThreadPriority();
+
+    dir = writer.getDirectory();
+
+    // First, quickly run through the newly proposed merges
+    // and add any orthogonal merges (ie a merge not
+    // involving segments already pending to be merged) to
+    // the queue.  If we are way behind on merging, many of
+    // these newly proposed merges will likely already be
+    // registered.
+
+    if (verbose()) {
+      message("now merge");
+      message("  index: " + writer.segString());
+    }
+    
+    // Iterate, pulling from the IndexWriter's queue of
+    // pending merges, until it's empty:
+    while(true) {
+
+      // TODO: we could be careful about which merges to do in
+      // the BG (eg maybe the "biggest" ones) vs FG, which
+      // merges to do first (the easiest ones?), etc.
+
+      MergePolicy.OneMerge merge = writer.getNextMerge();
+      if (merge == null) {
+        if (verbose())
+          message("  no more merges pending; now return");
+        return;
+      }
+
+      // We do this w/ the primary thread to keep
+      // deterministic assignment of segment names
+      writer.mergeInit(merge);
+
+      boolean success = false;
+      try {
+        synchronized(this) {
+          final MergeThread merger;
+          while (mergeThreadCount() >= maxThreadCount) {
+            if (verbose())
+              message("    too many merge threads running; stalling...");
+            try {
+              wait();
+            } catch (InterruptedException ie) {
+              throw new ThreadInterruptedException(ie);
+            }
+          }
+
+          if (verbose())
+            message("  consider merge " + merge.segString(dir));
+      
+          assert mergeThreadCount() < maxThreadCount;
+
+          // OK to spawn a new merge thread to handle this
+          // merge:
+          merger = getMergeThread(writer, merge);
+          mergeThreads.add(merger);
+          if (verbose())
+            message("    launch new thread [" + merger.getName() + "]");
+
+          merger.start();
+          success = true;
+        }
+      } finally {
+        if (!success) {
+          writer.mergeFinish(merge);
+        }
+      }
+    }
+  }
+
+  /** Does the actual merge, by calling {@link IndexWriter#merge} */
+  protected void doMerge(MergePolicy.OneMerge merge)
+    throws IOException {
+    writer.merge(merge);
+  }
+
+  /** Create and return a new MergeThread */
+  protected synchronized MergeThread getMergeThread(IndexWriter writer, MergePolicy.OneMerge merge) throws IOException {
+    final MergeThread thread = new MergeThread(writer, merge);
+    thread.setThreadPriority(mergeThreadPriority);
+    thread.setDaemon(true);
+    thread.setName("Lucene Merge Thread #" + mergeThreadCount++);
+    return thread;
+  }
+
+  protected class MergeThread extends Thread {
+
+    IndexWriter writer;
+    MergePolicy.OneMerge startMerge;
+    MergePolicy.OneMerge runningMerge;
+
+    public MergeThread(IndexWriter writer, MergePolicy.OneMerge startMerge) throws IOException {
+      this.writer = writer;
+      this.startMerge = startMerge;
+    }
+
+    public synchronized void setRunningMerge(MergePolicy.OneMerge merge) {
+      runningMerge = merge;
+    }
+
+    public synchronized MergePolicy.OneMerge getRunningMerge() {
+      return runningMerge;
+    }
+
+    public void setThreadPriority(int pri) {
+      try {
+        setPriority(pri);
+      } catch (NullPointerException npe) {
+        // Strangely, Sun's JDK 1.5 on Linux sometimes
+        // throws NPE out of here...
+      } catch (SecurityException se) {
+        // Ignore this because we will still run fine with
+        // normal thread priority
+      }
+    }
+
+    @Override
+    public void run() {
+      
+      // First time through the while loop we do the merge
+      // that we were started with:
+      MergePolicy.OneMerge merge = this.startMerge;
+      
+      try {
+
+        if (verbose())
+          message("  merge thread: start");
+
+        while(true) {
+          setRunningMerge(merge);
+          doMerge(merge);
+
+          // Subsequent times through the loop we do any new
+          // merge that writer says is necessary:
+          merge = writer.getNextMerge();
+          if (merge != null) {
+            writer.mergeInit(merge);
+            if (verbose())
+              message("  merge thread: do another merge " + merge.segString(dir));
+          } else
+            break;
+        }
+
+        if (verbose())
+          message("  merge thread: done");
+
+      } catch (Throwable exc) {
+
+        // Ignore the exception if it was due to abort:
+        if (!(exc instanceof MergePolicy.MergeAbortedException)) {
+          if (!suppressExceptions) {
+            // suppressExceptions is normally only set during
+            // testing.
+            anyExceptions = true;
+            handleMergeException(exc);
+          }
+        }
+      } finally {
+        synchronized(ConcurrentMergeScheduler.this) {
+          ConcurrentMergeScheduler.this.notifyAll();
+          boolean removed = mergeThreads.remove(this);
+          assert removed;
+        }
+      }
+    }
+
+    @Override
+    public String toString() {
+      MergePolicy.OneMerge merge = getRunningMerge();
+      if (merge == null)
+        merge = startMerge;
+      return "merge thread: " + merge.segString(dir);
+    }
+  }
+
+  /** Called when an exception is hit in a background merge
+   *  thread */
+  protected void handleMergeException(Throwable exc) {
+    try {
+      // When an exception is hit during merge, IndexWriter
+      // removes any partial files and then allows another
+      // merge to run.  If whatever caused the error is not
+      // transient then the exception will keep happening,
+      // so, we sleep here to avoid saturating CPU in such
+      // cases:
+      Thread.sleep(250);
+    } catch (InterruptedException ie) {
+      throw new ThreadInterruptedException(ie);
+    }
+    throw new MergePolicy.MergeException(exc, dir);
+  }
+
+  static boolean anyExceptions = false;
+
+  /** Used for testing */
+  public static boolean anyUnhandledExceptions() {
+    if (allInstances == null) {
+      throw new RuntimeException("setTestMode() was not called; often this is because your test case's setUp method fails to call super.setUp in LuceneTestCase");
+    }
+    synchronized(allInstances) {
+      final int count = allInstances.size();
+      // Make sure all outstanding threads are done so we see
+      // any exceptions they may produce:
+      for(int i=0;i<count;i++)
+        allInstances.get(i).sync();
+      boolean v = anyExceptions;
+      anyExceptions = false;
+      return v;
+    }
+  }
+
+  public static void clearUnhandledExceptions() {
+    synchronized(allInstances) {
+      anyExceptions = false;
+    }
+  }
+
+  /** Used for testing */
+  private void addMyself() {
+    synchronized(allInstances) {
+      final int size = allInstances.size();
+      int upto = 0;
+      for(int i=0;i<size;i++) {
+        final ConcurrentMergeScheduler other = allInstances.get(i);
+        if (!(other.closed && 0 == other.mergeThreadCount()))
+          // Keep this one for now: it still has threads or
+          // may spawn new threads
+          allInstances.set(upto++, other);
+      }
+      allInstances.subList(upto, allInstances.size()).clear();
+      allInstances.add(this);
+    }
+  }
+
+  private boolean suppressExceptions;
+
+  /** Used for testing */
+  void setSuppressExceptions() {
+    suppressExceptions = true;
+  }
+
+  /** Used for testing */
+  void clearSuppressExceptions() {
+    suppressExceptions = false;
+  }
+
+  /** Used for testing */
+  private static List<ConcurrentMergeScheduler> allInstances;
+  public static void setTestMode() {
+    allInstances = new ArrayList<ConcurrentMergeScheduler>();
+  }
+}
--- a/Show More
+++ b/Show More