Initial revision

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@149570 13f79535-47bb-0310-9956-ffa450edef68
2001-09-18 16:29:48 +00:00 · 2001-09-18 16:29:48 +00:00 · bd3948c539
parent 749b4aaf7e
commit bd3948c539
134 changed files with 16175 additions and 0 deletions
--- a/src/demo/org/apache/lucene/DeleteFiles.java
+++ b/src/demo/org/apache/lucene/DeleteFiles.java
@ -0,0 +1,87 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+import com.lucene.store.Directory;
+import com.lucene.store.FSDirectory;
+import com.lucene.index.IndexReader;
+import com.lucene.index.Term;
+
+class DeleteFiles {
+  public static void main(String[] args) {
+    try {
+      Directory directory = FSDirectory.getDirectory("demo index", false);
+      IndexReader reader = IndexReader.open(directory);
+
+//       Term term = new Term("path", "pizza");
+//       int deleted = reader.delete(term);
+
+//       System.out.println("deleted " + deleted +
+// 			 " documents containing " + term);
+
+      for (int i = 0; i < reader.maxDoc(); i++)
+	reader.delete(i);
+
+      reader.close();
+      directory.close();
+
+    } catch (Exception e) {
+      System.out.println(" caught a " + e.getClass() +
+			 "\n with message: " + e.getMessage());
+    }
+  }
+}
--- a/src/demo/org/apache/lucene/FileDocument.java
+++ b/src/demo/org/apache/lucene/FileDocument.java
@ -0,0 +1,111 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.File;
+import java.io.Reader;
+import java.io.FileInputStream;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+import com.lucene.document.Document;
+import com.lucene.document.Field;
+import com.lucene.document.DateField;
+
+/** A utility for making Lucene Documents from a File. */
+
+public class FileDocument {
+  /** Makes a document for a File.
+    <p>
+    The document has three fields:
+    <ul>
+    <li><code>path</code>--containing the pathname of the file, as a stored,
+    tokenized field;
+    <li><code>modified</code>--containing the last modified date of the file as
+    a keyword field as encoded by <a
+    href="lucene.document.DateField.html">DateField</a>; and
+    <li><code>contents</code>--containing the full contents of the file, as a
+    Reader field;
+    */
+  public static Document Document(File f)
+       throws java.io.FileNotFoundException {
+	 
+    // make a new, empty document
+    Document doc = new Document();
+
+    // Add the path of the file as a field named "path".  Use a Text field, so
+    // that the index stores the path, and so that the path is searchable
+    doc.add(Field.Text("path", f.getPath()));
+
+    // Add the last modified date of the file a field named "modified".  Use a
+    // Keyword field, so that it's searchable, but so that no attempt is made
+    // to tokenize the field into words.
+    doc.add(Field.Keyword("modified",
+			  DateField.timeToString(f.lastModified())));
+
+    // Add the contents of the file a field named "contents".  Use a Text
+    // field, specifying a Reader, so that the text of the file is tokenized.
+    // ?? why doesn't FileReader work here ??
+    FileInputStream is = new FileInputStream(f);
+    Reader reader = new BufferedReader(new InputStreamReader(is));
+    doc.add(Field.Text("contents", reader));
+
+    // return the document
+    return doc;
+  }
+
+  private FileDocument() {}
+}
+    
--- a/src/demo/org/apache/lucene/HTMLDocument.java
+++ b/src/demo/org/apache/lucene/HTMLDocument.java
@ -0,0 +1,121 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.*;
+import com.lucene.document.*;
+import demo.HTMLParser.HTMLParser;
+
+/** A utility for making Lucene Documents for HTML documents. */
+
+public class HTMLDocument {
+  static char dirSep = System.getProperty("file.separator").charAt(0);
+
+  public static String uid(File f) {
+    // Append path and date into a string in such a way that lexicographic
+    // sorting gives the same results as a walk of the file hierarchy.  Thus
+    // null (\u0000) is used both to separate directory components and to
+    // separate the path from the date.
+    return f.getPath().replace(dirSep, '\u0000') +
+      "\u0000" +
+      DateField.timeToString(f.lastModified());
+  }
+
+  public static String uid2url(String uid) {
+    String url = uid.replace('\u0000', '/');	  // replace nulls with slashes
+    return url.substring(0, url.lastIndexOf('/')); // remove date from end
+  }
+
+  public static Document Document(File f)
+       throws IOException, InterruptedException  {
+    // make a new, empty document
+    Document doc = new Document();
+
+    // Add the url as a field named "url".  Use an UnIndexed field, so
+    // that the url is just stored with the document, but is not searchable.
+    doc.add(Field.UnIndexed("url", f.getPath().replace(dirSep, '/')));
+
+    // Add the last modified date of the file a field named "modified".  Use a
+    // Keyword field, so that it's searchable, but so that no attempt is made
+    // to tokenize the field into words.
+    doc.add(Field.Keyword("modified",
+			  DateField.timeToString(f.lastModified())));
+
+    // Add the uid as a field, so that index can be incrementally maintained.
+    // This field is not stored with document, it is indexed, but it is not
+    // tokenized prior to indexing.
+    doc.add(new Field("uid", uid(f), false, true, false));
+
+    HTMLParser parser = new HTMLParser(f);
+
+    // Add the tag-stripped contents as a Reader-valued Text field so it will
+    // get tokenized and indexed.
+    doc.add(Field.Text("contents", parser.getReader()));
+
+    // Add the summary as an UnIndexed field, so that it is stored and returned
+    // with hit documents for display.
+    doc.add(Field.UnIndexed("summary", parser.getSummary()));
+
+    // Add the title as a separate Text field, so that it can be searched
+    // separately.
+    doc.add(Field.Text("title", parser.getTitle()));
+
+    // return the document
+    return doc;
+  }
+
+  private HTMLDocument() {}
+}
+    
--- a/src/demo/org/apache/lucene/HTMLParser/.cvsignore
+++ b/src/demo/org/apache/lucene/HTMLParser/.cvsignore
@ -0,0 +1,7 @@
+HTMLParser.java
+HTMLParserTokenManager.java
+TokenMgrError.java
+ParseException.java
+Token.java
+ASCII_CharStream.java
+HTMLParserConstants.java
--- a/src/demo/org/apache/lucene/HTMLParser/Entities.java
+++ b/src/demo/org/apache/lucene/HTMLParser/Entities.java
@ -0,0 +1,365 @@
+package demo.HTMLParser;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.*;
+
+public class Entities {
+  static final Hashtable decoder = new Hashtable(300);
+  static final String[]  encoder = new String[0x100];
+
+  static final String decode(String entity) {
+    if (entity.charAt(entity.length()-1) == ';')  // remove trailing semicolon
+      entity = entity.substring(0, entity.length()-1);
+    if (entity.charAt(1) == '#') {
+      int start = 2;
+      int radix = 10;
+      if (entity.charAt(2) == 'X' || entity.charAt(2) == 'x') {
+	start++;
+	radix = 16;
+      }
+      Character c =
+	new Character((char)Integer.parseInt(entity.substring(start), radix));
+      return c.toString();
+    } else {
+      String s = (String)decoder.get(entity);
+      if (s != null)
+	return s;
+      else return "";
+    }
+  }
+
+  static final public String encode(String s) {
+    int length = s.length();
+    StringBuffer buffer = new StringBuffer(length * 2);
+    for (int i = 0; i < length; i++) {
+      char c = s.charAt(i);
+      int j = (int)c;
+      if (j < 0x100 && encoder[j] != null) {
+	buffer.append(encoder[j]);		  // have a named encoding
+	buffer.append(';');
+      } else if (j < 0x80) {
+	buffer.append(c);			  // use ASCII value
+      } else {
+	buffer.append("&#");			  // use numeric encoding
+	buffer.append((int)c);
+	buffer.append(';');
+      }
+    }
+    return buffer.toString();
+  }
+
+  static final void add(String entity, int value) {
+    decoder.put(entity, (new Character((char)value)).toString());
+    if (value < 0x100)
+      encoder[value] = entity;
+  }
+
+  static {
+    add("&nbsp",   160);
+    add("&iexcl",  161);
+    add("&cent",   162);
+    add("&pound",  163);
+    add("&curren", 164);
+    add("&yen",    165);
+    add("&brvbar", 166);
+    add("&sect",   167);
+    add("&uml",    168);
+    add("&copy",   169);
+    add("&ordf",   170);
+    add("&laquo",  171);
+    add("&not",    172);
+    add("&shy",    173);
+    add("&reg",    174);
+    add("&macr",   175);
+    add("&deg",    176);
+    add("&plusmn", 177);
+    add("&sup2",   178);
+    add("&sup3",   179);
+    add("&acute",  180);
+    add("&micro",  181);
+    add("&para",   182);
+    add("&middot", 183);
+    add("&cedil",  184);
+    add("&sup1",   185);
+    add("&ordm",   186);
+    add("&raquo",  187);
+    add("&frac14", 188);
+    add("&frac12", 189);
+    add("&frac34", 190);
+    add("&iquest", 191);
+    add("&Agrave", 192);
+    add("&Aacute", 193);
+    add("&Acirc",  194);
+    add("&Atilde", 195);
+    add("&Auml",   196);
+    add("&Aring",  197);
+    add("&AElig",  198);
+    add("&Ccedil", 199);
+    add("&Egrave", 200);
+    add("&Eacute", 201);
+    add("&Ecirc",  202);
+    add("&Euml",   203);
+    add("&Igrave", 204);
+    add("&Iacute", 205);
+    add("&Icirc",  206);
+    add("&Iuml",   207);
+    add("&ETH",    208);
+    add("&Ntilde", 209);
+    add("&Ograve", 210);
+    add("&Oacute", 211);
+    add("&Ocirc",  212);
+    add("&Otilde", 213);
+    add("&Ouml",   214);
+    add("&times",  215);
+    add("&Oslash", 216);
+    add("&Ugrave", 217);
+    add("&Uacute", 218);
+    add("&Ucirc",  219);
+    add("&Uuml",   220);
+    add("&Yacute", 221);
+    add("&THORN",  222);
+    add("&szlig",  223);
+    add("&agrave", 224);
+    add("&aacute", 225);
+    add("&acirc",  226);
+    add("&atilde", 227);
+    add("&auml",   228);
+    add("&aring",  229);
+    add("&aelig",  230);
+    add("&ccedil", 231);
+    add("&egrave", 232);
+    add("&eacute", 233);
+    add("&ecirc",  234);
+    add("&euml",   235);
+    add("&igrave", 236);
+    add("&iacute", 237);
+    add("&icirc",  238);
+    add("&iuml",   239);
+    add("&eth",    240);
+    add("&ntilde", 241);
+    add("&ograve", 242);
+    add("&oacute", 243);
+    add("&ocirc",  244);
+    add("&otilde", 245);
+    add("&ouml",   246);
+    add("&divide", 247);
+    add("&oslash", 248);
+    add("&ugrave", 249);
+    add("&uacute", 250);
+    add("&ucirc",  251);
+    add("&uuml",   252);
+    add("&yacute", 253);
+    add("&thorn",  254);
+    add("&yuml",   255);
+    add("&fnof",   402);
+    add("&Alpha",  913);
+    add("&Beta",   914);
+    add("&Gamma",  915);
+    add("&Delta",  916);
+    add("&Epsilon",917);
+    add("&Zeta",   918);
+    add("&Eta",    919);
+    add("&Theta",  920);
+    add("&Iota",   921);
+    add("&Kappa",  922);
+    add("&Lambda", 923);
+    add("&Mu",     924);
+    add("&Nu",     925);
+    add("&Xi",     926);
+    add("&Omicron",927);
+    add("&Pi",     928);
+    add("&Rho",    929);
+    add("&Sigma",  931);
+    add("&Tau",    932);
+    add("&Upsilon",933);
+    add("&Phi",    934);
+    add("&Chi",    935);
+    add("&Psi",    936);
+    add("&Omega",  937);
+    add("&alpha",  945);
+    add("&beta",   946);
+    add("&gamma",  947);
+    add("&delta",  948);
+    add("&epsilon",949);
+    add("&zeta",   950);
+    add("&eta",    951);
+    add("&theta",  952);
+    add("&iota",   953);
+    add("&kappa",  954);
+    add("&lambda", 955);
+    add("&mu",     956);
+    add("&nu",     957);
+    add("&xi",     958);
+    add("&omicron",959);
+    add("&pi",     960);
+    add("&rho",    961);
+    add("&sigmaf", 962);
+    add("&sigma",  963);
+    add("&tau",    964);
+    add("&upsilon",965);
+    add("&phi",    966);
+    add("&chi",    967);
+    add("&psi",    968);
+    add("&omega",  969);
+    add("&thetasym",977);
+    add("&upsih",  978);
+    add("&piv",    982);
+    add("&bull",   8226);
+    add("&hellip", 8230);
+    add("&prime",  8242);
+    add("&Prime",  8243);
+    add("&oline",  8254);
+    add("&frasl",  8260);
+    add("&weierp", 8472);
+    add("&image",  8465);
+    add("&real",   8476);
+    add("&trade",  8482);
+    add("&alefsym",8501);
+    add("&larr",   8592);
+    add("&uarr",   8593);
+    add("&rarr",   8594);
+    add("&darr",   8595);
+    add("&harr",   8596);
+    add("&crarr",  8629);
+    add("&lArr",   8656);
+    add("&uArr",   8657);
+    add("&rArr",   8658);
+    add("&dArr",   8659);
+    add("&hArr",   8660);
+    add("&forall", 8704);
+    add("&part",   8706);
+    add("&exist",  8707);
+    add("&empty",  8709);
+    add("&nabla",  8711);
+    add("&isin",   8712);
+    add("&notin",  8713);
+    add("&ni",     8715);
+    add("&prod",   8719);
+    add("&sum",    8721);
+    add("&minus",  8722);
+    add("&lowast", 8727);
+    add("&radic",  8730);
+    add("&prop",   8733);
+    add("&infin",  8734);
+    add("&ang",    8736);
+    add("&and",    8743);
+    add("&or",     8744);
+    add("&cap",    8745);
+    add("&cup",    8746);
+    add("&int",    8747);
+    add("&there4", 8756);
+    add("&sim",    8764);
+    add("&cong",   8773);
+    add("&asymp",  8776);
+    add("&ne",     8800);
+    add("&equiv",  8801);
+    add("&le",     8804);
+    add("&ge",     8805);
+    add("&sub",    8834);
+    add("&sup",    8835);
+    add("&nsub",   8836);
+    add("&sube",   8838);
+    add("&supe",   8839);
+    add("&oplus",  8853);
+    add("&otimes", 8855);
+    add("&perp",   8869);
+    add("&sdot",   8901);
+    add("&lceil",  8968);
+    add("&rceil",  8969);
+    add("&lfloor", 8970);
+    add("&rfloor", 8971);
+    add("&lang",   9001);
+    add("&rang",   9002);
+    add("&loz",    9674);
+    add("&spades", 9824);
+    add("&clubs",  9827);
+    add("&hearts", 9829);
+    add("&diams",  9830);
+    add("&quot",   34);
+    add("&amp",    38);
+    add("&lt",     60);
+    add("&gt",     62);
+    add("&OElig",  338);
+    add("&oelig",  339);
+    add("&Scaron", 352);
+    add("&scaron", 353);
+    add("&Yuml",   376);
+    add("&circ",   710);
+    add("&tilde",  732);
+    add("&ensp",   8194);
+    add("&emsp",   8195);
+    add("&thinsp", 8201);
+    add("&zwnj",   8204);
+    add("&zwj",    8205);
+    add("&lrm",    8206);
+    add("&rlm",    8207);
+    add("&ndash",  8211);
+    add("&mdash",  8212);
+    add("&lsquo",  8216);
+    add("&rsquo",  8217);
+    add("&sbquo",  8218);
+    add("&ldquo",  8220);
+    add("&rdquo",  8221);
+    add("&bdquo",  8222);
+    add("&dagger", 8224);
+    add("&Dagger", 8225);
+    add("&permil", 8240);
+    add("&lsaquo", 8249);
+    add("&rsaquo", 8250);
+    add("&euro",   8364);
+
+  }
+}
--- a/src/demo/org/apache/lucene/HTMLParser/HTMLParser.jj
+++ b/src/demo/org/apache/lucene/HTMLParser/HTMLParser.jj
@ -0,0 +1,347 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+// HTMLParser.jj
+
+options {
+  STATIC = false;
+  OPTIMIZE_TOKEN_MANAGER = true;
+  //DEBUG_LOOKAHEAD = true;
+  //DEBUG_TOKEN_MANAGER = true;
+}
+
+PARSER_BEGIN(HTMLParser)
+
+package org.apache.lucene.HTMLParser;
+
+import java.io.*;
+
+public class HTMLParser {
+  public static int SUMMARY_LENGTH = 200;
+  
+  StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
+  StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
+  int length = 0;
+  boolean titleComplete = false;
+  boolean inTitle = false;
+  boolean inScript = false;
+  boolean afterTag = false;
+  boolean afterSpace = false;
+  String eol = System.getProperty("line.separator");
+  PipedReader pipeIn = null;
+  PipedWriter pipeOut;
+
+  public HTMLParser(File file) throws FileNotFoundException {
+    this(new FileInputStream(file));
+  }
+
+  public String getTitle() throws IOException, InterruptedException {
+    if (pipeIn == null)
+      getReader();				  // spawn parsing thread
+    while (true) {
+      synchronized(this) {
+	if (titleComplete || (length > SUMMARY_LENGTH))
+	  break;
+	wait(10);
+      }
+    }
+    return title.toString().trim();
+  }
+
+  public String getSummary() throws IOException, InterruptedException {
+    if (pipeIn == null)
+      getReader();				  // spawn parsing thread
+    while (true) {
+      synchronized(this) {
+	if (summary.length() >= SUMMARY_LENGTH)
+	  break;
+	wait(10);
+      }
+    }
+    if (summary.length() > SUMMARY_LENGTH)
+      summary.setLength(SUMMARY_LENGTH);
+
+    String sum = summary.toString().trim();
+    String tit = getTitle();
+    if (sum.startsWith(tit))
+      return sum.substring(tit.length());
+    else
+      return sum;
+  }
+
+  public Reader getReader() throws IOException {
+    if (pipeIn == null) {
+      pipeIn = new PipedReader();
+      pipeOut = new PipedWriter(pipeIn);
+      
+      Thread thread = new ParserThread(this);
+      thread.start();				  // start parsing
+    }
+
+    return pipeIn;
+  }
+
+  void addToSummary(String text) {
+    if (summary.length() < SUMMARY_LENGTH) {
+      summary.append(text);
+      if (summary.length() >= SUMMARY_LENGTH) {
+	synchronized(this) {
+	  notifyAll();
+	}
+      }
+    }
+  }
+
+  void addText(String text) throws IOException {
+    if (inScript)
+      return;
+    if (inTitle)
+      title.append(text);
+    else {
+      addToSummary(text);
+      if (!titleComplete && !title.equals("")) {  // finished title
+	synchronized(this) {
+	  titleComplete = true;			  // tell waiting threads
+	  notifyAll();
+	}
+      }
+    }
+
+    length += text.length();
+    pipeOut.write(text);
+
+    afterSpace = false;
+  }
+  
+  void addSpace() throws IOException {
+    if (inScript)
+      return;
+    if (!afterSpace) {
+      if (inTitle)
+	title.append(" ");
+      else
+	addToSummary(" ");
+      
+      String space = afterTag ? eol : " ";
+      length += space.length();
+      pipeOut.write(space);
+      afterSpace = true;
+    }
+  }
+
+//    void handleException(Exception e) {
+//      System.out.println(e.toString());  // print the error message
+//      System.out.println("Skipping...");
+//      Token t;
+//      do {
+//        t = getNextToken();
+//      } while (t.kind != TagEnd);
+//    }
+}
+
+PARSER_END(HTMLParser)
+
+
+void HTMLDocument() throws IOException :
+{
+  Token t;
+}
+{
+//  try {
+    ( Tag()         { afterTag = true; }
+    | t=Decl()      { afterTag = true; }
+    | CommentTag()  { afterTag = true; }
+    | t=<Word>      { addText(t.image); afterTag = false; }
+    | t=<Entity>    { addText(Entities.decode(t.image)); afterTag = false; }
+    | t=<Punct>     { addText(t.image); afterTag = false; }
+    | <Space>       { addSpace(); afterTag = false; }
+    )* <EOF>
+//  } catch (ParseException e) {
+//    handleException(e);
+//  }
+}
+
+void Tag() throws IOException :
+{
+  Token t1, t2;
+  boolean inImg = false;
+}
+{
+  t1=<TagName> {
+    inTitle = t1.image.equalsIgnoreCase("<title"); // keep track if in <TITLE>
+    inImg = t1.image.equalsIgnoreCase("<img");	  // keep track if in <IMG>
+    if (inScript) {				  // keep track if in <SCRIPT>
+      inScript = !t1.image.equalsIgnoreCase("</script");
+    } else {
+      inScript = t1.image.equalsIgnoreCase("<script");
+    }
+  }
+  (t1=<ArgName>
+   (<ArgEquals>
+    (t2=ArgValue()				  // save ALT text in IMG tag
+     {
+       if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
+         addText("[" + t2.image + "]");
+     }
+    )?
+   )?
+  )*
+  <TagEnd>
+}
+
+Token ArgValue() :
+{
+  Token t = null;
+}
+{
+  t=<ArgValue>                              { return t; }
+| LOOKAHEAD(2)
+  <ArgQuote1> <CloseQuote1>                 { return t; }
+| <ArgQuote1> t=<Quote1Text> <CloseQuote1>  { return t; }
+| LOOKAHEAD(2)
+  <ArgQuote2> <CloseQuote2>                 { return t; }
+| <ArgQuote2> t=<Quote2Text> <CloseQuote2>  { return t; }
+}
+
+
+Token Decl() :
+{
+  Token t;
+}
+{
+  t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd>
+  { return t; }
+}
+
+
+void CommentTag() :
+{}
+{
+  (<Comment1> ( <CommentText1> )* <CommentEnd1>)
+ |
+  (<Comment2> ( <CommentText2> )* <CommentEnd2>)
+}
+  
+
+TOKEN :
+{
+  < TagName:  "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
+| < DeclName: "<"  "!"   ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag
+
+| < Comment1:  "<!--" > : WithinComment1
+| < Comment2:  "<!" >   : WithinComment2
+
+| < Word:     ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] |
+                <LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM> )+ >
+| < #LET:     ["A"-"Z","a"-"z","0"-"9"] >
+| < #NUM:     ["0"-"9"] >
+
+| < Entity:   ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+ (";")? ) >
+
+| < Space:    (<SP>)+ >
+| < #SP:      [" ","\t","\r","\n"] >
+
+| < Punct:    ~[] > // Keep this last.  It is a catch-all.
+}
+
+
+<WithinTag> TOKEN:
+{
+  < ArgName:   (~[" ","\t","\r","\n","=",">","'","\""])
+               (~[" ","\t","\r","\n","=",">"])* >
+| < ArgEquals: "=" >  : AfterEquals
+| < TagEnd:    ">" | "=>" >  : DEFAULT
+}
+
+<AfterEquals> TOKEN:
+{
+  < ArgValue:  (~[" ","\t","\r","\n","=",">","'","\""])
+	       (~[" ","\t","\r","\n",">"])* > : WithinTag
+}
+
+<WithinTag, AfterEquals> TOKEN:
+{
+  < ArgQuote1: "'"  > : WithinQuote1
+| < ArgQuote2: "\"" > : WithinQuote2
+}
+
+<WithinTag, AfterEquals> SKIP:
+{
+  < <Space> >
+}
+
+<WithinQuote1> TOKEN:
+{
+  < Quote1Text:  (~["'"])+ >
+| < CloseQuote1: <ArgQuote1> > : WithinTag
+}
+
+<WithinQuote2> TOKEN:
+{
+  < Quote2Text:  (~["\""])+ >
+| < CloseQuote2: <ArgQuote2> > : WithinTag
+}
+
+
+<WithinComment1> TOKEN :
+{
+  < CommentText1:  (~["-"])+ | "-" >
+| < CommentEnd1:   "-->" > : DEFAULT
+}
+
+<WithinComment2> TOKEN :
+{
+  < CommentText2:  (~[">"])+ >
+| < CommentEnd2:   ">" > : DEFAULT
+}
--- a/src/demo/org/apache/lucene/HTMLParser/Makefile
+++ b/src/demo/org/apache/lucene/HTMLParser/Makefile
@ -0,0 +1,3 @@
+# sub-directory makefile for lucene
+ROOT = ../..
+include ../../com/lucene/rules.mk
--- a/src/demo/org/apache/lucene/HTMLParser/ParserThread.java
+++ b/src/demo/org/apache/lucene/HTMLParser/ParserThread.java
@ -0,0 +1,86 @@
+package org.apache.lucene.HTMLParser;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.*;
+
+class ParserThread extends Thread {		  
+  HTMLParser parser;
+
+  ParserThread(HTMLParser p) {
+    parser = p;
+  }
+
+  public void run() {				  // convert pipeOut to pipeIn
+    try {
+      try {					  // parse document to pipeOut
+	parser.HTMLDocument(); 
+      } catch (ParseException e) {
+	System.out.println("Parse Aborted: " + e.getMessage());
+      } catch (TokenMgrError e) {
+	System.out.println("Parse Aborted: " + e.getMessage());
+      } finally {
+	parser.pipeOut.close();
+	synchronized (parser) {
+	  parser.summary.setLength(parser.SUMMARY_LENGTH);
+	  parser.titleComplete = true;
+	  parser.notifyAll();
+	}
+      }
+    } catch (IOException e) {
+	e.printStackTrace();
+    }
+  }
+}
--- a/src/demo/org/apache/lucene/HTMLParser/Test.java
+++ b/src/demo/org/apache/lucene/HTMLParser/Test.java
@ -0,0 +1,81 @@
+package org.apache.lucene.HTMLParser;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.*;
+
+class Test {
+  public static void main(String[] argv) throws Exception {
+    if ("-dir".equals(argv[0])) {
+      String[] files = new File(argv[1]).list();
+      java.util.Arrays.sort(files);
+      for (int i = 0; i < files.length; i++) {
+	System.err.println(files[i]);
+	File file = new File(argv[1], files[i]);
+	parse(file);
+      }
+    } else
+      parse(new File(argv[0]));
+  }
+
+  public static void parse(File file) throws Exception {
+    HTMLParser parser = new HTMLParser(file);
+    System.out.println("Title: " + Entities.encode(parser.getTitle()));
+    System.out.println("Summary: " + Entities.encode(parser.getSummary()));
+    LineNumberReader reader = new LineNumberReader(parser.getReader());
+    for (String l = reader.readLine(); l != null; l = reader.readLine())
+      System.out.println(l);
+  }
+}
--- a/src/demo/org/apache/lucene/IndexFiles.java
+++ b/src/demo/org/apache/lucene/IndexFiles.java
@ -0,0 +1,98 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import com.lucene.analysis.StopAnalyzer;
+import com.lucene.index.IndexWriter;
+
+import java.io.File;
+import java.util.Date;
+
+class IndexFiles {
+  public static void main(String[] args) {
+    try {
+      Date start = new Date();
+
+      IndexWriter writer = new IndexWriter("index", new StopAnalyzer(), true);
+      writer.mergeFactor = 20;
+
+      indexDocs(writer, new File(args[0]));
+
+      writer.optimize();
+      writer.close();
+
+      Date end = new Date();
+
+      System.out.print(end.getTime() - start.getTime());
+      System.out.println(" total milliseconds");
+
+    } catch (Exception e) {
+      System.out.println(" caught a " + e.getClass() +
+			 "\n with message: " + e.getMessage());
+    }
+  }
+
+  public static void indexDocs(IndexWriter writer, File file)
+       throws Exception {
+    if (file.isDirectory()) {
+      String[] files = file.list();
+      for (int i = 0; i < files.length; i++)
+	indexDocs(writer, new File(file, files[i]));
+    } else {
+      System.out.println("adding " + file);
+      writer.addDocument(FileDocument.Document(file));
+    }
+  }
+}
--- a/src/demo/org/apache/lucene/IndexHTML.java
+++ b/src/demo/org/apache/lucene/IndexHTML.java
@ -0,0 +1,195 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import com.lucene.analysis.StopAnalyzer;
+import com.lucene.index.*;
+import com.lucene.document.Document;
+import com.lucene.util.Arrays;
+import demo.HTMLParser.HTMLParser;
+
+import java.io.File;
+import java.util.Date;
+
+class IndexHTML {
+  private static boolean deleting = false;	  // true during deletion pass
+  private static IndexReader reader;		  // existing index
+  private static IndexWriter writer;		  // new index being built
+  private static TermEnum uidIter;		  // document id iterator
+
+  public static void main(String[] argv) {
+    try {
+      String index = "index";
+      boolean create = false;
+      File root = null;
+      
+      String usage = "IndexHTML [-create] [-index <index>] <root_directory>";
+
+      if (argv.length == 0) {
+	System.err.println("Usage: " + usage);
+	return;
+      }
+
+      for (int i = 0; i < argv.length; i++) {
+	if (argv[i].equals("-index")) {		  // parse -index option
+	  index = argv[++i];
+	} else if (argv[i].equals("-create")) {	  // parse -create option
+	  create = true;
+	} else if (i != argv.length-1) {
+	  System.err.println("Usage: " + usage);
+	  return;
+	} else
+	  root = new File(argv[i]);
+      }
+
+      Date start = new Date();
+
+      if (!create) {				  // delete stale docs
+	deleting = true;
+	indexDocs(root, index, create);
+      }
+
+      writer = new IndexWriter(index, new StopAnalyzer(), create);
+      writer.mergeFactor = 20;
+      writer.maxFieldLength = 1000000;
+
+      indexDocs(root, index, create);		  // add new docs
+
+      System.out.println("Optimizing index...");
+      writer.optimize();
+      writer.close();
+
+      Date end = new Date();
+
+      System.out.print(end.getTime() - start.getTime());
+      System.out.println(" total milliseconds");
+
+    } catch (Exception e) {
+      System.out.println(" caught a " + e.getClass() +
+			 "\n with message: " + e.getMessage());
+    }
+  }
+
+  /* Walk directory hierarchy in uid order, while keeping uid iterator from
+  /* existing index in sync.  Mismatches indicate one of: (a) old documents to
+  /* be deleted; (b) unchanged documents, to be left alone; or (c) new
+  /* documents, to be indexed.
+   */
+
+  private static void indexDocs(File file, String index, boolean create)
+       throws Exception {
+    if (!create) {				  // incrementally update
+      
+      reader = IndexReader.open(index);		  // open existing index
+      uidIter = reader.terms(new Term("uid", "")); // init uid iterator
+    
+      indexDocs(file);
+
+      if (deleting) {				  // delete rest of stale docs
+	while (uidIter.term() != null && uidIter.term().field() == "uid") {
+	  System.out.println("deleting " +
+			     HTMLDocument.uid2url(uidIter.term().text()));
+	  reader.delete(uidIter.term());
+	  uidIter.next();
+	}
+	deleting = false;
+      }
+
+      uidIter.close();				  // close uid iterator
+      reader.close();				  // close existing index
+
+    } else					  // don't have exisiting
+      indexDocs(file);
+  }
+
+  private static void indexDocs(File file) throws Exception {
+    if (file.isDirectory()) {			  // if a directory
+      String[] files = file.list();		  // list its files
+      Arrays.sort(files);			  // sort the files
+      for (int i = 0; i < files.length; i++)	  // recursively index them
+	indexDocs(new File(file, files[i]));
+
+    } else if (file.getPath().endsWith(".html") || // index .html files
+	       file.getPath().endsWith(".htm") || // index .htm files
+	       file.getPath().endsWith(".txt")) { // index .txt files
+      
+      if (uidIter != null) {
+	String uid = HTMLDocument.uid(file);	  // construct uid for doc
+
+	while (uidIter.term() != null && uidIter.term().field() == "uid" &&
+	       uidIter.term().text().compareTo(uid) < 0) {
+	  if (deleting) {			  // delete stale docs
+	    System.out.println("deleting " +
+			       HTMLDocument.uid2url(uidIter.term().text()));
+	    reader.delete(uidIter.term());
+	  }
+	  uidIter.next();
+	}
+	if (uidIter.term() != null && uidIter.term().field() == "uid" &&
+	    uidIter.term().text().compareTo(uid) == 0) {
+	  uidIter.next();			  // keep matching docs
+	} else if (!deleting) {			  // add new docs
+	  Document doc = HTMLDocument.Document(file);
+	  System.out.println("adding " + doc.get("url"));
+	writer.addDocument(doc);
+	}
+      } else {					  // creating a new index
+	Document doc = HTMLDocument.Document(file);
+	System.out.println("adding " + doc.get("url"));
+	writer.addDocument(doc);		  // add docs unconditionally
+      }
+    }
+  }
+}
--- a/src/demo/org/apache/lucene/Makefile
+++ b/src/demo/org/apache/lucene/Makefile
@ -0,0 +1,3 @@
+# sub-directory makefile for lucene
+ROOT = ..
+include ../com/lucene/rules.mk
--- a/src/demo/org/apache/lucene/Search.html
+++ b/src/demo/org/apache/lucene/Search.html
@ -0,0 +1,17 @@
+<HTML>
+<HEAD>
+<TITLE>Lucene Search Demo</TITLE>
+</HEAD>
+<BODY>
+
+<CENTER>
+<H1>
+Lucene Search Demo</H1>
+
+<form name=search action=http://localhost:8080/Search.jhtml method=get>
+<input name=query size=44>&nbsp;<input type=submit value=Search></form>
+
+</CENTER>
+
+</BODY>
+</HTML>
--- a/src/demo/org/apache/lucene/Search.jhtml
+++ b/src/demo/org/apache/lucene/Search.jhtml
@ -0,0 +1,166 @@
+<HTML><!-- -*-java-*- -->
+<!-- Lucene Search Demo via CompiledPageServlet -->
+<!-- Copyright (c) 1998,2000 Douglass R. Cutting. -->
+
+<java type=import>
+  javax.servlet.*
+  javax.servlet.http.*
+  java.io.*
+  com.lucene.analysis.*
+  com.lucene.document.*
+  com.lucene.index.*
+  com.lucene.search.*
+  com.lucene.queryParser.*
+  demo.HTMLParser.Entities
+</java>
+
+<java>
+  // get index from request
+  String indexName = request.getParameter("index");
+  if (indexName == null)			  // default to "index"
+    indexName = "index";
+  Searcher searcher =				  // make searcher
+    new IndexSearcher(getReader(indexName));
+
+  // get query from request
+  String queryString = request.getParameter("query");
+  if (queryString == null)			  
+    throw new ServletException("no query specified");
+    
+  int start = 0;				  // first hit to display
+  String startString = request.getParameter("start");
+  if (startString != null)
+    start = Integer.parseInt(startString);
+
+  int hitsPerPage = 10;				  // number of hits to display
+  String hitsString = request.getParameter("hitsPerPage");
+  if (hitsString != null)
+    hitsPerPage = Integer.parseInt(hitsString);
+
+  boolean showSummaries = true;			  // show summaries?
+  if ("false".equals(request.getParameter("showSummaries")))
+    showSummaries = false;
+
+  Query query = null;
+  try {						  // parse query
+    query = QueryParser.parse(queryString, "contents", analyzer);
+  } catch (ParseException e) {			  // error parsing query
+    </java>
+    <HEAD><TITLE>Error Parsing Query</TITLE></HEAD><BODY>
+    <p>While parsing `queryString`: `e.getMessage()`
+    <java>
+    return;
+  }
+
+  String servletPath = request.getRequestURI();	  // getServletPath should work
+  int j = servletPath.indexOf('?');		  // here but doesn't, so we
+  if (j != -1)					  // remove query by hand...
+    servletPath = servletPath.substring(0, j);
+
+</java>
+
+<head><title>Lucene Search Results</title></head><body>
+
+<center>
+ <form name=search action=`servletPath` method=get>
+ <input name=query size=44 value='`queryString`'>
+ <input type=hidden name=index value="`indexName`">
+ <input type=hidden name=hitsPerPage value=`hitsPerPage`>
+ <input type=hidden name=showSummaries value=`showSummaries`>
+ <input type=submit value=Search>
+ </form>
+</center>
+<java>
+  Hits hits = searcher.search(query);		  // perform query
+  int end = Math.min(hits.length(), start + hitsPerPage);
+</java>
+
+<p>Hits <b><java type=print>start+1</java>-<java type=print>end</java></b>
+(out of <java type=print>hits.length()</java> total matching documents):
+
+<ul>
+<java>
+  for (int i = start; i < end; i++) {		  // display the hits
+    Document doc = hits.doc(i);
+    String title = doc.get("title");
+    if (title.equals(""))			  // use url for docs w/o title
+      title = doc.get("url");
+    </java>
+    <p><b><java type=print>(int)(hits.score(i) * 100.0f)</java>%
+    <a href="`doc.get("url")`">
+    <java type=print>Entities.encode(title)</java>
+    </b></a>
+    <java>
+    if (showSummaries) {			  // maybe show summary
+    </java>
+    <ul><i>Summary</i>:
+      <java type=print>Entities.encode(doc.get("summary"))</java>
+    </ul>
+    <java>
+    }
+  }
+</java>
+</ul>
+
+<java>
+  if (end < hits.length()) {			  // insert next page button
+</java>
+    <center>
+    <form name=search action=`servletPath` method=get>
+    <input type=hidden name=query value='`queryString`'>
+    <input type=hidden name=start value=`end`>
+    <input type=hidden name=index value="`indexName`">
+    <input type=hidden name=hitsPerPage value=`hitsPerPage`>
+    <input type=hidden name=showSummaries value=`showSummaries`>
+    <input type=submit value=Next>
+    </form>
+    </center>
+<java>
+    }
+</java>
+
+</body>
+
+<java type=class>
+
+  Analyzer analyzer = new StopAnalyzer();	  // used to tokenize queries
+
+  /** Keep a cache of open IndexReader's, so that an index does not have to
+      opened for each query.  The cache re-opens an index when it has changed
+      so that additions and deletions are visible ASAP. */
+
+  static Hashtable indexCache = new Hashtable();  // name->CachedIndex
+
+  class CachedIndex {				  // an entry in the cache
+    IndexReader reader;				  // an open reader
+    long modified;				  // reader's modified date
+    
+    CachedIndex(String name) throws IOException {
+      modified = IndexReader.lastModified(name);  // get modified date
+      reader = IndexReader.open(name);		  // open reader
+    }
+  }
+
+  IndexReader getReader(String name) throws ServletException {
+    CachedIndex index =				  // look in cache
+      (CachedIndex)indexCache.get(name);
+    
+    try {
+      if (index != null &&			  // check up-to-date
+	  (index.modified == IndexReader.lastModified(name)))
+	return index.reader;			  // cache hit
+      else {
+	index = new CachedIndex(name);		  // cache miss
+      }
+    } catch (IOException e) {
+      StringWriter writer = new StringWriter();
+      PrintWriter pw = new PrintWriter(writer);
+      throw new ServletException("Could not open index " + name + ": " +
+				 e.getClass().getName() + "--" +
+				 e.getMessage());
+    }
+
+    indexCache.put(name, index);		  // add to cache
+    return index.reader;
+  }
+</java>
--- a/src/demo/org/apache/lucene/SearchFiles.java
+++ b/src/demo/org/apache/lucene/SearchFiles.java
@ -0,0 +1,110 @@
+package org.apache.lucene;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.io.BufferedReader;
+import java.io.InputStreamReader;
+
+import com.lucene.analysis.Analyzer;
+import com.lucene.analysis.StopAnalyzer;
+import com.lucene.document.Document;
+import com.lucene.search.Searcher;
+import com.lucene.search.IndexSearcher;
+import com.lucene.search.Query;
+import com.lucene.search.Hits;
+import com.lucene.queryParser.QueryParser;
+
+class SearchFiles {
+  public static void main(String[] args) {
+    try {
+      Searcher searcher = new IndexSearcher("index");
+      Analyzer analyzer = new StopAnalyzer();
+
+      BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
+      while (true) {
+	System.out.print("Query: ");
+	String line = in.readLine();
+
+	if (line.length() == -1)
+	  break;
+
+	Query query = QueryParser.parse(line, "contents", analyzer);
+	System.out.println("Searching for: " + query.toString("contents"));
+
+	Hits hits = searcher.search(query);
+	System.out.println(hits.length() + " total matching documents");
+
+	final int HITS_PER_PAGE = 10;
+	for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
+	  int end = Math.min(hits.length(), start + HITS_PER_PAGE);
+	  for (int i = start; i < end; i++)
+	    System.out.println(i + ". " + hits.doc(i).get("path"));
+	  if (hits.length() > end) {
+	    System.out.print("more (y/n) ? ");
+	    line = in.readLine();
+	    if (line.length() == 0 || line.charAt(0) == 'n')
+	      break;
+	  }
+	}
+      }
+      searcher.close();
+
+    } catch (Exception e) {
+      System.out.println(" caught a " + e.getClass() +
+			 "\n with message: " + e.getMessage());
+    }
+  }
+}
--- a/src/java/org/apache/lucene/Makefile
+++ b/src/java/org/apache/lucene/Makefile
@ -0,0 +1,9 @@
+# top-level makefile for lucene
+
+all: jar doc
+
+# root is two levels up
+ROOT = ../..
+
+include rules.mk
+
--- a/src/java/org/apache/lucene/analysis/Analyzer.java
+++ b/src/java/org/apache/lucene/analysis/Analyzer.java
@ -0,0 +1,91 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.Reader;
+
+/** An Analyzer builds TokenStreams, which analyze text.  It thus represents a
+ *  policy for extracting index terms from text.
+ *  <p>
+ *  Typical implementations first build a Tokenizer, which breaks the stream of
+ *  characters from the Reader into raw Tokens.  One or more TokenFilters may
+ *  then be applied to the output of the Tokenizer.
+ *  <p>
+ *  WARNING: You must override one of the methods defined by this class in your
+ *  subclass or the Analyzer will enter an infinite loop.
+ */
+abstract public class Analyzer {
+  /** Creates a TokenStream which tokenizes all the text in the provided
+    Reader.  Default implementation forwards to tokenStream(Reader) for 
+    compatibility with older version.  Override to allow Analyzer to choose 
+    strategy based on document and/or field.  Must be able to handle null
+    field name for backward compatibility. */
+  public TokenStream tokenStream(String fieldName, Reader reader)
+  {
+	  // implemented for backward compatibility
+	  return tokenStream(reader);
+  }
+  
+  /** Creates a TokenStream which tokenizes all the text in the provided
+   *  Reader.  Provided for backward compatibility only.
+   * @deprecated use tokenStream(String, Reader) instead.
+   * @see tokenStream(String, Reader)
+   */
+  public TokenStream tokenStream(Reader reader)
+  {
+	  return tokenStream(null, reader);
+  }
+}
+
--- a/src/java/org/apache/lucene/analysis/LetterTokenizer.java
+++ b/src/java/org/apache/lucene/analysis/LetterTokenizer.java
@ -0,0 +1,114 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.Reader;
+
+/** A LetterTokenizer is a tokenizer that divides text at non-letters.  That's
+  to say, it defines tokens as maximal strings of adjacent letters, as defined
+  by java.lang.Character.isLetter() predicate.
+
+  Note: this does a decent job for most European languages, but does a terrible
+  job for some Asian languages, where words are not separated by spaces. */
+
+public final class LetterTokenizer extends Tokenizer {
+  public LetterTokenizer(Reader in) {
+    input = in;
+  }
+
+  private int offset = 0, bufferIndex=0, dataLen=0;
+  private final static int MAX_WORD_LEN = 255;
+  private final static int IO_BUFFER_SIZE = 1024;
+  private final char[] buffer = new char[MAX_WORD_LEN];
+  private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
+
+  public final Token next() throws java.io.IOException {
+    int length = 0;
+    int start = offset;
+    while (true) {
+      final char c;
+
+      offset++;
+      if (bufferIndex >= dataLen) {
+        dataLen = input.read(ioBuffer);
+        bufferIndex = 0;
+      };
+      if (dataLen == -1) {
+	if (length > 0)
+	  break;
+	else
+	  return null;
+      }
+      else
+        c = (char) ioBuffer[bufferIndex++];
+      
+      if (Character.isLetter(c)) {		  // if it's a letter
+
+	if (length == 0)			  // start of token
+	  start = offset-1;
+
+	buffer[length++] = c;			  // buffer it
+
+	if (length == MAX_WORD_LEN)		  // buffer overflow!
+	  break;
+
+      } else if (length > 0)			  // at non-Letter w/ chars
+	break;					  // return 'em
+
+    }
+
+    return new Token(new String(buffer, 0, length), start, start+length);
+  }
+}
--- a/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
+++ b/src/java/org/apache/lucene/analysis/LowerCaseFilter.java
@ -0,0 +1,74 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/** Normalizes token text to lower case. */
+
+public final class LowerCaseFilter extends TokenFilter {
+  public LowerCaseFilter(TokenStream in) {
+    input = in;
+  }
+
+  public final Token next() throws java.io.IOException {
+    Token t = input.next();
+
+    if (t == null)
+      return null;
+
+    t.termText = t.termText.toLowerCase();
+
+    return t;
+  }
+}
--- a/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java
+++ b/src/java/org/apache/lucene/analysis/LowerCaseTokenizer.java
@ -0,0 +1,116 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.Reader;
+
+/** LowerCaseTokenizer performs the function of LetterTokenizer
+  and LowerCaseFilter together.  It divides text at non-letters and converts
+  them to lower case.  While it is functionally equivalent to the combination
+  of LetterTokenizer and LowerCaseFilter, there is a performance advantage
+  to doing the two tasks at once, hence this (redundent) implementation.
+
+  Note: this does a decent job for most European languages, but does a terrible
+  job for some Asian languages, where words are not separated by spaces. */
+
+public final class LowerCaseTokenizer extends Tokenizer {
+  public LowerCaseTokenizer(Reader in) {
+    input = in;
+  }
+
+  private int offset = 0, bufferIndex=0, dataLen=0;
+  private final static int MAX_WORD_LEN = 255;
+  private final static int IO_BUFFER_SIZE = 1024;
+  private final char[] buffer = new char[MAX_WORD_LEN];
+  private final char[] ioBuffer = new char[IO_BUFFER_SIZE];
+
+  public final Token next() throws java.io.IOException {
+    int length = 0;
+    int start = offset;
+    while (true) {
+      final char c;
+
+      offset++;
+      if (bufferIndex >= dataLen) {
+        dataLen = input.read(ioBuffer);
+        bufferIndex = 0;
+      };
+      if (dataLen == -1) {
+	if (length > 0)
+	  break;
+	else
+	  return null;
+      }
+      else
+        c = (char) ioBuffer[bufferIndex++];
+      
+      if (Character.isLetter(c)) {		  // if it's a letter
+
+	if (length == 0)			  // start of token
+	  start = offset-1;
+
+	buffer[length++] = Character.toLowerCase(c);
+                                                  // buffer it
+	if (length == MAX_WORD_LEN)		  // buffer overflow!
+	  break;
+
+      } else if (length > 0)			  // at non-Letter w/ chars
+	break;					  // return 'em
+
+    }
+
+    return new Token(new String(buffer, 0, length), start, start+length);
+  }
+}
--- a/src/java/org/apache/lucene/analysis/Makefile
+++ b/src/java/org/apache/lucene/analysis/Makefile
@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
--- a/src/java/org/apache/lucene/analysis/PorterStemFilter.java
+++ b/src/java/org/apache/lucene/analysis/PorterStemFilter.java
@ -0,0 +1,98 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+
+/** Transforms the token stream as per the Porter stemming algorithm. 
+    Note: the input to the stemming filter must already be in lower case, 
+    so you will need to use LowerCaseFilter or LowerCaseTokenizer farther
+    down the Tokenizer chain in order for this to work properly!  
+
+    To use this filter with other analyzers, you'll want to write an 
+    Analyzer class that sets up the TokenStream chain as you want it.  
+    To use this with LowerCaseTokenizer, for example, you'd write an
+    analyzer like this:
+
+    class MyAnalyzer extends Analyzer {
+      public final TokenStream tokenStream(String fieldName, Reader reader) {
+        return new PorterStemFilter(new LowerCaseTokenizer(reader));
+      }
+    } 
+
+*/
+
+public final class PorterStemFilter extends TokenFilter {
+  private PorterStemmer stemmer;
+
+  public PorterStemFilter(TokenStream in) {
+    stemmer = new PorterStemmer();
+    input = in;
+  }
+
+  /** Returns the next input Token, after being stemmed */
+  public final Token next() throws IOException {
+    Token token = input.next();
+    if (token == null)
+      return null;
+    else {
+      String s = stemmer.stem(token.termText);
+      if (s != token.termText) // Yes, I mean object reference comparison here
+        token.termText = s;
+      return token;
+    }
+  }
+}
--- a/src/java/org/apache/lucene/analysis/PorterStemmer.java
+++ b/src/java/org/apache/lucene/analysis/PorterStemmer.java
@ -0,0 +1,584 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/*
+
+   Porter stemmer in Java. The original paper is in
+
+       Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
+       no. 3, pp 130-137,
+
+   See also http://www.muscat.com/~martin/stem.html
+
+   Bug 1 (reported by Gonzalo Parra 16/10/99) fixed as marked below.
+   Tthe words 'aed', 'eed', 'oed' leave k at 'a' for step 3, and b[k-1]
+   is then out outside the bounds of b.
+
+   Similarly,
+
+   Bug 2 (reported by Steve Dyrdahl 22/2/00) fixed as marked below.
+   'ion' by itself leaves j = -1 in the test for 'ion' in step 5, and
+   b[j] is then outside the bounds of b.
+
+   Release 3.
+
+   [ This version is derived from Release 3, modified by Brian Goetz to 
+     optimize for fewer object creations.  ]
+
+*/
+
+
+import java.io.*;
+
+/**
+ *
+ * Stemmer, implementing the Porter Stemming Algorithm
+ *
+ * The Stemmer class transforms a word into its root form.  The input
+ * word can be provided a character at time (by calling add()), or at once
+ * by calling one of the various stem(something) methods.  
+ */
+
+class PorterStemmer
+{   
+  private char[] b;
+  private int i,    /* offset into b */
+    j, k, k0;
+  private boolean dirty = false;
+  private static final int INC = 50; /* unit of size whereby b is increased */
+  private static final int EXTRA = 1;
+
+  public PorterStemmer() {  
+    b = new char[INC];
+    i = 0;
+  }
+
+  /** 
+   * reset() resets the stemmer so it can stem another word.  If you invoke
+   * the stemmer by calling add(char) and then stem(), you must call reset()
+   * before starting another word.
+   */
+  public void reset() { i = 0; dirty = false; }
+
+  /**
+   * Add a character to the word being stemmed.  When you are finished 
+   * adding characters, you can call stem(void) to process the word. 
+   */ 
+  public void add(char ch) {
+    if (b.length <= i + EXTRA) {
+      char[] new_b = new char[b.length+INC];
+      for (int c = 0; c < b.length; c++) 
+        new_b[c] = b[c];
+      b = new_b;
+    }
+    b[i++] = ch;
+  }
+
+  /**
+   * After a word has been stemmed, it can be retrieved by toString(), 
+   * or a reference to the internal buffer can be retrieved by getResultBuffer
+   * and getResultLength (which is generally more efficient.)
+   */
+  public String toString() { return new String(b,0,i); }
+
+  /**
+   * Returns the length of the word resulting from the stemming process.
+   */
+  public int getResultLength() { return i; }
+
+  /**
+   * Returns a reference to a character buffer containing the results of
+   * the stemming process.  You also need to consult getResultLength()
+   * to determine the length of the result.
+   */
+  public char[] getResultBuffer() { return b; }
+
+  /* cons(i) is true <=> b[i] is a consonant. */
+
+  private final boolean cons(int i) {
+    switch (b[i]) {
+    case 'a': case 'e': case 'i': case 'o': case 'u': 
+      return false;
+    case 'y': 
+      return (i==k0) ? true : !cons(i-1);
+    default: 
+      return true;
+    }
+  }
+
+  /* m() measures the number of consonant sequences between k0 and j. if c is
+     a consonant sequence and v a vowel sequence, and <..> indicates arbitrary
+     presence,
+
+          <c><v>       gives 0
+          <c>vc<v>     gives 1
+          <c>vcvc<v>   gives 2
+          <c>vcvcvc<v> gives 3
+          ....
+  */
+
+  private final int m() {
+    int n = 0;
+    int i = k0;
+    while(true) {
+      if (i > j) 
+        return n;
+      if (! cons(i)) 
+        break; 
+      i++;
+    }
+    i++;
+    while(true) {
+      while(true) {
+        if (i > j) 
+          return n;
+        if (cons(i)) 
+          break;
+        i++;
+      }
+      i++;
+      n++;
+      while(true) {
+        if (i > j) 
+          return n;
+        if (! cons(i)) 
+          break;
+        i++;
+      }
+      i++;
+    }
+  }
+
+  /* vowelinstem() is true <=> k0,...j contains a vowel */
+
+  private final boolean vowelinstem() {
+    int i; 
+    for (i = k0; i <= j; i++) 
+      if (! cons(i)) 
+        return true;
+    return false;
+  }
+
+  /* doublec(j) is true <=> j,(j-1) contain a double consonant. */
+
+  private final boolean doublec(int j) {
+    if (j < k0+1) 
+      return false;
+    if (b[j] != b[j-1]) 
+      return false;
+    return cons(j);
+  }
+
+  /* cvc(i) is true <=> i-2,i-1,i has the form consonant - vowel - consonant
+     and also if the second c is not w,x or y. this is used when trying to
+     restore an e at the end of a short word. e.g.
+
+          cav(e), lov(e), hop(e), crim(e), but
+          snow, box, tray.
+
+  */
+
+  private final boolean cvc(int i) {
+    if (i < k0+2 || !cons(i) || cons(i-1) || !cons(i-2)) 
+      return false;
+    else {
+      int ch = b[i];
+      if (ch == 'w' || ch == 'x' || ch == 'y') return false;
+    }
+    return true;
+  }
+
+  private final boolean ends(String s) {
+    int l = s.length();
+    int o = k-l+1;
+    if (o < k0) 
+      return false;
+    for (int i = 0; i < l; i++) 
+      if (b[o+i] != s.charAt(i)) 
+        return false;
+    j = k-l;
+    return true;
+  }
+
+  /* setto(s) sets (j+1),...k to the characters in the string s, readjusting
+     k. */
+
+  void setto(String s) {
+    int l = s.length();
+    int o = j+1;
+    for (int i = 0; i < l; i++) 
+      b[o+i] = s.charAt(i);
+    k = j+l;
+    dirty = true;
+  }
+
+  /* r(s) is used further down. */
+
+  void r(String s) { if (m() > 0) setto(s); }
+
+  /* step1() gets rid of plurals and -ed or -ing. e.g.
+
+           caresses  ->  caress
+           ponies    ->  poni
+           ties      ->  ti
+           caress    ->  caress
+           cats      ->  cat
+
+           feed      ->  feed
+           agreed    ->  agree
+           disabled  ->  disable
+
+           matting   ->  mat
+           mating    ->  mate
+           meeting   ->  meet
+           milling   ->  mill
+           messing   ->  mess
+
+           meetings  ->  meet
+
+  */
+  
+  private final void step1() {
+    if (b[k] == 's') {
+      if (ends("sses")) k -= 2; 
+      else if (ends("ies")) setto("i"); 
+      else if (b[k-1] != 's') k--;
+    }
+    if (ends("eed")) { 
+      if (m() > 0) 
+        k--; 
+    } 
+    else if ((ends("ed") || ends("ing")) && vowelinstem()) {  
+      k = j;
+      if (ends("at")) setto("ate"); 
+      else if (ends("bl")) setto("ble"); 
+      else if (ends("iz")) setto("ize"); 
+      else if (doublec(k)) {
+        int ch = b[k--];
+        if (ch == 'l' || ch == 's' || ch == 'z') 
+          k++;
+      }
+      else if (m() == 1 && cvc(k)) 
+        setto("e");
+    }
+  }
+
+  /* step2() turns terminal y to i when there is another vowel in the stem. */
+  
+  private final void step2() { 
+    if (ends("y") && vowelinstem()) {
+      b[k] = 'i'; 
+      dirty = true;
+    }
+  }
+
+  /* step3() maps double suffices to single ones. so -ization ( = -ize plus
+     -ation) maps to -ize etc. note that the string before the suffix must give
+     m() > 0. */
+
+  private final void step3() { 
+    if (k == k0) return; /* For Bug 1 */ 
+    switch (b[k-1]) {
+    case 'a': 
+      if (ends("ational")) { r("ate"); break; }
+      if (ends("tional")) { r("tion"); break; }
+      break;
+    case 'c': 
+      if (ends("enci")) { r("ence"); break; }
+      if (ends("anci")) { r("ance"); break; }
+      break;
+    case 'e': 
+      if (ends("izer")) { r("ize"); break; }
+      break;
+    case 'l': 
+      if (ends("bli")) { r("ble"); break; }
+      if (ends("alli")) { r("al"); break; }
+      if (ends("entli")) { r("ent"); break; }
+      if (ends("eli")) { r("e"); break; }
+      if (ends("ousli")) { r("ous"); break; }
+      break;
+    case 'o': 
+      if (ends("ization")) { r("ize"); break; }
+      if (ends("ation")) { r("ate"); break; }
+      if (ends("ator")) { r("ate"); break; }
+      break;
+    case 's': 
+      if (ends("alism")) { r("al"); break; }
+      if (ends("iveness")) { r("ive"); break; }
+      if (ends("fulness")) { r("ful"); break; }
+      if (ends("ousness")) { r("ous"); break; }
+      break;
+    case 't': 
+      if (ends("aliti")) { r("al"); break; }
+      if (ends("iviti")) { r("ive"); break; }
+      if (ends("biliti")) { r("ble"); break; }
+      break;
+    case 'g': 
+      if (ends("logi")) { r("log"); break; }
+    } 
+  }
+
+  /* step4() deals with -ic-, -full, -ness etc. similar strategy to step3. */
+
+  private final void step4() { 
+    switch (b[k]) {
+    case 'e': 
+      if (ends("icate")) { r("ic"); break; }
+      if (ends("ative")) { r(""); break; }
+      if (ends("alize")) { r("al"); break; }
+      break;
+    case 'i': 
+      if (ends("iciti")) { r("ic"); break; }
+      break;
+    case 'l': 
+      if (ends("ical")) { r("ic"); break; }
+      if (ends("ful")) { r(""); break; }
+      break;
+    case 's': 
+      if (ends("ness")) { r(""); break; }
+      break;
+    }
+  }
+  
+  /* step5() takes off -ant, -ence etc., in context <c>vcvc<v>. */
+
+  private final void step5() {
+    if (k == k0) return; /* for Bug 1 */ 
+    switch (b[k-1]) {
+    case 'a': 
+      if (ends("al")) break; 
+      return;
+    case 'c': 
+      if (ends("ance")) break;
+      if (ends("ence")) break; 
+      return;
+    case 'e': 
+      if (ends("er")) break; return;
+    case 'i': 
+      if (ends("ic")) break; return;
+    case 'l': 
+      if (ends("able")) break;
+      if (ends("ible")) break; return;
+    case 'n': 
+      if (ends("ant")) break;
+      if (ends("ement")) break;
+      if (ends("ment")) break;
+      /* element etc. not stripped before the m */
+      if (ends("ent")) break; 
+      return;
+    case 'o': 
+      if (ends("ion") && j >= 0 && (b[j] == 's' || b[j] == 't')) break;
+      /* j >= 0 fixes Bug 2 */
+      if (ends("ou")) break; 
+      return;
+      /* takes care of -ous */
+    case 's': 
+      if (ends("ism")) break; 
+      return;
+    case 't': 
+      if (ends("ate")) break;
+      if (ends("iti")) break; 
+      return;
+    case 'u': 
+      if (ends("ous")) break; 
+      return;
+    case 'v': 
+      if (ends("ive")) break; 
+      return;
+    case 'z': 
+      if (ends("ize")) break; 
+      return;
+    default: 
+      return;
+    }
+    if (m() > 1) 
+      k = j;
+  }
+
+  /* step6() removes a final -e if m() > 1. */
+
+  private final void step6() {
+    j = k;
+    if (b[k] == 'e') {
+      int a = m();
+      if (a > 1 || a == 1 && !cvc(k-1)) 
+        k--;
+    }
+    if (b[k] == 'l' && doublec(k) && m() > 1) 
+      k--;
+  }
+
+
+  /** 
+   * Stem a word provided as a String.  Returns the result as a String.
+   */
+  public String stem(String s) {
+    if (stem(s.toCharArray(), s.length()))
+      return toString();
+    else 
+      return s;
+  }
+
+  /** Stem a word contained in a char[].  Returns true if the stemming process
+   * resulted in a word different from the input.  You can retrieve the 
+   * result with getResultLength()/getResultBuffer() or toString(). 
+   */
+  public boolean stem(char[] word) {
+    return stem(word, word.length);
+  }
+
+  /** Stem a word contained in a portion of a char[] array.  Returns
+   * true if the stemming process resulted in a word different from
+   * the input.  You can retrieve the result with
+   * getResultLength()/getResultBuffer() or toString().  
+   */
+  public boolean stem(char[] wordBuffer, int offset, int wordLen) {
+    reset();
+    if (b.length < wordLen) {
+      char[] new_b = new char[wordLen + EXTRA];
+      b = new_b;
+    }
+    for (int j=0; j<wordLen; j++) 
+      b[j] = wordBuffer[offset+j];
+    i = wordLen;
+    return stem(0);
+  }
+
+  /** Stem a word contained in a leading portion of a char[] array.
+   * Returns true if the stemming process resulted in a word different
+   * from the input.  You can retrieve the result with
+   * getResultLength()/getResultBuffer() or toString().  
+   */
+  public boolean stem(char[] word, int wordLen) {
+    return stem(word, 0, wordLen);
+  }
+
+  /** Stem the word placed into the Stemmer buffer through calls to add().
+   * Returns true if the stemming process resulted in a word different
+   * from the input.  You can retrieve the result with
+   * getResultLength()/getResultBuffer() or toString().  
+   */
+  public boolean stem() {
+    return stem(0);
+  }
+
+  public boolean stem(int i0) {  
+    k = i - 1; 
+    k0 = i0;
+    if (k > k0+1) { 
+      step1(); step2(); step3(); step4(); step5(); step6(); 
+    }
+    // Also, a word is considered dirty if we lopped off letters
+    // Thanks to Ifigenia Vairelles for pointing this out.
+    if (i != k+1)
+      dirty = true;
+    i = k+1;
+    return dirty;
+  }
+
+  /** Test program for demonstrating the Stemmer.  It reads a file and
+   * stems each word, writing the result to standard out.  
+   * Usage: Stemmer file-name 
+   */
+  public static void main(String[] args) {
+    PorterStemmer s = new PorterStemmer();
+
+    for (int i = 0; i < args.length; i++) {
+      try {
+        InputStream in = new FileInputStream(args[i]);
+        byte[] buffer = new byte[1024];
+        int bufferLen, offset, ch;
+
+        bufferLen = in.read(buffer);
+        offset = 0;
+        s.reset();
+
+        while(true) {  
+          if (offset < bufferLen) 
+            ch = buffer[offset++];
+          else {
+            bufferLen = in.read(buffer);
+            offset = 0;
+            if (bufferLen < 0) 
+              ch = -1;
+            else 
+              ch = buffer[offset++];
+          }
+
+          if (Character.isLetter((char) ch)) {
+            s.add(Character.toLowerCase((char) ch));
+          }
+          else {  
+             s.stem();
+             System.out.print(s.toString());
+             s.reset();
+             if (ch < 0) 
+               break;
+             else {
+               System.out.print((char) ch);
+             }
+           }
+        }
+
+        in.close();
+      }
+      catch (IOException e) {  
+        System.out.println("error reading " + args[i]);
+      }
+    }
+  }
+}
+
--- a/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/SimpleAnalyzer.java
@ -0,0 +1,65 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.Reader;
+
+/** An Analyzer that filters LetterTokenizer with LowerCaseFilter. */
+
+public final class SimpleAnalyzer extends Analyzer {
+  public final TokenStream tokenStream(String fieldName, Reader reader) {
+    return new LowerCaseTokenizer(reader);
+  }
+}
--- a/src/java/org/apache/lucene/analysis/StopAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/StopAnalyzer.java
@ -0,0 +1,90 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.Reader;
+import java.util.Hashtable;
+
+/** Filters LetterTokenizer with LowerCaseFilter and StopFilter. */
+
+public final class StopAnalyzer extends Analyzer {
+  private Hashtable stopTable;
+
+  /** An array containing some common English words that are not usually useful
+    for searching. */
+  public static final String[] ENGLISH_STOP_WORDS = {
+    "a", "and", "are", "as", "at", "be", "but", "by",
+    "for", "if", "in", "into", "is", "it",
+    "no", "not", "of", "on", "or", "s", "such",
+    "t", "that", "the", "their", "then", "there", "these",
+    "they", "this", "to", "was", "will", "with"
+  };
+
+  /** Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */
+  public StopAnalyzer() {
+    stopTable = StopFilter.makeStopTable(ENGLISH_STOP_WORDS);
+  }
+
+  /** Builds an analyzer which removes words in the provided array. */
+  public StopAnalyzer(String[] stopWords) {
+    stopTable = StopFilter.makeStopTable(stopWords);
+  }
+
+  /** Filters LowerCaseTokenizer with StopFilter. */
+  public final TokenStream tokenStream(String fieldName, Reader reader) {
+    return new StopFilter(new LowerCaseTokenizer(reader), stopTable);
+  }
+}
+
--- a/src/java/org/apache/lucene/analysis/StopFilter.java
+++ b/src/java/org/apache/lucene/analysis/StopFilter.java
@ -0,0 +1,99 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+
+/** Removes stop words from a token stream. */
+
+public final class StopFilter extends TokenFilter {
+
+  private Hashtable table;
+
+  /** Constructs a filter which removes words from the input
+    TokenStream that are named in the array of words. */
+  public StopFilter(TokenStream in, String[] stopWords) {
+    input = in;
+    table = makeStopTable(stopWords);
+  }
+
+  /** Constructs a filter which removes words from the input
+    TokenStream that are named in the Hashtable. */
+  public StopFilter(TokenStream in, Hashtable stopTable) {
+    input = in;
+    table = stopTable;
+  }
+  
+  /** Builds a Hashtable from an array of stop words, appropriate for passing
+    into the StopFilter constructor.  This permits this table construction to
+    be cached once when an Analyzer is constructed. */
+  public final static Hashtable makeStopTable(String[] stopWords) {
+    Hashtable stopTable = new Hashtable(stopWords.length);
+    for (int i = 0; i < stopWords.length; i++)
+      stopTable.put(stopWords[i], stopWords[i]);
+    return stopTable;
+  }
+
+  /** Returns the next input Token whose termText() is not a stop word. */
+  public final Token next() throws IOException {
+    // return the first non-stop word found
+    for (Token token = input.next(); token != null; token = input.next())
+      if (table.get(token.termText) == null)
+	return token;
+    // reached EOS -- return null
+    return null;
+  }
+}
--- a/src/java/org/apache/lucene/analysis/Token.java
+++ b/src/java/org/apache/lucene/analysis/Token.java
@ -0,0 +1,111 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/** A Token is an occurence of a term from the text of a field.  It consists of
+  a term's text, the start and end offset of the term in the text of the field,
+  and a type string.
+
+  The start and end offsets permit applications to re-associate a token with
+  its source text, e.g., to display highlighted query terms in a document
+  browser, or to show matching text fragments in a KWIC (KeyWord In Context)
+  display, etc.
+
+  The type is an interned string, assigned by a lexical analyzer
+  (a.k.a. tokenizer), naming the lexical or syntactic class that the token
+  belongs to.  For example an end of sentence marker token might be implemented
+  with type "eos".  The default token type is "word".  */
+
+public final class Token {
+  String termText;				  // the text of the term
+  int startOffset;				  // start in source text
+  int endOffset;				  // end in source text
+  String type = "word";				  // lexical type
+
+  /** Constructs a Token with the given term text, and start & end offsets.
+      The type defaults to "word." */
+  public Token(String text, int start, int end) {
+    termText = text;
+    startOffset = start;
+    endOffset = end;
+  }
+
+  /** Constructs a Token with the given text, start and end offsets, & type. */
+  public Token(String text, int start, int end, String typ) {
+    termText = text;
+    startOffset = start;
+    endOffset = end;
+    type = typ;
+  }
+
+  /** Returns the Token's term text. */
+  public final String termText() { return termText; }
+
+  /** Returns this Token's starting offset, the position of the first character
+    corresponding to this token in the source text.
+
+    Note that the difference between endOffset() and startOffset() may not be
+    equal to termText.length(), as the term text may have been altered by a
+    stemmer or some other filter. */
+  public final int startOffset() { return startOffset; }
+
+  /** Returns this Token's ending offset, one greater than the position of the
+    last character corresponding to this token in the source text. */
+  public final int endOffset() { return endOffset; }
+
+  /** Returns this Token's lexical type.  Defaults to "word". */
+  public final String type() { return type; }
+
+}
--- a/src/java/org/apache/lucene/analysis/TokenFilter.java
+++ b/src/java/org/apache/lucene/analysis/TokenFilter.java
@ -0,0 +1,74 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+/** A TokenFilter is a TokenStream whose input is another token stream.
+  <p>
+  This is an abstract class.
+  */
+
+abstract public class TokenFilter extends TokenStream {
+  /** The source of tokens for this filter. */
+  protected TokenStream input;
+
+  /** Close the input TokenStream. */
+  public void close() throws IOException {
+    input.close();
+  }
+
+}
+
--- a/src/java/org/apache/lucene/analysis/TokenStream.java
+++ b/src/java/org/apache/lucene/analysis/TokenStream.java
@ -0,0 +1,77 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+/** A TokenStream enumerates the sequence of tokens, either from
+  fields of a document or from query text.
+  <p>
+  This is an abstract class.  Concrete subclasses are:
+  <ul>
+  <li>{@link Tokenizer}, a TokenStream
+  whose input is a Reader; and
+  <li>{@link TokenFilter}, a TokenStream
+  whose input is another TokenStream.
+  </ul>
+  */
+
+abstract public class TokenStream {
+  /** Returns the next token in the stream, or null at EOS. */
+  abstract public Token next() throws IOException;
+
+  /** Releases resources associated with this stream. */
+  public void close() throws IOException {}
+}
--- a/src/java/org/apache/lucene/analysis/Tokenizer.java
+++ b/src/java/org/apache/lucene/analysis/Tokenizer.java
@ -0,0 +1,74 @@
+package org.apache.lucene.analysis;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.Reader;
+import java.io.IOException;
+
+/** A Tokenizer is a TokenStream whose input is a Reader.
+  <p>
+  This is an abstract class.
+ */
+
+abstract public class Tokenizer extends TokenStream {
+  /** The text source for this Tokenizer. */
+  protected Reader input;
+
+  /** By default, closes the input Reader. */
+  public void close() throws IOException {
+    input.close();
+  }
+}
+
--- a/src/java/org/apache/lucene/analysis/package.html
+++ b/src/java/org/apache/lucene/analysis/package.html
@ -0,0 +1,10 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+   <meta name="Author" content="Doug Cutting">
+</head>
+<body>
+API and code to convert text into indexable tokens.
+</body>
+</html>
--- a/src/java/org/apache/lucene/analysis/standard/.cvsignore
+++ b/src/java/org/apache/lucene/analysis/standard/.cvsignore
@ -0,0 +1,6 @@
+Token.java
+StandardTokenizer.java
+StandardTokenizerTokenManager.java
+TokenMgrError.java
+CharStream.java
+StandardTokenizerConstants.java
--- a/src/java/org/apache/lucene/analysis/standard/FastCharStream.java
+++ b/src/java/org/apache/lucene/analysis/standard/FastCharStream.java
@ -0,0 +1,159 @@
+// FastCharStream.java
+package org.apache.lucene.analysis.standard;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.*;
+
+/** An efficient implementation of JavaCC's CharStream interface.  <p>Note that
+ * this does not do line-number counting, but instead keeps track of the
+ * character position of the token in the input, as required by Lucene's {@link
+ * org.apache.lucene.analysis.Token} API. */
+public final class FastCharStream implements CharStream {
+  char[] buffer = null;
+
+  int bufferLength = 0;				  // end of valid chars
+  int bufferPosition = 0;			  // next char to read
+  
+  int tokenStart = 0;				  // offset in buffer
+  int bufferStart = 0;				  // position in file of buffer
+
+  Reader input;					  // source of chars
+
+  /** Constructs from a Reader. */
+  public FastCharStream(Reader r) {
+    input = r;
+  }
+
+  public final char readChar() throws IOException {
+    if (bufferPosition >= bufferLength)
+      refill();
+    return buffer[bufferPosition++];
+  }
+
+  private final void refill() throws IOException {
+    int newPosition = bufferLength - tokenStart;
+
+    if (tokenStart == 0) {			  // token won't fit in buffer
+      if (buffer == null) {			  // first time: alloc buffer
+	buffer = new char[2048];		  
+      } else if (bufferLength == buffer.length) { // grow buffer
+	char[] newBuffer = new char[buffer.length*2];
+	System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
+	buffer = newBuffer;
+      }
+    } else {					  // shift token to front
+      System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
+    }
+
+    bufferLength = newPosition;			  // update state
+    bufferPosition = newPosition;
+    bufferStart += tokenStart;
+    tokenStart = 0;
+
+    int charsRead =				  // fill space in buffer
+      input.read(buffer, newPosition, buffer.length-newPosition);
+    if (charsRead == -1)
+      throw new IOException("read past eof");
+    else
+      bufferLength += charsRead;
+  }
+
+  public final char BeginToken() throws IOException {
+    tokenStart = bufferPosition;
+    return readChar();
+  }
+
+  public final void backup(int amount) {
+    bufferPosition -= amount;
+  }
+
+  public final String GetImage() {
+    return new String(buffer, tokenStart, bufferPosition - tokenStart);
+  }
+
+  public final char[] GetSuffix(int len) {
+    char[] value = new char[len];
+    System.arraycopy(buffer, bufferPosition - len, value, 0, len);
+    return value;
+  }
+
+  public final void Done() {
+    try {
+      input.close();
+    } catch (IOException e) {
+      System.err.println("Caught: " + e + "; ignoring.");
+    }
+  }
+
+  public final int getColumn() {
+    return bufferStart + bufferPosition;
+  }
+  public final int getLine() {
+    return 1;
+  }
+  public final int getEndColumn() {
+    return bufferStart + bufferPosition;
+  }
+  public final int getEndLine() {
+    return 1;
+  }
+  public final int getBeginColumn() {
+    return bufferStart + tokenStart;
+  }
+  public final int getBeginLine() {
+    return 1;
+  }
+}
--- a/src/java/org/apache/lucene/analysis/standard/Makefile
+++ b/src/java/org/apache/lucene/analysis/standard/Makefile
@ -0,0 +1,7 @@
+ROOT = ../../../..
+
+include ../../rules.mk
+
+# Don't delete ParseException.java -- we've changed it by hand.
+DIRT := $(patsubst ParseException.java,,${DIRT})
+
--- a/src/java/org/apache/lucene/analysis/standard/ParseException.java
+++ b/src/java/org/apache/lucene/analysis/standard/ParseException.java
@ -0,0 +1,191 @@
+/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 0.7pre6 */
+package org.apache.lucene.analysis.standard;
+
+/**
+ * This exception is thrown when parse errors are encountered.
+ * You can explicitly create objects of this exception type by
+ * calling the method generateParseException in the generated
+ * parser.
+ *
+ * You can modify this class to customize your error reporting
+ * mechanisms so long as you retain the public fields.
+ */
+public class ParseException extends java.io.IOException {
+
+  /**
+   * This constructor is used by the method "generateParseException"
+   * in the generated parser.  Calling this constructor generates
+   * a new object of this type with the fields "currentToken",
+   * "expectedTokenSequences", and "tokenImage" set.  The boolean
+   * flag "specialConstructor" is also set to true to indicate that
+   * this constructor was used to create this object.
+   * This constructor calls its super class with the empty string
+   * to force the "toString" method of parent class "Throwable" to
+   * print the error message in the form:
+   *     ParseException: <result of getMessage>
+   */
+  public ParseException(Token currentTokenVal,
+                        int[][] expectedTokenSequencesVal,
+                        String[] tokenImageVal
+                       )
+  {
+    super("");
+    specialConstructor = true;
+    currentToken = currentTokenVal;
+    expectedTokenSequences = expectedTokenSequencesVal;
+    tokenImage = tokenImageVal;
+  }
+
+  /**
+   * The following constructors are for use by you for whatever
+   * purpose you can think of.  Constructing the exception in this
+   * manner makes the exception behave in the normal way - i.e., as
+   * documented in the class "Throwable".  The fields "errorToken",
+   * "expectedTokenSequences", and "tokenImage" do not contain
+   * relevant information.  The JavaCC generated code does not use
+   * these constructors.
+   */
+
+  public ParseException() {
+    super();
+    specialConstructor = false;
+  }
+
+  public ParseException(String message) {
+    super(message);
+    specialConstructor = false;
+  }
+
+  /**
+   * This variable determines which constructor was used to create
+   * this object and thereby affects the semantics of the
+   * "getMessage" method (see below).
+   */
+  protected boolean specialConstructor;
+
+  /**
+   * This is the last token that has been consumed successfully.  If
+   * this object has been created due to a parse error, the token
+   * followng this token will (therefore) be the first error token.
+   */
+  public Token currentToken;
+
+  /**
+   * Each entry in this array is an array of integers.  Each array
+   * of integers represents a sequence of tokens (by their ordinal
+   * values) that is expected at this point of the parse.
+   */
+  public int[][] expectedTokenSequences;
+
+  /**
+   * This is a reference to the "tokenImage" array of the generated
+   * parser within which the parse error occurred.  This array is
+   * defined in the generated ...Constants interface.
+   */
+  public String[] tokenImage;
+
+  /**
+   * This method has the standard behavior when this object has been
+   * created using the standard constructors.  Otherwise, it uses
+   * "currentToken" and "expectedTokenSequences" to generate a parse
+   * error message and returns it.  If this object has been created
+   * due to a parse error, and you do not catch it (it gets thrown
+   * from the parser), then this method is called during the printing
+   * of the final stack trace, and hence the correct error message
+   * gets displayed.
+   */
+  public String getMessage() {
+    if (!specialConstructor) {
+      return super.getMessage();
+    }
+    String expected = "";
+    int maxSize = 0;
+    for (int i = 0; i < expectedTokenSequences.length; i++) {
+      if (maxSize < expectedTokenSequences[i].length) {
+        maxSize = expectedTokenSequences[i].length;
+      }
+      for (int j = 0; j < expectedTokenSequences[i].length; j++) {
+        expected += tokenImage[expectedTokenSequences[i][j]] + " ";
+      }
+      if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
+        expected += "...";
+      }
+      expected += eol + "    ";
+    }
+    String retval = "Encountered \"";
+    Token tok = currentToken.next;
+    for (int i = 0; i < maxSize; i++) {
+      if (i != 0) retval += " ";
+      if (tok.kind == 0) {
+        retval += tokenImage[0];
+        break;
+      }
+      retval += add_escapes(tok.image);
+      tok = tok.next; 
+    }
+    retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn + "." + eol;
+    if (expectedTokenSequences.length == 1) {
+      retval += "Was expecting:" + eol + "    ";
+    } else {
+      retval += "Was expecting one of:" + eol + "    ";
+    }
+    retval += expected;
+    return retval;
+  }
+
+  /**
+   * The end of line string for this machine.
+   */
+  protected String eol = System.getProperty("line.separator", "\n");
+ 
+  /**
+   * Used to convert raw characters to their escaped version
+   * when these raw version cannot be used as part of an ASCII
+   * string literal.
+   */
+  protected String add_escapes(String str) {
+      StringBuffer retval = new StringBuffer();
+      char ch;
+      for (int i = 0; i < str.length(); i++) {
+        switch (str.charAt(i))
+        {
+           case 0 :
+              continue;
+           case '\b':
+              retval.append("\\b");
+              continue;
+           case '\t':
+              retval.append("\\t");
+              continue;
+           case '\n':
+              retval.append("\\n");
+              continue;
+           case '\f':
+              retval.append("\\f");
+              continue;
+           case '\r':
+              retval.append("\\r");
+              continue;
+           case '\"':
+              retval.append("\\\"");
+              continue;
+           case '\'':
+              retval.append("\\\'");
+              continue;
+           case '\\':
+              retval.append("\\\\");
+              continue;
+           default:
+              if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
+                 String s = "0000" + Integer.toString(ch, 16);
+                 retval.append("\\u" + s.substring(s.length() - 4, s.length()));
+              } else {
+                 retval.append(ch);
+              }
+              continue;
+        }
+      }
+      return retval.toString();
+   }
+
+}
--- a/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
+++ b/src/java/org/apache/lucene/analysis/standard/StandardAnalyzer.java
@ -0,0 +1,95 @@
+package org.apache.lucene.analysis.standard;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.analysis.*;
+import java.io.Reader;
+import java.util.Hashtable;
+
+/** Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
+ * LowerCaseFilter} and {@link StopFilter}. */
+public final class StandardAnalyzer extends Analyzer {
+  private Hashtable stopTable;
+
+  /** An array containing some common English words that are not usually useful
+    for searching. */
+  public static final String[] STOP_WORDS = {
+    "a", "and", "are", "as", "at", "be", "but", "by",
+    "for", "if", "in", "into", "is", "it",
+    "no", "not", "of", "on", "or", "s", "such",
+    "t", "that", "the", "their", "then", "there", "these",
+    "they", "this", "to", "was", "will", "with"
+  };
+
+  /** Builds an analyzer. */
+  public StandardAnalyzer() {
+    this(STOP_WORDS);
+  }
+
+  /** Builds an analyzer with the given stop words. */
+  public StandardAnalyzer(String[] stopWords) {
+    stopTable = StopFilter.makeStopTable(stopWords);
+  }
+
+  /** Constructs a {@link StandardTokenizer} filtered by a {@link
+   * StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
+  public final TokenStream tokenStream(String fieldName, Reader reader) {
+    TokenStream result = new StandardTokenizer(reader);
+    result = new StandardFilter(result);
+    result = new LowerCaseFilter(result);
+    result = new StopFilter(result, stopTable);
+    return result;
+  }
+}
--- a/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
+++ b/src/java/org/apache/lucene/analysis/standard/StandardFilter.java
@ -0,0 +1,106 @@
+package org.apache.lucene.analysis.standard;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.analysis.*;
+
+/** Normalizes tokens extracted with {@link StandardTokenizer}. */
+
+public final class StandardFilter extends TokenFilter
+  implements StandardTokenizerConstants  {
+
+
+  /** Construct filtering <i>in</i>. */
+  public StandardFilter(TokenStream in) {
+    input = in;
+  }
+
+  private static final String APOSTROPHE_TYPE = tokenImage[APOSTROPHE];
+  private static final String ACRONYM_TYPE = tokenImage[ACRONYM];
+  
+  /** Returns the next token in the stream, or null at EOS.
+   * <p>Removes <tt>'s</tt> from the end of words.
+   * <p>Removes dots from acronyms.
+   */
+  public final org.apache.lucene.analysis.Token next() throws java.io.IOException {
+    org.apache.lucene.analysis.Token t = input.next();
+
+    if (t == null)
+      return null;
+
+    String text = t.termText();
+    String type = t.type();
+
+    if (type == APOSTROPHE_TYPE &&		  // remove 's
+	(text.endsWith("'s") || text.endsWith("'S"))) {
+      return new org.apache.lucene.analysis.Token
+	(text.substring(0,text.length()-2),
+	 t.startOffset(), t.endOffset(), type);
+
+    } else if (type == ACRONYM_TYPE) {		  // remove dots
+      StringBuffer trimmed = new StringBuffer();
+      for (int i = 0; i < text.length(); i++) {
+	char c = text.charAt(i);
+	if (c != '.')
+	  trimmed.append(c);
+      }
+      return new org.apache.lucene.analysis.Token
+	(trimmed.toString(), t.startOffset(), t.endOffset(), type);
+
+    } else {
+      return t;
+    }
+  }
+}
--- a/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
+++ b/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj
@ -0,0 +1,197 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+options {
+  STATIC = false;
+//IGNORE_CASE = true;
+//BUILD_PARSER = false;
+//UNICODE_INPUT = true;
+  USER_CHAR_STREAM = true;
+  OPTIMIZE_TOKEN_MANAGER = true;
+//DEBUG_TOKEN_MANAGER = true;
+}
+PARSER_BEGIN(StandardTokenizer)
+
+package org.apache.lucene.analysis.standard;
+
+import java.io.*;
+
+/** A grammar-based tokenizer constructed with JavaCC.
+ *
+ * <p> This should be a good tokenizer for most European-language documents.
+ *
+ * <p>Many applications have specific tokenizer needs.  If this tokenizer does
+ * not suit your application, please consider copying this source code
+ * directory to your project and maintaining your own grammar-based tokenizer.
+ */
+public class StandardTokenizer extends org.apache.lucene.analysis.Tokenizer {
+
+  /** Constructs a tokenizer for this Reader. */
+  public StandardTokenizer(Reader reader) {
+    this(new FastCharStream(reader));
+    this.input = reader;
+  }
+}
+
+PARSER_END(StandardTokenizer)
+
+TOKEN : {					  // token patterns
+
+  // basic word: a sequence of digits & letters
+  <ALPHANUM: (<LETTER>|<DIGIT>)+ >
+
+  // internal apostrophes: O'Reilly, you're, O'Reilly's
+  // use a post-filter to remove possesives
+| <APOSTROPHE: <ALPHA> ("'" <ALPHA>)+ >
+
+  // acronyms: U.S.A., I.B.M., etc.
+  // use a post-filter to remove dots
+| <ACRONYM: <ALPHA> "." (<ALPHA> ".")+ >
+
+  // company names like AT&T and Excite@Home.
+| <COMPANY: <ALPHA> ("&"|"@") <ALPHA> >
+
+  // email addresses
+| <EMAIL: <ALPHANUM> "@" <ALPHANUM> ("." <ALPHANUM>)+ >
+
+  // hostname
+| <HOST: <ALPHANUM> ("." <ALPHANUM>)+ >
+
+  // floating point, serial, model numbers, ip addresses, etc.
+  // every other segment must have at least one digit
+| <NUM: (<ALPHANUM> <P> <HAS_DIGIT>
+       | <HAS_DIGIT> <P> <ALPHANUM>
+       | <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
+       | <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
+       | <ALPHANUM> <P> <HAS_DIGIT> (<P> <ALPHANUM> <P> <HAS_DIGIT>)+
+       | <HAS_DIGIT> <P> <ALPHANUM> (<P> <HAS_DIGIT> <P> <ALPHANUM>)+
+        )
+  >
+| <#P: ("_"|"-"|"/"|"."|",") >
+| <#HAS_DIGIT:					  // at least one digit
+    (<LETTER>|<DIGIT>)*
+    <DIGIT>
+    (<LETTER>|<DIGIT>)*
+  >
+
+| < #ALPHA: (<LETTER>)+>
+| < #LETTER:					  // unicode letters
+      [
+       "\u0041"-"\u005a",
+       "\u0061"-"\u007a",
+       "\u00c0"-"\u00d6",
+       "\u00d8"-"\u00f6",
+       "\u00f8"-"\u00ff",
+       "\u0100"-"\u1fff",
+       "\u3040"-"\u318f",
+       "\u3300"-"\u337f",
+       "\u3400"-"\u3d2d",
+       "\u4e00"-"\u9fff",
+       "\uf900"-"\ufaff"
+      ]
+  >
+| < #DIGIT:					  // unicode digits
+      [
+       "\u0030"-"\u0039",
+       "\u0660"-"\u0669",
+       "\u06f0"-"\u06f9",
+       "\u0966"-"\u096f",
+       "\u09e6"-"\u09ef",
+       "\u0a66"-"\u0a6f",
+       "\u0ae6"-"\u0aef",
+       "\u0b66"-"\u0b6f",
+       "\u0be7"-"\u0bef",
+       "\u0c66"-"\u0c6f",
+       "\u0ce6"-"\u0cef",
+       "\u0d66"-"\u0d6f",
+       "\u0e50"-"\u0e59",
+       "\u0ed0"-"\u0ed9",
+       "\u1040"-"\u1049"
+      ]
+  >
+}
+
+SKIP : {					  // skip unrecognized chars
+ <NOISE: ~[] >
+}
+
+/** Returns the next token in the stream, or null at EOS.
+ * <p>The returned token's type is set to an element of {@link
+ * StandardTokenizerConstants.tokenImage}.
+ */
+org.apache.lucene.analysis.Token next() throws IOException :
+{
+  Token token = null;
+}
+{
+  ( token = <ALPHANUM> |
+    token = <APOSTROPHE> |
+    token = <ACRONYM> |
+    token = <COMPANY> |
+    token = <EMAIL> |
+    token = <HOST> |
+    token = <NUM> |
+    token = <EOF>
+   )
+    {
+      if (token.kind == EOF) {
+	return null;
+      } else {
+	return
+	  new org.apache.lucene.analysis.Token(token.image,
+					token.beginColumn,token.endColumn,
+					tokenImage[token.kind]);
+      }
+    }
+}
--- a/src/java/org/apache/lucene/analysis/standard/package.html
+++ b/src/java/org/apache/lucene/analysis/standard/package.html
@ -0,0 +1,15 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+   <meta name="Author" content="Doug Cutting">
+</head>
+<body>
+A grammar-based tokenizer constructed with JavaCC.
+<p>Note that JavaCC defines lots of public, classes, methods and fields
+that do not need to be public.&nbsp; These clutter the documentation.&nbsp;
+Sorry.
+<p>Note that because JavaCC defines a class named <tt>Token</tt>, <tt>org.apache.lucene.analysis.Token</tt>
+must always be fully qualified in sourced code in this package.
+</body>
+</html>
--- a/src/java/org/apache/lucene/document/DateField.java
+++ b/src/java/org/apache/lucene/document/DateField.java
@ -0,0 +1,109 @@
+package org.apache.lucene.document;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.Date;
+
+/** Provides support for converting dates to strings and vice-versa.  The
+   * strings are structured so that lexicographic sorting orders by date.  This
+   * makes them suitable for use as field values and search terms.  */
+public class DateField {
+  private DateField() {};
+
+  // make date strings long enough to last a millenium
+  private static int DATE_LEN = Long.toString(1000L*365*24*60*60*1000,
+					       Character.MAX_RADIX).length();
+
+  public static String MIN_DATE_STRING() {
+    return timeToString(0);
+  }
+
+  public static String MAX_DATE_STRING() {
+    char[] buffer = new char[DATE_LEN];
+    char c = Character.forDigit(Character.MAX_RADIX-1, Character.MAX_RADIX);
+    for (int i = 0 ; i < DATE_LEN; i++)
+      buffer[i] = c;
+    return new String(buffer);
+  }
+  
+  /** Converts a Date to a string suitable for indexing. */
+  public static String dateToString(Date date) {
+    return timeToString(date.getTime());
+  }
+  /** Converts a millisecond time to a string suitable for indexing. */
+  public static String timeToString(long time) {
+    if (time < 0)
+      throw new RuntimeException("time too early");
+
+    String s = Long.toString(time, Character.MAX_RADIX);
+
+    if (s.length() > DATE_LEN)
+      throw new RuntimeException("time too late");
+
+    while (s.length() < DATE_LEN)
+      s = "0" + s;				  // pad with leading zeros
+
+    return s;
+  }
+
+  /** Converts a string-encoded date into a millisecond time. */
+  public static long stringToTime(String s) {
+    return Long.parseLong(s, Character.MAX_RADIX);
+  }
+  /** Converts a string-encoded date into a Date object. */
+  public static Date stringToDate(String s) {
+    return new Date(stringToTime(s));
+  }
+}
--- a/src/java/org/apache/lucene/document/Document.java
+++ b/src/java/org/apache/lucene/document/Document.java
@ -0,0 +1,145 @@
+package org.apache.lucene.document;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.Enumeration;
+
+/** Documents are the unit of indexing and search.
+ *
+ * A Document is a set of fields.  Each field has a name and a textual value.
+ * A field may be stored with the document, in which case it is returned with
+ * search hits on the document.  Thus each document should typically contain
+ * stored fields which uniquely identify it.
+ * */
+
+public final class Document {
+  DocumentFieldList fieldList = null;
+
+  /** Constructs a new document with no fields. */
+  public Document() {}
+
+  /** Adds a field to a document.  Several fields may be added with
+   * the same name.  In this case, if the fields are indexed, their text is
+   * treated as though appended for the purposes of search. */
+  public final void add(Field field) {
+    fieldList = new DocumentFieldList(field, fieldList);
+  }
+
+  /** Returns a field with the given name if any exist in this document, or
+    null.  If multiple fields may exist with this name, this method returns the
+    last added such added. */
+  public final Field getField(String name) {
+    for (DocumentFieldList list = fieldList; list != null; list = list.next)
+      if (list.field.name().equals(name))
+	return list.field;
+    return null;
+  }
+
+  /** Returns the string value of the field with the given name if any exist in
+    this document, or null.  If multiple fields may exist with this name, this
+    method returns the last added such added. */
+  public final String get(String name) {
+    Field field = getField(name);
+    if (field != null)
+      return field.stringValue();
+    else
+      return null;
+  }
+
+  /** Returns an Enumeration of all the fields in a document. */
+  public final Enumeration fields() {
+    return new DocumentFieldEnumeration(this);
+  }
+
+  /** Prints the fields of a document for human consumption. */
+  public final String toString() {
+    StringBuffer buffer = new StringBuffer();
+    buffer.append("Document<");
+    for (DocumentFieldList list = fieldList; list != null; list = list.next) {
+      buffer.append(list.field.toString());
+      if (list.next != null)
+	buffer.append(" ");
+    }
+    buffer.append(">");
+    return buffer.toString();
+  }
+
+}
+
+final class DocumentFieldList {
+  DocumentFieldList(Field f, DocumentFieldList n) {
+    field = f;
+    next = n;
+  }
+  Field field;
+  DocumentFieldList next;
+}
+
+final class DocumentFieldEnumeration implements Enumeration {
+  DocumentFieldList fields;
+  DocumentFieldEnumeration(Document d) {
+    fields = d.fieldList;
+  }
+
+  public final boolean hasMoreElements() {
+    return fields == null ? false : true;
+  }
+
+  public final Object nextElement() {
+    Field result = fields.field;
+    fields = fields.next;
+    return result;
+  }
+}
--- a/src/java/org/apache/lucene/document/Field.java
+++ b/src/java/org/apache/lucene/document/Field.java
@ -0,0 +1,169 @@
+package org.apache.lucene.document;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.Reader;
+
+/**
+  A field is a section of a Document.  Each field has two parts, a name and a
+  value.  Values may be free text, provided as a String or as a Reader, or they
+  may be atomic keywords, which are not further processed.  Such keywords may
+  be used to represent dates, urls, etc.  Fields are optionally stored in the
+  index, so that they may be returned with hits on the document.
+  */
+
+public final class Field {
+  private String name = "body";
+  private String stringValue = null;
+  private Reader readerValue = null;
+  private boolean isStored = false;
+  private boolean isIndexed = true;
+  private boolean isTokenized = true;
+
+  /** Constructs a String-valued Field that is not tokenized, but is indexed
+    and stored.  Useful for non-text fields, e.g. date or url.  */
+  public static final Field Keyword(String name, String value) {
+    return new Field(name, value, true, true, false);
+  }
+
+  /** Constructs a String-valued Field that is not tokenized or indexed,
+    but is stored in the index, for return with hits. */
+  public static final Field UnIndexed(String name, String value) {
+    return new Field(name, value, true, false, false);
+  }
+
+  /** Constructs a String-valued Field that is tokenized and indexed,
+    and is stored in the index, for return with hits.  Useful for short text
+    fields, like "title" or "subject". */
+  public static final Field Text(String name, String value) {
+    return new Field(name, value, true, true, true);
+  }
+
+  /** Constructs a String-valued Field that is tokenized and indexed,
+    but that is not stored in the index. */
+  public static final Field UnStored(String name, String value) {
+    return new Field(name, value, false, true, true);
+  }
+
+  /** Constructs a Reader-valued Field that is tokenized and indexed, but is
+    not stored in the index verbatim.  Useful for longer text fields, like
+    "body". */
+  public static final Field Text(String name, Reader value) {
+    return new Field(name, value);
+  }
+
+  /** The name of the field (e.g., "date", "subject", "title", "body", etc.)
+    as an interned string. */
+  public String name() 		{ return name; }
+
+  /** The value of the field as a String, or null.  If null, the Reader value
+    is used.  Exactly one of stringValue() and readerValue() must be set. */
+  public String stringValue()		{ return stringValue; }
+  /** The value of the field as a Reader, or null.  If null, the String value
+    is used.  Exactly one of stringValue() and readerValue() must be set. */
+  public Reader readerValue()	{ return readerValue; }
+
+  public Field(String name, String string,
+	       boolean store, boolean index, boolean token) {
+    if (name == null)
+      throw new IllegalArgumentException("name cannot be null");
+    if (string == null)
+      throw new IllegalArgumentException("value cannot be null");
+
+    this.name = name.intern();			  // field names are interned
+    this.stringValue = string;
+    this.isStored = store;
+    this.isIndexed = index;
+    this.isTokenized = token;
+  }
+  Field(String name, Reader reader) {
+    if (name == null)
+      throw new IllegalArgumentException("name cannot be null");
+    if (reader == null)
+      throw new IllegalArgumentException("value cannot be null");
+
+    this.name = name.intern();			  // field names are interned
+    this.readerValue = reader;
+  }
+
+  /** True iff the value of the field is to be stored in the index for return
+    with search hits.  It is an error for this to be true if a field is
+    Reader-valued. */
+  public final boolean	isStored() 	{ return isStored; }
+
+  /** True iff the value of the field is to be indexed, so that it may be
+    searched on. */
+  public final boolean 	isIndexed() 	{ return isIndexed; }
+
+  /** True iff the value of the field should be tokenized as text prior to
+    indexing.  Un-tokenized fields are indexed as a single word and may not be
+    Reader-valued. */
+  public final boolean 	isTokenized() 	{ return isTokenized; }
+
+  /** Prints a Field for human consumption. */
+  public final String toString() {
+    if (isStored && isIndexed && !isTokenized)
+      return "Keyword<" + name + ":" + stringValue + ">";
+    else if (isStored && !isIndexed && !isTokenized)
+      return "Unindexed<" + name + ":" + stringValue + ">";
+    else if (isStored && isIndexed && isTokenized && stringValue!=null)
+      return "Text<" + name + ":" + stringValue + ">";
+    else if (!isStored && isIndexed && isTokenized && readerValue!=null)
+      return "Text<" + name + ":" + readerValue + ">";
+    else
+      return super.toString();
+  }
+
+}
--- a/src/java/org/apache/lucene/document/Makefile
+++ b/src/java/org/apache/lucene/document/Makefile
@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
--- a/src/java/org/apache/lucene/document/package.html
+++ b/src/java/org/apache/lucene/document/package.html
@ -0,0 +1,10 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+   <meta name="Author" content="Doug Cutting">
+</head>
+<body>
+The Document abstraction.
+</body>
+</html>
--- a/src/java/org/apache/lucene/index/DocumentWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentWriter.java
@ -0,0 +1,336 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+import java.util.Hashtable;
+import java.util.Enumeration;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.search.Similarity;
+
+final class DocumentWriter {
+  private Analyzer analyzer;
+  private Directory directory;
+  private FieldInfos fieldInfos;
+  private int maxFieldLength;
+  
+  DocumentWriter(Directory d, Analyzer a, int mfl) {
+    directory = d;
+    analyzer = a;
+    maxFieldLength = mfl;
+  }
+  
+  final void addDocument(String segment, Document doc)
+       throws IOException {
+    // write field names
+    fieldInfos = new FieldInfos();
+    fieldInfos.add(doc);
+    fieldInfos.write(directory, segment + ".fnm");
+
+    // write field values
+    FieldsWriter fieldsWriter =
+      new FieldsWriter(directory, segment, fieldInfos);
+    try {
+      fieldsWriter.addDocument(doc);
+    } finally {
+      fieldsWriter.close();
+    }
+      
+    // invert doc into postingTable
+    postingTable.clear();			  // clear postingTable
+    fieldLengths = new int[fieldInfos.size()];	  // init fieldLengths
+    invertDocument(doc);
+
+    // sort postingTable into an array
+    Posting[] postings = sortPostingTable();
+
+    /*
+    for (int i = 0; i < postings.length; i++) {
+      Posting posting = postings[i];
+      System.out.print(posting.term);
+      System.out.print(" freq=" + posting.freq);
+      System.out.print(" pos=");
+      System.out.print(posting.positions[0]);
+      for (int j = 1; j < posting.freq; j++)
+	System.out.print("," + posting.positions[j]);
+      System.out.println("");
+    }
+    */
+
+    // write postings
+    writePostings(postings, segment);
+
+    // write norms of indexed fields
+    writeNorms(doc, segment);
+    
+  }
+
+  // Keys are Terms, values are Postings.
+  // Used to buffer a document before it is written to the index.
+  private final Hashtable postingTable = new Hashtable();
+  private int[] fieldLengths;
+
+  // Tokenizes the fields of a document into Postings.
+  private final void invertDocument(Document doc)
+       throws IOException {
+    Enumeration fields  = doc.fields();
+    while (fields.hasMoreElements()) {
+      Field field = (Field)fields.nextElement();
+      String fieldName = field.name();
+      int fieldNumber = fieldInfos.fieldNumber(fieldName);
+
+      int position = fieldLengths[fieldNumber];	  // position in field
+
+      if (field.isIndexed()) {
+	if (!field.isTokenized()) {		  // un-tokenized field
+	  addPosition(fieldName, field.stringValue(), position++);
+	} else {
+	  Reader reader;			  // find or make Reader
+	  if (field.readerValue() != null)
+	    reader = field.readerValue();
+	  else if (field.stringValue() != null)
+	    reader = new StringReader(field.stringValue());
+	  else
+	    throw new IllegalArgumentException
+	      ("field must have either String or Reader value");
+
+	  // Tokenize field and add to postingTable
+	  TokenStream stream = analyzer.tokenStream(fieldName, reader);
+	  try {
+	    for (Token t = stream.next(); t != null; t = stream.next()) {
+	      addPosition(fieldName, t.termText(), position++);
+	      if (position > maxFieldLength) break;
+	    }
+	  } finally {
+	    stream.close();
+	  }
+	}
+
+	fieldLengths[fieldNumber] = position;	  // save field length
+      }
+    }
+  }
+
+  private final Term termBuffer = new Term("", ""); // avoid consing
+
+  private final void addPosition(String field, String text, int position) {
+    termBuffer.set(field, text);
+    Posting ti = (Posting)postingTable.get(termBuffer);
+    if (ti != null) {				  // word seen before
+      int freq = ti.freq;
+      if (ti.positions.length == freq) {	  // positions array is full
+	int[] newPositions = new int[freq * 2];	  // double size
+	int[] positions = ti.positions;
+	for (int i = 0; i < freq; i++)		  // copy old positions to new
+	  newPositions[i] = positions[i];
+	ti.positions = newPositions;
+      }
+      ti.positions[freq] = position;		  // add new position
+      ti.freq = freq + 1;			  // update frequency
+    }
+    else {					  // word not seen before
+      Term term = new Term(field, text, false);
+      postingTable.put(term, new Posting(term, position));
+    }
+  }
+
+  private final Posting[] sortPostingTable() {
+    // copy postingTable into an array
+    Posting[] array = new Posting[postingTable.size()];
+    Enumeration postings = postingTable.elements();
+    for (int i = 0; postings.hasMoreElements(); i++)
+      array[i] = (Posting)postings.nextElement();
+
+    // sort the array
+    quickSort(array, 0, array.length - 1);
+
+    return array;
+  }
+
+  static private final void quickSort(Posting[] postings, int lo, int hi) {
+    if(lo >= hi)
+      return;
+
+    int mid = (lo + hi) / 2;
+
+    if(postings[lo].term.compareTo(postings[mid].term) > 0) {
+      Posting tmp = postings[lo];
+      postings[lo] = postings[mid];
+      postings[mid] = tmp;
+    }
+
+    if(postings[mid].term.compareTo(postings[hi].term) > 0) {
+      Posting tmp = postings[mid];
+      postings[mid] = postings[hi];
+      postings[hi] = tmp;
+      
+      if(postings[lo].term.compareTo(postings[mid].term) > 0) {
+	Posting tmp2 = postings[lo];
+        postings[lo] = postings[mid];
+        postings[mid] = tmp2;
+      }
+    }
+
+    int left = lo + 1;
+    int right = hi - 1;
+
+    if (left >= right)
+      return; 
+
+    Term partition = postings[mid].term;
+    
+    for( ;; ) {
+      while(postings[right].term.compareTo(partition) > 0)
+	--right;
+      
+      while(left < right && postings[left].term.compareTo(partition) <= 0)
+	++left;
+      
+      if(left < right) {
+        Posting tmp = postings[left];
+        postings[left] = postings[right];
+        postings[right] = tmp;
+        --right;
+      } else {
+	break;
+      }
+    }
+    
+    quickSort(postings, lo, left);
+    quickSort(postings, left + 1, hi);
+  }
+
+  private final void writePostings(Posting[] postings, String segment)
+       throws IOException {
+    OutputStream freq = null, prox = null;
+    TermInfosWriter tis = null;
+
+    try {
+      freq = directory.createFile(segment + ".frq");
+      prox = directory.createFile(segment + ".prx");
+      tis = new TermInfosWriter(directory, segment, fieldInfos);
+      TermInfo ti = new TermInfo();
+
+      for (int i = 0; i < postings.length; i++) {
+	Posting posting = postings[i];
+
+	// add an entry to the dictionary with pointers to prox and freq files
+	ti.set(1, freq.getFilePointer(), prox.getFilePointer());
+	tis.add(posting.term, ti);
+	
+	// add an entry to the freq file
+	int f = posting.freq;
+	if (f == 1)				  // optimize freq=1
+	  freq.writeVInt(1);			  // set low bit of doc num.
+	else {
+	  freq.writeVInt(0);			  // the document number
+	  freq.writeVInt(f);			  // frequency in doc
+	}
+	
+	int lastPosition = 0;			  // write positions
+	int[] positions = posting.positions;
+	for (int j = 0; j < f; j++) {		  // use delta-encoding
+	  int position = positions[j];
+	  prox.writeVInt(position - lastPosition);
+	  lastPosition = position;
+	}
+      }
+    }
+    finally {
+      if (freq != null) freq.close();
+      if (prox != null) prox.close();
+      if (tis  != null)  tis.close();
+    }
+  }
+
+  private final void writeNorms(Document doc, String segment)
+       throws IOException {
+    Enumeration fields  = doc.fields();
+    while (fields.hasMoreElements()) {
+      Field field = (Field)fields.nextElement();
+      if (field.isIndexed()) {
+	int fieldNumber = fieldInfos.fieldNumber(field.name());
+	OutputStream norm = directory.createFile(segment + ".f" + fieldNumber);
+	try {
+	  norm.writeByte(Similarity.norm(fieldLengths[fieldNumber]));
+	} finally {
+	  norm.close();
+	}
+      }
+    }
+  }
+}
+
+final class Posting {				  // info about a Term in a doc
+  Term term;					  // the Term
+  int freq;					  // its frequency in doc
+  int[] positions;				  // positions it occurs at
+  
+  Posting(Term t, int position) {
+    term = t;
+    freq = 1;
+    positions = new int[1];
+    positions[0] = position;
+  }
+}
--- a/src/java/org/apache/lucene/index/FieldInfo.java
+++ b/src/java/org/apache/lucene/index/FieldInfo.java
@ -0,0 +1,67 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+final class FieldInfo {
+  String name;
+  boolean isIndexed;
+  int number;
+
+  FieldInfo(String na, boolean tk, int nu) {
+    name = na;
+    isIndexed = tk;
+    number = nu;
+  }
+}
--- a/src/java/org/apache/lucene/index/FieldInfos.java
+++ b/src/java/org/apache/lucene/index/FieldInfos.java
@ -0,0 +1,167 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.Hashtable;
+import java.util.Vector;
+import java.util.Enumeration;
+import java.io.IOException;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.store.InputStream;
+
+final class FieldInfos {
+  private Vector byNumber = new Vector();
+  private Hashtable byName = new Hashtable();
+
+  FieldInfos() {
+    add("", false);
+  }
+
+  FieldInfos(Directory d, String name) throws IOException {
+    InputStream input = d.openFile(name);
+    try {
+      read(input);
+    } finally {
+      input.close();
+    }
+  }
+
+  /** Adds field info for a Document. */
+  final void add(Document doc) {
+    Enumeration fields  = doc.fields();
+    while (fields.hasMoreElements()) {
+      Field field = (Field)fields.nextElement();
+      add(field.name(), field.isIndexed());
+    }
+  }
+
+  /** Merges in information from another FieldInfos. */
+  final void add(FieldInfos other) {
+    for (int i = 0; i < other.size(); i++) {
+      FieldInfo fi = other.fieldInfo(i);
+      add(fi.name, fi.isIndexed);
+    }
+  }
+
+  private final void add(String name, boolean isIndexed) {
+    FieldInfo fi = fieldInfo(name);
+    if (fi == null)
+      addInternal(name, isIndexed);
+    else if (fi.isIndexed != isIndexed)
+      throw new IllegalStateException("field " + name +
+				      (fi.isIndexed ? " must" : " cannot") +
+				      " be an indexed field.");
+  }
+
+  private final void addInternal(String name, boolean isIndexed) {
+    FieldInfo fi = new FieldInfo(name, isIndexed, byNumber.size());
+    byNumber.addElement(fi);
+    byName.put(name, fi);
+  }
+
+  final int fieldNumber(String fieldName) {
+    FieldInfo fi = fieldInfo(fieldName);
+    if (fi != null)
+      return fi.number;
+    else
+      return -1;
+  }
+
+  final FieldInfo fieldInfo(String fieldName) {
+    return (FieldInfo)byName.get(fieldName);
+  }
+
+  final String fieldName(int fieldNumber) {
+    return fieldInfo(fieldNumber).name;
+  }
+
+  final FieldInfo fieldInfo(int fieldNumber) {
+    return (FieldInfo)byNumber.elementAt(fieldNumber);
+  }
+
+  final int size() {
+    return byNumber.size();
+  }
+
+  final void write(Directory d, String name) throws IOException {
+    OutputStream output = d.createFile(name);
+    try {
+      write(output);
+    } finally {
+      output.close();
+    }
+  }
+
+  final void write(OutputStream output) throws IOException {
+    output.writeVInt(size());
+    for (int i = 0; i < size(); i++) {
+      FieldInfo fi = fieldInfo(i);
+      output.writeString(fi.name);
+      output.writeByte((byte)(fi.isIndexed ? 1 : 0));
+    }
+  }
+
+  private final void read(InputStream input) throws IOException {
+    int size = input.readVInt();
+    for (int i = 0; i < size; i++)
+      addInternal(input.readString().intern(),
+		  input.readByte() != 0);
+  }
+}
--- a/src/java/org/apache/lucene/index/FieldsReader.java
+++ b/src/java/org/apache/lucene/index/FieldsReader.java
@ -0,0 +1,113 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+final class FieldsReader {
+  private FieldInfos fieldInfos;
+  private InputStream fieldsStream;
+  private InputStream indexStream;
+  private int size;
+
+  FieldsReader(Directory d, String segment, FieldInfos fn)
+       throws IOException {
+    fieldInfos = fn;
+
+    fieldsStream = d.openFile(segment + ".fdt");
+    indexStream = d.openFile(segment + ".fdx");
+
+    size = (int)indexStream.length() / 8;
+  }
+
+  final void close() throws IOException {
+    fieldsStream.close();
+    indexStream.close();
+  }
+
+  final int size() {
+    return size;
+  }
+
+  final Document doc(int n) throws IOException {
+    indexStream.seek(n * 8L);
+    long position = indexStream.readLong();
+    fieldsStream.seek(position);
+    
+    Document doc = new Document();
+    int numFields = fieldsStream.readVInt();
+    for (int i = 0; i < numFields; i++) {
+      int fieldNumber = fieldsStream.readVInt();
+      FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
+
+      byte bits = fieldsStream.readByte();
+
+      doc.add(new Field(fi.name,		  // name
+			fieldsStream.readString(), // read value
+			true,			  // stored
+			fi.isIndexed,		  // indexed
+			(bits & 1) != 0));	  // tokenized
+    }
+
+    return doc;
+  }
+}
--- a/src/java/org/apache/lucene/index/FieldsWriter.java
+++ b/src/java/org/apache/lucene/index/FieldsWriter.java
@ -0,0 +1,110 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.Enumeration;
+import java.util.Hashtable;
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+final class FieldsWriter {
+  private FieldInfos fieldInfos;
+  private OutputStream fieldsStream;
+  private OutputStream indexStream;
+  
+  FieldsWriter(Directory d, String segment, FieldInfos fn)
+       throws IOException {
+    fieldInfos = fn;
+    fieldsStream = d.createFile(segment + ".fdt");
+    indexStream = d.createFile(segment + ".fdx");
+  }
+
+  final void close() throws IOException {
+    fieldsStream.close();
+    indexStream.close();
+  }
+
+  final void addDocument(Document doc) throws IOException {
+    indexStream.writeLong(fieldsStream.getFilePointer());
+    
+    int storedCount = 0;
+    Enumeration fields  = doc.fields();
+    while (fields.hasMoreElements()) {
+      Field field = (Field)fields.nextElement();
+      if (field.isStored())
+	storedCount++;
+    }
+    fieldsStream.writeVInt(storedCount);
+    
+    fields  = doc.fields();
+    while (fields.hasMoreElements()) {
+      Field field = (Field)fields.nextElement();
+      if (field.isStored()) {
+	fieldsStream.writeVInt(fieldInfos.fieldNumber(field.name()));
+
+	byte bits = 0;
+	if (field.isTokenized())
+	  bits |= 1;
+	fieldsStream.writeByte(bits);
+
+	fieldsStream.writeString(field.stringValue());
+      }
+    }
+  }
+}
--- a/src/java/org/apache/lucene/index/IndexReader.java
+++ b/src/java/org/apache/lucene/index/IndexReader.java
@ -0,0 +1,215 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.io.File;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.document.Document;
+
+/** IndexReader is an abstract class, providing an interface for accessing an
+  index.  Search of an index is done entirely through this abstract interface,
+  so that any subclass which implements it is searchable.
+
+  <p> Concrete subclasses of IndexReader are usually constructed with a call to
+  the static method {@link #open}.
+
+  <p> For efficiency, in this API documents are often referred to via
+  <it>document numbers</it>, non-negative integers which each name a unique
+  document in the index.  These document numbers are ephemeral--they may change
+  as documents are added to and deleted from an index.  Clients should thus not
+  rely on a given document having the same number between sessions. */
+
+abstract public class IndexReader {
+  protected IndexReader() {};
+
+  /** Returns an IndexReader reading the index in an FSDirectory in the named
+  path. */
+  public static IndexReader open(String path) throws IOException {
+    return open(FSDirectory.getDirectory(path, false));
+  }
+
+  /** Returns an IndexReader reading the index in an FSDirectory in the named
+  path. */
+  public static IndexReader open(File path) throws IOException {
+    return open(FSDirectory.getDirectory(path, false));
+  }
+
+  /** Returns an IndexReader reading the index in the given Directory. */
+  public static IndexReader open(Directory directory) throws IOException {
+    synchronized (directory) {
+      SegmentInfos infos = new SegmentInfos();
+      infos.read(directory);
+      if (infos.size() == 1)			  // index is optimized
+	return new SegmentReader(infos.info(0), true);
+      
+      SegmentReader[] readers = new SegmentReader[infos.size()];
+      for (int i = 0; i < infos.size(); i++)
+	readers[i] = new SegmentReader(infos.info(i), i == infos.size() - 1);
+      return new SegmentsReader(readers);
+    }
+  }
+
+  /** Returns the time the index in the named directory was last modified. */
+  public static long lastModified(String directory) throws IOException {
+    return lastModified(new File(directory));
+  }
+
+  /** Returns the time the index in the named directory was last modified. */
+  public static long lastModified(File directory) throws IOException {
+    return FSDirectory.fileModified(directory, "segments");
+  }
+
+  /** Returns the time the index in this directory was last modified. */
+  public static long lastModified(Directory directory) throws IOException {
+    return directory.fileModified("segments");
+  }
+
+  /** Returns the number of documents in this index. */
+  abstract public int numDocs();
+  /** Returns one greater than the largest possible document number.
+    This may be used to, e.g., determine how big to allocate an array which
+    will have an element for every document number in an index.
+   */
+  abstract public int maxDoc();
+  /** Returns the stored fields of the <code>n</code><sup>th</sup>
+      <code>Document</code> in this index. */
+  abstract public Document document(int n) throws IOException;
+
+  /** Returns true if document <i>n</i> has been deleted */
+  abstract public boolean isDeleted(int n);
+
+  /** Returns the byte-encoded normalization factor for the named field of
+    every document.  This is used by the search code to score documents.
+    @see org.apache.lucene.search.Similarity#norm
+    */
+  abstract public byte[] norms(String field) throws IOException;
+
+  /** Returns an enumeration of all the terms in the index.
+    The enumeration is ordered by Term.compareTo().  Each term
+    is greater than all that precede it in the enumeration. 
+   */
+  abstract public TermEnum terms() throws IOException;
+  /** Returns an enumeration of all terms after a given term.
+    The enumeration is ordered by Term.compareTo().  Each term
+    is greater than all that precede it in the enumeration. 
+   */
+  abstract public TermEnum terms(Term t) throws IOException;
+
+  /** Returns the number of documents containing the term <code>t</code>. */
+  abstract public int docFreq(Term t) throws IOException;
+
+  /** Returns an enumeration of all the documents which contain
+    <code>Term</code>. For each document, the document number, the frequency of
+    the term in that document is also provided, for use in search scoring.
+    Thus, this method implements the mapping:
+    <p><ul>
+    Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq&gt;<sup>*</sup>
+    </ul>
+    <p>The enumeration is ordered by document number.  Each document number
+    is greater than all that precede it in the enumeration. */
+  abstract public TermDocs termDocs(Term t) throws IOException;
+
+  /** Returns an enumeration of all the documents which contain
+    <code>Term</code>.  For each document, in addition to the document number
+    and frequency of the term in that document, a list of all of the ordinal
+    positions of the term in the document is available.  Thus, this method
+    implements the mapping:
+
+    <p><ul>
+    Term &nbsp;&nbsp; =&gt; &nbsp;&nbsp; &lt;docNum, freq,
+          &lt;pos<sub>1</sub>, pos<sub>2</sub>, ...
+	  pos<sub>freq-1</sub>&gt;
+	&gt;<sup>*</sup>
+    </ul>
+    <p> This positional information faciliates phrase and proximity searching.
+    <p>The enumeration is ordered by document number.  Each document number is
+    greater than all that precede it in the enumeration. */
+  abstract public TermPositions termPositions(Term t) throws IOException;
+
+  /** Deletes the document numbered <code>docNum</code>.  Once a document is
+    deleted it will not appear in TermDocs or TermPostitions enumerations.
+    Attempts to read its field with the {@link #document}
+    method will result in an error.  The presence of this document may still be
+    reflected in the {@link #docFreq} statistic, though
+    this will be corrected eventually as the index is further modified.  */
+  abstract public void delete(int docNum) throws IOException;
+
+  /** Deletes all documents containing <code>term</code>.
+    This is useful if one uses a document field to hold a unique ID string for
+    the document.  Then to delete such a document, one merely constructs a
+    term with the appropriate field and the unique ID string as its text and
+    passes it to this method.  Returns the number of documents deleted. */
+  public final int delete(Term term) throws IOException {
+    TermDocs docs = termDocs(term);
+    if ( docs == null ) return 0;
+    int n = 0;
+    try {
+      while (docs.next()) {
+	delete(docs.doc());
+	n++;
+      }
+    } finally {
+      docs.close();
+    }
+    return n;
+  }
+
+  /** Closes files associated with this index.
+    Also saves any new deletions to disk.
+    No other methods should be called after this has been called. */
+  abstract public void close() throws IOException;
+}
--- a/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/src/java/org/apache/lucene/index/IndexWriter.java
@ -0,0 +1,385 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.io.File;
+import java.io.PrintStream;
+import java.util.Vector;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.analysis.Analyzer;
+
+/**
+  An IndexWriter creates and maintains an index.
+
+  The third argument to the <a href="#IndexWriter"><b>constructor</b></a>
+  determines whether a new index is created, or whether an existing index is
+  opened for the addition of new documents.
+
+  In either case, documents are added with the <a
+  href="#addDocument"><b>addDocument</b></a> method.  When finished adding
+  documents, <a href="#close"><b>close</b></a> should be called.
+
+  If an index will not have more documents added for a while and optimal search
+  performance is desired, then the <a href="#optimize"><b>optimize</b></a>
+  method should be called before the index is closed.
+  */
+
+public final class IndexWriter {
+  private Directory directory;			  // where this index resides
+  private Analyzer analyzer;			  // how to analyze text
+
+  private SegmentInfos segmentInfos = new SegmentInfos(); // the segments
+  private final Directory ramDirectory = new RAMDirectory(); // for temp segs
+
+  /** Constructs an IndexWriter for the index in <code>path</code>.  Text will
+    be analyzed with <code>a</code>.  If <code>create</code> is true, then a
+    new, empty index will be created in <code>d</code>, replacing the index
+    already there, if any. */
+  public IndexWriter(String path, Analyzer a, boolean create)
+       throws IOException {
+    this(FSDirectory.getDirectory(path, create), a, create);
+  }
+
+  /** Constructs an IndexWriter for the index in <code>path</code>.  Text will
+    be analyzed with <code>a</code>.  If <code>create</code> is true, then a
+    new, empty index will be created in <code>d</code>, replacing the index
+    already there, if any. */
+  public IndexWriter(File path, Analyzer a, boolean create)
+       throws IOException {
+    this(FSDirectory.getDirectory(path, create), a, create);
+  }
+
+  /** Constructs an IndexWriter for the index in <code>d</code>.  Text will be
+    analyzed with <code>a</code>.  If <code>create</code> is true, then a new,
+    empty index will be created in <code>d</code>, replacing the index already
+    there, if any. */
+  public IndexWriter(Directory d, Analyzer a, boolean create)
+       throws IOException {
+    directory = d;
+    analyzer = a;
+
+    synchronized (directory) {
+      if (create)
+	segmentInfos.write(directory);
+      else
+	segmentInfos.read(directory);
+    }
+  }
+
+  /** Flushes all changes to an index, closes all associated files, and closes
+    the directory that the index is stored in. */
+  public final synchronized void close() throws IOException {
+    flushRamSegments();
+    ramDirectory.close();
+    directory.close();
+  }
+
+  /** Returns the number of documents currently in this index. */
+  public final synchronized int docCount() {
+    int count = 0;
+    for (int i = 0; i < segmentInfos.size(); i++) {
+      SegmentInfo si = segmentInfos.info(i);
+      count += si.docCount;
+    }
+    return count;
+  }
+
+  /** The maximum number of terms that will be indexed for a single field in a
+    document.  This limits the amount of memory required for indexing, so that
+    collections with very large files will not crash the indexing process by
+    running out of memory.
+
+    <p>By default, no more than 10,000 terms will be indexed for a field. */
+  public int maxFieldLength = 10000;
+
+  /** Adds a document to this index.*/
+  public final void addDocument(Document doc) throws IOException {
+    DocumentWriter dw =
+      new DocumentWriter(ramDirectory, analyzer, maxFieldLength);
+    String segmentName = newSegmentName();
+    dw.addDocument(segmentName, doc);
+    synchronized (this) {
+      segmentInfos.addElement(new SegmentInfo(segmentName, 1, ramDirectory));
+      maybeMergeSegments();
+    }
+  }
+
+  private final synchronized String newSegmentName() {
+    return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX);
+  }
+
+  /** Determines how often segment indexes are merged by addDocument().  With
+   * smaller values, less RAM is used while indexing, and searches on
+   * unoptimized indexes are faster, but indexing speed is slower.  With larger
+   * values more RAM is used while indexing and searches on unoptimized indexes
+   * are slower, but indexing is faster.  Thus larger values (> 10) are best
+   * for batched index creation, and smaller values (< 10) for indexes that are
+   * interactively maintained.
+   *
+   * <p>This must never be less than 2.  The default value is 10.*/
+  public int mergeFactor = 10;
+
+  /** Determines the largest number of documents ever merged by addDocument().
+   * Small values (e.g., less than 10,000) are best for interactive indexing,
+   * as this limits the length of pauses while indexing to a few seconds.
+   * Larger values are best for batched indexing and speedier searches.
+   *
+   * <p>The default value is {@link Integer#MAX_VALUE}. */
+  public int maxMergeDocs = Integer.MAX_VALUE;
+
+  /** If non-null, information about merges will be printed to this. */
+  public PrintStream infoStream = null;
+
+  /** Merges all segments together into a single segment, optimizing an index
+      for search. */
+  public final synchronized void optimize() throws IOException {
+    flushRamSegments();
+    while (segmentInfos.size() > 1 ||
+	   (segmentInfos.size() == 1 &&
+	    SegmentReader.hasDeletions(segmentInfos.info(0)))){
+      int minSegment = segmentInfos.size() - mergeFactor;
+      mergeSegments(minSegment < 0 ? 0 : minSegment);
+    }
+  }
+
+  /** Merges all segments from an array of indexes into this index.
+   *
+   * <p>This may be used to parallelize batch indexing.  A large document
+   * collection can be broken into sub-collections.  Each sub-collection can be
+   * indexed in parallel, on a different thread, process or machine.  The
+   * complete index can then be created by merging sub-collection indexes
+   * with this method.
+   *
+   * <p>After this completes, the index is optimized. */
+  public final synchronized void addIndexes(Directory[] dirs)
+      throws IOException {
+    optimize();					  // start with zero or 1 seg
+    int minSegment = segmentInfos.size();
+    int segmentsAddedSinceMerge = 0;
+    for (int i = 0; i < dirs.length; i++) {
+      SegmentInfos sis = new SegmentInfos();	  // read infos from dir
+      sis.read(dirs[i]);
+      for (int j = 0; j < sis.size(); j++) {
+	segmentInfos.addElement(sis.info(j));	  // add each info
+
+	// merge whenever mergeFactor segments have been added
+	if (++segmentsAddedSinceMerge == mergeFactor) {
+	  mergeSegments(minSegment++, false);
+	  segmentsAddedSinceMerge = 0;
+	}
+      }
+    }
+    optimize();					  // final cleanup
+  }
+
+  /** Merges all RAM-resident segments. */
+  private final void flushRamSegments() throws IOException {
+    int minSegment = segmentInfos.size()-1;
+    int docCount = 0;
+    while (minSegment >= 0 &&
+	   (segmentInfos.info(minSegment)).dir == ramDirectory) {
+      docCount += segmentInfos.info(minSegment).docCount;
+      minSegment--;
+    }
+    if (minSegment < 0 ||			  // add one FS segment?
+	(docCount + segmentInfos.info(minSegment).docCount) > mergeFactor ||
+	!(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory))
+      minSegment++;
+    if (minSegment >= segmentInfos.size())
+      return;					  // none to merge
+    mergeSegments(minSegment);
+  }
+
+  /** Incremental segment merger.  */
+  private final void maybeMergeSegments() throws IOException {
+    long targetMergeDocs = mergeFactor;
+    while (targetMergeDocs <= maxMergeDocs) {
+      // find segments smaller than current target size
+      int minSegment = segmentInfos.size();
+      int mergeDocs = 0;
+      while (--minSegment >= 0) {
+	SegmentInfo si = segmentInfos.info(minSegment);
+	if (si.docCount >= targetMergeDocs)
+	  break;
+	mergeDocs += si.docCount;
+      }
+
+      if (mergeDocs >= targetMergeDocs)		  // found a merge to do
+	mergeSegments(minSegment+1);
+      else
+	break;
+      
+      targetMergeDocs *= mergeFactor;		  // increase target size
+    }
+  }
+
+  /** Pops segments off of segmentInfos stack down to minSegment, merges them,
+    and pushes the merged index onto the top of the segmentInfos stack. */
+  private final void mergeSegments(int minSegment) throws IOException {
+    mergeSegments(minSegment, true);
+  }
+
+  /** Pops segments off of segmentInfos stack down to minSegment, merges them,
+    and pushes the merged index onto the top of the segmentInfos stack. */
+  private final void mergeSegments(int minSegment, boolean delete)
+      throws IOException {
+    String mergedName = newSegmentName();
+    int mergedDocCount = 0;
+    if (infoStream != null) infoStream.print("merging segments");
+    SegmentMerger merger = new SegmentMerger(directory, mergedName);
+    Vector segmentsToDelete = new Vector();
+    for (int i = minSegment; i < segmentInfos.size(); i++) {
+      SegmentInfo si = segmentInfos.info(i);
+      if (infoStream != null)
+	infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
+      SegmentReader reader = new SegmentReader(si);
+      merger.add(reader);
+      if (delete)
+	segmentsToDelete.addElement(reader);	  // queue for deletion
+      mergedDocCount += si.docCount;
+    }
+    if (infoStream != null) {
+      infoStream.println();
+      infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
+    }
+    merger.merge();
+
+    segmentInfos.setSize(minSegment);		  // pop old infos & add new
+    segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
+					    directory));
+    
+    synchronized (directory) {
+      segmentInfos.write(directory);		  // commit before deleting
+      deleteSegments(segmentsToDelete);		  // delete now-unused segments
+    }
+  }
+
+  /* Some operating systems (e.g. Windows) don't permit a file to be deleted
+     while it is opened for read (e.g. by another process or thread).  So we
+     assume that when a delete fails it is because the file is open in another
+     process, and queue the file for subsequent deletion. */
+
+  private final void deleteSegments(Vector segments) throws IOException {
+    Vector deletable = new Vector();
+
+    deleteFiles(readDeleteableFiles(), deletable); // try to delete deleteable
+    
+    for (int i = 0; i < segments.size(); i++) {
+      SegmentReader reader = (SegmentReader)segments.elementAt(i);
+      if (reader.directory == this.directory)
+	deleteFiles(reader.files(), deletable);	  // try to delete our files
+      else
+	deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
+    }
+
+    writeDeleteableFiles(deletable);		  // note files we can't delete
+  }
+
+  private final void deleteFiles(Vector files, Directory directory)
+       throws IOException {
+    for (int i = 0; i < files.size(); i++)
+      directory.deleteFile((String)files.elementAt(i));
+  }
+
+  private final void deleteFiles(Vector files, Vector deletable)
+       throws IOException {
+    for (int i = 0; i < files.size(); i++) {
+      String file = (String)files.elementAt(i);
+      try {
+	directory.deleteFile(file);		  // try to delete each file
+      } catch (IOException e) {			  // if delete fails
+	if (directory.fileExists(file)) {
+	  if (infoStream != null)
+	    infoStream.println(e.getMessage() + "; Will re-try later.");
+	  deletable.addElement(file);		  // add to deletable
+	}
+      }
+    }
+  }
+
+  private final Vector readDeleteableFiles() throws IOException {
+    Vector result = new Vector();
+    if (!directory.fileExists("deletable"))
+      return result;
+
+    InputStream input = directory.openFile("deletable");
+    try {
+      for (int i = input.readInt(); i > 0; i--)	  // read file names
+	result.addElement(input.readString());
+    } finally {
+      input.close();
+    }
+    return result;
+  }
+
+  private final void writeDeleteableFiles(Vector files) throws IOException {
+    OutputStream output = directory.createFile("deleteable.new");
+    try {
+      output.writeInt(files.size());
+      for (int i = 0; i < files.size(); i++)
+	output.writeString((String)files.elementAt(i));
+    } finally {
+      output.close();
+    }
+    directory.renameFile("deleteable.new", "deletable");
+  }
+}
--- a/src/java/org/apache/lucene/index/Makefile
+++ b/src/java/org/apache/lucene/index/Makefile
@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
--- a/src/java/org/apache/lucene/index/SegmentInfo.java
+++ b/src/java/org/apache/lucene/index/SegmentInfo.java
@ -0,0 +1,69 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.store.Directory;
+
+final class SegmentInfo {
+  public String name;				  // unique name in dir
+  public int docCount;				  // number of docs in seg
+  public Directory dir;				  // where segment resides
+
+  public SegmentInfo(String name, int docCount, Directory dir) {
+    this.name = name;
+    this.docCount = docCount;
+    this.dir = dir;
+  }
+}
--- a/src/java/org/apache/lucene/index/SegmentInfos.java
+++ b/src/java/org/apache/lucene/index/SegmentInfos.java
@ -0,0 +1,101 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.Vector;
+import java.io.IOException;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.OutputStream;
+
+final class SegmentInfos extends Vector {
+  public int counter = 0;			  // used to name new segments
+  
+  public final SegmentInfo info(int i) {
+    return (SegmentInfo)elementAt(i);
+  }
+
+  public final void read(Directory directory) throws IOException {
+    InputStream input = directory.openFile("segments");
+    try {
+      counter = input.readInt();		  // read counter
+      for (int i = input.readInt(); i > 0; i--) { // read segmentInfos
+	SegmentInfo si = new SegmentInfo(input.readString(), input.readInt(),
+					 directory);
+	addElement(si);
+      }
+    } finally {
+      input.close();
+    }
+  }
+
+  public final void write(Directory directory) throws IOException {
+    OutputStream output = directory.createFile("segments.new");
+    try {
+      output.writeInt(counter);			  // write counter
+      output.writeInt(size());			  // write infos
+      for (int i = 0; i < size(); i++) {
+	SegmentInfo si = info(i);
+	output.writeString(si.name);
+	output.writeInt(si.docCount);
+      }
+    } finally {
+      output.close();
+    }
+
+    // install new segment info
+    directory.renameFile("segments.new", "segments");
+  }
+}
--- a/src/java/org/apache/lucene/index/SegmentMergeInfo.java
+++ b/src/java/org/apache/lucene/index/SegmentMergeInfo.java
@ -0,0 +1,106 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.BitVector;
+
+final class SegmentMergeInfo {
+  Term term;
+  int base;
+  SegmentTermEnum termEnum;
+  SegmentReader reader;
+  SegmentTermPositions postings;
+  int[] docMap = null;				  // maps around deleted docs
+
+  SegmentMergeInfo(int b, SegmentTermEnum te, SegmentReader r)
+    throws IOException {
+    base = b;
+    reader = r;
+    termEnum = te;
+    term = te.term();
+    postings = new SegmentTermPositions(r);
+
+    if (reader.deletedDocs != null) {
+      // build array which maps document numbers around deletions 
+      BitVector deletedDocs = reader.deletedDocs;
+      int maxDoc = reader.maxDoc();
+      docMap = new int[maxDoc];
+      int j = 0;
+      for (int i = 0; i < maxDoc; i++) {
+	if (deletedDocs.get(i))
+	  docMap[i] = -1;
+	else
+	  docMap[i] = j++;
+      }
+    }
+  }
+
+  final boolean next() throws IOException {
+    if (termEnum.next()) {
+      term = termEnum.term();
+      return true;
+    } else {
+      term = null;
+      return false;
+    }
+  }
+
+  final void close() throws IOException {
+    termEnum.close();
+    postings.close();
+  }
+}
+
--- a/src/java/org/apache/lucene/index/SegmentMergeQueue.java
+++ b/src/java/org/apache/lucene/index/SegmentMergeQueue.java
@ -0,0 +1,80 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.PriorityQueue;
+
+final class SegmentMergeQueue extends PriorityQueue {
+  SegmentMergeQueue(int size) {
+    initialize(size);
+  }
+
+  protected final boolean lessThan(Object a, Object b) {
+    SegmentMergeInfo stiA = (SegmentMergeInfo)a;
+    SegmentMergeInfo stiB = (SegmentMergeInfo)b;
+    int comparison = stiA.term.compareTo(stiB.term);
+    if (comparison == 0)
+      return stiA.base < stiB.base; 
+    else
+      return comparison < 0;
+  }
+
+  final void close() throws IOException {
+    while (top() != null)
+      ((SegmentMergeInfo)pop()).close();
+  }
+
+}
--- a/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/src/java/org/apache/lucene/index/SegmentMerger.java
@ -0,0 +1,275 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.Vector;
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.BitVector;
+
+final class SegmentMerger {
+  private Directory directory;
+  private String segment;
+
+  private Vector readers = new Vector();
+  private FieldInfos fieldInfos;
+  
+  SegmentMerger(Directory dir, String name) {
+    directory = dir;
+    segment = name;
+  }
+
+  final void add(SegmentReader reader) {
+    readers.addElement(reader);
+  }
+
+  final SegmentReader segmentReader(int i) {
+    return (SegmentReader)readers.elementAt(i);
+  }
+
+  final void merge() throws IOException {
+    try {
+      mergeFields();
+      mergeTerms();
+      mergeNorms();
+      
+    } finally {
+      for (int i = 0; i < readers.size(); i++) {  // close readers
+	SegmentReader reader = (SegmentReader)readers.elementAt(i);
+	reader.close();
+      }
+    }
+  }
+
+  private final void mergeFields() throws IOException {
+    fieldInfos = new FieldInfos();		  // merge field names
+    for (int i = 0; i < readers.size(); i++) {
+      SegmentReader reader = (SegmentReader)readers.elementAt(i);
+      fieldInfos.add(reader.fieldInfos);
+    }
+    fieldInfos.write(directory, segment + ".fnm");
+    
+    FieldsWriter fieldsWriter =			  // merge field values
+      new FieldsWriter(directory, segment, fieldInfos);
+    try {
+      for (int i = 0; i < readers.size(); i++) {
+	SegmentReader reader = (SegmentReader)readers.elementAt(i);
+	BitVector deletedDocs = reader.deletedDocs;
+	int maxDoc = reader.maxDoc();
+	for (int j = 0; j < maxDoc; j++)
+	  if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
+	    fieldsWriter.addDocument(reader.document(j));
+      }
+    } finally {
+      fieldsWriter.close();
+    }
+  }
+
+  private OutputStream freqOutput = null;
+  private OutputStream proxOutput = null;
+  private TermInfosWriter termInfosWriter = null;
+  private SegmentMergeQueue queue = null;
+
+  private final void mergeTerms() throws IOException {
+    try {
+      freqOutput = directory.createFile(segment + ".frq");
+      proxOutput = directory.createFile(segment + ".prx");
+      termInfosWriter =
+	new TermInfosWriter(directory, segment, fieldInfos);
+      
+      mergeTermInfos();
+      
+    } finally {
+      if (freqOutput != null) 		freqOutput.close();
+      if (proxOutput != null) 		proxOutput.close();
+      if (termInfosWriter != null) 	termInfosWriter.close();
+      if (queue != null)		queue.close();
+    }
+  }
+
+  private final void mergeTermInfos() throws IOException {
+    queue = new SegmentMergeQueue(readers.size());
+    int base = 0;
+    for (int i = 0; i < readers.size(); i++) {
+      SegmentReader reader = (SegmentReader)readers.elementAt(i);
+      SegmentTermEnum termEnum = (SegmentTermEnum)reader.terms();
+      SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
+      base += reader.numDocs();
+      if (smi.next())
+	queue.put(smi);				  // initialize queue
+      else
+	smi.close();
+    }
+
+    SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
+    
+    while (queue.size() > 0) {
+      int matchSize = 0;			  // pop matching terms
+      match[matchSize++] = (SegmentMergeInfo)queue.pop();
+      Term term = match[0].term;
+      SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
+      
+      while (top != null && term.compareTo(top.term) == 0) {
+	match[matchSize++] = (SegmentMergeInfo)queue.pop();
+	top = (SegmentMergeInfo)queue.top();
+      }
+
+      mergeTermInfo(match, matchSize);		  // add new TermInfo
+      
+      while (matchSize > 0) {
+	SegmentMergeInfo smi = match[--matchSize];
+	if (smi.next())
+	  queue.put(smi);			  // restore queue
+	else
+	  smi.close();				  // done with a segment
+      }
+    }
+  }
+
+  private final TermInfo termInfo = new TermInfo(); // minimize consing
+
+  private final void mergeTermInfo(SegmentMergeInfo[] smis, int n)
+       throws IOException {
+    long freqPointer = freqOutput.getFilePointer();
+    long proxPointer = proxOutput.getFilePointer();
+
+    int df = appendPostings(smis, n);		  // append posting data
+
+    if (df > 0) {
+      // add an entry to the dictionary with pointers to prox and freq files
+      termInfo.set(df, freqPointer, proxPointer);
+      termInfosWriter.add(smis[0].term, termInfo);
+    }
+  }
+       
+  private final int appendPostings(SegmentMergeInfo[] smis, int n)
+       throws IOException {
+    int lastDoc = 0;
+    int df = 0;					  // number of docs w/ term
+    for (int i = 0; i < n; i++) {
+      SegmentMergeInfo smi = smis[i];
+      SegmentTermPositions postings = smi.postings;
+      int base = smi.base;
+      int[] docMap = smi.docMap;
+      smi.termEnum.termInfo(termInfo);
+      postings.seek(termInfo);
+      while (postings.next()) {
+	int doc;
+	if (docMap == null)
+	  doc = base + postings.doc;		  // no deletions
+	else
+	  doc = base + docMap[postings.doc];	  // re-map around deletions
+
+	if (doc < lastDoc)
+	  throw new IllegalStateException("docs out of order");
+
+	int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
+	lastDoc = doc;
+
+	int freq = postings.freq;
+	if (freq == 1) {
+	  freqOutput.writeVInt(docCode | 1);	  // write doc & freq=1
+	} else {
+	  freqOutput.writeVInt(docCode);	  // write doc
+	  freqOutput.writeVInt(freq);		  // write frequency in doc
+	}
+	  
+	int lastPosition = 0;			  // write position deltas
+	for (int j = 0; j < freq; j++) {
+	  int position = postings.nextPosition();
+	  proxOutput.writeVInt(position - lastPosition);
+	  lastPosition = position;
+	}
+
+	df++;
+      }
+    }
+    return df;
+  }
+
+  private final void mergeNorms() throws IOException {
+    for (int i = 0; i < fieldInfos.size(); i++) {
+      FieldInfo fi = fieldInfos.fieldInfo(i);
+      if (fi.isIndexed) {
+	OutputStream output = directory.createFile(segment + ".f" + i);
+	try {
+	  for (int j = 0; j < readers.size(); j++) {
+	    SegmentReader reader = (SegmentReader)readers.elementAt(j);
+	    BitVector deletedDocs = reader.deletedDocs;
+	    InputStream input = reader.normStream(fi.name);
+            int maxDoc = reader.maxDoc();
+	    try {
+	      for (int k = 0; k < maxDoc; k++) {
+		byte norm = input != null ? input.readByte() : (byte)0;
+		if (deletedDocs == null || !deletedDocs.get(k))
+		  output.writeByte(norm);
+	      }
+	    } finally {
+	      if (input != null)
+		input.close();
+	    }
+	  }
+	} finally {
+	  output.close();
+	}
+      }
+    }
+  }
+}
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@ -0,0 +1,284 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+import java.util.Enumeration;
+import java.util.Vector;
+
+import org.apache.lucene.util.BitVector;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+import org.apache.lucene.document.Document;
+
+final class SegmentReader extends IndexReader {
+  Directory directory;
+  private boolean closeDirectory = false;
+  private String segment;
+
+  FieldInfos fieldInfos;
+  private FieldsReader fieldsReader;
+
+  TermInfosReader tis;
+  
+  BitVector deletedDocs = null;
+  private boolean deletedDocsDirty = false;
+
+  private InputStream freqStream;
+  private InputStream proxStream;
+
+
+  private static class Norm {
+    public Norm(InputStream in) { this.in = in; }
+    public InputStream in;
+    public byte[] bytes;
+  }
+  private Hashtable norms = new Hashtable();
+
+  SegmentReader(SegmentInfo si, boolean closeDir)
+       throws IOException {
+    this(si);
+    closeDirectory = closeDir;
+  }
+
+  SegmentReader(SegmentInfo si)
+       throws IOException {
+    directory = si.dir;
+    segment = si.name;
+
+    fieldInfos = new FieldInfos(directory, segment + ".fnm");
+    fieldsReader = new FieldsReader(directory, segment, fieldInfos);
+
+    tis = new TermInfosReader(directory, segment, fieldInfos);
+
+    if (hasDeletions(si))
+      deletedDocs = new BitVector(directory, segment + ".del");
+
+    // make sure that all index files have been read or are kept open
+    // so that if an index update removes them we'll still have them
+    freqStream = directory.openFile(segment + ".frq");
+    proxStream = directory.openFile(segment + ".prx");
+    openNorms();
+  }
+  
+  public final synchronized void close() throws IOException {
+    if (deletedDocsDirty) {
+      synchronized (directory) {
+	deletedDocs.write(directory, segment + ".tmp");
+	directory.renameFile(segment + ".tmp", segment + ".del");
+      }
+      deletedDocsDirty = false;
+    }
+
+    fieldsReader.close();
+    tis.close();
+
+    if (freqStream != null)
+      freqStream.close();
+    if (proxStream != null)
+      proxStream.close();
+
+    closeNorms();
+
+    if (closeDirectory)
+      directory.close();
+  }
+
+  final static boolean hasDeletions(SegmentInfo si) throws IOException {
+    return si.dir.fileExists(si.name + ".del");
+  }
+
+  public final synchronized void delete(int docNum) throws IOException {
+    if (deletedDocs == null)
+      deletedDocs = new BitVector(maxDoc());
+    deletedDocsDirty = true;
+    deletedDocs.set(docNum);
+  }
+
+  final Vector files() throws IOException {
+    Vector files = new Vector(16);
+    files.addElement(segment + ".fnm");
+    files.addElement(segment + ".fdx");
+    files.addElement(segment + ".fdt");
+    files.addElement(segment + ".tii");
+    files.addElement(segment + ".tis");
+    files.addElement(segment + ".frq");
+    files.addElement(segment + ".prx");
+
+    if (directory.fileExists(segment + ".del"))
+      files.addElement(segment + ".del");
+
+    for (int i = 0; i < fieldInfos.size(); i++) {
+      FieldInfo fi = fieldInfos.fieldInfo(i);
+      if (fi.isIndexed)
+	files.addElement(segment + ".f" + i);
+    }
+    return files;
+  }
+
+  public final TermEnum terms() throws IOException {
+    return tis.terms();
+  }
+
+  public final TermEnum terms(Term t) throws IOException {
+    return tis.terms(t);
+  }
+
+  public final synchronized Document document(int n) throws IOException {
+    if (isDeleted(n))
+      throw new IllegalArgumentException
+	("attempt to access a deleted document");
+    return fieldsReader.doc(n);
+  }
+
+  public final synchronized boolean isDeleted(int n) {
+    return (deletedDocs != null && deletedDocs.get(n));
+  }
+
+  public final TermDocs termDocs(Term t) throws IOException {
+    TermInfo ti = tis.get(t);
+    if (ti != null)
+      return new SegmentTermDocs(this, ti);
+    else
+      return null;
+  }
+
+  final InputStream getFreqStream () {
+    return (InputStream)freqStream.clone();
+  }
+
+  public final TermPositions termPositions(Term t) throws IOException {
+    TermInfo ti = tis.get(t);
+    if (ti != null)
+      return new SegmentTermPositions(this, ti);
+    else
+      return null;
+  }
+
+  final InputStream getProxStream () {
+    return (InputStream)proxStream.clone();
+  }
+
+  public final int docFreq(Term t) throws IOException {
+    TermInfo ti = tis.get(t);
+    if (ti != null)
+      return ti.docFreq;
+    else
+      return 0;
+  }
+
+  public final int numDocs() {
+    int n = maxDoc();
+    if (deletedDocs != null)
+      n -= deletedDocs.count();
+    return n;
+  }
+
+  public final int maxDoc() {
+    return fieldsReader.size();
+  }
+
+  public final byte[] norms(String field) throws IOException {
+    Norm norm = (Norm)norms.get(field);
+    if (norm == null)
+      return null;
+    if (norm.bytes == null) {
+      byte[] bytes = new byte[maxDoc()];
+      norms(field, bytes, 0);
+      norm.bytes = bytes;
+    }
+    return norm.bytes;
+  }
+
+  final void norms(String field, byte[] bytes, int offset) throws IOException {
+    InputStream normStream = normStream(field);
+    if (normStream == null)
+      return;					  // use zeros in array
+    try {
+      normStream.readBytes(bytes, offset, maxDoc());
+    } finally {
+      normStream.close();
+    }
+  }
+
+  final InputStream normStream(String field) throws IOException {
+    Norm norm = (Norm)norms.get(field);
+    if (norm == null)
+      return null;
+    InputStream result = (InputStream)norm.in.clone();
+    result.seek(0);
+    return result;
+  }
+
+  private final void openNorms() throws IOException {
+    for (int i = 0; i < fieldInfos.size(); i++) {
+      FieldInfo fi = fieldInfos.fieldInfo(i);
+      if (fi.isIndexed) 
+	norms.put(fi.name,
+		  new Norm(directory.openFile(segment + ".f" + fi.number)));
+    }
+  }
+
+  private final void closeNorms() throws IOException {
+    synchronized (norms) {
+      Enumeration enum  = norms.elements();
+      while (enum.hasMoreElements()) {
+	Norm norm = (Norm)enum.nextElement();
+	norm.in.close();
+      }
+    }
+  }
+}
--- a/src/java/org/apache/lucene/index/SegmentTermDocs.java
+++ b/src/java/org/apache/lucene/index/SegmentTermDocs.java
@ -0,0 +1,150 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.util.BitVector;
+import org.apache.lucene.store.InputStream;
+
+class SegmentTermDocs implements TermDocs {
+  protected SegmentReader parent;
+  private InputStream freqStream;
+  private int freqCount;
+  private BitVector deletedDocs;
+  int doc = 0;
+  int freq;
+
+  SegmentTermDocs(SegmentReader p) throws IOException {
+    parent = p;
+    freqStream = parent.getFreqStream();
+    deletedDocs = parent.deletedDocs;
+  }
+
+  SegmentTermDocs(SegmentReader p, TermInfo ti) throws IOException {
+    this(p);
+    seek(ti);
+  }
+  
+  void seek(TermInfo ti) throws IOException {
+    freqCount = ti.docFreq;
+    doc = 0;
+    freqStream.seek(ti.freqPointer);
+  }
+  
+  public void close() throws IOException {
+    freqStream.close();
+  }
+
+  public final int doc() { return doc; }
+  public final int freq() { return freq; }
+
+  protected void skippingDoc() throws IOException {
+  }
+
+  public boolean next() throws IOException {
+    while (true) {
+      if (freqCount == 0)
+	return false;
+
+      int docCode = freqStream.readVInt();
+      doc += docCode >>> 1;			  // shift off low bit
+      if ((docCode & 1) != 0)			  // if low bit is set
+	freq = 1;				  // freq is one
+      else
+	freq = freqStream.readVInt();		  // else read freq
+ 
+      freqCount--;
+    
+      if (deletedDocs == null || !deletedDocs.get(doc))
+	break;
+      skippingDoc();
+    }
+    return true;
+  }
+
+  /** Optimized implementation. */
+  public int read(final int[] docs, final int[] freqs)
+      throws IOException {
+    final int end = docs.length;
+    int i = 0;
+    while (i < end && freqCount > 0) {
+
+      // manually inlined call to next() for speed
+      final int docCode = freqStream.readVInt();
+      doc += docCode >>> 1;			  // shift off low bit
+      if ((docCode & 1) != 0)			  // if low bit is set
+	freq = 1;				  // freq is one
+      else
+	freq = freqStream.readVInt();		  // else read freq
+      freqCount--;
+   
+      if (deletedDocs == null || !deletedDocs.get(doc)) {
+	docs[i] = doc;
+	freqs[i] = freq;
+	++i;
+      }
+     }
+    return i;
+  }
+
+  /** As yet unoptimized implementation. */
+  public boolean skipTo(int target) throws IOException {
+    do {
+      if (!next())
+	return false;
+    } while (target > doc);
+    return true;
+  }
+}
--- a/src/java/org/apache/lucene/index/SegmentTermEnum.java
+++ b/src/java/org/apache/lucene/index/SegmentTermEnum.java
@ -0,0 +1,184 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.store.InputStream;
+
+final class SegmentTermEnum extends TermEnum implements Cloneable {
+  private InputStream input;
+  private FieldInfos fieldInfos;
+  int size;
+  int position = -1;
+
+  private Term term = new Term("", "");
+  private TermInfo termInfo = new TermInfo();
+
+  boolean isIndex = false;
+  long indexPointer = 0;
+  Term prev;
+
+  private char[] buffer = {};
+
+  SegmentTermEnum(InputStream i, FieldInfos fis, boolean isi)
+       throws IOException {
+    input = i;
+    fieldInfos = fis; 
+    size = input.readInt();
+    isIndex = isi;
+  }
+  
+  protected Object clone() {
+    SegmentTermEnum clone = null;
+    try {
+      clone = (SegmentTermEnum)super.clone();
+    } catch (CloneNotSupportedException e) {}
+
+    clone.input = (InputStream)input.clone();
+    clone.termInfo = new TermInfo(termInfo);
+    clone.growBuffer(term.text.length());
+
+    return clone;
+  }
+
+  final void seek(long pointer, int p, Term t, TermInfo ti)
+       throws IOException {
+    input.seek(pointer);
+    position = p;
+    term = t;
+    prev = null;
+    termInfo.set(ti);
+    growBuffer(term.text.length());		  // copy term text into buffer
+  }
+
+  /** Increments the enumeration to the next element.  True if one exists.*/
+  public final boolean next() throws IOException {
+    if (position++ >= size-1) {
+      term = null;
+      return false;
+    }
+
+    prev = term;
+    term = readTerm();
+
+    termInfo.docFreq = input.readVInt();	  // read doc freq
+    termInfo.freqPointer += input.readVLong();	  // read freq pointer
+    termInfo.proxPointer += input.readVLong();	  // read prox pointer
+    
+    if (isIndex)
+      indexPointer += input.readVLong();	  // read index pointer
+
+    return true;
+  }
+
+  private final Term readTerm() throws IOException {
+    int start = input.readVInt();
+    int length = input.readVInt();
+    int totalLength = start + length;
+    if (buffer.length < totalLength)
+      growBuffer(totalLength);
+    
+    input.readChars(buffer, start, length);
+    return new Term(fieldInfos.fieldName(input.readVInt()),
+		    new String(buffer, 0, totalLength), false);
+  }
+
+  private final void growBuffer(int length) {
+    buffer = new char[length];
+    for (int i = 0; i < term.text.length(); i++)  // copy contents
+      buffer[i] = term.text.charAt(i);
+  }
+
+  /** Returns the current Term in the enumeration.
+    Initially invalid, valid after next() called for the first time.*/
+  public final Term term() {
+    return term;
+  }
+
+  /** Returns the current TermInfo in the enumeration.
+    Initially invalid, valid after next() called for the first time.*/
+  final TermInfo termInfo() {
+    return new TermInfo(termInfo);
+  }
+
+  /** Sets the argument to the current TermInfo in the enumeration.
+    Initially invalid, valid after next() called for the first time.*/
+  final void termInfo(TermInfo ti) {
+    ti.set(termInfo);
+  }
+
+  /** Returns the docFreq from the current TermInfo in the enumeration.
+    Initially invalid, valid after next() called for the first time.*/
+  public final int docFreq() {
+    return termInfo.docFreq;
+  }
+
+  /* Returns the freqPointer from the current TermInfo in the enumeration.
+    Initially invalid, valid after next() called for the first time.*/
+  final long freqPointer() {
+    return termInfo.freqPointer;
+  }
+
+  /* Returns the proxPointer from the current TermInfo in the enumeration.
+    Initially invalid, valid after next() called for the first time.*/
+  final long proxPointer() {
+    return termInfo.proxPointer;
+  }
+
+  /** Closes the enumeration to further activity, freeing resources. */
+  public final void close() throws IOException {
+    input.close();
+  }
+}
--- a/src/java/org/apache/lucene/index/SegmentTermPositions.java
+++ b/src/java/org/apache/lucene/index/SegmentTermPositions.java
@ -0,0 +1,114 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.InputStream;
+
+final class SegmentTermPositions
+extends SegmentTermDocs implements TermPositions {
+  private InputStream proxStream;
+  private int proxCount;
+  private int position;
+  
+  SegmentTermPositions(SegmentReader p) throws IOException {
+    super(p);
+    proxStream = parent.getProxStream();
+  }
+
+  SegmentTermPositions(SegmentReader p, TermInfo ti)
+       throws IOException {
+    this(p);
+    seek(ti);
+  }
+
+  final void seek(TermInfo ti) throws IOException {
+    super.seek(ti);
+    proxStream.seek(ti.proxPointer);
+  }
+
+  public final void close() throws IOException {
+    super.close();
+    proxStream.close();
+  }
+
+  public final int nextPosition() throws IOException {
+    proxCount--;
+    return position += proxStream.readVInt();
+  }
+
+  protected final void skippingDoc() throws IOException {
+    for (int f = freq; f > 0; f--)		  // skip all positions
+      proxStream.readVInt();
+  }
+
+  public final boolean next() throws IOException {
+    for (int f = proxCount; f > 0; f--)		  // skip unread positions
+      proxStream.readVInt();
+
+    if (super.next()) {				  // run super
+      proxCount = freq;				  // note frequency
+      position = 0;				  // reset position
+      return true;
+    }
+    return false;
+  }
+
+  public final int read(final int[] docs, final int[] freqs)
+      throws IOException {
+    throw new RuntimeException();
+  }
+}
--- a/src/java/org/apache/lucene/index/SegmentsReader.java
+++ b/src/java/org/apache/lucene/index/SegmentsReader.java
@ -0,0 +1,329 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+
+final class SegmentsReader extends IndexReader {
+  protected SegmentReader[] readers;
+  protected int[] starts;			  // 1st docno for each segment
+  private Hashtable normsCache = new Hashtable();
+  private int maxDoc = 0;
+  private int numDocs = -1;
+
+  SegmentsReader(SegmentReader[] r) throws IOException {
+    readers = r;
+    starts = new int[readers.length + 1];	  // build starts array
+    for (int i = 0; i < readers.length; i++) {
+      starts[i] = maxDoc;
+      maxDoc += readers[i].maxDoc();		  // compute maxDocs
+    }
+    starts[readers.length] = maxDoc;
+  }
+
+  public final int numDocs() {
+    if (numDocs == -1) {			  // check cache
+      int n = 0;				  // cache miss--recompute
+      for (int i = 0; i < readers.length; i++)
+	n += readers[i].numDocs();		  // sum from readers
+      numDocs = n;
+    }
+    return numDocs;
+  }
+
+  public final int maxDoc() {
+    return maxDoc;
+  }
+
+  public final Document document(int n) throws IOException {
+    int i = readerIndex(n);			  // find segment num
+    return readers[i].document(n - starts[i]);	  // dispatch to segment reader
+  }
+
+  public final boolean isDeleted(int n) {
+    int i = readerIndex(n);			  // find segment num
+    return readers[i].isDeleted(n - starts[i]);	  // dispatch to segment reader
+  }
+
+  public final void delete(int n) throws IOException {
+    numDocs = -1;				  // invalidate cache
+    int i = readerIndex(n);			  // find segment num
+    readers[i].delete(n - starts[i]);		  // dispatch to segment reader
+  }
+
+  private final int readerIndex(int n) {	  // find reader for doc n:
+    int lo = 0;					  // search starts array
+    int hi = readers.length - 1;		  // for first element less
+						  // than n, return its index
+    while (hi >= lo) {
+      int mid = (lo + hi) >> 1;
+      int midValue = starts[mid];
+      if (n < midValue)
+	hi = mid - 1;
+      else if (n > midValue)
+	lo = mid + 1;
+      else
+	return mid;
+    }
+    return hi;
+  }
+
+  public final synchronized byte[] norms(String field) throws IOException {
+    byte[] bytes = (byte[])normsCache.get(field);
+    if (bytes != null)
+      return bytes;				  // cache hit
+
+    bytes = new byte[maxDoc()];
+    for (int i = 0; i < readers.length; i++)
+      readers[i].norms(field, bytes, starts[i]);
+    normsCache.put(field, bytes);		  // update cache
+    return bytes;
+  }
+
+  public final TermEnum terms() throws IOException {
+    return new SegmentsTermEnum(readers, starts, null);
+  }
+
+  public final TermEnum terms(Term term) throws IOException {
+    return new SegmentsTermEnum(readers, starts, term);
+  }
+
+  public final int docFreq(Term t) throws IOException {
+    int total = 0;				  // sum freqs in segments
+    for (int i = 0; i < readers.length; i++)
+      total += readers[i].docFreq(t);
+    return total;
+  }
+
+  public final TermDocs termDocs(Term term) throws IOException {
+    return new SegmentsTermDocs(readers, starts, term);
+  }
+
+  public final TermPositions termPositions(Term term) throws IOException {
+    return new SegmentsTermPositions(readers, starts, term);
+  }
+
+  public final void close() throws IOException {
+    for (int i = 0; i < readers.length; i++)
+      readers[i].close();
+  }
+}
+
+class SegmentsTermEnum extends TermEnum {
+  private SegmentMergeQueue queue;
+
+  private Term term;
+  private int docFreq;
+
+  SegmentsTermEnum(SegmentReader[] readers, int[] starts, Term t)
+       throws IOException {
+    queue = new SegmentMergeQueue(readers.length);
+    for (int i = 0; i < readers.length; i++) {
+      SegmentReader reader = readers[i];
+      SegmentTermEnum termEnum;
+
+      if (t != null) {
+	termEnum = (SegmentTermEnum)reader.terms(t);
+      } else
+	termEnum = (SegmentTermEnum)reader.terms();
+      
+      SegmentMergeInfo smi = new SegmentMergeInfo(starts[i], termEnum, reader);
+      if (t == null ? smi.next() : termEnum.term() != null)
+	queue.put(smi);				  // initialize queue
+      else
+	smi.close();
+    }
+
+    if (t != null && queue.size() > 0) {
+      SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
+      term = top.termEnum.term();
+      docFreq = top.termEnum.docFreq();
+    }
+  }
+
+  public final boolean next() throws IOException {
+    SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
+    if (top == null) {
+      term = null;
+      return false;
+    }
+      
+    term = top.term;
+    docFreq = 0;
+    
+    while (top != null && term.compareTo(top.term) == 0) {
+      queue.pop();
+      docFreq += top.termEnum.docFreq();	  // increment freq
+      if (top.next())
+	queue.put(top);				  // restore queue
+      else
+	top.close();				  // done with a segment
+      top = (SegmentMergeInfo)queue.top();
+    }
+    return true;
+  }
+
+  public final Term term() {
+    return term;
+  }
+
+  public final int docFreq() {
+    return docFreq;
+  }
+
+  public final void close() throws IOException {
+    queue.close();
+  }
+}
+
+class SegmentsTermDocs implements TermDocs {
+  protected SegmentReader[] readers;
+  protected int[] starts;
+  protected Term term;
+
+  protected int base = 0;
+  protected int pointer = 0;
+
+  SegmentsTermDocs(SegmentReader[] r, int[] s, Term t) {
+    readers = r;
+    starts = s;
+    term = t;
+  }
+
+  protected SegmentTermDocs current;
+  
+  public final int doc() {
+    return base + current.doc;
+  }
+  public final int freq() {
+    return current.freq;
+  }
+
+  public final boolean next() throws IOException {
+    if (current != null && current.next()) {
+      return true;
+    } else if (pointer < readers.length) {
+      if (current != null)
+	current.close();
+      base = starts[pointer];
+      current = termDocs(readers[pointer++]);
+      return next();
+    } else
+      return false;
+  }
+
+  /** Optimized implementation. */
+  public final int read(final int[] docs, final int[] freqs)
+      throws IOException {
+    while (true) {
+      while (current == null) {
+	if (pointer < readers.length) {		  // try next segment
+	  base = starts[pointer];
+	  current = termDocs(readers[pointer++]);
+	} else {
+	  return 0;
+	}
+      }
+      int end = current.read(docs, freqs);
+      if (end == 0) {				  // none left in segment
+	current.close();
+	current = null;
+      } else {					  // got some
+	final int b = base;			  // adjust doc numbers
+	for (int i = 0; i < end; i++)
+	  docs[i] += b;
+	return end;
+      }
+    }
+  }
+
+  /** As yet unoptimized implementation. */
+  public boolean skipTo(int target) throws IOException {
+    do {
+      if (!next())
+	return false;
+    } while (target > doc());
+    return true;
+  }
+
+  protected SegmentTermDocs termDocs(SegmentReader reader)
+       throws IOException {
+    return (SegmentTermDocs)reader.termDocs(term);
+  }
+
+  public final void close() throws IOException {
+    if (current != null)
+      current.close();
+  }
+}
+
+class SegmentsTermPositions extends SegmentsTermDocs implements TermPositions {
+  SegmentsTermPositions(SegmentReader[] r, int[] s, Term t) {
+    super(r,s,t);
+  }
+
+  protected final SegmentTermDocs termDocs(SegmentReader reader)
+       throws IOException {
+    return (SegmentTermDocs)reader.termPositions(term);
+  }
+
+  public final int nextPosition() throws IOException {
+    return ((SegmentTermPositions)current).nextPosition();
+  }
+}
--- a/src/java/org/apache/lucene/index/Term.java
+++ b/src/java/org/apache/lucene/index/Term.java
@ -0,0 +1,122 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/**
+  A Term represents a word from text.  This is the unit of search.  It is
+  composed of two elements, the text of the word, as a string, and the name of
+  the field that the text occured in, an interned string.
+
+  Note that terms may represent more than words from text fields, but also
+  things like dates, email addresses, urls, etc.  */
+
+public final class Term {
+  String field;
+  String text;
+  
+  /** Constructs a Term with the given field and text. */
+  public Term(String fld, String txt) {
+    this(fld, txt, true);
+  }
+  Term(String fld, String txt, boolean intern) {
+    field = intern ? fld.intern() : fld;	  // field names are interned
+    text = txt;					  // unless already known to be
+  }
+
+  /** Returns the field of this term, an interned string.   The field indicates
+    the part of a document which this term came from. */
+  public final String field() { return field; }
+
+  /** Returns the text of this term.  In the case of words, this is simply the
+    text of the word.  In the case of dates and other types, this is an
+    encoding of the object as a string.  */
+  public final String text() { return text; }
+
+  /** Compares two terms, returning true iff they have the same
+      field and text. */
+  public final boolean equals(Object o) {
+    if (o == null)
+      return false;
+    Term other = (Term)o;
+    return field == other.field && text.equals(other.text);
+  }
+
+  /** Combines the hashCode() of the field and the text. */
+  public final int hashCode() {
+    return field.hashCode() + text.hashCode();
+  }
+
+  /** Compares two terms, returning an integer which is less than zero iff this
+    term belongs after the argument, equal zero iff this term is equal to the
+    argument, and greater than zero iff this term belongs after the argument.
+
+    The ordering of terms is first by field, then by text.*/
+  public final int compareTo(Term other) {
+    if (field == other.field)			  // fields are interned
+      return text.compareTo(other.text);
+    else
+      return field.compareTo(other.field);
+  }
+
+  /** Resets the field and text of a Term. */
+  final void set(String fld, String txt) {
+    field = fld;
+    text = txt;
+  }
+
+  public final String toString() {
+    return "Term<" + field + ":" + text + ">";
+  }
+}
--- a/src/java/org/apache/lucene/index/TermDocs.java
+++ b/src/java/org/apache/lucene/index/TermDocs.java
@ -0,0 +1,110 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.document.Document;
+
+/** TermDocs provides an interface for enumerating &lt;document, frequency&gt;
+  pairs for a term.  <p> The document portion names each document containing
+  the term.  Documents are indicated by number.  The frequency portion gives
+  the number of times the term occurred in each document.  <p> The pairs are
+  ordered by document number.
+
+  @see IndexReader#termDocs
+  */
+
+public interface TermDocs {
+  /** Returns the current document number.  <p> This is invalid until {@link
+      #next()} is called for the first time.*/
+  public int doc();
+
+  /** Returns the frequency of the term within the current document.  <p> This
+    is invalid until {@link #next()} is called for the first time.*/
+  public int freq();
+
+  /** Moves to the next pair in the enumeration.  <p> Returns true iff there is
+    such a next pair in the enumeration. */
+  public boolean next() throws IOException;
+
+  /** Attempts to read multiple entries from the enumeration, up to length of
+   * <i>docs</i>.  Document numbers are stored in <i>docs</i>, and term
+   * frequencies are stored in <i>freqs</i>.  The <i>freqs</i> array must be as
+   * long as the <i>docs</i> array.
+   *
+   * <p>Returns the number of entries read.  Zero is only returned when the
+   * stream has been exhausted.  */
+  public int read(int[] docs, int[] freqs) throws IOException;
+
+  /** Skips entries to the first beyond the current whose document number is
+   * greater than or equal to <i>target</i>. <p>Returns true iff there is such
+   * an entry.  <p>Behaves as if written: <pre>
+   *   public boolean skipTo(int target) {
+   *     do {
+   *       if (!next())
+   * 	     return false;
+   *     } while (target > doc());
+   *     return true;
+   *   }
+   * </pre>
+   * Some implementations are considerably more efficient than that.
+   */
+  public boolean skipTo(int target) throws IOException;
+
+  /** Frees associated resources. */
+  public void close() throws IOException;
+}
+
+
--- a/src/java/org/apache/lucene/index/TermEnum.java
+++ b/src/java/org/apache/lucene/index/TermEnum.java
@ -0,0 +1,78 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+/** Abstract class for enumerating terms.
+
+  <p>Term enumerations are always ordered by Term.compareTo().  Each term in
+  the enumeration is greater than all that precede it.  */
+
+public abstract class TermEnum {
+  /** Increments the enumeration to the next element.  True if one exists.*/
+  abstract public boolean next() throws IOException;
+
+  /** Returns the current Term in the enumeration.
+    Initially invalid, valid after next() called for the first time.*/
+  abstract public Term term();
+
+  /** Returns the docFreq of the current Term in the enumeration.
+    Initially invalid, valid after next() called for the first time.*/
+  abstract public int docFreq();
+
+  /** Closes the enumeration to further activity, freeing resources. */
+  abstract public void close() throws IOException;
+}
--- a/src/java/org/apache/lucene/index/TermInfo.java
+++ b/src/java/org/apache/lucene/index/TermInfo.java
@ -0,0 +1,91 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/** A TermInfo is the record of information stored for a term.*/
+
+final class TermInfo {
+  /** The number of documents which contain the term. */
+  int docFreq = 0;
+
+  long freqPointer = 0;
+  long proxPointer = 0;
+
+  TermInfo() {}
+
+  TermInfo(int df, long fp, long pp) {
+    docFreq = df;
+    freqPointer = fp;
+    proxPointer = pp;
+  }
+
+  TermInfo(TermInfo ti) {
+    docFreq = ti.docFreq;
+    freqPointer = ti.freqPointer;
+    proxPointer = ti.proxPointer;
+  }
+
+  final void set(int df, long fp, long pp) {
+    docFreq = df;
+    freqPointer = fp;
+    proxPointer = pp;
+  }
+
+  final void set(TermInfo ti) {
+    docFreq = ti.docFreq;
+    freqPointer = ti.freqPointer;
+    proxPointer = ti.proxPointer;
+  }
+}
--- a/src/java/org/apache/lucene/index/TermInfosReader.java
+++ b/src/java/org/apache/lucene/index/TermInfosReader.java
@ -0,0 +1,222 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.InputStream;
+
+/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
+ * Directory.  Pairs are accessed either by Term or by ordinal position the
+ * set.  */
+
+final class TermInfosReader {
+  private Directory directory;
+  private String segment;
+  private FieldInfos fieldInfos;
+
+  private SegmentTermEnum enum;
+  private int size;
+
+  TermInfosReader(Directory dir, String seg, FieldInfos fis)
+       throws IOException {
+    directory = dir;
+    segment = seg;
+    fieldInfos = fis;
+
+    enum = new SegmentTermEnum(directory.openFile(segment + ".tis"),
+			       fieldInfos, false);
+    size = enum.size;
+    readIndex();
+  }
+
+  final void close() throws IOException {
+    if (enum != null)
+      enum.close();
+  }
+
+  /** Returns the number of term/value pairs in the set. */
+  final int size() {
+    return size;
+  }
+
+  Term[] indexTerms = null;
+  TermInfo[] indexInfos;
+  long[] indexPointers;
+
+  private final void readIndex() throws IOException {
+    SegmentTermEnum indexEnum =
+      new SegmentTermEnum(directory.openFile(segment + ".tii"),
+			  fieldInfos, true);
+    try {
+      int indexSize = indexEnum.size;
+
+      indexTerms = new Term[indexSize];
+      indexInfos = new TermInfo[indexSize];
+      indexPointers = new long[indexSize];
+
+      for (int i = 0; indexEnum.next(); i++) {
+	indexTerms[i] = indexEnum.term();
+	indexInfos[i] = indexEnum.termInfo();
+	indexPointers[i] = indexEnum.indexPointer;
+      }
+    } finally {
+      indexEnum.close();
+    }
+  }
+
+  /** Returns the offset of the greatest index entry which is less than term.*/
+  private final int getIndexOffset(Term term) throws IOException {
+    int lo = 0;					  // binary search indexTerms[]
+    int hi = indexTerms.length - 1;
+
+    while (hi >= lo) {
+      int mid = (lo + hi) >> 1;
+      int delta = term.compareTo(indexTerms[mid]);
+      if (delta < 0)
+	hi = mid - 1;
+      else if (delta > 0)
+	lo = mid + 1;
+      else
+	return mid;
+    }
+    return hi;
+  }
+
+  private final void seekEnum(int indexOffset) throws IOException {
+    enum.seek(indexPointers[indexOffset],
+	      (indexOffset * TermInfosWriter.INDEX_INTERVAL) - 1,
+	      indexTerms[indexOffset], indexInfos[indexOffset]);
+  }
+
+  /** Returns the TermInfo for a Term in the set, or null. */
+  final synchronized TermInfo get(Term term) throws IOException {
+    if (size == 0) return null;
+    
+    // optimize sequential access: first try scanning cached enum w/o seeking
+    if (enum.term() != null			  // term is at or past current
+	&& ((enum.prev != null && term.compareTo(enum.prev) > 0)
+	    || term.compareTo(enum.term()) >= 0)) { 
+      int enumOffset = (enum.position/TermInfosWriter.INDEX_INTERVAL)+1;
+      if (indexTerms.length == enumOffset	  // but before end of block
+	  || term.compareTo(indexTerms[enumOffset]) < 0)
+	return scanEnum(term);			  // no need to seek
+    }
+    
+    // random-access: must seek
+    seekEnum(getIndexOffset(term));
+    return scanEnum(term);
+  }
+  
+  /** Scans within block for matching term. */
+  private final TermInfo scanEnum(Term term) throws IOException {
+    while (term.compareTo(enum.term()) > 0 && enum.next()) {}
+    if (enum.term() != null && term.compareTo(enum.term()) == 0)
+      return enum.termInfo();
+    else
+      return null;
+  }
+
+  /** Returns the nth term in the set. */
+  final synchronized Term get(int position) throws IOException {
+    if (size == 0) return null;
+
+    if (enum != null && enum.term() != null && position >= enum.position &&
+	position < (enum.position + TermInfosWriter.INDEX_INTERVAL))
+      return scanEnum(position);		  // can avoid seek
+
+    seekEnum(position / TermInfosWriter.INDEX_INTERVAL); // must seek
+    return scanEnum(position);
+  }
+
+  private final Term scanEnum(int position) throws IOException {
+    while(enum.position < position)
+      if (!enum.next())
+	return null;
+
+    return enum.term();
+  }
+
+  /** Returns the position of a Term in the set or -1. */
+  final synchronized int getPosition(Term term) throws IOException {
+    if (size == 0) return -1;
+
+    int indexOffset = getIndexOffset(term);
+    seekEnum(indexOffset);
+
+    while(term.compareTo(enum.term()) > 0 && enum.next()) {}
+
+    if (term.compareTo(enum.term()) == 0)
+      return enum.position;
+    else
+      return -1;
+  }
+
+  /** Returns an enumeration of all the Terms and TermInfos in the set. */
+  final synchronized SegmentTermEnum terms() throws IOException {
+    if (enum.position != -1)			  // if not at start
+      seekEnum(0);				  // reset to start
+    return (SegmentTermEnum)enum.clone();
+  }
+
+  /** Returns an enumeration of terms starting at or after the named term. */
+  final synchronized SegmentTermEnum terms(Term term) throws IOException {
+    get(term);					  // seek enum to term
+    return (SegmentTermEnum)enum.clone();
+  }
+
+
+}
--- a/src/java/org/apache/lucene/index/TermInfosWriter.java
+++ b/src/java/org/apache/lucene/index/TermInfosWriter.java
@ -0,0 +1,159 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.store.OutputStream;
+import org.apache.lucene.store.Directory;
+
+/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
+  Directory.  A TermInfos can be written once, in order.  */
+
+final class TermInfosWriter {
+  private FieldInfos fieldInfos;
+  private OutputStream output;
+  private Term lastTerm = new Term("", "");
+  private TermInfo lastTi = new TermInfo();
+  private int size = 0;
+  
+  static final int INDEX_INTERVAL = 128;
+  private long lastIndexPointer = 0;
+  private boolean isIndex = false;
+
+  private TermInfosWriter other = null;
+
+  TermInfosWriter(Directory directory, String segment, FieldInfos fis)
+       throws IOException, SecurityException {
+    initialize(directory, segment, fis, false);
+    other = new TermInfosWriter(directory, segment, fis, true);
+    other.other = this;
+  }
+
+  private TermInfosWriter(Directory directory, String segment, FieldInfos fis,
+			  boolean isIndex) throws IOException {
+    initialize(directory, segment, fis, isIndex);
+  }
+
+  private void initialize(Directory directory, String segment, FieldInfos fis,
+		     boolean isi) throws IOException {
+    fieldInfos = fis;
+    isIndex = isi;
+    output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
+    output.writeInt(0);				  // leave space for size
+  }
+
+  /** Adds a new <Term, TermInfo> pair to the set.
+    Term must be lexicographically greater than all previous Terms added.
+    TermInfo pointers must be positive and greater than all previous.*/
+  final void add(Term term, TermInfo ti)
+       throws IOException, SecurityException {
+    if (!isIndex && term.compareTo(lastTerm) <= 0)
+      throw new IOException("term out of order");
+    if (ti.freqPointer < lastTi.freqPointer)
+      throw new IOException("freqPointer out of order");
+    if (ti.proxPointer < lastTi.proxPointer)
+      throw new IOException("proxPointer out of order");
+
+    if (!isIndex && size % INDEX_INTERVAL == 0)
+      other.add(lastTerm, lastTi);		  // add an index term
+
+    writeTerm(term);				  // write term
+    output.writeVInt(ti.docFreq);		  // write doc freq
+    output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
+    output.writeVLong(ti.proxPointer - lastTi.proxPointer);
+
+    if (isIndex) {
+      output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
+      lastIndexPointer = other.output.getFilePointer(); // write pointer
+    }
+
+    lastTi.set(ti);
+    size++;
+  }
+
+  private final void writeTerm(Term term)
+       throws IOException {
+    int start = stringDifference(lastTerm.text, term.text);
+    int length = term.text.length() - start;
+    
+    output.writeVInt(start);			  // write shared prefix length
+    output.writeVInt(length);			  // write delta length
+    output.writeChars(term.text, start, length);  // write delta chars
+
+    output.writeVInt(fieldInfos.fieldNumber(term.field)); // write field num
+
+    lastTerm = term;
+  }
+
+  private static final int stringDifference(String s1, String s2) {
+    int len1 = s1.length();
+    int len2 = s2.length();
+    int len = len1 < len2 ? len1 : len2;
+    for (int i = 0; i < len; i++)
+      if (s1.charAt(i) != s2.charAt(i))
+	return i;
+    return len;
+  }
+
+  /** Called to complete TermInfos creation. */
+  final void close() throws IOException, SecurityException {
+    output.seek(0);				  // write size at start
+    output.writeInt(size);
+    output.close();
+    
+    if (!isIndex)
+      other.close();
+  }
+}
--- a/src/java/org/apache/lucene/index/TermPositions.java
+++ b/src/java/org/apache/lucene/index/TermPositions.java
@ -0,0 +1,75 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.document.Document;
+
+
+/** TermPositions provides an interface for enumerating the &lt;document,
+  frequency, &lt;position&gt;* &gt; tuples for a term.  <p> The document and
+  frequency are as for a TermDocs.  The positions portion lists the ordinal
+  positions of each occurence of a term in a document.
+  @see IndexReader#termPositions
+  */
+
+public interface TermPositions extends TermDocs {
+  /** Returns next position in the current document.  It is an error to call
+    this more than {@link #freq()} times
+    without calling {@link #next()}<p> This is
+    invalid until {@link #next()} is called for
+    the first time.*/
+  public int nextPosition() throws IOException;
+}  
--- a/src/java/org/apache/lucene/index/package.html
+++ b/src/java/org/apache/lucene/index/package.html
@ -0,0 +1,10 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+   <meta name="Author" content="Doug Cutting">
+</head>
+<body>
+Code to maintain and access indices.
+</body>
+</html>
--- a/src/java/org/apache/lucene/manifest
+++ b/src/java/org/apache/lucene/manifest
@ -0,0 +1,8 @@
+
+Name: com/lucene
+Specification-Title: Lucene Search Engine
+Specification-Version: $Name$
+Specification-Vendor: Lucene
+Implementation-Title: com.lucene
+Implementation-Version: $Name$ $Date$
+Implementation-Vendor: Lucene
--- a/src/java/org/apache/lucene/queryParser/.cvsignore
+++ b/src/java/org/apache/lucene/queryParser/.cvsignore
@ -0,0 +1,6 @@
+QueryParser.java
+TokenMgrError.java
+ParseException.java
+Token.java
+TokenManager.java
+QueryParserConstants.java
--- a/src/java/org/apache/lucene/queryParser/Makefile
+++ b/src/java/org/apache/lucene/queryParser/Makefile
@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
--- a/src/java/org/apache/lucene/queryParser/QueryParser.jj
+++ b/src/java/org/apache/lucene/queryParser/QueryParser.jj
@ -0,0 +1,366 @@
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+
+options {
+  STATIC= false;
+}
+
+PARSER_BEGIN(QueryParser)
+
+package org.apache.lucene.queryParser;
+
+import java.util.Vector;
+import java.io.*;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.search.*;
+
+/**
+ * This class is generated by JavaCC.  The only method that clients should need
+ * to call is <a href="#parse">parse()</a>.
+ *
+ * The syntax for query strings is as follows:
+ * A Query is a series of clauses.
+ * A clause may be prefixed by: 
+ * <ul>
+ * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
+ * that the clause is required or prohibited respectively; or
+ * <li> a term followed by a colon, indicating the field to be searched.
+ * This enables one to construct queries which search multiple fields.
+ * </ul>
+ *
+ * A clause may be either a:
+ * <ul>
+ * <li> a term, indicating all the documents that contain this term; or
+ * <li> a nested query, enclosed in parentheses.  Note that this may be used
+ * with a <code>+</code>/<code>-</code> prefix to require any of a set of
+ * terms.
+ * </ul>
+ *
+ * Thus, in BNF, the query grammar is:
+ * <pre>
+ *   Query  ::= ( Clause )*
+ *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+ * </pre>
+ */
+
+public class QueryParser {
+  /** Parses a query string, returning a
+   * <a href="lucene.search.Query.html">Query</a>.
+   *  @param query	the query string to be parsed.
+   *  @param field	the default field for query terms.
+   *  @param analyzer   used to find terms in the query text.
+   */
+  static public Query parse(String query, String field, Analyzer analyzer)
+       throws ParseException {
+    QueryParser parser = new QueryParser(field, analyzer);
+    return parser.parse(query);
+  }
+       
+  Analyzer analyzer;
+  String field;
+  int phraseSlop = 0;
+
+  /** Constructs a query parser.
+   *  @param field	the default field for query terms.
+   *  @param analyzer   used to find terms in the query text.
+   */
+  public QueryParser(String f, Analyzer a) {
+    this(new StringReader(""));
+    analyzer = a;
+    field = f;
+  }
+
+  /** Parses a query string, returning a
+   * <a href="lucene.search.Query.html">Query</a>.
+   *  @param query	the query string to be parsed.
+   */
+  public Query parse(String query) throws ParseException {
+    ReInit(new StringReader(query));
+    return Query(field);
+  }
+
+  /** Sets the default slop for phrases.  If zero, then exact phrase matches
+    are required.  Zero by default. */
+  public void setPhraseSlop(int s) { phraseSlop = s; }
+  /** Gets the default slop for phrases. */
+  public int getPhraseSlop() { return phraseSlop; }
+
+  private void addClause(Vector clauses, int conj, int mods, 
+                        Query q) {
+    boolean required, prohibited;
+
+    // If this term is introduced by AND, make the preceding term required,
+    // unless it's already prohibited
+    if (conj == CONJ_AND) {
+      BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
+      if (!c.prohibited)
+        c.required = true;
+    }
+
+    // We might have been passed a null query; the term might have been
+    // filtered away by the analyzer. 
+    if (q == null)
+      return;
+
+    // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
+    // introduced by NOT or -; make sure not to set both.
+    prohibited = (mods == MOD_NOT);
+    required = (mods == MOD_REQ);
+    if (conj == CONJ_AND && !prohibited)
+      required = true;
+    clauses.addElement(new BooleanClause(q, required, prohibited));
+  }
+
+  private Query getFieldQuery(String field, Analyzer analyzer, String queryText) {
+    // Use the analyzer to get all the tokens, and then build a TermQuery,
+    // PhraseQuery, or nothing based on the term count
+    
+    TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
+    Vector v = new Vector();
+    org.apache.lucene.analysis.Token t;
+
+    while (true) {
+      try {
+        t = source.next();
+      } 
+      catch (IOException e) {
+        t = null;
+      }
+      if (t == null) 
+        break;
+      v.addElement(t.termText());
+    }
+    if (v.size() == 0) 
+      return null;
+    else if (v.size() == 1) 
+      return new TermQuery(new Term(field, (String) v.elementAt(0)));
+    else {
+      PhraseQuery q = new PhraseQuery();
+      q.setSlop(phraseSlop);
+      for (int i=0; i<v.size(); i++) {
+        q.add(new Term(field, (String) v.elementAt(i)));
+      }
+      return q;
+    }
+  }
+
+  public static void main(String[] args) throws Exception {
+    QueryParser qp = new QueryParser("field", 
+                                     new org.apache.lucene.analysis.SimpleAnalyzer());
+    Query q = qp.parse(args[0]);
+    System.out.println(q.toString("field"));
+  }
+
+  private static final int CONJ_NONE   = 0;
+  private static final int CONJ_AND    = 1;
+  private static final int CONJ_OR     = 2;
+
+  private static final int MOD_NONE    = 0;
+  private static final int MOD_NOT     = 10;
+  private static final int MOD_REQ     = 11;
+}
+
+PARSER_END(QueryParser)
+
+/* ***************** */
+/* Token Definitions */
+/* ***************** */
+
+<*> TOKEN : {
+  <#_ALPHA_CHAR: ["a"-"z", "A"-"Z"] >
+| <#_NUM_CHAR:   ["0"-"9"] >
+| <#_ALPHANUM_CHAR: [ "a"-"z", "A"-"Z", "0"-"9" ] >
+| <#_IDENTIFIER_CHAR: [ "a"-"z", "A"-"Z", "0"-"9", "_" ] >
+| <#_IDENTIFIER: <_ALPHA_CHAR> (<_IDENTIFIER_CHAR>)* >
+| <#_NEWLINE:    ( "\r\n" | "\r" | "\n" ) >
+| <#_WHITESPACE: ( " " | "\t" ) >
+| <#_QCHAR:      ( "\\" (<_NEWLINE> | ~["a"-"z", "A"-"Z", "0"-"9"] ) ) >
+| <#_RESTOFLINE: (~["\r", "\n"])* >
+}
+
+<DEFAULT> TOKEN : {
+  <AND:       ("AND" | "&&") >
+| <OR:        ("OR" | "||") >
+| <NOT:       ("NOT" | "!") >
+| <PLUS:      "+" >
+| <MINUS:     "-" >
+| <LPAREN:    "(" >
+| <RPAREN:    ")" >
+| <COLON:     ":" >
+| <CARAT:     "^" >
+| <STAR:      "*" >
+| <QUOTED:     "\"" (~["\""])+ "\"">
+| <NUMBER:    (<_NUM_CHAR>)+ "." (<_NUM_CHAR>)+ >
+| <TERM:      <_IDENTIFIER_CHAR> 
+              ( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "*", "?", "~" ] )* >
+| <FUZZY:     "~" >
+| <WILDTERM:  <_IDENTIFIER_CHAR>
+              ( ~["\"", " ", "\t", "(", ")", ":", "&", "|", "^", "~" ] )* <_IDENTIFIER_CHAR>>
+}
+
+<DEFAULT> SKIP : {
+  <<_WHITESPACE>>
+}
+
+// *   Query  ::= ( Clause )*
+// *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+
+int Conjunction() : { 
+  int ret = CONJ_NONE;
+}
+{
+  [ 
+    <AND> { ret = CONJ_AND; } 
+    | <OR>  { ret = CONJ_OR; }
+  ]
+  { return ret; }
+}
+
+int Modifiers() : { 
+  int ret = MOD_NONE;
+}
+{
+  [ 
+     <PLUS> { ret = MOD_REQ; }  
+     | <MINUS> { ret = MOD_NOT; }
+     | <NOT> { ret = MOD_NOT; }
+  ]
+  { return ret; }
+}
+
+Query Query(String field) :
+{
+  Vector clauses = new Vector();
+  Query q;
+  int conj, mods; 
+}
+{
+  mods=Modifiers() q=Clause(field) 
+  { addClause(clauses, CONJ_NONE, mods, q); }
+  ( 
+    conj=Conjunction() mods=Modifiers() q=Clause(field) 
+    { addClause(clauses, conj, mods, q); }
+  )*
+    {
+      BooleanQuery query = new BooleanQuery();
+      for (int i = 0; i < clauses.size(); i++)
+  	query.add((BooleanClause)clauses.elementAt(i));
+      return query;
+    }
+}
+
+Query Clause(String field) : {
+  Query q;
+  Token fieldToken=null;
+}
+{
+  [
+    LOOKAHEAD(2)
+    fieldToken=<TERM> <COLON> { field = fieldToken.image; }
+  ]
+
+  (
+   q=Term(field) 
+   | <LPAREN> q=Query(field) <RPAREN>
+  )
+    {
+      return q;
+    }
+}
+    
+
+Query Term(String field) : { 
+  Token term, boost=null;
+  boolean prefix = false;
+  boolean wildcard = false;
+  boolean fuzzy = false;
+  Query q;
+}
+{
+  ( 
+     (term=<TERM>|term=<WILDTERM>{wildcard=true;}|term=<NUMBER>)[<STAR>{prefix=true;}|<FUZZY>{fuzzy=true;}][<CARAT> boost=<NUMBER>]
+      { if (wildcard)
+          q = new WildcardQuery(new Term(field, term.image));
+        else if (prefix) 
+          q = new PrefixQuery(new Term(field, term.image));
+        else if (fuzzy)
+          q = new FuzzyQuery(new Term(field, term.image));
+        else
+          q = getFieldQuery(field, analyzer, term.image); }
+    | term=<QUOTED> 
+      { q = getFieldQuery(field, analyzer, 
+                          term.image.substring(1, term.image.length()-1)); }
+  )
+  { 
+    if (boost != null) {
+      float f = (float) 1.0;
+      try { 
+        f = Float.valueOf(boost.image).floatValue();
+      }
+      catch (Exception ignored) { }
+
+      if (q instanceof TermQuery) 
+        ((TermQuery) q).setBoost(f);
+      else if (q instanceof PhraseQuery) 
+        ((PhraseQuery) q).setBoost(f);
+      else if (q instanceof MultiTermQuery)
+        ((MultiTermQuery) q).setBoost(f);
+    }
+    return q; 
+  }
+}
+
+
--- a/src/java/org/apache/lucene/queryParser/package.html
+++ b/src/java/org/apache/lucene/queryParser/package.html
@ -0,0 +1,15 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<html>
+<head>
+   <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
+   <meta name="Author" content="Doug Cutting">
+</head>
+<body>
+A simple query parser implemented with JavaCC.
+<p>Note that JavaCC defines lots of public, classes, methods and fields
+that do not need to be public.&nbsp; These clutter the documentation.&nbsp;
+Sorry.
+<p>Note that because JavaCC defines a class named <tt>Token</tt>, <tt>com.lucene.analysis.Token</tt>
+must always be fully qualified in sourced code in this package.
+</body>
+</html>
--- a/src/java/org/apache/lucene/rootrules.mk
+++ b/src/java/org/apache/lucene/rootrules.mk
@ -0,0 +1,58 @@
+# rules to enable the running of "make jar" and the like from any dir..
+
+# directories containing java source code
+DIRS = store util document analysis analysis/standard index search queryParser
+PACKAGES = $(subst /,.,$(patsubst %,com.lucene.%,$(DIRS)))
+
+ifeq ($(JAVALINK),) 
+  JAVALINK = http://java.sun.com/products/jdk/1.3/docs/api/
+endif
+
+# OLDJAVA does not have a -link option
+ifeq ($(OLDJAVA),)
+  JLINK_OPT = -link $(JAVALINK)
+  JAR_CMD = $(JAR) -cvfm lucene.jar com/lucene/manifest
+else
+  JAR_CMD = $(JAR) -cvf lucene.jar
+endif
+
+.PHONY: jar doc demo release
+
+jar:	all_classes
+	cd $(ROOT) && $(JAR_CMD) \
+	 `ls com/lucene/*/*.class` `ls com/lucene/*/*/*.class`
+
+doc:	all_classes
+	if [ -d $(ROOT)/doc/api ]; then rm -rf $(ROOT)/doc/api ;fi
+	mkdir $(ROOT)/doc/api
+	$(JAVADOC) -classpath '$(CLASSPATH)' -author -version \
+	 -d $(ROOT)/doc/api $(JLINK_OPT) $(PACKAGES)
+
+demo: all_classes
+	$(MAKE) -C $(ROOT)/demo/HTMLParser -w
+	$(MAKE) -C $(ROOT)/demo -w CLASSPATH=..
+
+release: jar demo doc
+	cd $(ROOT) && tar cvf lucene.tar lucene.jar doc/*.html doc/api \
+	   demo/*.java demo/*.class demo/*.html demo/*.jhtml \
+	   demo/HTMLParser/*.class demo/HTMLParser/*.jj \
+	   demo/HTMLParser/*.java
+
+# make all the Lucene classes 
+all_classes : TARGET = classes
+all_classes : $(DIRS)
+
+.PHONY: $(DIRS)
+$(DIRS):
+	$(MAKE) -C $(ROOT)/com/lucene/$@ -w $(TARGET)
+
+# Removes all generated files from src directories.
+src_clean: TARGET = clean
+src_clean: $(DIRS) clean
+
+# Removes all generated files.
+real_clean: DIRS += demo
+real_clean: DIRS += demo/HTMLParser
+real_clean: TARGET = clean
+real_clean: $(DIRS) clean
+	cd $(ROOT) && rm -rf lucene.jar lucene.tar doc/api
--- a/src/java/org/apache/lucene/rules.mk
+++ b/src/java/org/apache/lucene/rules.mk
@ -0,0 +1,128 @@
+# GNU make rules for lucene
+
+# determine whether we're on Win32 or Unix
+ifeq ($(findstring CYGWIN,$(shell uname)),CYGWIN)
+  OS = win32
+else
+  OS = unix
+endif
+
+# DOS compatibility:
+# These should be used in variables that end up in CLASSPATH.
+ifeq ($(OS),win32)
+  SLASH=\\
+  COLON=;
+else
+  SLASH=/
+  COLON=:
+endif
+
+# ROOT should be set to the root directory of the Lucene package
+# hierarchy.  This is typically ../../.., as most packages are of the
+# form com.lucene.<package>.
+ifeq ($(ROOT),)
+  ROOT = ..$(SLASH)..$(SLASH)..
+else
+  ROOT := $(subst /,$(SLASH),$(ROOT))
+endif
+
+#include all the relevant variables
+include $(subst $(SLASH),/,$(ROOT))/com/lucene/variables.mk
+
+# directories containing java source code
+DIRS = store util document analysis analysis/standard index search queryParser
+PACKAGES = $(subst /,.,$(patsubst %,com.lucene.%,$(DIRS)))
+
+ifeq ($(JDK_HOME),)
+  ifneq ($(JAVA_HOME),)
+     JDK_HOME=$(JAVA_HOME)
+   else
+     ifeq ($(OS),win32)
+       JDK_HOME = C:/jdk1.3.1
+     else
+       JDK_HOME = /usr/local/java/jdk1.3.1
+     endif
+   endif
+endif
+
+# Location of JavaCC
+ifeq ($(JAVACC),)
+ ifeq ($(OS),win32)
+  JAVACC = C:/javacc2_0/bin/lib/JavaCC.zip
+ else
+  JAVACC = /usr/local/java/javacc2_0/bin/lib/JavaCC.zip
+ endif
+endif
+
+JAVADIR = $(subst \,/,$(JDK_HOME))
+
+# The compiler executable.
+ifeq ($(JAVAC),)
+  JAVAC = $(JAVADIR)/bin/javac
+endif
+
+# The java executable
+JAVA = $(JAVADIR)/bin/java
+
+# The jar executable
+JAR = $(JAVADIR)/bin/jar
+
+# javadoc
+JAVADOC = $(JAVADIR)/bin/javadoc
+
+# Options to pass to Java compiler
+ifeq ($(JFLAGS),)
+  JFLAGS = -O
+endif
+
+
+# CLASSPATH
+# By default include the Lucene root, and Java's builtin classes
+ifeq ($(OLDJAVA),)
+  export CLASSPATH=$(PREPENDCLASSPATH)$(COLON)$(ROOT)$(COLON)$(JDK_HOME)$(SLASH)jre$(SLASH)lib$(SLASH)rt.jar
+else
+  export CLASSPATH=$(PREPENDCLASSPATH)$(COLON)$(ROOT)$(COLON)$(JDK_HOME)$(SLASH)lib$(SLASH)classes.zip
+endif
+
+# JIKESPATH overrides the classpath variable for jikes, so we need to set it
+# here to avoid problems with a jikes user
+export JIKESPATH=$(CLASSPATH)
+
+## Rules
+
+# Use JAVAC to compile .java files into .class files
+%.class : %.java
+	$(JAVAC) $(JFLAGS) $<
+
+# Compile .jj files to .java with JavaCC
+%.java : %.jj
+	$(JAVA) -classpath '$(CLASSPATH)$(COLON)$(JAVACC)' COM.sun.labs.javacc.Main $<
+
+# Add JavaCC generated files to 'classes' and 'clean' targets.
+JJFILES = $(wildcard *.jj)
+ifneq ($(JJFILES),)
+  CLASSES += $(patsubst %.jj,%.class,  $(JJFILES))
+  DIRT += $(patsubst %.jj,%.java, $(JJFILES))
+  DIRT += $(patsubst %.jj,%Constants.java, $(JJFILES))
+  DIRT += $(patsubst %.jj,%TokenManager.java, $(JJFILES))
+  DIRT += Token.java TokenMgrError.java TokenManager.java \
+          CharStream.java ASCII_CharStream.java ParseException.java
+endif
+
+
+# Don't delete parser's .java file -- it's needed by javadoc.
+.PRECIOUS: $(patsubst %.jj,%.java, $(JJFILES))
+
+
+# Assume all .java files should have a .class file.
+CLASSES += $(patsubst %.java,%.class,$(wildcard *.java))
+
+# default rule
+classes : $(CLASSES)
+
+# Removes all generated files from the connected src directory.
+clean:
+	rm -f *.class $(DIRT)
+
+# include all the rules for the root directory..
+include $(subst $(SLASH),/,$(ROOT))/com/lucene/rootrules.mk
--- a/src/java/org/apache/lucene/search/BooleanClause.java
+++ b/src/java/org/apache/lucene/search/BooleanClause.java
@ -0,0 +1,75 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/** A clause in a BooleanQuery. */
+public final class BooleanClause {
+  /** The query whose matching documents are combined by the boolean query. */
+  public Query query;
+  /** If true, documents documents which <i>do not</i>
+    match this sub-query will <it>not</it> match the boolean query. */
+  public boolean required = false;
+  /** If true, documents documents which <i>do</i>
+    match this sub-query will <it>not</it> match the boolean query. */
+  public boolean prohibited = false;
+  
+  /** Constructs a BooleanClause with query <code>q</code>, required
+    <code>r</code> and prohibited <code>p</code>. */ 
+  public BooleanClause(Query q, boolean r, boolean p) {
+    query = q;
+    required = r;
+    prohibited = p;
+  }
+}
--- a/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/src/java/org/apache/lucene/search/BooleanQuery.java
@ -0,0 +1,177 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.index.IndexReader;
+
+/** A Query that matches documents matching boolean combinations of other
+  queries, typically {@link TermQuery}s or {@link PhraseQuery}s.
+  */
+final public class BooleanQuery extends Query {
+  private Vector clauses = new Vector();
+
+  /** Constructs an empty boolean query. */
+  public BooleanQuery() {}
+
+  /** Adds a clause to a boolean query.  Clauses may be:
+    <ul>
+    <li><code>required</code> which means that documents which <i>do not</i>
+    match this sub-query will <it>not</it> match the boolean query;
+    <li><code>prohibited</code> which means that documents which <i>do</i>
+    match this sub-query will <it>not</it> match the boolean query; or
+    <li>neither, in which case matched documents are neither prohibited from
+    nor required to match the sub-query.
+    </ul>
+    It is an error to specify a clause as both <code>required</code> and
+    <code>prohibited</code>.
+    */
+  public final void add(Query query, boolean required, boolean prohibited) {
+    clauses.addElement(new BooleanClause(query, required, prohibited));
+  }
+
+  /** Adds a clause to a boolean query. */
+  public final void add(BooleanClause clause) {
+    clauses.addElement(clause);
+  }
+
+  void prepare(IndexReader reader) {
+    for (int i = 0 ; i < clauses.size(); i++) {
+      BooleanClause c = (BooleanClause)clauses.elementAt(i);
+      c.query.prepare(reader);
+    }
+  }
+
+  final float sumOfSquaredWeights(Searcher searcher)
+       throws IOException {
+    float sum = 0.0f;
+
+    for (int i = 0 ; i < clauses.size(); i++) {
+      BooleanClause c = (BooleanClause)clauses.elementAt(i);
+      if (!c.prohibited)
+	sum += c.query.sumOfSquaredWeights(searcher); // sum sub-query weights
+    }
+
+    return sum;
+  }
+
+  final void normalize(float norm) {
+    for (int i = 0 ; i < clauses.size(); i++) {
+      BooleanClause c = (BooleanClause)clauses.elementAt(i);
+      if (!c.prohibited)
+	c.query.normalize(norm);
+    }
+  }
+
+  final Scorer scorer(IndexReader reader)
+       throws IOException {
+
+    if (clauses.size() == 1) {			  // optimize 1-term queries
+      BooleanClause c = (BooleanClause)clauses.elementAt(0);
+      if (!c.prohibited)			  // just return term scorer
+	return c.query.scorer(reader);
+    }
+
+    BooleanScorer result = new BooleanScorer();
+
+    int theMask = 1, thisMask;
+    for (int i = 0 ; i < clauses.size(); i++) {
+      BooleanClause c = (BooleanClause)clauses.elementAt(i);
+      if (c.required || c.prohibited) {
+	thisMask = theMask;
+	theMask = theMask << 1;
+      } else
+	thisMask = 0;
+      
+      Scorer subScorer = c.query.scorer(reader);
+      if (subScorer != null)
+	result.add(subScorer, c.required, c.prohibited);
+      else if (c.required)
+	return null;
+    }
+    if (theMask == 0)
+      throw new IndexOutOfBoundsException
+	("More than 32 required/prohibited clauses in query.");
+
+    return result;
+  }
+
+  /** Prints a user-readable version of this query. */
+  public String toString(String field) {
+    StringBuffer buffer = new StringBuffer();
+    for (int i = 0 ; i < clauses.size(); i++) {
+      BooleanClause c = (BooleanClause)clauses.elementAt(i);
+      if (c.prohibited)
+	buffer.append("-");
+      else if (c.required)
+	buffer.append("+");
+
+      Query subQuery = c.query;
+      if (subQuery instanceof BooleanQuery) {	  // wrap sub-bools in parens
+	BooleanQuery bq = (BooleanQuery)subQuery;
+	buffer.append("(");
+	buffer.append(c.query.toString(field));
+	buffer.append(")");
+      } else
+	buffer.append(c.query.toString(field));
+
+      if (i != clauses.size()-1)
+	buffer.append(" ");
+    }
+    return buffer.toString();
+  }
+
+}
--- a/src/java/org/apache/lucene/search/BooleanScorer.java
+++ b/src/java/org/apache/lucene/search/BooleanScorer.java
@ -0,0 +1,204 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.*;
+
+final class BooleanScorer extends Scorer {
+  private int currentDoc;
+
+  private SubScorer scorers = null;
+  private BucketTable bucketTable = new BucketTable(this);
+
+  private int maxCoord = 1;
+  private float[] coordFactors = null;
+
+  private int requiredMask = 0;
+  private int prohibitedMask = 0;
+  private int nextMask = 1;
+
+  static final class SubScorer {
+    public Scorer scorer;
+    public boolean required = false;
+    public boolean prohibited = false;
+    public HitCollector collector;
+    public SubScorer next;
+
+    public SubScorer(Scorer scorer, boolean required, boolean prohibited,
+		     HitCollector collector, SubScorer next) {
+      this.scorer = scorer;
+      this.required = required;
+      this.prohibited = prohibited;
+      this.collector = collector;
+      this.next = next;
+    }
+  }
+
+  final void add(Scorer scorer, boolean required, boolean prohibited) {
+    int mask = 0;
+    if (required || prohibited) {
+      if (nextMask == 0)
+	throw new IndexOutOfBoundsException
+	  ("More than 32 required/prohibited clauses in query.");
+      mask = nextMask;
+      nextMask = nextMask << 1;
+    } else
+      mask = 0;
+
+    if (!prohibited)
+      maxCoord++;
+
+    if (prohibited)
+      prohibitedMask |= mask;			  // update prohibited mask
+    else if (required)
+      requiredMask |= mask;			  // update required mask
+
+    scorers = new SubScorer(scorer, required, prohibited,
+			    bucketTable.newCollector(mask), scorers);
+  }
+
+  private final void computeCoordFactors() throws IOException {
+    coordFactors = new float[maxCoord];
+    for (int i = 0; i < maxCoord; i++)
+      coordFactors[i] = Similarity.coord(i, maxCoord);
+  }
+
+  final void score(HitCollector results, int maxDoc) throws IOException {
+    if (coordFactors == null)
+      computeCoordFactors();
+
+    while (currentDoc < maxDoc) {
+      currentDoc = Math.min(currentDoc+BucketTable.SIZE, maxDoc);
+      for (SubScorer t = scorers; t != null; t = t.next)
+	t.scorer.score(t.collector, currentDoc);
+      bucketTable.collectHits(results);
+    }
+  }
+
+  static final class Bucket {
+    int	doc = -1;				  // tells if bucket is valid
+    float	score;				  // incremental score
+    int	bits;					  // used for bool constraints
+    int	coord;					  // count of terms in score
+    Bucket 	next;				  // next valid bucket
+  }
+
+  /** A simple hash table of document scores within a range. */
+  static final class BucketTable {
+    public static final int SIZE = 1 << 10;
+    public static final int MASK = SIZE - 1;
+
+    final Bucket[] buckets = new Bucket[SIZE];
+    Bucket first = null;			  // head of valid list
+  
+    private BooleanScorer scorer;
+
+    public BucketTable(BooleanScorer scorer) {
+      this.scorer = scorer;
+    }
+
+    public final void collectHits(HitCollector results) {
+      final int required = scorer.requiredMask;
+      final int prohibited = scorer.prohibitedMask;
+      final float[] coord = scorer.coordFactors;
+
+      for (Bucket bucket = first; bucket!=null; bucket = bucket.next) {
+	if ((bucket.bits & prohibited) == 0 &&	  // check prohibited
+	    (bucket.bits & required) == required){// check required
+	  results.collect(bucket.doc,		  // add to results
+			  bucket.score * coord[bucket.coord]);
+	}
+      }
+      first = null;				  // reset for next round
+    }
+
+    public final int size() { return SIZE; }
+
+    public HitCollector newCollector(int mask) {
+      return new Collector(mask, this);
+    }
+  }
+
+  static final class Collector extends HitCollector {
+    private BucketTable bucketTable;
+    private int mask;
+    public Collector(int mask, BucketTable bucketTable) {
+      this.mask = mask;
+      this.bucketTable = bucketTable;
+    }
+    public final void collect(final int doc, final float score) {
+      final BucketTable table = bucketTable;
+      final int i = doc & BucketTable.MASK;
+      Bucket bucket = table.buckets[i];
+      if (bucket == null)
+	table.buckets[i] = bucket = new Bucket();
+      
+      if (bucket.doc != doc) {			  // invalid bucket
+	bucket.doc = doc;			  // set doc
+	bucket.score = score;			  // initialize score
+	bucket.bits = mask;			  // initialize mask
+	bucket.coord = 1;			  // initialize coord
+	
+	bucket.next = table.first;		  // push onto valid list
+	table.first = bucket;
+      } else {					  // valid bucket
+	bucket.score += score;			  // increment score
+	bucket.bits |= mask;			  // add bits in mask
+	bucket.coord++;				  // increment coord
+      }
+    }
+  }
+}
--- a/src/java/org/apache/lucene/search/DateFilter.java
+++ b/src/java/org/apache/lucene/search/DateFilter.java
@ -0,0 +1,161 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.BitSet;
+import java.util.Date;
+import java.io.IOException;
+
+import org.apache.lucene.document.DateField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.IndexReader;
+
+/** A Filter that restricts search results to a range of time.
+
+   <p>For this to work, documents must have been indexed with a {@link
+   DateField}.  */
+
+public final class DateFilter extends Filter {
+  String field;
+
+  String start = DateField.MIN_DATE_STRING();
+  String end = DateField.MAX_DATE_STRING();
+
+  private DateFilter(String f) {
+    field = f;
+  }
+
+  /** Constructs a filter for field <code>f</code> matching dates between
+    <code>from</code> and <code>to</code>. */
+  public DateFilter(String f, Date from, Date to) {
+    field = f;
+    start = DateField.dateToString(from);
+    end = DateField.dateToString(to);
+  }
+  /** Constructs a filter for field <code>f</code> matching times between
+    <code>from</code> and <code>to</code>. */
+  public DateFilter(String f, long from, long to) {
+    field = f;
+    start = DateField.timeToString(from);
+    end = DateField.timeToString(to);
+  }
+
+  /** Constructs a filter for field <code>f</code> matching dates before
+    <code>date</code>. */
+  public static DateFilter Before(String field, Date date) {
+    DateFilter result = new DateFilter(field);
+    result.end = DateField.dateToString(date);
+    return result;
+  }
+  /** Constructs a filter for field <code>f</code> matching times before
+    <code>time</code>. */
+  public static DateFilter Before(String field, long time) {
+    DateFilter result = new DateFilter(field);
+    result.end = DateField.timeToString(time);
+    return result;
+  }
+
+  /** Constructs a filter for field <code>f</code> matching dates before
+    <code>date</code>. */
+  public static DateFilter After(String field, Date date) {
+    DateFilter result = new DateFilter(field);
+    result.start = DateField.dateToString(date);
+    return result;
+  }
+  /** Constructs a filter for field <code>f</code> matching times before
+    <code>time</code>. */
+  public static DateFilter After(String field, long time) {
+    DateFilter result = new DateFilter(field);
+    result.start = DateField.timeToString(time);
+    return result;
+  }
+
+  /** Returns a BitSet with true for documents which should be permitted in
+    search results, and false for those that should not. */
+  final public BitSet bits(IndexReader reader) throws IOException {
+    BitSet bits = new BitSet(reader.maxDoc());
+    TermEnum enum = reader.terms(new Term(field, start));
+    try {
+      Term stop = new Term(field, end);
+      while (enum.term().compareTo(stop) <= 0) {
+	TermDocs termDocs = reader.termDocs(enum.term());
+	try {
+	  while (termDocs.next())
+	    bits.set(termDocs.doc());
+	} finally {
+	  termDocs.close();
+	}
+	if (!enum.next()) {
+	  break;
+	}
+      }
+    } finally {
+      enum.close();
+    }
+    return bits;
+  }
+
+  public final String toString() {
+    StringBuffer buffer = new StringBuffer();
+    buffer.append(field);
+    buffer.append(":");
+    buffer.append(DateField.stringToDate(start).toString());
+    buffer.append("-");
+    buffer.append(DateField.stringToDate(end).toString());
+    return buffer.toString();
+  }
+}
--- a/src/java/org/apache/lucene/search/ExactPhraseScorer.java
+++ b/src/java/org/apache/lucene/search/ExactPhraseScorer.java
@ -0,0 +1,91 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.util.*;
+import org.apache.lucene.index.*;
+
+final class ExactPhraseScorer extends PhraseScorer {
+
+  ExactPhraseScorer(TermPositions[] tps, byte[] n, float w)
+       throws IOException {
+    super(tps, n, w);
+  }
+
+  protected final float phraseFreq() throws IOException {
+    // sort list with pq
+    for (PhrasePositions pp = first; pp != null; pp = pp.next) {
+      pp.firstPosition();
+      pq.put(pp);				  // build pq from list
+    }
+    pqToList();					  // rebuild list from pq
+
+    int freq = 0;
+    do {					  // find position w/ all terms
+      while (first.position < last.position) {	  // scan forward in first
+	do {
+	  if (!first.nextPosition())
+	    return (float)freq;
+	} while (first.position < last.position);
+	firstToLast();
+      }
+      freq++;					  // all equal: a match
+    } while (last.nextPosition());
+  
+    return (float)freq;
+  }
+}
--- a/src/java/org/apache/lucene/search/Filter.java
+++ b/src/java/org/apache/lucene/search/Filter.java
@ -0,0 +1,67 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.BitSet;
+import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+
+/** Abstract base class providing a mechanism to restrict searches to a subset
+ of an index. */
+abstract public class Filter {
+  /** Returns a BitSet with true for documents which should be permitted in
+    search results, and false for those that should not. */
+  abstract public BitSet bits(IndexReader reader) throws IOException;
+}
--- a/src/java/org/apache/lucene/search/FilteredTermEnum.java
+++ b/src/java/org/apache/lucene/search/FilteredTermEnum.java
@ -0,0 +1,130 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+
+/** Abstract class for enumerating a subset of all terms. 
+
+  <p>Term enumerations are always ordered by Term.compareTo().  Each term in
+  the enumeration is greater than all that precede it.  */
+public abstract class FilteredTermEnum extends TermEnum {
+    private Term currentTerm = null;
+    private TermEnum actualEnum = null;
+    
+    public FilteredTermEnum(IndexReader reader, Term term) throws IOException {}
+
+    /** Equality compare on the term */
+    protected abstract boolean termCompare(Term term);
+    
+    /** Equality measure on the term */
+    protected abstract float difference();
+
+    /** Indiciates the end of the enumeration has been reached */
+    protected abstract boolean endEnum();
+    
+    protected void setEnum(TermEnum actualEnum) throws IOException {
+        this.actualEnum = actualEnum;
+        // Find the first term that matches
+        Term term = actualEnum.term();
+        if (termCompare(term)) 
+            currentTerm = term;
+        else next();
+    }
+    
+    /** 
+     * Returns the docFreq of the current Term in the enumeration.
+     * Initially invalid, valid after next() called for the first time. 
+     */
+    public int docFreq() {
+        if (actualEnum == null) return -1;
+        return actualEnum.docFreq();
+    }
+    
+    /** Increments the enumeration to the next element.  True if one exists. */
+    public boolean next() throws IOException {
+        if (actualEnum == null) return false; // the actual enumerator is not initialized!
+        currentTerm = null;
+        while (currentTerm == null) {
+            if (endEnum()) return false;
+            if (actualEnum.next()) {
+                Term term = actualEnum.term();
+                if (termCompare(term)) {
+                    currentTerm = term;
+                    return true;
+                }
+            }
+            else return false;
+        }
+        currentTerm = null;
+        return false;
+    }
+    
+    /** Returns the current Term in the enumeration.
+     * Initially invalid, valid after next() called for the first time. */
+    public Term term() {
+        return currentTerm;
+    }
+    
+    /** Closes the enumeration to further activity, freeing resources.  */
+    public void close() throws IOException {
+        actualEnum.close();
+        currentTerm = null;
+        actualEnum = null;
+    }
+}
--- a/src/java/org/apache/lucene/search/FuzzyQuery.java
+++ b/src/java/org/apache/lucene/search/FuzzyQuery.java
@ -0,0 +1,79 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import java.io.IOException;
+
+/** Implements the fuzzy search query */
+final public class FuzzyQuery extends MultiTermQuery {
+    private Term fuzzyTerm;
+    
+    public FuzzyQuery(Term term) {
+        super(term);
+        fuzzyTerm = term;
+    }
+    
+    final void prepare(IndexReader reader) {
+        try {
+            setEnum(new FuzzyTermEnum(reader, fuzzyTerm));
+        } catch (IOException e) {}
+    }
+    
+    public String toString(String field) {
+        return super.toString(field) + '~';
+    }
+}
--- a/src/java/org/apache/lucene/search/FuzzyTermEnum.java
+++ b/src/java/org/apache/lucene/search/FuzzyTermEnum.java
@ -0,0 +1,175 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+
+/** Subclass of FilteredTermEnum for enumerating all terms that are similiar to the specified filter term.
+
+  <p>Term enumerations are always ordered by Term.compareTo().  Each term in
+  the enumeration is greater than all that precede it.  */
+final public class FuzzyTermEnum extends FilteredTermEnum {
+    double distance;
+    boolean fieldMatch = false;
+    boolean endEnum = false;
+
+    Term searchTerm = null;
+    String field = "";
+    String text = "";
+    int textlen;
+    
+    public FuzzyTermEnum(IndexReader reader, Term term) throws IOException {
+        super(reader, term);
+        searchTerm = term;
+        field = searchTerm.field();
+        text = searchTerm.text();
+        textlen = text.length();
+        setEnum(reader.terms(new Term(searchTerm.field(), "")));
+    }
+    
+    /**
+     The termCompare method in FuzzyTermEnum uses Levenshtein distance to 
+     calculate the distance between the given term and the comparing term. 
+     */
+    final protected boolean termCompare(Term term) {
+        if (field == term.field()) {
+            String target = term.text();
+            int targetlen = target.length();
+            int dist = editDistance(text, target, textlen, targetlen);
+            distance = 1 - ((double)dist / (double)Math.min(textlen, targetlen));
+            return (distance > FUZZY_THRESHOLD);
+        }
+        endEnum = true;
+        return false;
+    }
+    
+    final protected float difference() {
+        return (float)((distance - FUZZY_THRESHOLD) * SCALE_FACTOR);
+    }
+    
+    final public boolean endEnum() {
+        return endEnum;
+    }
+    
+    /******************************
+     * Compute Levenshtein distance
+     ******************************/
+    
+    public static final double FUZZY_THRESHOLD = 0.5;
+    public static final double SCALE_FACTOR = 1.0f / (1.0f - FUZZY_THRESHOLD);
+    
+    /**
+     Finds and returns the smallest of three integers 
+     */
+    private final static int min(int a, int b, int c) {
+        int t = (a < b) ? a : b;
+        return (t < c) ? t : c;
+    }
+    
+    /**
+     * This static array saves us from the time required to create a new array
+     * everytime editDistance is called.
+     */
+    private int e[][] = new int[0][0];
+    
+    /**
+     Levenshtein distance also known as edit distance is a measure of similiarity
+     between two strings where the distance is measured as the number of character 
+     deletions, insertions or substitutions required to transform one string to 
+     the other string. 
+     <p>This method takes in four parameters; two strings and their respective 
+     lengths to compute the Levenshtein distance between the two strings.
+     The result is returned as an integer.
+     */ 
+    private final int editDistance(String s, String t, int n, int m) {
+        if (e.length <= n || e[0].length <= m) {
+            e = new int[Math.max(e.length, n+1)][Math.max(e.length, m+1)];
+        }
+        int d[][] = e; // matrix
+        int i; // iterates through s
+        int j; // iterates through t
+        char s_i; // ith character of s
+        
+        if (n == 0) return m;
+        if (m == 0) return n;
+        
+        // init matrix d
+        for (i = 0; i <= n; i++) d[i][0] = i;
+        for (j = 0; j <= m; j++) d[0][j] = j;
+        
+        // start computing edit distance
+        for (i = 1; i <= n; i++) {
+            s_i = s.charAt(i - 1);
+            for (j = 1; j <= m; j++) {
+                if (s_i != t.charAt(j-1))
+                    d[i][j] = min(d[i-1][j], d[i][j-1], d[i-1][j-1])+1;
+                else d[i][j] = min(d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1]);
+            }
+        }
+        
+        // we got the result!
+        return d[n][m];
+    }
+    
+  public void close() throws IOException {
+      super.close();
+      searchTerm = null;
+      field = null;
+      text = null;
+  }
+}
--- a/src/java/org/apache/lucene/search/HitCollector.java
+++ b/src/java/org/apache/lucene/search/HitCollector.java
@ -0,0 +1,76 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/** Lower-level search API.
+ * @see IndexSearcher#search(Query,HitCollector)
+ */
+public abstract class HitCollector {
+  /** Called once for every non-zero scoring document, with the document number
+   * and its score.
+   *
+   * <P>If, for example, an application wished to collect all of the hits for a
+   * query in a BitSet, then it might:<pre>
+   *   Searcher = new IndexSearcher(indexReader);
+   *   final BitSet bits = new BitSet(indexReader.maxDoc());
+   *   searcher.search(query, new HitCollector() {
+   *       public void collect(int doc, float score) {
+   *         bits.set(doc);
+   *       }
+   *     });
+   * </pre>
+   */
+  public abstract void collect(int doc, float score);
+}
--- a/src/java/org/apache/lucene/search/HitQueue.java
+++ b/src/java/org/apache/lucene/search/HitQueue.java
@ -0,0 +1,72 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.util.PriorityQueue;
+
+final class HitQueue extends PriorityQueue {
+  HitQueue(int size) {
+    initialize(size);
+  }
+
+  protected final boolean lessThan(Object a, Object b) {
+    ScoreDoc hitA = (ScoreDoc)a;
+    ScoreDoc hitB = (ScoreDoc)b;
+    if (hitA.score == hitB.score)
+      return hitA.doc > hitB.doc; 
+    else
+      return hitA.score < hitB.score;
+  }
+}
--- a/src/java/org/apache/lucene/search/Hits.java
+++ b/src/java/org/apache/lucene/search/Hits.java
@ -0,0 +1,188 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import java.util.BitSet;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+
+/** A ranked list of documents, used to hold search results. */
+public final class Hits {
+  private Query query;
+  private Searcher searcher;
+  private Filter filter = null;
+
+  private int length;				  // the total number of hits
+  private Vector hitDocs = new Vector();	  // cache of hits retrieved
+
+  private HitDoc first;				  // head of LRU cache
+  private HitDoc last;				  // tail of LRU cache
+  private int numDocs = 0;			  // number cached
+  private int maxDocs = 200;			  // max to cache
+
+  Hits(Searcher s, Query q, Filter f) throws IOException {
+    query = q;
+    searcher = s;
+    filter = f;
+    getMoreDocs(50);				  // retrieve 100 initially
+  }
+
+  // Tries to add new documents to hitDocs.
+  // Ensures that the hit numbered <code>min</code> has been retrieved.
+  private final void getMoreDocs(int min) throws IOException {
+    if (hitDocs.size() > min)
+      min = hitDocs.size();
+
+    int n = min * 2;				  // double # retrieved
+    TopDocs topDocs = searcher.search(query, filter, n);
+    length = topDocs.totalHits;
+    ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+
+    float scoreNorm = 1.0f;
+    if (length > 0 && scoreDocs[0].score > 1.0f)
+      scoreNorm = 1.0f / scoreDocs[0].score;
+
+    int end = scoreDocs.length < length ? scoreDocs.length : length;
+    for (int i = hitDocs.size(); i < end; i++)
+      hitDocs.addElement(new HitDoc(scoreDocs[i].score*scoreNorm,
+				    scoreDocs[i].doc));
+  }
+
+  /** Returns the total number of hits available in this set. */
+  public final int length() {
+    return length;
+  }
+
+  /** Returns the nth document in this set.
+    <p>Documents are cached, so that repeated requests for the same element may
+    return the same Document object. */ 
+  public final Document doc(int n) throws IOException {
+    HitDoc hitDoc = hitDoc(n);
+
+    // Update LRU cache of documents
+    remove(hitDoc);				  // remove from list, if there
+    addToFront(hitDoc);				  // add to front of list
+    if (numDocs > maxDocs) {			  // if cache is full
+      HitDoc oldLast = last;
+      remove(last);				  // flush last
+      oldLast.doc = null;			  // let doc get gc'd
+    }
+
+    if (hitDoc.doc == null)
+      hitDoc.doc = searcher.doc(hitDoc.id);	  // cache miss: read document
+      
+    return hitDoc.doc;
+  }
+
+  /** Returns the score for the nth document in this set. */ 
+  public final float score(int n) throws IOException {
+    return hitDoc(n).score;
+  }
+
+  private final HitDoc hitDoc(int n) throws IOException {
+    if (n >= length)
+      throw new IndexOutOfBoundsException("Not a valid hit number: " + n);
+    if (n >= hitDocs.size())
+      getMoreDocs(n);
+
+    return (HitDoc)hitDocs.elementAt(n);
+  }
+
+  private final void addToFront(HitDoc hitDoc) {  // insert at front of cache
+    if (first == null)
+      last = hitDoc;
+    else
+      first.prev = hitDoc;
+    
+    hitDoc.next = first;
+    first = hitDoc;
+    hitDoc.prev = null;
+
+    numDocs++;
+  }
+
+  private final void remove(HitDoc hitDoc) {	  // remove from cache
+    if (hitDoc.doc == null)			  // it's not in the list
+      return;					  // abort
+
+    if (hitDoc.next == null)
+      last = hitDoc.prev;
+    else
+      hitDoc.next.prev = hitDoc.prev;
+    
+    if (hitDoc.prev == null)
+      first = hitDoc.next;
+    else
+      hitDoc.prev.next = hitDoc.next;
+
+    numDocs--;
+  }
+}
+
+final class HitDoc {
+  float score;
+  int id;
+  Document doc = null;
+
+  HitDoc next;					  // in doubly-linked cache
+  HitDoc prev;					  // in doubly-linked cache
+
+  HitDoc(float s, int i) {
+    score = s;
+    id = i;
+  }
+}
--- a/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@ -0,0 +1,178 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.BitSet;
+
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.PriorityQueue;
+
+/** Implements search over a single IndexReader. */
+public final class IndexSearcher extends Searcher {
+  IndexReader reader;
+
+  /** Creates a searcher searching the index in the named directory. */
+  public IndexSearcher(String path) throws IOException {
+    this(IndexReader.open(path));
+  }
+    
+  /** Creates a searcher searching the index in the provided directory. */
+  public IndexSearcher(Directory directory) throws IOException {
+    this(IndexReader.open(directory));
+  }
+    
+  /** Creates a searcher searching the provided index. */
+  public IndexSearcher(IndexReader r) {
+    reader = r;
+  }
+    
+  /** Frees resources associated with this Searcher. */
+  public final void close() throws IOException {
+    reader.close();
+  }
+
+  final int docFreq(Term term) throws IOException {
+    return reader.docFreq(term);
+  }
+
+  final Document doc(int i) throws IOException {
+    return reader.document(i);
+  }
+
+  final int maxDoc() throws IOException {
+    return reader.maxDoc();
+  }
+
+  final TopDocs search(Query query, Filter filter, final int nDocs)
+       throws IOException {
+    Scorer scorer = Query.scorer(query, this, reader);
+    if (scorer == null)
+      return new TopDocs(0, new ScoreDoc[0]);
+
+    final BitSet bits = filter != null ? filter.bits(reader) : null;
+    final HitQueue hq = new HitQueue(nDocs);
+    final int[] totalHits = new int[1];
+    scorer.score(new HitCollector() {
+	private float minScore = 0.0f;
+	public final void collect(int doc, float score) {
+	  if (score > 0.0f &&			  // ignore zeroed buckets
+	      (bits==null || bits.get(doc))) {	  // skip docs not in bits
+	    totalHits[0]++;
+	    if (score >= minScore) {
+	      hq.put(new ScoreDoc(doc, score));	  // update hit queue
+	      if (hq.size() > nDocs) {		  // if hit queue overfull
+		hq.pop();			  // remove lowest in hit queue
+		minScore = ((ScoreDoc)hq.top()).score; // reset minScore
+	      }
+	    }
+	  }
+	}
+      }, reader.maxDoc());
+
+    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
+    for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
+      scoreDocs[i] = (ScoreDoc)hq.pop();
+    
+    return new TopDocs(totalHits[0], scoreDocs);
+  }
+
+  /** Lower-level search API.
+   *
+   * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
+   * scoring document.
+   *
+   * <p>Applications should only use this if they need <it>all</it> of the
+   * matching documents.  The high-level search API ({@link
+   * Searcher#search(Query)}) is usually more efficient, as it skips
+   * non-high-scoring hits.  */
+  public final void search(Query query, HitCollector results)
+      throws IOException {
+    search(query, null, results);
+  }
+
+  /** Lower-level search API.
+   *
+   * <p>{@link HitCollector#collect(int,float)} is called for every non-zero
+   * scoring document.
+   *
+   * <p>Applications should only use this if they need <it>all</it> of the
+   * matching documents.  The high-level search API ({@link
+   * Searcher#search(Query)}) is usually more efficient, as it skips
+   * non-high-scoring hits.  */
+  public final void search(Query query, Filter filter,
+			   final HitCollector results) throws IOException {
+    HitCollector collector = results;
+    if (filter != null) {
+      final BitSet bits = filter.bits(reader);
+      collector = new HitCollector() {
+	  public final void collect(int doc, float score) {
+	    if (bits.get(doc)) {		  // skip docs not in bits
+	      results.collect(doc, score);
+	    }
+	  }
+	};
+    }
+
+    Scorer scorer = Query.scorer(query, this, reader);
+    if (scorer == null)
+      return;
+    scorer.score(collector, reader.maxDoc());
+  }
+
+}
--- a/src/java/org/apache/lucene/search/Makefile
+++ b/src/java/org/apache/lucene/search/Makefile
@ -0,0 +1,2 @@
+# sub-directory makefile for lucene
+include ../rules.mk
--- a/src/java/org/apache/lucene/search/MultiSearcher.java
+++ b/src/java/org/apache/lucene/search/MultiSearcher.java
@ -0,0 +1,152 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.PriorityQueue;
+
+/** Implements search over a set of Searcher's. */
+public final class MultiSearcher extends Searcher {
+  private Searcher[] searchers;
+  private int[] starts;
+  private int maxDoc = 0;
+
+  /** Creates a searcher which searches <i>searchers</i>. */
+  public MultiSearcher(Searcher[] searchers) throws IOException {
+    this.searchers = searchers;
+
+    starts = new int[searchers.length + 1];	  // build starts array
+    for (int i = 0; i < searchers.length; i++) {
+      starts[i] = maxDoc;
+      maxDoc += searchers[i].maxDoc();		  // compute maxDocs
+    }
+    starts[searchers.length] = maxDoc;
+  }
+    
+  /** Frees resources associated with this Searcher. */
+  public final void close() throws IOException {
+    for (int i = 0; i < searchers.length; i++)
+      searchers[i].close();
+  }
+
+  final int docFreq(Term term) throws IOException {
+    int docFreq = 0;
+    for (int i = 0; i < searchers.length; i++)
+      docFreq += searchers[i].docFreq(term);
+    return docFreq;
+  }
+
+  final Document doc(int n) throws IOException {
+    int i = searcherIndex(n);			  // find searcher index
+    return searchers[i].doc(n - starts[i]);	  // dispatch to searcher
+  }
+
+  // replace w/ call to Arrays.binarySearch in Java 1.2
+  private final int searcherIndex(int n) {	  // find searcher for doc n:
+    int lo = 0;					  // search starts array
+    int hi = searchers.length - 1;		  // for first element less
+						  // than n, return its index
+    while (hi >= lo) {
+      int mid = (lo + hi) >> 1;
+      int midValue = starts[mid];
+      if (n < midValue)
+	hi = mid - 1;
+      else if (n > midValue)
+	lo = mid + 1;
+      else
+	return mid;
+    }
+    return hi;
+  }
+
+  final int maxDoc() throws IOException {
+    return maxDoc;
+  }
+
+  final TopDocs search(Query query, Filter filter, int nDocs)
+       throws IOException {
+    HitQueue hq = new HitQueue(nDocs);
+    float minScore = 0.0f;
+    int totalHits = 0;
+
+    for (int i = 0; i < searchers.length; i++) {  // search each searcher
+      TopDocs docs = searchers[i].search(query, filter, nDocs);
+      totalHits += docs.totalHits;		  // update totalHits
+      ScoreDoc[] scoreDocs = docs.scoreDocs;
+      for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq
+	ScoreDoc scoreDoc = scoreDocs[j];
+	if (scoreDoc.score >= minScore) {
+	  scoreDoc.doc += starts[i];		  // convert doc
+	  hq.put(scoreDoc);			  // update hit queue
+	  if (hq.size() > nDocs) {		  // if hit queue overfull
+	    hq.pop();				  // remove lowest in hit queue
+	    minScore = ((ScoreDoc)hq.top()).score; // reset minScore
+	  }
+	} else
+	  break;				  // no more scores > minScore
+      }
+    }
+    
+    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
+    for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
+      scoreDocs[i] = (ScoreDoc)hq.pop();
+    
+    return new TopDocs(totalHits, scoreDocs);
+  }
+}
--- a/src/java/org/apache/lucene/search/MultiTermQuery.java
+++ b/src/java/org/apache/lucene/search/MultiTermQuery.java
@ -0,0 +1,161 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+
+/** A Query that matches documents containing a subset of terms provided by a 
+ FilteredTermEnum enumeration. MultiTermQuery is not designed to be used by 
+ itself. The reason being that it is not intialized with a FilteredTermEnum 
+ enumeration. A FilteredTermEnum enumeration needs to be provided. For example,
+ WildcardQuery and FuzzyQuery extends MultiTermQuery to provide WildcardTermEnum
+ and FuzzyTermEnum respectively. */
+public class MultiTermQuery extends Query {
+    private Term term;
+    private FilteredTermEnum enum;
+    private IndexReader reader;
+    private float boost = 1.0f;
+    private BooleanQuery query;
+    
+    /** Enable or disable lucene style toString(field) format */
+    private static boolean LUCENE_STYLE_TOSTRING = false;
+    
+    /** Constructs a query for terms matching <code>term</code>. */
+    public MultiTermQuery(Term term) {
+        this.term = term;
+        this.query = query;
+    }
+    
+    /** Set the TermEnum to be used */
+    protected void setEnum(FilteredTermEnum enum) {
+        this.enum = enum;
+    }
+    
+    /** Sets the boost for this term to <code>b</code>.  Documents containing
+     * this term will (in addition to the normal weightings) have their score
+     * multiplied by <code>boost</code>. */
+    final public void setBoost(float boost) {
+        this.boost = boost;
+    }
+    
+    /** Returns the boost for this term. */
+    final public float getBoost() {
+        return boost;
+    }
+    
+    final float sumOfSquaredWeights(Searcher searcher) throws IOException {
+        return getQuery().sumOfSquaredWeights(searcher);
+    }
+    
+    final void normalize(float norm) {
+        try {
+            getQuery().normalize(norm);
+        } catch (IOException e) {
+            throw new RuntimeException(e.toString());
+        }
+    }
+    
+    final Scorer scorer(IndexReader reader) throws IOException {
+        return getQuery().scorer(reader);
+    }
+    
+    final private BooleanQuery getQuery() throws IOException {
+        if (query == null) {
+            BooleanQuery q = new BooleanQuery();
+            try {
+                do {
+                    Term t = enum.term();
+                    if (t != null) {
+                        TermQuery tq = new TermQuery(t);	// found a match
+                        tq.setBoost(boost * enum.difference()); // set the boost
+                        q.add(tq, false, false);		// add to q
+                    }
+                } while (enum.next());
+            } finally {
+                enum.close();
+            }
+            query = q;
+        }
+        return query;
+    }
+    
+    /** Prints a user-readable version of this query. */
+    public String toString(String field) {
+        if (!LUCENE_STYLE_TOSTRING) {
+            Query q = null;
+            try {
+                q = getQuery();
+            } catch (Exception e) {}
+            if (q != null) {
+                return "(" + q.toString(field) + ")";
+            }
+        }
+        StringBuffer buffer = new StringBuffer();
+        if (!term.field().equals(field)) {
+            buffer.append(term.field());
+            buffer.append(":");
+        }
+        buffer.append(term.text());
+        if (boost != 1.0f) {
+            buffer.append("^");
+            buffer.append(Float.toString(boost));
+        }
+        return buffer.toString();
+    }
+}
--- a/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/src/java/org/apache/lucene/search/PhrasePositions.java
@ -0,0 +1,96 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.*;
+
+final class PhrasePositions {
+  int doc;					  // current doc
+  int position;					  // position in doc
+  int count;					  // remaining pos in this doc
+  int offset;					  // position in phrase
+  TermPositions tp;				  // stream of positions
+  PhrasePositions next;				  // used to make lists
+
+  PhrasePositions(TermPositions t, int o) throws IOException {
+    tp = t;
+    offset = o;
+    next();
+  }
+
+  final void next() throws IOException {	  // increments to next doc
+    if (!tp.next()) {
+      tp.close();				  // close stream
+      doc = Integer.MAX_VALUE;			  // sentinel value
+      return;
+    }
+    doc = tp.doc();
+    position = 0;
+  }
+
+  final void firstPosition() throws IOException {
+    count = tp.freq();				  // read first pos
+    nextPosition();
+  }
+
+  final boolean nextPosition() throws IOException {
+    if (count-- > 0) {				  // read subsequent pos's
+      position = tp.nextPosition() - offset;
+      return true;
+    } else
+      return false;
+  }
+}
--- a/src/java/org/apache/lucene/search/PhraseQuery.java
+++ b/src/java/org/apache/lucene/search/PhraseQuery.java
@ -0,0 +1,183 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.TermPositions;
+import org.apache.lucene.index.IndexReader;
+
+/** A Query that matches documents containing a particular sequence of terms.
+  This may be combined with other terms with a {@link BooleanQuery}.
+  */
+final public class PhraseQuery extends Query {
+  private String field;
+  private Vector terms = new Vector();
+  private float idf = 0.0f;
+  private float weight = 0.0f;
+
+  private float boost = 1.0f;
+  private int slop = 0;
+
+
+  /** Constructs an empty phrase query. */
+  public PhraseQuery() {
+  }
+
+  /** Sets the boost for this term to <code>b</code>.  Documents containing
+    this term will (in addition to the normal weightings) have their score
+    multiplied by <code>b</code>. */
+  public final void setBoost(float b) { boost = b; }
+  /** Gets the boost for this term.  Documents containing
+    this term will (in addition to the normal weightings) have their score
+    multiplied by <code>b</code>.   The boost is 1.0 by default.  */
+  public final float getBoost() { return boost; }
+  
+  /** Sets the number of other words permitted between words in query phrase.
+    If zero, then this is an exact phrase search.  For larger values this works
+    like a <code>WITHIN</code> or <code>NEAR</code> operator.
+
+    <p>The slop is in fact an edit-distance, where the units correspond to
+    moves of terms in the query phrase out of position.  For example, to switch
+    the order of two words requires two moves (the first move places the words
+    atop one another), so to permit re-orderings of phrases, the slop must be
+    at least two.
+
+    <p>More exact matches are scored higher than sloppier matches, thus search
+    results are sorted by exactness.
+
+    <p>The slop is zero by default, requiring exact matches.*/
+  public final void setSlop(int s) { slop = s; }
+  /** Returns the slop.  See setSlop(). */
+  public final int getSlop() { return slop; }
+
+  /** Adds a term to the end of the query phrase. */
+  public final void add(Term term) {
+    if (terms.size() == 0)
+      field = term.field();
+    else if (term.field() != field)
+      throw new IllegalArgumentException
+	("All phrase terms must be in the same field: " + term);
+
+    terms.addElement(term);
+  }
+
+  final float sumOfSquaredWeights(Searcher searcher) throws IOException {
+    for (int i = 0; i < terms.size(); i++)	  // sum term IDFs
+      idf += Similarity.idf((Term)terms.elementAt(i), searcher);
+
+    weight = idf * boost;
+    return weight * weight;			  // square term weights
+  }
+
+  final void normalize(float norm) {
+    weight *= norm;				  // normalize for query
+    weight *= idf;				  // factor from document
+  }
+
+  final Scorer scorer(IndexReader reader) throws IOException {
+    if (terms.size() == 0)			  // optimize zero-term case
+      return null;
+    if (terms.size() == 1) {			  // optimize one-term case
+      Term term = (Term)terms.elementAt(0);
+      TermDocs docs = reader.termDocs(term);
+      if (docs == null)
+	return null;
+      return new TermScorer(docs, reader.norms(term.field()), weight);
+    }
+
+    TermPositions[] tps = new TermPositions[terms.size()];
+    for (int i = 0; i < terms.size(); i++) {
+      TermPositions p = reader.termPositions((Term)terms.elementAt(i));
+      if (p == null)
+	return null;
+      tps[i] = p;
+    }
+
+    if (slop == 0)				  // optimize exact case
+      return new ExactPhraseScorer(tps, reader.norms(field), weight);
+    else
+      return
+	new SloppyPhraseScorer(tps, slop, reader.norms(field), weight);
+
+  }
+
+  /** Prints a user-readable version of this query. */
+  public final String toString(String f) {
+    StringBuffer buffer = new StringBuffer();
+    if (!field.equals(f)) {
+      buffer.append(field);
+      buffer.append(":");
+    }
+
+    buffer.append("\"");
+    for (int i = 0; i < terms.size(); i++) {
+      buffer.append(((Term)terms.elementAt(i)).text());
+      if (i != terms.size()-1)
+	buffer.append(" ");
+    }
+    buffer.append("\"");
+
+    if (boost != 1.0f) {
+      buffer.append("^");
+      buffer.append(Float.toString(boost));
+    }
+
+    return buffer.toString();
+  }
+}
--- a/src/java/org/apache/lucene/search/PhraseQueue.java
+++ b/src/java/org/apache/lucene/search/PhraseQueue.java
@ -0,0 +1,72 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import org.apache.lucene.util.PriorityQueue;
+
+final class PhraseQueue extends PriorityQueue {
+  PhraseQueue(int size) {
+    initialize(size);
+  }
+
+  protected final boolean lessThan(Object o1, Object o2) {
+    PhrasePositions pp1 = (PhrasePositions)o1;
+    PhrasePositions pp2 = (PhrasePositions)o2;
+    if (pp1.doc == pp2.doc) 
+      return pp1.position < pp2.position;
+    else
+      return pp1.doc < pp2.doc;
+  }
+}
--- a/src/java/org/apache/lucene/search/PhraseScorer.java
+++ b/src/java/org/apache/lucene/search/PhraseScorer.java
@ -0,0 +1,124 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Vector;
+import org.apache.lucene.util.*;
+import org.apache.lucene.index.*;
+
+abstract class PhraseScorer extends Scorer {
+  protected byte[] norms;
+  protected float weight;
+
+  protected PhraseQueue pq;
+  protected PhrasePositions first, last;
+
+  PhraseScorer(TermPositions[] tps, byte[] n, float w) throws IOException {
+    norms = n;
+    weight = w;
+
+    // use PQ to build a sorted list of PhrasePositions
+    pq = new PhraseQueue(tps.length);
+    for (int i = 0; i < tps.length; i++)
+      pq.put(new PhrasePositions(tps[i], i));
+    pqToList();
+  }
+
+  final void score(HitCollector results, int end) throws IOException {
+    while (last.doc < end) {			  // find doc w/ all the terms
+      while (first.doc < last.doc) {		  // scan forward in first
+	do {
+	  first.next();
+	} while (first.doc < last.doc);
+	firstToLast();
+	if (last.doc >= end)
+	  return;
+      }
+
+      // found doc with all terms
+      float freq = phraseFreq();		  // check for phrase
+
+      if (freq > 0.0) {
+	float score = Similarity.tf(freq)*weight; // compute score
+	score *= Similarity.norm(norms[first.doc]); // normalize
+	results.collect(first.doc, score);	  // add to results
+      }
+      last.next();				  // resume scanning
+    }
+  }
+
+  abstract protected float phraseFreq() throws IOException;
+
+  protected final void pqToList() {
+    last = first = null;
+    while (pq.top() != null) {
+      PhrasePositions pp = (PhrasePositions)pq.pop();
+      if (last != null) {			  // add next to end of list
+	last.next = pp;
+      } else
+	first = pp;
+      last = pp;
+      pp.next = null;
+    }
+  }
+
+  protected final void firstToLast() {
+    last.next = first;			  // move first to end of list
+    last = first;
+    first = first.next;
+    last.next = null;
+  }
+}
--- a/src/java/org/apache/lucene/search/PrefixQuery.java
+++ b/src/java/org/apache/lucene/search/PrefixQuery.java
@ -0,0 +1,153 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.index.TermDocs;
+import org.apache.lucene.index.IndexReader;
+
+/** A Query that matches documents containing terms with a specified prefix. */
+final public class PrefixQuery extends Query {
+  private Term prefix;
+  private IndexReader reader;
+  private float boost = 1.0f;
+  private BooleanQuery query;
+
+  /** Constructs a query for terms starting with <code>prefix</code>. */
+  public PrefixQuery(Term prefix) {
+    this.prefix = prefix;
+    this.reader = reader;
+  }
+
+  /** Sets the boost for this term to <code>b</code>.  Documents containing
+    this term will (in addition to the normal weightings) have their score
+    multiplied by <code>boost</code>. */
+  public void setBoost(float boost) {
+    this.boost = boost;
+  }
+
+  /** Returns the boost for this term. */
+  public float getBoost() {
+    return boost;
+  }
+  
+  final void prepare(IndexReader reader) {
+    this.query = null;
+    this.reader = reader;
+  }
+
+  final float sumOfSquaredWeights(Searcher searcher)
+    throws IOException {
+    return getQuery().sumOfSquaredWeights(searcher);
+  }
+
+  void normalize(float norm) {
+    try {
+      getQuery().normalize(norm);
+    } catch (IOException e) {
+      throw new RuntimeException(e.toString());
+    }
+  }
+
+  Scorer scorer(IndexReader reader) throws IOException {
+    return getQuery().scorer(reader);
+  }
+
+  private BooleanQuery getQuery() throws IOException {
+    if (query == null) {
+      BooleanQuery q = new BooleanQuery();
+      TermEnum enum = reader.terms(prefix);
+      try {
+	String prefixText = prefix.text();
+	String prefixField = prefix.field();
+	do {
+	  Term term = enum.term();
+	  if (term != null &&
+	      term.text().startsWith(prefixText) &&
+	      term.field() == prefixField) {
+	    TermQuery tq = new TermQuery(term);	  // found a match
+	    tq.setBoost(boost);			  // set the boost
+	    q.add(tq, false, false);		  // add to q
+	    //System.out.println("added " + term);
+	  } else {
+	    break;
+	  }
+	} while (enum.next());
+      } finally {
+	enum.close();
+      }
+      query = q;
+    }
+    return query;
+  }
+
+  /** Prints a user-readable version of this query. */
+  public String toString(String field) {
+    StringBuffer buffer = new StringBuffer();
+    if (!prefix.field().equals(field)) {
+      buffer.append(prefix.field());
+      buffer.append(":");
+    }
+    buffer.append(prefix.text());
+    buffer.append('*');
+    if (boost != 1.0f) {
+      buffer.append("^");
+      buffer.append(Float.toString(boost));
+    }
+    return buffer.toString();
+  }
+}
--- a/src/java/org/apache/lucene/search/Query.java
+++ b/src/java/org/apache/lucene/search/Query.java
@ -0,0 +1,101 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Hashtable;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+
+/** The abstract base class for queries.
+  <p>Instantiable subclasses are:
+  <ul>
+  <li> {@link TermQuery}
+  <li> {@link PhraseQuery}
+  <li> {@link BooleanQuery}
+  </ul>
+  <p>A parser for queries is contained in:
+  <ul>
+  <li><a href="doc/lucene.queryParser.QueryParser.html">QueryParser</a>
+  </ul>
+  */
+abstract public class Query {
+
+  // query weighting
+  abstract float sumOfSquaredWeights(Searcher searcher) throws IOException;
+  abstract void normalize(float norm);
+
+  // query evaluation
+  abstract Scorer scorer(IndexReader reader) throws IOException;
+
+  void prepare(IndexReader reader) {}
+
+  static Scorer scorer(Query query, Searcher searcher, IndexReader reader)
+    throws IOException {
+    query.prepare(reader);
+    float sum = query.sumOfSquaredWeights(searcher);
+    float norm = 1.0f / (float)Math.sqrt(sum);
+    query.normalize(norm);
+    return query.scorer(reader);
+  }
+
+  /** Prints a query to a string, with <code>field</code> as the default field
+    for terms.
+    <p>The representation used is one that is readable by
+    <a href="doc/lucene.queryParser.QueryParser.html">QueryParser</a>
+    (although, if the query was created by the parser, the printed
+    representation may not be exactly what was parsed). */
+  abstract public String toString(String field);
+}
--- a/src/java/org/apache/lucene/search/ScoreDoc.java
+++ b/src/java/org/apache/lucene/search/ScoreDoc.java
@ -0,0 +1,65 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+final class ScoreDoc {
+  float score;
+  int doc;
+
+  ScoreDoc(int d, float s) {
+    doc = d;
+    score = s;
+  }
+}
--- a/Show More
+++ b/Show More