From 6f8347c6fef2877bba40fe328392e32c6d76d27d Mon Sep 17 00:00:00 2001
From: Doug Cutting <cutting@apache.org>
Date: Thu, 15 Jan 2004 22:42:35 +0000
Subject: [PATCH] Optimized TermDocs.skipTo() and changed scorers to take
 advantage of it.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150170 13f79535-47bb-0310-9956-ffa450edef68
---
 CHANGES.txt                                   |  30 ++++
 .../apache/lucene/index/DocumentWriter.java   |   2 +-
 .../apache/lucene/index/SegmentMerger.java    |  49 ++++-
 .../apache/lucene/index/SegmentTermDocs.java  |  87 +++++++--
 .../apache/lucene/index/SegmentTermEnum.java  |  43 ++++-
 .../lucene/index/SegmentTermPositions.java    |   7 +
 .../org/apache/lucene/index/TermInfo.java     |  13 +-
 .../apache/lucene/index/TermInfosReader.java  |  22 ++-
 .../apache/lucene/index/TermInfosWriter.java  |  44 ++++-
 .../apache/lucene/search/BooleanQuery.java    |  31 ++++
 .../apache/lucene/search/BooleanScorer.java   |  60 +++++--
 .../lucene/search/ConjunctionScorer.java      | 155 ++++++++++++++++
 .../apache/lucene/search/IndexSearcher.java   |   4 +-
 .../apache/lucene/search/PhrasePositions.java |  18 +-
 .../apache/lucene/search/PhraseScorer.java    | 170 +++++++++++-------
 src/java/org/apache/lucene/search/Scorer.java |  36 +++-
 .../org/apache/lucene/search/TermScorer.java  |  76 ++++----
 .../org/apache/lucene/store/RAMDirectory.java |  95 ----------
 src/java/org/apache/lucene/store/RAMFile.java |  63 +++++++
 .../apache/lucene/store/RAMInputStream.java   |  95 ++++++++++
 .../apache/lucene/store/RAMOutputStream.java  | 145 +++++++++++++++
 .../org/apache/lucene/ThreadSafetyTest.java   |  77 +-------
 .../org/apache/lucene/search/TestBasics.java  | 135 ++++++++++++++
 src/test/org/apache/lucene/util/English.java  | 140 +++++++++++++++
 24 files changed, 1272 insertions(+), 325 deletions(-)
 create mode 100644 src/java/org/apache/lucene/search/ConjunctionScorer.java
 create mode 100644 src/java/org/apache/lucene/store/RAMFile.java
 create mode 100644 src/java/org/apache/lucene/store/RAMInputStream.java
 create mode 100644 src/java/org/apache/lucene/store/RAMOutputStream.java
 create mode 100644 src/test/org/apache/lucene/search/TestBasics.java
 create mode 100644 src/test/org/apache/lucene/util/English.java

diff --git a/CHANGES.txt b/CHANGES.txt
index 47fa790b523..42be8b0d855 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -2,6 +2,36 @@ Lucene Change Log
 
 $Id$
 
+1.4 RC1
+
+ 1. Changed the format of the .tis file, so that:
+
+    - it has a format version number, which makes it easier to
+      back-compatibly change file formats in the future.
+
+    - the term count is now stored as a long.  This was the one aspect
+      of the Lucene's file formats which limited index size.
+
+    - a few internal index parameters are now stored in the index, so
+      that they can (in theory) now be changed from index to index,
+      although there is not yet an API to do so.
+
+    These changes are back compatible.  The new code can read old
+    indexes.  But old code will not be able read new indexes. (cutting)
+
+ 2. Added an optimized implementation of TermDocs.skipTo().  A skip
+    table is now stored for each term in the .frq file.  This only
+    adds a percent or two to overall index size, but can substantially
+    speedup many searches.  (cutting)
+
+ 3. Restructured the Scorer API and all Scorer implementations to take
+    advantage of an optimized TermDocs.skipTo() implementation.  In
+    particular, PhraseQuerys and conjunctive BooleanQuerys are
+    faster when one clause has substantially fewer matches than the
+    others.  (A conjunctive BooleanQuery is a BooleanQuery where all
+    clauses are required.)  (cutting)
+
+
 1.3 final
 
  1. Added catch of BooleanQuery$TooManyClauses in QueryParser to
diff --git a/src/java/org/apache/lucene/index/DocumentWriter.java b/src/java/org/apache/lucene/index/DocumentWriter.java
index eb4c822d6ca..bc9c1454b6b 100644
--- a/src/java/org/apache/lucene/index/DocumentWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentWriter.java
@@ -291,7 +291,7 @@ final class DocumentWriter {
         Posting posting = postings[i];
 
         // add an entry to the dictionary with pointers to prox and freq files
-        ti.set(1, freq.getFilePointer(), prox.getFilePointer());
+        ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
         tis.add(posting.term, ti);
 
         // add an entry to the freq file
diff --git a/src/java/org/apache/lucene/index/SegmentMerger.java b/src/java/org/apache/lucene/index/SegmentMerger.java
index 76a42c7037c..08afc05260a 100644
--- a/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/src/java/org/apache/lucene/index/SegmentMerger.java
@@ -62,6 +62,7 @@ import java.io.IOException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.OutputStream;
 import org.apache.lucene.store.InputStream;
+import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BitVector;
 
 final class SegmentMerger {
@@ -246,17 +247,21 @@ final class SegmentMerger {
 
     int df = appendPostings(smis, n);		  // append posting data
 
+    long skipPointer = writeSkip();
+
     if (df > 0) {
       // add an entry to the dictionary with pointers to prox and freq files
-      termInfo.set(df, freqPointer, proxPointer);
+      termInfo.set(df, freqPointer, proxPointer, (int)(skipPointer-freqPointer));
       termInfosWriter.add(smis[0].term, termInfo);
     }
   }
 
   private final int appendPostings(SegmentMergeInfo[] smis, int n)
        throws IOException {
+    final int skipInterval = termInfosWriter.skipInterval;
     int lastDoc = 0;
     int df = 0;					  // number of docs w/ term
+    resetSkip();
     for (int i = 0; i < n; i++) {
       SegmentMergeInfo smi = smis[i];
       TermPositions postings = smi.postings;
@@ -272,6 +277,12 @@ final class SegmentMerger {
         if (doc < lastDoc)
           throw new IllegalStateException("docs out of order");
 
+        df++;
+
+        if ((df % skipInterval) == 0) {
+          bufferSkip(lastDoc);
+        }
+
         int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
         lastDoc = doc;
         
@@ -289,13 +300,43 @@ final class SegmentMerger {
           proxOutput.writeVInt(position - lastPosition);
           lastPosition = position;
         }
-
-        df++;
       }
     }
     return df;
   }
-  private final void mergeNorms() throws IOException {
+
+  private RAMOutputStream skipBuffer = new RAMOutputStream();
+  private int lastSkipDoc;
+  private long lastSkipFreqPointer;
+  private long lastSkipProxPointer;
+
+  private void resetSkip() throws IOException {
+    skipBuffer.reset();
+    lastSkipDoc = 0;
+    lastSkipFreqPointer = freqOutput.getFilePointer();
+    lastSkipProxPointer = proxOutput.getFilePointer();
+  }
+
+  private void bufferSkip(int doc) throws IOException {
+    long freqPointer = freqOutput.getFilePointer();
+    long proxPointer = proxOutput.getFilePointer();
+
+    skipBuffer.writeVInt(doc - lastSkipDoc); 
+    skipBuffer.writeVInt((int)(freqPointer - lastSkipFreqPointer));
+    skipBuffer.writeVInt((int)(proxPointer - lastSkipProxPointer));
+
+    lastSkipDoc = doc;
+    lastSkipFreqPointer = freqPointer;
+    lastSkipProxPointer = proxPointer;
+  }
+
+  private long writeSkip() throws IOException {
+    long skipPointer = freqOutput.getFilePointer();
+    skipBuffer.writeTo(freqOutput);
+    return skipPointer;
+  }
+
+  private void mergeNorms() throws IOException {
     for (int i = 0; i < fieldInfos.size(); i++) {
       FieldInfo fi = fieldInfos.fieldInfo(i);
       if (fi.isIndexed) {
diff --git a/src/java/org/apache/lucene/index/SegmentTermDocs.java b/src/java/org/apache/lucene/index/SegmentTermDocs.java
index cd7a9384033..44dffadc529 100644
--- a/src/java/org/apache/lucene/index/SegmentTermDocs.java
+++ b/src/java/org/apache/lucene/index/SegmentTermDocs.java
@@ -61,16 +61,27 @@ import org.apache.lucene.store.InputStream;
 class SegmentTermDocs implements TermDocs {
   protected SegmentReader parent;
   private InputStream freqStream;
-  private int freqCount;
+  private int count;
+  private int df;
   private BitVector deletedDocs;
   int doc = 0;
   int freq;
 
+  private int skipInterval;
+  private int skipCount;
+  private InputStream skipStream;
+  private int skipDoc;
+  private long freqPointer;
+  private long proxPointer;
+  private long skipPointer;
+  private boolean haveSkipped;
+
   SegmentTermDocs(SegmentReader parent)
     throws IOException {
     this.parent = parent;
     this.freqStream = (InputStream)parent.freqStream.clone();
     this.deletedDocs = parent.deletedDocs;
+    this.skipInterval = parent.tis.getSkipInterval();
   }
   
   public void seek(Term term) throws IOException {
@@ -88,12 +99,19 @@ class SegmentTermDocs implements TermDocs {
   }
   
   void seek(TermInfo ti) throws IOException {
+    count = 0;
     if (ti == null) {
-      freqCount = 0;
+      df = 0;
     } else {
-      freqCount = ti.docFreq;
+      df = ti.docFreq;
       doc = 0;
-      freqStream.seek(ti.freqPointer);
+      skipDoc = 0;
+      skipCount = 0;
+      freqPointer = ti.freqPointer;
+      proxPointer = ti.proxPointer;
+      skipPointer = freqPointer + ti.skipOffset;
+      freqStream.seek(freqPointer);
+      haveSkipped = false;
     }
   }
   
@@ -109,7 +127,7 @@ class SegmentTermDocs implements TermDocs {
 
   public boolean next() throws IOException {
     while (true) {
-      if (freqCount == 0)
+      if (count == df)
 	return false;
 
       int docCode = freqStream.readVInt();
@@ -119,7 +137,7 @@ class SegmentTermDocs implements TermDocs {
       else
 	freq = freqStream.readVInt();		  // else read freq
  
-      freqCount--;
+      count++;
     
       if (deletedDocs == null || !deletedDocs.get(doc))
 	break;
@@ -131,9 +149,9 @@ class SegmentTermDocs implements TermDocs {
   /** Optimized implementation. */
   public int read(final int[] docs, final int[] freqs)
       throws IOException {
-    final int end = docs.length;
+    final int length = docs.length;
     int i = 0;
-    while (i < end && freqCount > 0) {
+    while (i < length && count < df) {
 
       // manually inlined call to next() for speed
       final int docCode = freqStream.readVInt();
@@ -142,7 +160,7 @@ class SegmentTermDocs implements TermDocs {
 	freq = 1;				  // freq is one
       else
 	freq = freqStream.readVInt();		  // else read freq
-      freqCount--;
+      count++;
    
       if (deletedDocs == null || !deletedDocs.get(doc)) {
 	docs[i] = doc;
@@ -153,12 +171,61 @@ class SegmentTermDocs implements TermDocs {
     return i;
   }
 
-  /** As yet unoptimized implementation. */
+  /** Overridden by SegmentTermPositions to skip in prox stream. */
+  protected void skipProx(long proxPointer) throws IOException {}
+
+  /** Optimized implementation. */
   public boolean skipTo(int target) throws IOException {
+    if (df > skipInterval) {                      // optimized case
+
+      if (skipStream == null)
+        skipStream = (InputStream)freqStream.clone(); // lazily clone
+
+      if (!haveSkipped) {                          // lazily seek skip stream
+        skipStream.seek(skipPointer);
+        haveSkipped = true;
+      }
+
+      // scan skip data
+      int lastSkipDoc = skipDoc;
+      long lastFreqPointer = freqStream.getFilePointer();
+      long lastProxPointer = -1;
+      int numSkipped = -1 -(count % skipInterval);
+      
+      while (target > skipDoc) {
+        lastSkipDoc = skipDoc;
+        lastFreqPointer = freqPointer;
+        lastProxPointer = proxPointer;
+        if (skipDoc >= doc)
+          numSkipped += skipInterval;
+        
+        if ((count + numSkipped + skipInterval) > df)
+          break;                                  // no more skips
+
+        skipDoc += skipStream.readVInt();
+        freqPointer += skipStream.readVInt();
+        proxPointer += skipStream.readVInt();
+        
+        skipCount++;
+      }
+      
+      // if we found something to skip, then skip it
+      if (lastFreqPointer > freqStream.getFilePointer()) {
+        freqStream.seek(lastFreqPointer);
+        skipProx(lastProxPointer);
+        
+        doc = lastSkipDoc;
+        count += numSkipped;
+      }
+
+    }
+
+    // done skipping, now just scan
     do {
       if (!next())
 	return false;
     } while (target > doc);
     return true;
   }
+
 }
diff --git a/src/java/org/apache/lucene/index/SegmentTermEnum.java b/src/java/org/apache/lucene/index/SegmentTermEnum.java
index ef8e234a9fb..17262832516 100644
--- a/src/java/org/apache/lucene/index/SegmentTermEnum.java
+++ b/src/java/org/apache/lucene/index/SegmentTermEnum.java
@@ -60,14 +60,17 @@ import org.apache.lucene.store.InputStream;
 final class SegmentTermEnum extends TermEnum implements Cloneable {
   private InputStream input;
   private FieldInfos fieldInfos;
-  int size;
-  int position = -1;
+  long size;
+  long position = -1;
 
   private Term term = new Term("", "");
   private TermInfo termInfo = new TermInfo();
 
-  boolean isIndex = false;
+  private int format;
+  private boolean isIndex = false;
   long indexPointer = 0;
+  int indexInterval;
+  int skipInterval;
   Term prev;
 
   private char[] buffer = {};
@@ -76,8 +79,34 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
        throws IOException {
     input = i;
     fieldInfos = fis; 
-    size = input.readInt();
     isIndex = isi;
+
+    int firstInt = input.readInt();
+    if (firstInt >= 0) {
+      // original-format file, without explicit format version number
+      format = 0;
+      size = firstInt;
+
+      // back-compatible settings
+      indexInterval = 128;
+      skipInterval = Integer.MAX_VALUE;
+
+    } else {
+      // we have a format version number
+      format = firstInt;
+
+      // check that it is a format we can understand
+      if (format < TermInfosWriter.FORMAT)
+        throw new IOException("Unknown format version:" + format);
+      
+      size = input.readLong();                    // read the size
+      
+      if (!isIndex) {
+        indexInterval = input.readInt();
+        skipInterval = input.readInt();
+      }
+    }
+    
   }
   
   protected Object clone() {
@@ -117,6 +146,12 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
     termInfo.freqPointer += input.readVLong();	  // read freq pointer
     termInfo.proxPointer += input.readVLong();	  // read prox pointer
     
+    if (!isIndex) {
+      if (termInfo.docFreq > skipInterval) {
+        termInfo.skipOffset = input.readVInt();
+      }
+    }
+
     if (isIndex)
       indexPointer += input.readVLong();	  // read index pointer
 
diff --git a/src/java/org/apache/lucene/index/SegmentTermPositions.java b/src/java/org/apache/lucene/index/SegmentTermPositions.java
index 5fc487ccbb8..6b148a4e0ba 100644
--- a/src/java/org/apache/lucene/index/SegmentTermPositions.java
+++ b/src/java/org/apache/lucene/index/SegmentTermPositions.java
@@ -109,4 +109,11 @@ extends SegmentTermDocs implements TermPositions {
     throw new UnsupportedOperationException();
   }
 
+
+  /** Called by super.skipTo(). */
+  protected void skipProx(long proxPointer) throws IOException {
+    proxStream.seek(proxPointer);
+    proxCount = 0;
+  }
+
 }
diff --git a/src/java/org/apache/lucene/index/TermInfo.java b/src/java/org/apache/lucene/index/TermInfo.java
index 91c974aa6d4..f13a9b7c6e1 100644
--- a/src/java/org/apache/lucene/index/TermInfo.java
+++ b/src/java/org/apache/lucene/index/TermInfo.java
@@ -62,6 +62,7 @@ final class TermInfo {
 
   long freqPointer = 0;
   long proxPointer = 0;
+  int skipOffset;
 
   TermInfo() {}
 
@@ -75,17 +76,21 @@ final class TermInfo {
     docFreq = ti.docFreq;
     freqPointer = ti.freqPointer;
     proxPointer = ti.proxPointer;
+    skipOffset = ti.skipOffset;
   }
 
-  final void set(int df, long fp, long pp) {
-    docFreq = df;
-    freqPointer = fp;
-    proxPointer = pp;
+  final void set(int docFreq,
+                 long freqPointer, long proxPointer, int skipOffset) {
+    this.docFreq = docFreq;
+    this.freqPointer = freqPointer;
+    this.proxPointer = proxPointer;
+    this.skipOffset = skipOffset;
   }
 
   final void set(TermInfo ti) {
     docFreq = ti.docFreq;
     freqPointer = ti.freqPointer;
     proxPointer = ti.proxPointer;
+    skipOffset = ti.skipOffset;
   }
 }
diff --git a/src/java/org/apache/lucene/index/TermInfosReader.java b/src/java/org/apache/lucene/index/TermInfosReader.java
index c544b619e48..5393d55974e 100644
--- a/src/java/org/apache/lucene/index/TermInfosReader.java
+++ b/src/java/org/apache/lucene/index/TermInfosReader.java
@@ -68,7 +68,7 @@ final class TermInfosReader {
   private FieldInfos fieldInfos;
 
   private SegmentTermEnum enumerator;
-  private int size;
+  private long size;
 
   TermInfosReader(Directory dir, String seg, FieldInfos fis)
        throws IOException {
@@ -82,13 +82,17 @@ final class TermInfosReader {
     readIndex();
   }
 
+  public int getSkipInterval() {
+    return enumerator.skipInterval;
+  }
+
   final void close() throws IOException {
     if (enumerator != null)
       enumerator.close();
   }
 
   /** Returns the number of term/value pairs in the set. */
-  final int size() {
+  final long size() {
     return size;
   }
 
@@ -101,7 +105,7 @@ final class TermInfosReader {
       new SegmentTermEnum(directory.openFile(segment + ".tii"),
 			  fieldInfos, true);
     try {
-      int indexSize = indexEnum.size;
+      int indexSize = (int)indexEnum.size;
 
       indexTerms = new Term[indexSize];
       indexInfos = new TermInfo[indexSize];
@@ -137,7 +141,7 @@ final class TermInfosReader {
 
   private final void seekEnum(int indexOffset) throws IOException {
     enumerator.seek(indexPointers[indexOffset],
-	      (indexOffset * TermInfosWriter.INDEX_INTERVAL) - 1,
+	      (indexOffset * enumerator.indexInterval) - 1,
 	      indexTerms[indexOffset], indexInfos[indexOffset]);
   }
 
@@ -146,10 +150,10 @@ final class TermInfosReader {
     if (size == 0) return null;
 
     // optimize sequential access: first try scanning cached enumerator w/o seeking
-    if (enumerator.term() != null			  // term is at or past current
+    if (enumerator.term() != null                 // term is at or past current
 	&& ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
 	    || term.compareTo(enumerator.term()) >= 0)) {
-      int enumOffset = (enumerator.position/TermInfosWriter.INDEX_INTERVAL)+1;
+      int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
       if (indexTerms.length == enumOffset	  // but before end of block
 	  || term.compareTo(indexTerms[enumOffset]) < 0)
 	return scanEnum(term);			  // no need to seek
@@ -174,10 +178,10 @@ final class TermInfosReader {
     if (size == 0) return null;
 
     if (enumerator != null && enumerator.term() != null && position >= enumerator.position &&
-	position < (enumerator.position + TermInfosWriter.INDEX_INTERVAL))
+	position < (enumerator.position + enumerator.indexInterval))
       return scanEnum(position);		  // can avoid seek
 
-    seekEnum(position / TermInfosWriter.INDEX_INTERVAL); // must seek
+    seekEnum(position / enumerator.indexInterval); // must seek
     return scanEnum(position);
   }
 
@@ -190,7 +194,7 @@ final class TermInfosReader {
   }
 
   /** Returns the position of a Term in the set or -1. */
-  final synchronized int getPosition(Term term) throws IOException {
+  final synchronized long getPosition(Term term) throws IOException {
     if (size == 0) return -1;
 
     int indexOffset = getIndexOffset(term);
diff --git a/src/java/org/apache/lucene/index/TermInfosWriter.java b/src/java/org/apache/lucene/index/TermInfosWriter.java
index a8a79f769e4..684ec9d055c 100644
--- a/src/java/org/apache/lucene/index/TermInfosWriter.java
+++ b/src/java/org/apache/lucene/index/TermInfosWriter.java
@@ -62,13 +62,36 @@ import org.apache.lucene.store.Directory;
   Directory.  A TermInfos can be written once, in order.  */
 
 final class TermInfosWriter {
+  /** The file format version, a negative number. */
+  public static final int FORMAT = -1;
+
   private FieldInfos fieldInfos;
   private OutputStream output;
   private Term lastTerm = new Term("", "");
   private TermInfo lastTi = new TermInfo();
   private int size = 0;
 
-  static final int INDEX_INTERVAL = 128;
+  // TODO: the default values for these two parameters should be settable from
+  // IndexWriter.  However, once that's done, folks will start setting them to
+  // ridiculous values and complaining that things don't work well, as with
+  // mergeFactor.  So, let's wait until a number of folks find that alternate
+  // values work better.  Note that both of these values are stored in the
+  // segment, so that it's safe to change these w/o rebuilding all indexes.
+
+  /** Expert: The fraction of terms in the "dictionary" which should be stored
+   * in RAM.  Smaller values use more memory, but make searching slightly
+   * faster, while larger values use less memory and make searching slightly
+   * slower.  Searching is typically not dominated by dictionary lookup, so
+   * tweaking this is rarely useful.*/
+  int indexInterval = 128;
+
+  /** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
+   * used to accellerate {@link TermDocs#skipTo(int)}.  Larger values result in
+   * smaller indexes, greater acceleration, but fewer accelerable cases, while
+   * smaller values result in bigger indexes, less acceleration and more
+   * accelerable cases. More detailed experiments would be useful here. */
+  int skipInterval = 16;
+
   private long lastIndexPointer = 0;
   private boolean isIndex = false;
 
@@ -91,7 +114,12 @@ final class TermInfosWriter {
     fieldInfos = fis;
     isIndex = isi;
     output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
-    output.writeInt(0);				  // leave space for size
+    output.writeInt(FORMAT);                      // write format
+    output.writeLong(0);                          // leave space for size
+    if (!isIndex) {
+      output.writeInt(indexInterval);             // write indexInterval
+      output.writeInt(skipInterval);              // write skipInterval
+    }
   }
 
   /** Adds a new <Term, TermInfo> pair to the set.
@@ -106,7 +134,7 @@ final class TermInfosWriter {
     if (ti.proxPointer < lastTi.proxPointer)
       throw new IOException("proxPointer out of order");
 
-    if (!isIndex && size % INDEX_INTERVAL == 0)
+    if (!isIndex && size % indexInterval == 0)
       other.add(lastTerm, lastTi);		  // add an index term
 
     writeTerm(term);				  // write term
@@ -114,6 +142,12 @@ final class TermInfosWriter {
     output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
     output.writeVLong(ti.proxPointer - lastTi.proxPointer);
 
+    if (!isIndex) {
+      if (ti.docFreq > skipInterval) {
+        output.writeVInt(ti.skipOffset);
+      }
+    }
+
     if (isIndex) {
       output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
       lastIndexPointer = other.output.getFilePointer(); // write pointer
@@ -149,8 +183,8 @@ final class TermInfosWriter {
 
   /** Called to complete TermInfos creation. */
   final void close() throws IOException {
-    output.seek(0);				  // write size at start
-    output.writeInt(size);
+    output.seek(4);				  // write size after format
+    output.writeLong(size);
     output.close();
 
     if (!isIndex)
diff --git a/src/java/org/apache/lucene/search/BooleanQuery.java b/src/java/org/apache/lucene/search/BooleanQuery.java
index 66fa15d0d39..8cae4e632ff 100644
--- a/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/src/java/org/apache/lucene/search/BooleanQuery.java
@@ -158,6 +158,37 @@ public class BooleanQuery extends Query {
     }
 
     public Scorer scorer(IndexReader reader) throws IOException {
+      // First see if the (faster) ConjunctionScorer will work.  This can be
+      // used when all clauses are required.  Also, at this point a
+      // BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
+      // from a BooleanScorer are not always sorted by document number (sigh)
+      // and hence BooleanScorer cannot implement skipTo() correctly, which is
+      // required by ConjunctionScorer.
+      boolean allRequired = true;      
+      boolean noneBoolean = true;
+      for (int i = 0 ; i < weights.size(); i++) {
+        BooleanClause c = (BooleanClause)clauses.elementAt(i);
+        if (!c.required)
+          allRequired = false;
+        if (c.query instanceof BooleanQuery)
+          noneBoolean = false;
+      }
+
+      if (allRequired && noneBoolean) {           // ConjunctionScorer is okay
+        ConjunctionScorer result =
+          new ConjunctionScorer(searcher.getSimilarity());
+        for (int i = 0 ; i < weights.size(); i++) {
+          BooleanClause c = (BooleanClause)clauses.elementAt(i);
+          Weight w = (Weight)weights.elementAt(i);
+          Scorer subScorer = w.scorer(reader);
+          if (subScorer == null)
+            return null;
+          result.add(subScorer);
+        }
+        return result;
+      }
+
+      // Use good-old BooleanScorer instead.
       BooleanScorer result = new BooleanScorer(searcher.getSimilarity());
 
       for (int i = 0 ; i < weights.size(); i++) {
diff --git a/src/java/org/apache/lucene/search/BooleanScorer.java b/src/java/org/apache/lucene/search/BooleanScorer.java
index 8d2bf0eae84..d822780d358 100644
--- a/src/java/org/apache/lucene/search/BooleanScorer.java
+++ b/src/java/org/apache/lucene/search/BooleanScorer.java
@@ -76,14 +76,17 @@ final class BooleanScorer extends Scorer {
 
   static final class SubScorer {
     public Scorer scorer;
+    public boolean done;
     public boolean required = false;
     public boolean prohibited = false;
     public HitCollector collector;
     public SubScorer next;
 
     public SubScorer(Scorer scorer, boolean required, boolean prohibited,
-		     HitCollector collector, SubScorer next) {
+		     HitCollector collector, SubScorer next)
+      throws IOException {
       this.scorer = scorer;
+      this.done = !scorer.next();
       this.required = required;
       this.prohibited = prohibited;
       this.collector = collector;
@@ -91,7 +94,8 @@ final class BooleanScorer extends Scorer {
     }
   }
 
-  final void add(Scorer scorer, boolean required, boolean prohibited) {
+  final void add(Scorer scorer, boolean required, boolean prohibited)
+    throws IOException {
     int mask = 0;
     if (required || prohibited) {
       if (nextMask == 0)
@@ -120,17 +124,45 @@ final class BooleanScorer extends Scorer {
       coordFactors[i] = getSimilarity().coord(i, maxCoord-1);
   }
 
-  public final void score(HitCollector results, int maxDoc)
-    throws IOException {
+  private int end;
+  private Bucket current;
+
+  public int doc() { return current.doc; }
+
+  public boolean next() throws IOException {
+    boolean more = false;
+    do {
+      while (bucketTable.first != null) {         // more queued
+        current = bucketTable.first;
+        bucketTable.first = current.next;         // pop the queue
+
+        // check prohibited & required
+        if ((current.bits & prohibitedMask) == 0 && 
+            (current.bits & requiredMask) == requiredMask) {
+          return true;
+        }
+      }
+
+      // refill the queue
+      end += BucketTable.SIZE;
+      for (SubScorer sub = scorers; sub != null; sub = sub.next) {
+        Scorer scorer = sub.scorer;
+        while (!sub.done && scorer.doc() < end) {
+          sub.collector.collect(scorer.doc(), scorer.score());
+          sub.done = !scorer.next();
+        }
+        if (!sub.done) {
+          more  = true;
+        }
+      }
+    } while (bucketTable.first != null | more);
+    return false;
+  }
+
+  public float score() throws IOException {
     if (coordFactors == null)
       computeCoordFactors();
-
-    while (currentDoc < maxDoc) {
-      currentDoc = Math.min(currentDoc+BucketTable.SIZE, maxDoc);
-      for (SubScorer t = scorers; t != null; t = t.next)
-	t.scorer.score(t.collector, currentDoc);
-      bucketTable.collectHits(results);
-    }
+    return current.score * coordFactors[current.coord];
   }
 
   static final class Bucket {
@@ -196,7 +228,7 @@ final class BooleanScorer extends Scorer {
 	bucket.score = score;			  // initialize score
 	bucket.bits = mask;			  // initialize mask
 	bucket.coord = 1;			  // initialize coord
-	
+
 	bucket.next = table.first;		  // push onto valid list
 	table.first = bucket;
       } else {					  // valid bucket
@@ -207,6 +239,10 @@ final class BooleanScorer extends Scorer {
     }
   }
 
+  public boolean skipTo(int target) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
   public Explanation explain(int doc) throws IOException {
     throw new UnsupportedOperationException();
   }
diff --git a/src/java/org/apache/lucene/search/ConjunctionScorer.java b/src/java/org/apache/lucene/search/ConjunctionScorer.java
new file mode 100644
index 00000000000..57faf0982f0
--- /dev/null
+++ b/src/java/org/apache/lucene/search/ConjunctionScorer.java
@@ -0,0 +1,155 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2004 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.*;
+import org.apache.lucene.index.*;
+
+/** Scorer for conjunctions, sets of queries, all of which are required. */
+final class ConjunctionScorer extends Scorer {
+  private LinkedList scorers = new LinkedList();
+  private boolean firstTime = true;
+  private boolean more = true;
+  private float coord;
+
+  public ConjunctionScorer(Similarity similarity) {
+    super(similarity);
+  }
+
+  final void add(Scorer scorer) throws IOException {
+    scorers.addLast(scorer);
+  }
+
+  private Scorer first() { return (Scorer)scorers.getFirst(); }
+  private Scorer last() { return (Scorer)scorers.getLast(); }
+
+  public int doc() { return first().doc(); }
+
+  public boolean next() throws IOException {
+    if (firstTime) {
+      init();
+    } else if (more) {
+      more = last().next();                       // trigger further scanning
+    }
+
+    while (more && first().doc() < last().doc()) { // find doc w/ all clauses
+      more = first().skipTo(last().doc());      // skip first upto last
+      scorers.addLast(scorers.removeFirst());   // move first to last
+    }
+    
+    return more;                                // found a doc with all clauses
+  }
+
+  public boolean skipTo(int target) throws IOException {
+    Iterator i = scorers.iterator();
+    while (more && i.hasNext()) {
+      more = ((Scorer)i.next()).skipTo(target);
+    }
+    if (more)
+      sortScorers();                              // re-sort scorers
+    return more;
+  }
+
+  public float score() throws IOException {
+    float score = 0.0f;                           // sum scores
+    Iterator i = scorers.iterator();
+    while (i.hasNext())
+      score += ((Scorer)i.next()).score();
+    score *= coord;
+    return score;
+  }
+
+  private void init() throws IOException {
+    more = scorers.size() > 0;
+
+    // compute coord factor
+    coord = getSimilarity().coord(scorers.size(), scorers.size());
+
+    // move each scorer to its first entry
+    Iterator i = scorers.iterator();
+    while (more && i.hasNext()) {
+      more = ((Scorer)i.next()).next();
+    }
+    if (more)
+      sortScorers();                              // initial sort of list
+
+    firstTime = false;
+  }
+
+  private void sortScorers() throws IOException {
+    // move scorers to an array
+    Scorer[] array = (Scorer[])scorers.toArray(new Scorer[scorers.size()]);
+    scorers.clear();                              // empty the list
+
+    Arrays.sort(array, new Comparator() {         // sort the array
+        public int compare(Object o1, Object o2) {
+          return ((Scorer)o1).doc() - ((Scorer)o2).doc();
+        }
+        public boolean equals(Object o1, Object o2) {
+          return ((Scorer)o1).doc() == ((Scorer)o2).doc();
+        }
+      });
+    
+    for (int i = 0; i < array.length; i++) {
+      scorers.addLast(array[i]);                  // re-build list, now sorted
+    }
+  }
+
+  public Explanation explain(int doc) throws IOException {
+    throw new UnsupportedOperationException();
+  }
+
+}
diff --git a/src/java/org/apache/lucene/search/IndexSearcher.java b/src/java/org/apache/lucene/search/IndexSearcher.java
index b03094ca56e..f152bfe3114 100644
--- a/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@@ -140,7 +140,7 @@ public class IndexSearcher extends Searcher {
             hq.insert(new ScoreDoc(doc, score));
 	  }
 	}
-      }, reader.maxDoc());
+      });
 
     ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
     for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
@@ -180,7 +180,7 @@ public class IndexSearcher extends Searcher {
     Scorer scorer = query.weight(this).scorer(reader);
     if (scorer == null)
       return;
-    scorer.score(collector, reader.maxDoc());
+    scorer.score(collector);
   }
 
   public Query rewrite(Query original) throws IOException {
diff --git a/src/java/org/apache/lucene/search/PhrasePositions.java b/src/java/org/apache/lucene/search/PhrasePositions.java
index adfb59e63b2..41c8b961da1 100644
--- a/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/src/java/org/apache/lucene/search/PhrasePositions.java
@@ -68,19 +68,31 @@ final class PhrasePositions {
   PhrasePositions(TermPositions t, int o) throws IOException {
     tp = t;
     offset = o;
-    next();
   }
 
-  final void next() throws IOException {	  // increments to next doc
+  final boolean next() throws IOException {	  // increments to next doc
     if (!tp.next()) {
       tp.close();				  // close stream
       doc = Integer.MAX_VALUE;			  // sentinel value
-      return;
+      return false;
     }
     doc = tp.doc();
     position = 0;
+    return true;
   }
 
+  final boolean skipTo(int target) throws IOException {
+    if (!tp.skipTo(target)) {
+      tp.close();				  // close stream
+      doc = Integer.MAX_VALUE;			  // sentinel value
+      return false;
+    }
+    doc = tp.doc();
+    position = 0;
+    return true;
+  }
+
+
   final void firstPosition() throws IOException {
     count = tp.freq();				  // read first pos
     nextPosition();
diff --git a/src/java/org/apache/lucene/search/PhraseScorer.java b/src/java/org/apache/lucene/search/PhraseScorer.java
index 00ae443b970..82b152d6b8c 100644
--- a/src/java/org/apache/lucene/search/PhraseScorer.java
+++ b/src/java/org/apache/lucene/search/PhraseScorer.java
@@ -60,89 +60,127 @@ import org.apache.lucene.util.*;
 import org.apache.lucene.index.*;
 
 abstract class PhraseScorer extends Scorer {
-    private Weight weight;
-    protected byte[] norms;
-    protected float value;
+  private Weight weight;
+  protected byte[] norms;
+  protected float value;
 
-    protected PhraseQueue pq;
-    protected PhrasePositions first, last;
+  private boolean firstTime = true;
+  private boolean more = true;
+  protected PhraseQueue pq;
+  protected PhrasePositions first, last;
 
-    private float freq;
+  private float freq;
 
-    PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
-                 byte[] norms) throws IOException {
-        super(similarity);
-        this.norms = norms;
-        this.weight = weight;
-        this.value = weight.getValue();
+  PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
+               byte[] norms) throws IOException {
+    super(similarity);
+    this.norms = norms;
+    this.weight = weight;
+    this.value = weight.getValue();
 
-        // use PQ to build a sorted list of PhrasePositions
-        pq = new PhraseQueue(tps.length);
-        for (int i = 0; i < tps.length; i++) {
-            pq.put(new PhrasePositions(tps[i], i));
-        }
-        pqToList();
+    // convert tps to a list
+    for (int i = 0; i < tps.length; i++) {
+      PhrasePositions pp = new PhrasePositions(tps[i], i);
+      if (last != null) {			  // add next to end of list
+        last.next = pp;
+      } else
+        first = pp;
+      last = pp;
     }
 
-    public final void score(HitCollector results, int end) throws IOException {
-        Similarity similarity = getSimilarity();
-        while (last.doc < end) {			  // find doc w/ all the terms
-            while (first.doc < last.doc) {		  // scan forward in first
-                do {
-                    first.next();
-                } while (first.doc < last.doc);
-                firstToLast();
-                if (last.doc >= end)
-                    return;
-            }
+    pq = new PhraseQueue(tps.length);             // construct empty pq
 
-            // found doc with all terms
-            freq = phraseFreq();                        // check for phrase
+  }
 
-            if (freq > 0.0) {
-                float score = similarity.tf(freq) * value;  // compute score
-                score *= Similarity.decodeNorm(norms[first.doc]); // normalize
-                results.collect(first.doc, score);	  // add to results
-            }
-            last.next();				  // resume scanning
-        }
+  public int doc() { return first.doc; }
+
+  public boolean next() throws IOException {
+    if (firstTime) {
+      sort();
+      firstTime = false;
+    } else if (more) {
+      more = last.next();                         // trigger further scanning
     }
 
-    protected abstract float phraseFreq() throws IOException;
+    while (more) {
+      while (more && first.doc < last.doc) {      // find doc w/ all the terms
+        more = first.skipTo(last.doc);            // skip first upto last
+        firstToLast();                            // and move it to the end
+      }
 
-    protected final void pqToList() {
-        last = first = null;
-        while (pq.top() != null) {
-            PhrasePositions pp = (PhrasePositions) pq.pop();
-            if (last != null) {			  // add next to end of list
-                last.next = pp;
-            } else
-                first = pp;
-            last = pp;
-            pp.next = null;
-        }
+      if (more) {
+        // found a doc with all of the terms
+        freq = phraseFreq();                      // check for phrase
+        if (freq == 0.0f)                         // no match
+          more = last.next();                     // trigger further scanning
+        else
+          return true;                            // found a match
+      }
     }
+    return false;                                 // no more matches
+  }
 
-    protected final void firstToLast() {
-        last.next = first;			  // move first to end of list
-        last = first;
-        first = first.next;
-        last.next = null;
+  public float score() throws IOException {
+    //System.out.println("scoring " + first.doc);
+    float raw = getSimilarity().tf(freq) * value; // raw score
+    return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
+  }
+
+  public boolean skipTo(int target) throws IOException {
+    for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
+      more = pp.skipTo(target);
     }
+    if (more)
+      sort();                                     // re-sort
+    return more;
+  }
 
-    public Explanation explain(final int doc) throws IOException {
-        Explanation tfExplanation = new Explanation();
 
-        score(new HitCollector() {
-            public final void collect(int d, float score) {
-            }
-        }, doc + 1);
+  protected abstract float phraseFreq() throws IOException;
 
-        float phraseFreq = (first.doc == doc) ? freq : 0.0f;
-        tfExplanation.setValue(getSimilarity().tf(phraseFreq));
-        tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
-
-        return tfExplanation;
+  private void sort() throws IOException {
+    pq.clear();
+    for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
+      more = pp.next();
+      if (more) {
+        pq.put(pp);
+      } else {
+        return;
+      }
     }
+    pqToList();
+  }
+
+  protected final void pqToList() {
+    last = first = null;
+    while (pq.top() != null) {
+      PhrasePositions pp = (PhrasePositions) pq.pop();
+      if (last != null) {			  // add next to end of list
+        last.next = pp;
+      } else
+        first = pp;
+      last = pp;
+      pp.next = null;
+    }
+  }
+
+  protected final void firstToLast() {
+    last.next = first;			  // move first to end of list
+    last = first;
+    first = first.next;
+    last.next = null;
+  }
+
+  public Explanation explain(final int doc) throws IOException {
+    Explanation tfExplanation = new Explanation();
+
+    while (next() && doc() < doc) {}
+
+    float phraseFreq = (doc() == doc) ? freq : 0.0f;
+    tfExplanation.setValue(getSimilarity().tf(phraseFreq));
+    tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
+
+    return tfExplanation;
+  }
 
 }
diff --git a/src/java/org/apache/lucene/search/Scorer.java b/src/java/org/apache/lucene/search/Scorer.java
index 68d03a41de0..0117f91e2b9 100644
--- a/src/java/org/apache/lucene/search/Scorer.java
+++ b/src/java/org/apache/lucene/search/Scorer.java
@@ -70,11 +70,39 @@ public abstract class Scorer {
     return this.similarity;
   }
 
-  /** Scores hits and passes them to a collector.  Stops at the last document
-   * before <code>maxDoc</code>.  If called repeatedly, will restart at point
-   * where it last left off.
+  /** Scores all documents and passes them to a collector. */
+  public void score(HitCollector hc) throws IOException {
+    while (next()) {
+      hc.collect(doc(), score());
+    }
+  }
+
+  /** Advance to the next document matching the query.  Returns true iff there
+   * is another match. */
+  public abstract boolean next() throws IOException;
+
+  /** Returns the current document number.  Initially invalid, until {@link
+   * #next()} is called the first time. */
+  public abstract int doc();
+
+  /** Returns the score of the current document.  Initially invalid, until
+   * {@link #next()} is called the first time. */
+  public abstract float score() throws IOException;
+
+  /** Skips to the first match beyond the current whose document number is
+   * greater than or equal to <i>target</i>. <p>Returns true iff there is such
+   * a match.  <p>Behaves as if written: <pre>
+   *   boolean skipTo(int target) {
+   *     do {
+   *       if (!next())
+   * 	     return false;
+   *     } while (target > doc());
+   *     return true;
+   *   }
+   * </pre>
+   * Most implementations are considerably more efficient than that.
    */
-  public abstract void score(HitCollector hc, int maxDoc) throws IOException;
+  public abstract boolean skipTo(int target) throws IOException;
 
   /** Returns an explanation of the score for <code>doc</code>. */
   public abstract Explanation explain(int doc) throws IOException;
diff --git a/src/java/org/apache/lucene/search/TermScorer.java b/src/java/org/apache/lucene/search/TermScorer.java
index d14d8f76d20..8a32b4bfdd3 100644
--- a/src/java/org/apache/lucene/search/TermScorer.java
+++ b/src/java/org/apache/lucene/search/TermScorer.java
@@ -83,44 +83,56 @@ final class TermScorer extends Scorer {
 
     for (int i = 0; i < SCORE_CACHE_SIZE; i++)
       scoreCache[i] = getSimilarity().tf(i) * weightValue;
-
-    pointerMax = termDocs.read(docs, freqs);	  // fill buffers
-
-    if (pointerMax != 0)
-      doc = docs[0];
-    else {
-      termDocs.close();				  // close stream
-      doc = Integer.MAX_VALUE;			  // set to sentinel value
-    }
   }
 
-  public final void score(HitCollector c, final int end) throws IOException {
-    int d = doc;				  // cache doc in local
-    Similarity similarity = getSimilarity();      // cache sim in local
-    while (d < end) {				  // for docs in window
-      final int f = freqs[pointer];
-      float score =				  // compute tf(f)*weight
-	f < SCORE_CACHE_SIZE			  // check cache
-	 ? scoreCache[f]			  // cache hit
-	 : similarity.tf(f)*weightValue;          // cache miss
+  public int doc() { return doc; }
 
-      score *= Similarity.decodeNorm(norms[d]);	  // normalize for field
+  public boolean next() throws IOException {
+    pointer++;
+    if (pointer >= pointerMax) {
+      pointerMax = termDocs.read(docs, freqs);    // refill buffer
+      if (pointerMax != 0) {
+        pointer = 0;
+      } else {
+        termDocs.close();			  // close stream
+        doc = Integer.MAX_VALUE;		  // set to sentinel value
+        return false;
+      }
+    } 
+    doc = docs[pointer];
+    return true;
+  }
 
-      c.collect(d, score);			  // collect score
+  public float score() throws IOException {
+    int f = freqs[pointer];
+    float raw =                                   // compute tf(f)*weight
+      f < SCORE_CACHE_SIZE			  // check cache
+      ? scoreCache[f]                             // cache hit
+      : getSimilarity().tf(f)*weightValue;        // cache miss
 
-      if (++pointer == pointerMax) {
-	pointerMax = termDocs.read(docs, freqs);  // refill buffers
-	if (pointerMax != 0) {
-	  pointer = 0;
-	} else {
-	  termDocs.close();			  // close stream
-	  doc = Integer.MAX_VALUE;		  // set to sentinel value
-	  return;
-	}
-      } 
-      d = docs[pointer];
+    return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
+  }
+
+  public boolean skipTo(int target) throws IOException {
+    // first scan in cache
+    for (pointer++; pointer < pointerMax; pointer++) {
+      if (!(target > docs[pointer])) {
+        doc = docs[pointer];
+        return true;
+      }
     }
-    doc = d;					  // flush cache
+
+    // not found in cache, seek underlying stream
+    boolean result = termDocs.skipTo(target);
+    if (result) {
+      pointerMax = 1;
+      pointer = 0;
+      docs[pointer] = doc = termDocs.doc();
+      freqs[pointer] = termDocs.freq();
+    } else {
+      doc = Integer.MAX_VALUE;
+    }
+    return result;
   }
 
   public Explanation explain(int doc) throws IOException {
diff --git a/src/java/org/apache/lucene/store/RAMDirectory.java b/src/java/org/apache/lucene/store/RAMDirectory.java
index 0faaff869e1..c0d7c1029de 100644
--- a/src/java/org/apache/lucene/store/RAMDirectory.java
+++ b/src/java/org/apache/lucene/store/RAMDirectory.java
@@ -226,98 +226,3 @@ public final class RAMDirectory extends Directory {
   public final void close() {
   }
 }
-
-
-final class RAMInputStream extends InputStream implements Cloneable {
-  RAMFile file;
-  int pointer = 0;
-
-  public RAMInputStream(RAMFile f) {
-    file = f;
-    length = file.length;
-  }
-
-  /** InputStream methods */
-  public final void readInternal(byte[] dest, int destOffset, int len) {
-    int remainder = len;
-    int start = pointer;
-    while (remainder != 0) {
-      int bufferNumber = start/InputStream.BUFFER_SIZE;
-      int bufferOffset = start%InputStream.BUFFER_SIZE;
-      int bytesInBuffer = InputStream.BUFFER_SIZE - bufferOffset;
-      int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
-      byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
-      System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
-      destOffset += bytesToCopy;
-      start += bytesToCopy;
-      remainder -= bytesToCopy;
-    }
-    pointer += len;
-  }
-
-  public final void close() {
-  }
-
-  /** Random-access methods */
-  public final void seekInternal(long pos) {
-    pointer = (int)pos;
-  }
-}
-
-
-final class RAMOutputStream extends OutputStream {
-  RAMFile file;
-  int pointer = 0;
-
-  public RAMOutputStream(RAMFile f) {
-    file = f;
-  }
-
-  /** output methods: */
-  public final void flushBuffer(byte[] src, int len) {
-    int bufferNumber = pointer/OutputStream.BUFFER_SIZE;
-    int bufferOffset = pointer%OutputStream.BUFFER_SIZE;
-    int bytesInBuffer = OutputStream.BUFFER_SIZE - bufferOffset;
-    int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
-
-    if (bufferNumber == file.buffers.size())
-      file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
-
-    byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
-    System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
-
-    if (bytesToCopy < len) {			  // not all in one buffer
-      int srcOffset = bytesToCopy;
-      bytesToCopy = len - bytesToCopy;		  // remaining bytes
-      bufferNumber++;
-      if (bufferNumber == file.buffers.size())
-        file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
-      buffer = (byte[])file.buffers.elementAt(bufferNumber);
-      System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
-    }
-    pointer += len;
-    if (pointer > file.length)
-      file.length = pointer;
-
-    file.lastModified = System.currentTimeMillis();
-  }
-
-  public final void close() throws IOException {
-    super.close();
-  }
-
-  /** Random-access methods */
-  public final void seek(long pos) throws IOException {
-    super.seek(pos);
-    pointer = (int)pos;
-  }
-  public final long length() throws IOException {
-    return file.length;
-  }
-}
-
-final class RAMFile {
-  Vector buffers = new Vector();
-  long length;
-  long lastModified = System.currentTimeMillis();
-}
diff --git a/src/java/org/apache/lucene/store/RAMFile.java b/src/java/org/apache/lucene/store/RAMFile.java
new file mode 100644
index 00000000000..c151e63c349
--- /dev/null
+++ b/src/java/org/apache/lucene/store/RAMFile.java
@@ -0,0 +1,63 @@
+package org.apache.lucene.store;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.util.Vector;
+
+class RAMFile {
+  Vector buffers = new Vector();
+  long length;
+  long lastModified = System.currentTimeMillis();
+}
diff --git a/src/java/org/apache/lucene/store/RAMInputStream.java b/src/java/org/apache/lucene/store/RAMInputStream.java
new file mode 100644
index 00000000000..b6038b68e46
--- /dev/null
+++ b/src/java/org/apache/lucene/store/RAMInputStream.java
@@ -0,0 +1,95 @@
+package org.apache.lucene.store;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+/**
+ * A memory-resident {@link InputStream} implementation.
+ *
+ * @version $Id$
+ */
+
+class RAMInputStream extends InputStream implements Cloneable {
+  private RAMFile file;
+  private int pointer = 0;
+
+  public RAMInputStream(RAMFile f) {
+    file = f;
+    length = file.length;
+  }
+
+  public void readInternal(byte[] dest, int destOffset, int len) {
+    int remainder = len;
+    int start = pointer;
+    while (remainder != 0) {
+      int bufferNumber = start/BUFFER_SIZE;
+      int bufferOffset = start%BUFFER_SIZE;
+      int bytesInBuffer = BUFFER_SIZE - bufferOffset;
+      int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
+      byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
+      System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
+      destOffset += bytesToCopy;
+      start += bytesToCopy;
+      remainder -= bytesToCopy;
+    }
+    pointer += len;
+  }
+
+  public void close() {
+  }
+
+  public void seekInternal(long pos) {
+    pointer = (int)pos;
+  }
+}
diff --git a/src/java/org/apache/lucene/store/RAMOutputStream.java b/src/java/org/apache/lucene/store/RAMOutputStream.java
new file mode 100644
index 00000000000..7d4c5d5375e
--- /dev/null
+++ b/src/java/org/apache/lucene/store/RAMOutputStream.java
@@ -0,0 +1,145 @@
+package org.apache.lucene.store;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+
+/**
+ * A memory-resident {@link OutputStream} implementation.
+ *
+ * @version $Id$
+ */
+
+public class RAMOutputStream extends OutputStream {
+  private RAMFile file;
+  private int pointer = 0;
+
+  /** Construct an empty output buffer. */
+  public RAMOutputStream() {
+    this(new RAMFile());
+  }
+
+  RAMOutputStream(RAMFile f) {
+    file = f;
+  }
+
+  /** Copy the current contents of this buffer to the named output. */
+  public void writeTo(OutputStream out) throws IOException {
+    flush();
+    final long end = file.length;
+    long pos = 0;
+    int buffer = 0;
+    while (pos < end) {
+      int length = BUFFER_SIZE;
+      long nextPos = pos + length;
+      if (nextPos > end) {                        // at the last buffer
+        length = (int)(end - pos);
+      }
+      out.writeBytes((byte[])file.buffers.elementAt(buffer++), length);
+      pos = nextPos;
+    }
+  }
+
+  /** Resets this to an empty buffer. */
+  public void reset() {
+    try {
+      seek(0);
+    } catch (IOException e) {                     // should never happen
+      throw new RuntimeException(e.toString());
+    }
+
+    file.length = 0;
+  }
+
+  public void flushBuffer(byte[] src, int len) {
+    int bufferNumber = pointer/BUFFER_SIZE;
+    int bufferOffset = pointer%BUFFER_SIZE;
+    int bytesInBuffer = BUFFER_SIZE - bufferOffset;
+    int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
+
+    if (bufferNumber == file.buffers.size())
+      file.buffers.addElement(new byte[BUFFER_SIZE]);
+
+    byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
+    System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
+
+    if (bytesToCopy < len) {			  // not all in one buffer
+      int srcOffset = bytesToCopy;
+      bytesToCopy = len - bytesToCopy;		  // remaining bytes
+      bufferNumber++;
+      if (bufferNumber == file.buffers.size())
+        file.buffers.addElement(new byte[BUFFER_SIZE]);
+      buffer = (byte[])file.buffers.elementAt(bufferNumber);
+      System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
+    }
+    pointer += len;
+    if (pointer > file.length)
+      file.length = pointer;
+
+    file.lastModified = System.currentTimeMillis();
+  }
+
+  public void close() throws IOException {
+    super.close();
+  }
+
+  public void seek(long pos) throws IOException {
+    super.seek(pos);
+    pointer = (int)pos;
+  }
+  public long length() {
+    return file.length;
+  }
+}
diff --git a/src/test/org/apache/lucene/ThreadSafetyTest.java b/src/test/org/apache/lucene/ThreadSafetyTest.java
index 017c92e72a5..d3638697495 100644
--- a/src/test/org/apache/lucene/ThreadSafetyTest.java
+++ b/src/test/org/apache/lucene/ThreadSafetyTest.java
@@ -54,6 +54,7 @@ package org.apache.lucene;
  * <http://www.apache.org/>.
  */
 
+import org.apache.lucene.util.*;
 import org.apache.lucene.store.*;
 import org.apache.lucene.document.*;
 import org.apache.lucene.analysis.*;
@@ -93,7 +94,7 @@ class ThreadSafetyTest {
           Document d = new Document();
           int n = RANDOM.nextInt();
           d.add(Field.Keyword("id", Integer.toString(n)));
-          d.add(Field.UnStored("contents", intToEnglish(n)));
+          d.add(Field.UnStored("contents", English.intToEnglish(n)));
           System.out.println("Adding " + n);
           
           // Switch between single and multiple file segments
@@ -151,7 +152,7 @@ class ThreadSafetyTest {
       throws Exception {
       System.out.println("Searching for " + n);
       Hits hits =
-        searcher.search(QueryParser.parse(intToEnglish(n), "contents",
+        searcher.search(QueryParser.parse(English.intToEnglish(n), "contents",
                                           ANALYZER));
       System.out.println("Search for " + n + ": total=" + hits.length());
       for (int j = 0; j < Math.min(3, hits.length()); j++) {
@@ -197,76 +198,4 @@ class ThreadSafetyTest {
     SearcherThread searcherThread3 = new SearcherThread(true);
     searcherThread3.start();
   }
-
-  private static String intToEnglish(int i) {
-    StringBuffer result = new StringBuffer();
-    intToEnglish(i, result);
-    return result.toString();
-  }
-
-  private static void intToEnglish(int i, StringBuffer result) {
-    if (i < 0) {
-      result.append("minus ");
-      i = -i;
-    }
-    if (i >= 1000000000) {			  // billions
-      intToEnglish(i/1000000000, result);
-      result.append("billion, ");
-      i = i%1000000000;
-    }
-    if (i >= 1000000) {				  // millions
-      intToEnglish(i/1000000, result);
-      result.append("million, ");
-      i = i%1000000;
-    }
-    if (i >= 1000) {				  // thousands
-      intToEnglish(i/1000, result);
-      result.append("thousand, ");
-      i = i%1000;
-    }
-    if (i >= 100) {				  // hundreds
-      intToEnglish(i/100, result);
-      result.append("hundred ");
-      i = i%100;
-    }
-    if (i >= 20) {
-      switch (i/10) {
-      case 9 : result.append("ninety"); break;
-      case 8 : result.append("eighty"); break;
-      case 7 : result.append("seventy"); break;
-      case 6 : result.append("sixty"); break;
-      case 5 : result.append("fifty"); break;
-      case 4 : result.append("forty"); break;
-      case 3 : result.append("thirty"); break;
-      case 2 : result.append("twenty"); break;
-      }
-      i = i%10;
-      if (i == 0)
-        result.append(" ");
-      else 
-        result.append("-");
-    }
-    switch (i) {
-    case 19 : result.append("nineteen "); break;
-    case 18 : result.append("eighteen "); break;
-    case 17 : result.append("seventeen "); break;
-    case 16 : result.append("sixteen "); break;
-    case 15 : result.append("fifteen "); break;
-    case 14 : result.append("fourteen "); break;
-    case 13 : result.append("thirteen "); break;
-    case 12 : result.append("twelve "); break;
-    case 11 : result.append("eleven "); break;
-    case 10 : result.append("ten "); break;
-    case 9 : result.append("nine "); break;
-    case 8 : result.append("eight "); break;
-    case 7 : result.append("seven "); break;
-    case 6 : result.append("six "); break;
-    case 5 : result.append("five "); break;
-    case 4 : result.append("four "); break;
-    case 3 : result.append("three "); break;
-    case 2 : result.append("two "); break;
-    case 1 : result.append("one "); break;
-    case 0 : result.append(""); break;
-    }
-  }
 }
diff --git a/src/test/org/apache/lucene/search/TestBasics.java b/src/test/org/apache/lucene/search/TestBasics.java
new file mode 100644
index 00000000000..21d6a5e943d
--- /dev/null
+++ b/src/test/org/apache/lucene/search/TestBasics.java
@@ -0,0 +1,135 @@
+package org.apache.lucene.search;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import junit.framework.TestCase;
+import org.apache.lucene.util.English;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.RAMDirectory;
+
+/**
+ * Tests basic search capabilities.
+ *
+ * @author Doug Cutting
+ */
+public class TestBasics extends TestCase {
+  private IndexSearcher searcher;
+
+  public void setUp() throws Exception {
+    RAMDirectory directory = new RAMDirectory();
+    IndexWriter writer
+      = new IndexWriter(directory, new SimpleAnalyzer(), true);
+    //writer.infoStream = System.out;
+    StringBuffer buffer = new StringBuffer();
+    for (int i = 0; i < 1000; i++) {
+      Document doc = new Document();
+      doc.add(Field.Text("field", English.intToEnglish(i)));
+      writer.addDocument(doc);
+    }
+
+    writer.close();
+
+    searcher = new IndexSearcher(directory);
+  }
+
+  public void testTerm() throws Exception {
+    Query query = new TermQuery(new Term("field", "seventy"));
+    Hits hits = searcher.search(query);
+    assertEquals(100, hits.length());
+  }
+
+  public void testTerm2() throws Exception {
+    Query query = new TermQuery(new Term("field", "seventish"));
+    Hits hits = searcher.search(query);
+    assertEquals(0, hits.length());
+  }
+
+  public void testPhrase() throws Exception {
+    PhraseQuery query = new PhraseQuery();
+    query.add(new Term("field", "seventy"));
+    query.add(new Term("field", "seven"));
+    Hits hits = searcher.search(query);
+    assertEquals(10, hits.length());
+  }
+
+  public void testPhrase2() throws Exception {
+    PhraseQuery query = new PhraseQuery();
+    query.add(new Term("field", "seventish"));
+    query.add(new Term("field", "sevenon"));
+    Hits hits = searcher.search(query);
+    assertEquals(0, hits.length());
+  }
+
+  public void testBoolean() throws Exception {
+    BooleanQuery query = new BooleanQuery();
+    query.add(new TermQuery(new Term("field", "seventy")), true, false);
+    query.add(new TermQuery(new Term("field", "seven")), true, false);
+    Hits hits = searcher.search(query);
+    assertEquals(19, hits.length());
+  }
+
+  public void testBoolean2() throws Exception {
+    BooleanQuery query = new BooleanQuery();
+    query.add(new TermQuery(new Term("field", "sevento")), true, false);
+    query.add(new TermQuery(new Term("field", "sevenly")), true, false);
+    Hits hits = searcher.search(query);
+    assertEquals(0, hits.length());
+  }
+
+}
diff --git a/src/test/org/apache/lucene/util/English.java b/src/test/org/apache/lucene/util/English.java
new file mode 100644
index 00000000000..1072d54b394
--- /dev/null
+++ b/src/test/org/apache/lucene/util/English.java
@@ -0,0 +1,140 @@
+package org.apache.lucene.util;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
+ * reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ *    if any, must include the following acknowledgment:
+ *       "This product includes software developed by the
+ *        Apache Software Foundation (http://www.apache.org/)."
+ *    Alternately, this acknowledgment may appear in the software itself,
+ *    if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ *    "Apache Lucene" must not be used to endorse or promote products
+ *    derived from this software without prior written permission. For
+ *    written permission, please contact apache@apache.org.
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ *    "Apache Lucene", nor may "Apache" appear in their name, without
+ *    prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation.  For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+
+public class English {
+
+  public static String intToEnglish(int i) {
+    StringBuffer result = new StringBuffer();
+    intToEnglish(i, result);
+    return result.toString();
+  }
+
+  public static void intToEnglish(int i, StringBuffer result) {
+    if (i == 0) {
+      result.append("zero");
+      return;
+    }
+    if (i < 0) {
+      result.append("minus ");
+      i = -i;
+    }
+    if (i >= 1000000000) {			  // billions
+      intToEnglish(i/1000000000, result);
+      result.append("billion, ");
+      i = i%1000000000;
+    }
+    if (i >= 1000000) {				  // millions
+      intToEnglish(i/1000000, result);
+      result.append("million, ");
+      i = i%1000000;
+    }
+    if (i >= 1000) {				  // thousands
+      intToEnglish(i/1000, result);
+      result.append("thousand, ");
+      i = i%1000;
+    }
+    if (i >= 100) {				  // hundreds
+      intToEnglish(i/100, result);
+      result.append("hundred ");
+      i = i%100;
+    }
+    if (i >= 20) {
+      switch (i/10) {
+      case 9 : result.append("ninety"); break;
+      case 8 : result.append("eighty"); break;
+      case 7 : result.append("seventy"); break;
+      case 6 : result.append("sixty"); break;
+      case 5 : result.append("fifty"); break;
+      case 4 : result.append("forty"); break;
+      case 3 : result.append("thirty"); break;
+      case 2 : result.append("twenty"); break;
+      }
+      i = i%10;
+      if (i == 0)
+        result.append(" ");
+      else 
+        result.append("-");
+    }
+    switch (i) {
+    case 19 : result.append("nineteen "); break;
+    case 18 : result.append("eighteen "); break;
+    case 17 : result.append("seventeen "); break;
+    case 16 : result.append("sixteen "); break;
+    case 15 : result.append("fifteen "); break;
+    case 14 : result.append("fourteen "); break;
+    case 13 : result.append("thirteen "); break;
+    case 12 : result.append("twelve "); break;
+    case 11 : result.append("eleven "); break;
+    case 10 : result.append("ten "); break;
+    case 9 : result.append("nine "); break;
+    case 8 : result.append("eight "); break;
+    case 7 : result.append("seven "); break;
+    case 6 : result.append("six "); break;
+    case 5 : result.append("five "); break;
+    case 4 : result.append("four "); break;
+    case 3 : result.append("three "); break;
+    case 2 : result.append("two "); break;
+    case 1 : result.append("one "); break;
+    case 0 : result.append(""); break;
+    }
+  }
+
+  public static void main(String[] args) {
+    System.out.println(intToEnglish(Integer.parseInt(args[0])));
+  }
+
+}