Optimized TermDocs.skipTo() and changed scorers to take advantage of it.

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150170 13f79535-47bb-0310-9956-ffa450edef68
2004-01-15 22:42:35 +00:00 · 2004-01-15 22:42:35 +00:00 · 6f8347c6fe
parent 07829a37a7
commit 6f8347c6fe
24 changed files with 1272 additions and 325 deletions
--- a/CHANGES.txt
+++ b/CHANGES.txt
@ -2,6 +2,36 @@ Lucene Change Log
 $Id$
 1.4 RC1
 1. Changed the format of the .tis file, so that:
    - it has a format version number, which makes it easier to
      back-compatibly change file formats in the future.
    - the term count is now stored as a long.  This was the one aspect
      of the Lucene's file formats which limited index size.
    - a few internal index parameters are now stored in the index, so
      that they can (in theory) now be changed from index to index,
      although there is not yet an API to do so.
    These changes are back compatible.  The new code can read old
    indexes.  But old code will not be able read new indexes. (cutting)
 2. Added an optimized implementation of TermDocs.skipTo().  A skip
    table is now stored for each term in the .frq file.  This only
    adds a percent or two to overall index size, but can substantially
    speedup many searches.  (cutting)
 3. Restructured the Scorer API and all Scorer implementations to take
    advantage of an optimized TermDocs.skipTo() implementation.  In
    particular, PhraseQuerys and conjunctive BooleanQuerys are
    faster when one clause has substantially fewer matches than the
    others.  (A conjunctive BooleanQuery is a BooleanQuery where all
    clauses are required.)  (cutting)
 1.3 final
 1. Added catch of BooleanQuery$TooManyClauses in QueryParser to
--- a/src/java/org/apache/lucene/index/DocumentWriter.java
+++ b/src/java/org/apache/lucene/index/DocumentWriter.java
@ -291,7 +291,7 @@ final class DocumentWriter {
        Posting posting = postings[i];
        // add an entry to the dictionary with pointers to prox and freq files
-        ti.set(1, freq.getFilePointer(), prox.getFilePointer());
+        ti.set(1, freq.getFilePointer(), prox.getFilePointer(), -1);
        tis.add(posting.term, ti);
        // add an entry to the freq file
--- a/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/src/java/org/apache/lucene/index/SegmentMerger.java
@ -62,6 +62,7 @@ import java.io.IOException;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.OutputStream;
 import org.apache.lucene.store.InputStream;
 import org.apache.lucene.store.RAMOutputStream;
 import org.apache.lucene.util.BitVector;
 final class SegmentMerger {
@ -246,17 +247,21 @@ final class SegmentMerger {
    int df = appendPostings(smis, n);		  // append posting data
    long skipPointer = writeSkip();
    if (df > 0) {
      // add an entry to the dictionary with pointers to prox and freq files
-      termInfo.set(df, freqPointer, proxPointer);
+      termInfo.set(df, freqPointer, proxPointer, (int)(skipPointer-freqPointer));
      termInfosWriter.add(smis[0].term, termInfo);
    }
  }
  private final int appendPostings(SegmentMergeInfo[] smis, int n)
       throws IOException {
    final int skipInterval = termInfosWriter.skipInterval;
    int lastDoc = 0;
    int df = 0;					  // number of docs w/ term
    resetSkip();
    for (int i = 0; i < n; i++) {
      SegmentMergeInfo smi = smis[i];
      TermPositions postings = smi.postings;
@ -272,6 +277,12 @@ final class SegmentMerger {
        if (doc < lastDoc)
          throw new IllegalStateException("docs out of order");
        df++;
        if ((df % skipInterval) == 0) {
          bufferSkip(lastDoc);
        }
        int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
        lastDoc = doc;
@ -289,13 +300,43 @@ final class SegmentMerger {
          proxOutput.writeVInt(position - lastPosition);
          lastPosition = position;
        }
        df++;
      }
    }
    return df;
  }
-  private final void mergeNorms() throws IOException {
+
  private RAMOutputStream skipBuffer = new RAMOutputStream();
  private int lastSkipDoc;
  private long lastSkipFreqPointer;
  private long lastSkipProxPointer;
  private void resetSkip() throws IOException {
    skipBuffer.reset();
    lastSkipDoc = 0;
    lastSkipFreqPointer = freqOutput.getFilePointer();
    lastSkipProxPointer = proxOutput.getFilePointer();
  }
  private void bufferSkip(int doc) throws IOException {
    long freqPointer = freqOutput.getFilePointer();
    long proxPointer = proxOutput.getFilePointer();
    skipBuffer.writeVInt(doc - lastSkipDoc); 
    skipBuffer.writeVInt((int)(freqPointer - lastSkipFreqPointer));
    skipBuffer.writeVInt((int)(proxPointer - lastSkipProxPointer));
    lastSkipDoc = doc;
    lastSkipFreqPointer = freqPointer;
    lastSkipProxPointer = proxPointer;
  }
  private long writeSkip() throws IOException {
    long skipPointer = freqOutput.getFilePointer();
    skipBuffer.writeTo(freqOutput);
    return skipPointer;
  }
  private void mergeNorms() throws IOException {
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed) {
--- a/src/java/org/apache/lucene/index/SegmentTermDocs.java
+++ b/src/java/org/apache/lucene/index/SegmentTermDocs.java
@ -61,16 +61,27 @@ import org.apache.lucene.store.InputStream;
 class SegmentTermDocs implements TermDocs {
  protected SegmentReader parent;
  private InputStream freqStream;
-  private int freqCount;
+  private int count;
  private int df;
  private BitVector deletedDocs;
  int doc = 0;
  int freq;
  private int skipInterval;
  private int skipCount;
  private InputStream skipStream;
  private int skipDoc;
  private long freqPointer;
  private long proxPointer;
  private long skipPointer;
  private boolean haveSkipped;
  SegmentTermDocs(SegmentReader parent)
    throws IOException {
    this.parent = parent;
    this.freqStream = (InputStream)parent.freqStream.clone();
    this.deletedDocs = parent.deletedDocs;
    this.skipInterval = parent.tis.getSkipInterval();
  }
  public void seek(Term term) throws IOException {
@ -88,12 +99,19 @@ class SegmentTermDocs implements TermDocs {
  }
  void seek(TermInfo ti) throws IOException {
    count = 0;
    if (ti == null) {
-      freqCount = 0;
+      df = 0;
    } else {
-      freqCount = ti.docFreq;
+      df = ti.docFreq;
      doc = 0;
-      freqStream.seek(ti.freqPointer);
+      skipDoc = 0;
      skipCount = 0;
      freqPointer = ti.freqPointer;
      proxPointer = ti.proxPointer;
      skipPointer = freqPointer + ti.skipOffset;
      freqStream.seek(freqPointer);
      haveSkipped = false;
    }
  }
@ -109,7 +127,7 @@ class SegmentTermDocs implements TermDocs {
  public boolean next() throws IOException {
    while (true) {
-      if (freqCount == 0)
+      if (count == df)
 	return false;
      int docCode = freqStream.readVInt();
@ -119,7 +137,7 @@ class SegmentTermDocs implements TermDocs {
      else
 	freq = freqStream.readVInt();		  // else read freq
-      freqCount--;
+      count++;
      if (deletedDocs == null || !deletedDocs.get(doc))
 	break;
@ -131,9 +149,9 @@ class SegmentTermDocs implements TermDocs {
  /** Optimized implementation. */
  public int read(final int[] docs, final int[] freqs)
      throws IOException {
-    final int end = docs.length;
+    final int length = docs.length;
    int i = 0;
-    while (i < end && freqCount > 0) {
+    while (i < length && count < df) {
      // manually inlined call to next() for speed
      final int docCode = freqStream.readVInt();
@ -142,7 +160,7 @@ class SegmentTermDocs implements TermDocs {
 	freq = 1;				  // freq is one
      else
 	freq = freqStream.readVInt();		  // else read freq
-      freqCount--;
+      count++;
      if (deletedDocs == null || !deletedDocs.get(doc)) {
 	docs[i] = doc;
@ -153,12 +171,61 @@ class SegmentTermDocs implements TermDocs {
    return i;
  }
-  /** As yet unoptimized implementation. */
+  /** Overridden by SegmentTermPositions to skip in prox stream. */
  protected void skipProx(long proxPointer) throws IOException {}
  /** Optimized implementation. */
  public boolean skipTo(int target) throws IOException {
    if (df > skipInterval) {                      // optimized case
      if (skipStream == null)
        skipStream = (InputStream)freqStream.clone(); // lazily clone
      if (!haveSkipped) {                          // lazily seek skip stream
        skipStream.seek(skipPointer);
        haveSkipped = true;
      }
      // scan skip data
      int lastSkipDoc = skipDoc;
      long lastFreqPointer = freqStream.getFilePointer();
      long lastProxPointer = -1;
      int numSkipped = -1 -(count % skipInterval);
      while (target > skipDoc) {
        lastSkipDoc = skipDoc;
        lastFreqPointer = freqPointer;
        lastProxPointer = proxPointer;
        if (skipDoc >= doc)
          numSkipped += skipInterval;
        if ((count + numSkipped + skipInterval) > df)
          break;                                  // no more skips
        skipDoc += skipStream.readVInt();
        freqPointer += skipStream.readVInt();
        proxPointer += skipStream.readVInt();
        skipCount++;
      }
      // if we found something to skip, then skip it
      if (lastFreqPointer > freqStream.getFilePointer()) {
        freqStream.seek(lastFreqPointer);
        skipProx(lastProxPointer);
        doc = lastSkipDoc;
        count += numSkipped;
      }
    }
    // done skipping, now just scan
    do {
      if (!next())
 	return false;
    } while (target > doc);
    return true;
  }
 }
--- a/src/java/org/apache/lucene/index/SegmentTermEnum.java
+++ b/src/java/org/apache/lucene/index/SegmentTermEnum.java
@ -60,14 +60,17 @@ import org.apache.lucene.store.InputStream;
 final class SegmentTermEnum extends TermEnum implements Cloneable {
  private InputStream input;
  private FieldInfos fieldInfos;
-  int size;
+  long size;
-  int position = -1;
+  long position = -1;
  private Term term = new Term("", "");
  private TermInfo termInfo = new TermInfo();
-  boolean isIndex = false;
+  private int format;
  private boolean isIndex = false;
  long indexPointer = 0;
  int indexInterval;
  int skipInterval;
  Term prev;
  private char[] buffer = {};
@ -76,8 +79,34 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
       throws IOException {
    input = i;
    fieldInfos = fis; 
    size = input.readInt();
    isIndex = isi;
    int firstInt = input.readInt();
    if (firstInt >= 0) {
      // original-format file, without explicit format version number
      format = 0;
      size = firstInt;
      // back-compatible settings
      indexInterval = 128;
      skipInterval = Integer.MAX_VALUE;
    } else {
      // we have a format version number
      format = firstInt;
      // check that it is a format we can understand
      if (format < TermInfosWriter.FORMAT)
        throw new IOException("Unknown format version:" + format);
      size = input.readLong();                    // read the size
      if (!isIndex) {
        indexInterval = input.readInt();
        skipInterval = input.readInt();
      }
    }
  }
  protected Object clone() {
@ -117,6 +146,12 @@ final class SegmentTermEnum extends TermEnum implements Cloneable {
    termInfo.freqPointer += input.readVLong();	  // read freq pointer
    termInfo.proxPointer += input.readVLong();	  // read prox pointer
    if (!isIndex) {
      if (termInfo.docFreq > skipInterval) {
        termInfo.skipOffset = input.readVInt();
      }
    }
    if (isIndex)
      indexPointer += input.readVLong();	  // read index pointer
--- a/src/java/org/apache/lucene/index/SegmentTermPositions.java
+++ b/src/java/org/apache/lucene/index/SegmentTermPositions.java
@ -109,4 +109,11 @@ extends SegmentTermDocs implements TermPositions {
    throw new UnsupportedOperationException();
  }
  /** Called by super.skipTo(). */
  protected void skipProx(long proxPointer) throws IOException {
    proxStream.seek(proxPointer);
    proxCount = 0;
  }
 }
--- a/src/java/org/apache/lucene/index/TermInfo.java
+++ b/src/java/org/apache/lucene/index/TermInfo.java
@ -62,6 +62,7 @@ final class TermInfo {
  long freqPointer = 0;
  long proxPointer = 0;
  int skipOffset;
  TermInfo() {}
@ -75,17 +76,21 @@ final class TermInfo {
    docFreq = ti.docFreq;
    freqPointer = ti.freqPointer;
    proxPointer = ti.proxPointer;
    skipOffset = ti.skipOffset;
  }
-  final void set(int df, long fp, long pp) {
+  final void set(int docFreq,
-    docFreq = df;
+                 long freqPointer, long proxPointer, int skipOffset) {
-    freqPointer = fp;
+    this.docFreq = docFreq;
-    proxPointer = pp;
+    this.freqPointer = freqPointer;
    this.proxPointer = proxPointer;
    this.skipOffset = skipOffset;
  }
  final void set(TermInfo ti) {
    docFreq = ti.docFreq;
    freqPointer = ti.freqPointer;
    proxPointer = ti.proxPointer;
    skipOffset = ti.skipOffset;
  }
 }
--- a/src/java/org/apache/lucene/index/TermInfosReader.java
+++ b/src/java/org/apache/lucene/index/TermInfosReader.java
@ -68,7 +68,7 @@ final class TermInfosReader {
  private FieldInfos fieldInfos;
  private SegmentTermEnum enumerator;
-  private int size;
+  private long size;
  TermInfosReader(Directory dir, String seg, FieldInfos fis)
       throws IOException {
@ -82,13 +82,17 @@ final class TermInfosReader {
    readIndex();
  }
  public int getSkipInterval() {
    return enumerator.skipInterval;
  }
  final void close() throws IOException {
    if (enumerator != null)
      enumerator.close();
  }
  /** Returns the number of term/value pairs in the set. */
-  final int size() {
+  final long size() {
    return size;
  }
@ -101,7 +105,7 @@ final class TermInfosReader {
      new SegmentTermEnum(directory.openFile(segment + ".tii"),
 			  fieldInfos, true);
    try {
-      int indexSize = indexEnum.size;
+      int indexSize = (int)indexEnum.size;
      indexTerms = new Term[indexSize];
      indexInfos = new TermInfo[indexSize];
@ -137,7 +141,7 @@ final class TermInfosReader {
  private final void seekEnum(int indexOffset) throws IOException {
    enumerator.seek(indexPointers[indexOffset],
-	      (indexOffset * TermInfosWriter.INDEX_INTERVAL) - 1,
+	      (indexOffset * enumerator.indexInterval) - 1,
 	      indexTerms[indexOffset], indexInfos[indexOffset]);
  }
@ -146,10 +150,10 @@ final class TermInfosReader {
    if (size == 0) return null;
    // optimize sequential access: first try scanning cached enumerator w/o seeking
-    if (enumerator.term() != null			  // term is at or past current
+    if (enumerator.term() != null                 // term is at or past current
 	&& ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
 	    || term.compareTo(enumerator.term()) >= 0)) {
-      int enumOffset = (enumerator.position/TermInfosWriter.INDEX_INTERVAL)+1;
+      int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
      if (indexTerms.length == enumOffset	  // but before end of block
 	  || term.compareTo(indexTerms[enumOffset]) < 0)
 	return scanEnum(term);			  // no need to seek
@ -174,10 +178,10 @@ final class TermInfosReader {
    if (size == 0) return null;
    if (enumerator != null && enumerator.term() != null && position >= enumerator.position &&
-	position < (enumerator.position + TermInfosWriter.INDEX_INTERVAL))
+	position < (enumerator.position + enumerator.indexInterval))
      return scanEnum(position);		  // can avoid seek
-    seekEnum(position / TermInfosWriter.INDEX_INTERVAL); // must seek
+    seekEnum(position / enumerator.indexInterval); // must seek
    return scanEnum(position);
  }
@ -190,7 +194,7 @@ final class TermInfosReader {
  }
  /** Returns the position of a Term in the set or -1. */
-  final synchronized int getPosition(Term term) throws IOException {
+  final synchronized long getPosition(Term term) throws IOException {
    if (size == 0) return -1;
    int indexOffset = getIndexOffset(term);
--- a/src/java/org/apache/lucene/index/TermInfosWriter.java
+++ b/src/java/org/apache/lucene/index/TermInfosWriter.java
@ -62,13 +62,36 @@ import org.apache.lucene.store.Directory;
  Directory.  A TermInfos can be written once, in order.  */
 final class TermInfosWriter {
  /** The file format version, a negative number. */
  public static final int FORMAT = -1;
  private FieldInfos fieldInfos;
  private OutputStream output;
  private Term lastTerm = new Term("", "");
  private TermInfo lastTi = new TermInfo();
  private int size = 0;
-  static final int INDEX_INTERVAL = 128;
+  // TODO: the default values for these two parameters should be settable from
  // IndexWriter.  However, once that's done, folks will start setting them to
  // ridiculous values and complaining that things don't work well, as with
  // mergeFactor.  So, let's wait until a number of folks find that alternate
  // values work better.  Note that both of these values are stored in the
  // segment, so that it's safe to change these w/o rebuilding all indexes.
  /** Expert: The fraction of terms in the "dictionary" which should be stored
   * in RAM.  Smaller values use more memory, but make searching slightly
   * faster, while larger values use less memory and make searching slightly
   * slower.  Searching is typically not dominated by dictionary lookup, so
   * tweaking this is rarely useful.*/
  int indexInterval = 128;
  /** Expert: The fraction of {@link TermDocs} entries stored in skip tables,
   * used to accellerate {@link TermDocs#skipTo(int)}.  Larger values result in
   * smaller indexes, greater acceleration, but fewer accelerable cases, while
   * smaller values result in bigger indexes, less acceleration and more
   * accelerable cases. More detailed experiments would be useful here. */
  int skipInterval = 16;
  private long lastIndexPointer = 0;
  private boolean isIndex = false;
@ -91,7 +114,12 @@ final class TermInfosWriter {
    fieldInfos = fis;
    isIndex = isi;
    output = directory.createFile(segment + (isIndex ? ".tii" : ".tis"));
-    output.writeInt(0);				  // leave space for size
+    output.writeInt(FORMAT);                      // write format
    output.writeLong(0);                          // leave space for size
    if (!isIndex) {
      output.writeInt(indexInterval);             // write indexInterval
      output.writeInt(skipInterval);              // write skipInterval
    }
  }
  /** Adds a new <Term, TermInfo> pair to the set.
@ -106,7 +134,7 @@ final class TermInfosWriter {
    if (ti.proxPointer < lastTi.proxPointer)
      throw new IOException("proxPointer out of order");
-    if (!isIndex && size % INDEX_INTERVAL == 0)
+    if (!isIndex && size % indexInterval == 0)
      other.add(lastTerm, lastTi);		  // add an index term
    writeTerm(term);				  // write term
@ -114,6 +142,12 @@ final class TermInfosWriter {
    output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers
    output.writeVLong(ti.proxPointer - lastTi.proxPointer);
    if (!isIndex) {
      if (ti.docFreq > skipInterval) {
        output.writeVInt(ti.skipOffset);
      }
    }
    if (isIndex) {
      output.writeVLong(other.output.getFilePointer() - lastIndexPointer);
      lastIndexPointer = other.output.getFilePointer(); // write pointer
@ -149,8 +183,8 @@ final class TermInfosWriter {
  /** Called to complete TermInfos creation. */
  final void close() throws IOException {
-    output.seek(0);				  // write size at start
+    output.seek(4);				  // write size after format
-    output.writeInt(size);
+    output.writeLong(size);
    output.close();
    if (!isIndex)
--- a/src/java/org/apache/lucene/search/BooleanQuery.java
+++ b/src/java/org/apache/lucene/search/BooleanQuery.java
@ -158,6 +158,37 @@ public class BooleanQuery extends Query {
    }
    public Scorer scorer(IndexReader reader) throws IOException {
      // First see if the (faster) ConjunctionScorer will work.  This can be
      // used when all clauses are required.  Also, at this point a
      // BooleanScorer cannot be embedded in a ConjunctionScorer, as the hits
      // from a BooleanScorer are not always sorted by document number (sigh)
      // and hence BooleanScorer cannot implement skipTo() correctly, which is
      // required by ConjunctionScorer.
      boolean allRequired = true;      
      boolean noneBoolean = true;
      for (int i = 0 ; i < weights.size(); i++) {
        BooleanClause c = (BooleanClause)clauses.elementAt(i);
        if (!c.required)
          allRequired = false;
        if (c.query instanceof BooleanQuery)
          noneBoolean = false;
      }
      if (allRequired && noneBoolean) {           // ConjunctionScorer is okay
        ConjunctionScorer result =
          new ConjunctionScorer(searcher.getSimilarity());
        for (int i = 0 ; i < weights.size(); i++) {
          BooleanClause c = (BooleanClause)clauses.elementAt(i);
          Weight w = (Weight)weights.elementAt(i);
          Scorer subScorer = w.scorer(reader);
          if (subScorer == null)
            return null;
          result.add(subScorer);
        }
        return result;
      }
      // Use good-old BooleanScorer instead.
      BooleanScorer result = new BooleanScorer(searcher.getSimilarity());
      for (int i = 0 ; i < weights.size(); i++) {
--- a/src/java/org/apache/lucene/search/BooleanScorer.java
+++ b/src/java/org/apache/lucene/search/BooleanScorer.java
@ -76,14 +76,17 @@ final class BooleanScorer extends Scorer {
  static final class SubScorer {
    public Scorer scorer;
    public boolean done;
    public boolean required = false;
    public boolean prohibited = false;
    public HitCollector collector;
    public SubScorer next;
    public SubScorer(Scorer scorer, boolean required, boolean prohibited,
-		     HitCollector collector, SubScorer next) {
+		     HitCollector collector, SubScorer next)
      throws IOException {
      this.scorer = scorer;
      this.done = !scorer.next();
      this.required = required;
      this.prohibited = prohibited;
      this.collector = collector;
@ -91,7 +94,8 @@ final class BooleanScorer extends Scorer {
    }
  }
-  final void add(Scorer scorer, boolean required, boolean prohibited) {
+  final void add(Scorer scorer, boolean required, boolean prohibited)
    throws IOException {
    int mask = 0;
    if (required || prohibited) {
      if (nextMask == 0)
@ -120,17 +124,45 @@ final class BooleanScorer extends Scorer {
      coordFactors[i] = getSimilarity().coord(i, maxCoord-1);
  }
-  public final void score(HitCollector results, int maxDoc)
+  private int end;
-    throws IOException {
+  private Bucket current;
  public int doc() { return current.doc; }
  public boolean next() throws IOException {
    boolean more = false;
    do {
      while (bucketTable.first != null) {         // more queued
        current = bucketTable.first;
        bucketTable.first = current.next;         // pop the queue
        // check prohibited & required
        if ((current.bits & prohibitedMask) == 0 && 
            (current.bits & requiredMask) == requiredMask) {
          return true;
        }
      }
      // refill the queue
      end += BucketTable.SIZE;
      for (SubScorer sub = scorers; sub != null; sub = sub.next) {
        Scorer scorer = sub.scorer;
        while (!sub.done && scorer.doc() < end) {
          sub.collector.collect(scorer.doc(), scorer.score());
          sub.done = !scorer.next();
        }
        if (!sub.done) {
          more  = true;
        }
      }
    } while (bucketTable.first != null | more);
    return false;
  }
  public float score() throws IOException {
    if (coordFactors == null)
      computeCoordFactors();
-
+    return current.score * coordFactors[current.coord];
    while (currentDoc < maxDoc) {
      currentDoc = Math.min(currentDoc+BucketTable.SIZE, maxDoc);
      for (SubScorer t = scorers; t != null; t = t.next)
 	t.scorer.score(t.collector, currentDoc);
      bucketTable.collectHits(results);
    }
  }
  static final class Bucket {
@ -207,6 +239,10 @@ final class BooleanScorer extends Scorer {
    }
  }
  public boolean skipTo(int target) throws IOException {
    throw new UnsupportedOperationException();
  }
  public Explanation explain(int doc) throws IOException {
    throw new UnsupportedOperationException();
  }
--- a/src/java/org/apache/lucene/search/ConjunctionScorer.java
+++ b/src/java/org/apache/lucene/search/ConjunctionScorer.java
@ -0,0 +1,155 @@
 package org.apache.lucene.search;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2004 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 import java.io.IOException;
 import java.util.*;
 import org.apache.lucene.index.*;
 /** Scorer for conjunctions, sets of queries, all of which are required. */
 final class ConjunctionScorer extends Scorer {
  private LinkedList scorers = new LinkedList();
  private boolean firstTime = true;
  private boolean more = true;
  private float coord;
  public ConjunctionScorer(Similarity similarity) {
    super(similarity);
  }
  final void add(Scorer scorer) throws IOException {
    scorers.addLast(scorer);
  }
  private Scorer first() { return (Scorer)scorers.getFirst(); }
  private Scorer last() { return (Scorer)scorers.getLast(); }
  public int doc() { return first().doc(); }
  public boolean next() throws IOException {
    if (firstTime) {
      init();
    } else if (more) {
      more = last().next();                       // trigger further scanning
    }
    while (more && first().doc() < last().doc()) { // find doc w/ all clauses
      more = first().skipTo(last().doc());      // skip first upto last
      scorers.addLast(scorers.removeFirst());   // move first to last
    }
    return more;                                // found a doc with all clauses
  }
  public boolean skipTo(int target) throws IOException {
    Iterator i = scorers.iterator();
    while (more && i.hasNext()) {
      more = ((Scorer)i.next()).skipTo(target);
    }
    if (more)
      sortScorers();                              // re-sort scorers
    return more;
  }
  public float score() throws IOException {
    float score = 0.0f;                           // sum scores
    Iterator i = scorers.iterator();
    while (i.hasNext())
      score += ((Scorer)i.next()).score();
    score *= coord;
    return score;
  }
  private void init() throws IOException {
    more = scorers.size() > 0;
    // compute coord factor
    coord = getSimilarity().coord(scorers.size(), scorers.size());
    // move each scorer to its first entry
    Iterator i = scorers.iterator();
    while (more && i.hasNext()) {
      more = ((Scorer)i.next()).next();
    }
    if (more)
      sortScorers();                              // initial sort of list
    firstTime = false;
  }
  private void sortScorers() throws IOException {
    // move scorers to an array
    Scorer[] array = (Scorer[])scorers.toArray(new Scorer[scorers.size()]);
    scorers.clear();                              // empty the list
    Arrays.sort(array, new Comparator() {         // sort the array
        public int compare(Object o1, Object o2) {
          return ((Scorer)o1).doc() - ((Scorer)o2).doc();
        }
        public boolean equals(Object o1, Object o2) {
          return ((Scorer)o1).doc() == ((Scorer)o2).doc();
        }
      });
    for (int i = 0; i < array.length; i++) {
      scorers.addLast(array[i]);                  // re-build list, now sorted
    }
  }
  public Explanation explain(int doc) throws IOException {
    throw new UnsupportedOperationException();
  }
 }
--- a/src/java/org/apache/lucene/search/IndexSearcher.java
+++ b/src/java/org/apache/lucene/search/IndexSearcher.java
@ -140,7 +140,7 @@ public class IndexSearcher extends Searcher {
            hq.insert(new ScoreDoc(doc, score));
 	  }
 	}
-      }, reader.maxDoc());
+      });
    ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()];
    for (int i = hq.size()-1; i >= 0; i--)	  // put docs in array
@ -180,7 +180,7 @@ public class IndexSearcher extends Searcher {
    Scorer scorer = query.weight(this).scorer(reader);
    if (scorer == null)
      return;
-    scorer.score(collector, reader.maxDoc());
+    scorer.score(collector);
  }
  public Query rewrite(Query original) throws IOException {
--- a/src/java/org/apache/lucene/search/PhrasePositions.java
+++ b/src/java/org/apache/lucene/search/PhrasePositions.java
@ -68,19 +68,31 @@ final class PhrasePositions {
  PhrasePositions(TermPositions t, int o) throws IOException {
    tp = t;
    offset = o;
    next();
  }
-  final void next() throws IOException {	  // increments to next doc
+  final boolean next() throws IOException {	  // increments to next doc
    if (!tp.next()) {
      tp.close();				  // close stream
      doc = Integer.MAX_VALUE;			  // sentinel value
-      return;
+      return false;
    }
    doc = tp.doc();
    position = 0;
    return true;
  }
  final boolean skipTo(int target) throws IOException {
    if (!tp.skipTo(target)) {
      tp.close();				  // close stream
      doc = Integer.MAX_VALUE;			  // sentinel value
      return false;
    }
    doc = tp.doc();
    position = 0;
    return true;
  }
  final void firstPosition() throws IOException {
    count = tp.freq();				  // read first pos
    nextPosition();
--- a/src/java/org/apache/lucene/search/PhraseScorer.java
+++ b/src/java/org/apache/lucene/search/PhraseScorer.java
@ -60,89 +60,127 @@ import org.apache.lucene.util.*;
 import org.apache.lucene.index.*;
 abstract class PhraseScorer extends Scorer {
-    private Weight weight;
+  private Weight weight;
-    protected byte[] norms;
+  protected byte[] norms;
-    protected float value;
+  protected float value;
-    protected PhraseQueue pq;
+  private boolean firstTime = true;
-    protected PhrasePositions first, last;
+  private boolean more = true;
  protected PhraseQueue pq;
  protected PhrasePositions first, last;
-    private float freq;
+  private float freq;
-    PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
+  PhraseScorer(Weight weight, TermPositions[] tps, Similarity similarity,
-                 byte[] norms) throws IOException {
+               byte[] norms) throws IOException {
-        super(similarity);
+    super(similarity);
-        this.norms = norms;
+    this.norms = norms;
-        this.weight = weight;
+    this.weight = weight;
-        this.value = weight.getValue();
+    this.value = weight.getValue();
-        // use PQ to build a sorted list of PhrasePositions
+    // convert tps to a list
-        pq = new PhraseQueue(tps.length);
+    for (int i = 0; i < tps.length; i++) {
-        for (int i = 0; i < tps.length; i++) {
+      PhrasePositions pp = new PhrasePositions(tps[i], i);
-            pq.put(new PhrasePositions(tps[i], i));
+      if (last != null) {			  // add next to end of list
-        }
+        last.next = pp;
-        pqToList();
+      } else
        first = pp;
      last = pp;
    }
-    public final void score(HitCollector results, int end) throws IOException {
+    pq = new PhraseQueue(tps.length);             // construct empty pq
        Similarity similarity = getSimilarity();
        while (last.doc < end) {			  // find doc w/ all the terms
            while (first.doc < last.doc) {		  // scan forward in first
                do {
                    first.next();
                } while (first.doc < last.doc);
                firstToLast();
                if (last.doc >= end)
                    return;
            }
-            // found doc with all terms
+  }
            freq = phraseFreq();                        // check for phrase
-            if (freq > 0.0) {
+  public int doc() { return first.doc; }
-                float score = similarity.tf(freq) * value;  // compute score
+
-                score *= Similarity.decodeNorm(norms[first.doc]); // normalize
+  public boolean next() throws IOException {
-                results.collect(first.doc, score);	  // add to results
+    if (firstTime) {
-            }
+      sort();
-            last.next();				  // resume scanning
+      firstTime = false;
-        }
+    } else if (more) {
      more = last.next();                         // trigger further scanning
    }
-    protected abstract float phraseFreq() throws IOException;
+    while (more) {
      while (more && first.doc < last.doc) {      // find doc w/ all the terms
        more = first.skipTo(last.doc);            // skip first upto last
        firstToLast();                            // and move it to the end
      }
-    protected final void pqToList() {
+      if (more) {
-        last = first = null;
+        // found a doc with all of the terms
-        while (pq.top() != null) {
+        freq = phraseFreq();                      // check for phrase
-            PhrasePositions pp = (PhrasePositions) pq.pop();
+        if (freq == 0.0f)                         // no match
-            if (last != null) {			  // add next to end of list
+          more = last.next();                     // trigger further scanning
-                last.next = pp;
+        else
-            } else
+          return true;                            // found a match
-                first = pp;
+      }
            last = pp;
            pp.next = null;
        }
    }
    return false;                                 // no more matches
  }
-    protected final void firstToLast() {
+  public float score() throws IOException {
-        last.next = first;			  // move first to end of list
+    //System.out.println("scoring " + first.doc);
-        last = first;
+    float raw = getSimilarity().tf(freq) * value; // raw score
-        first = first.next;
+    return raw * Similarity.decodeNorm(norms[first.doc]); // normalize
-        last.next = null;
+  }
  public boolean skipTo(int target) throws IOException {
    for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
      more = pp.skipTo(target);
    }
    if (more)
      sort();                                     // re-sort
    return more;
  }
    public Explanation explain(final int doc) throws IOException {
        Explanation tfExplanation = new Explanation();
-        score(new HitCollector() {
+  protected abstract float phraseFreq() throws IOException;
            public final void collect(int d, float score) {
            }
        }, doc + 1);
-        float phraseFreq = (first.doc == doc) ? freq : 0.0f;
+  private void sort() throws IOException {
-        tfExplanation.setValue(getSimilarity().tf(phraseFreq));
+    pq.clear();
-        tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
+    for (PhrasePositions pp = first; more && pp != null; pp = pp.next) {
-
+      more = pp.next();
-        return tfExplanation;
+      if (more) {
        pq.put(pp);
      } else {
        return;
      }
    }
    pqToList();
  }
  protected final void pqToList() {
    last = first = null;
    while (pq.top() != null) {
      PhrasePositions pp = (PhrasePositions) pq.pop();
      if (last != null) {			  // add next to end of list
        last.next = pp;
      } else
        first = pp;
      last = pp;
      pp.next = null;
    }
  }
  protected final void firstToLast() {
    last.next = first;			  // move first to end of list
    last = first;
    first = first.next;
    last.next = null;
  }
  public Explanation explain(final int doc) throws IOException {
    Explanation tfExplanation = new Explanation();
    while (next() && doc() < doc) {}
    float phraseFreq = (doc() == doc) ? freq : 0.0f;
    tfExplanation.setValue(getSimilarity().tf(phraseFreq));
    tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")");
    return tfExplanation;
  }
 }
--- a/src/java/org/apache/lucene/search/Scorer.java
+++ b/src/java/org/apache/lucene/search/Scorer.java
@ -70,11 +70,39 @@ public abstract class Scorer {
    return this.similarity;
  }
-  /** Scores hits and passes them to a collector.  Stops at the last document
+  /** Scores all documents and passes them to a collector. */
-   * before <code>maxDoc</code>.  If called repeatedly, will restart at point
+  public void score(HitCollector hc) throws IOException {
-   * where it last left off.
+    while (next()) {
      hc.collect(doc(), score());
    }
  }
  /** Advance to the next document matching the query.  Returns true iff there
   * is another match. */
  public abstract boolean next() throws IOException;
  /** Returns the current document number.  Initially invalid, until {@link
   * #next()} is called the first time. */
  public abstract int doc();
  /** Returns the score of the current document.  Initially invalid, until
   * {@link #next()} is called the first time. */
  public abstract float score() throws IOException;
  /** Skips to the first match beyond the current whose document number is
   * greater than or equal to <i>target</i>. <p>Returns true iff there is such
   * a match.  <p>Behaves as if written: <pre>
   *   boolean skipTo(int target) {
   *     do {
   *       if (!next())
   * 	     return false;
   *     } while (target > doc());
   *     return true;
   *   }
   * </pre>
   * Most implementations are considerably more efficient than that.
   */
-  public abstract void score(HitCollector hc, int maxDoc) throws IOException;
+  public abstract boolean skipTo(int target) throws IOException;
  /** Returns an explanation of the score for <code>doc</code>. */
  public abstract Explanation explain(int doc) throws IOException;
--- a/src/java/org/apache/lucene/search/TermScorer.java
+++ b/src/java/org/apache/lucene/search/TermScorer.java
@ -83,44 +83,56 @@ final class TermScorer extends Scorer {
    for (int i = 0; i < SCORE_CACHE_SIZE; i++)
      scoreCache[i] = getSimilarity().tf(i) * weightValue;
    pointerMax = termDocs.read(docs, freqs);	  // fill buffers
    if (pointerMax != 0)
      doc = docs[0];
    else {
      termDocs.close();				  // close stream
      doc = Integer.MAX_VALUE;			  // set to sentinel value
    }
  }
-  public final void score(HitCollector c, final int end) throws IOException {
+  public int doc() { return doc; }
    int d = doc;				  // cache doc in local
    Similarity similarity = getSimilarity();      // cache sim in local
    while (d < end) {				  // for docs in window
      final int f = freqs[pointer];
      float score =				  // compute tf(f)*weight
 	f < SCORE_CACHE_SIZE			  // check cache
 	 ? scoreCache[f]			  // cache hit
 	 : similarity.tf(f)*weightValue;          // cache miss
-      score *= Similarity.decodeNorm(norms[d]);	  // normalize for field
+  public boolean next() throws IOException {
-
+    pointer++;
-      c.collect(d, score);			  // collect score
+    if (pointer >= pointerMax) {
-
+      pointerMax = termDocs.read(docs, freqs);    // refill buffer
-      if (++pointer == pointerMax) {
+      if (pointerMax != 0) {
-	pointerMax = termDocs.read(docs, freqs);  // refill buffers
+        pointer = 0;
-	if (pointerMax != 0) {
+      } else {
-	  pointer = 0;
+        termDocs.close();			  // close stream
-	} else {
+        doc = Integer.MAX_VALUE;		  // set to sentinel value
-	  termDocs.close();			  // close stream
+        return false;
 	  doc = Integer.MAX_VALUE;		  // set to sentinel value
 	  return;
 	}
      }
      d = docs[pointer];
    } 
-    doc = d;					  // flush cache
+    doc = docs[pointer];
    return true;
  }
  public float score() throws IOException {
    int f = freqs[pointer];
    float raw =                                   // compute tf(f)*weight
      f < SCORE_CACHE_SIZE			  // check cache
      ? scoreCache[f]                             // cache hit
      : getSimilarity().tf(f)*weightValue;        // cache miss
    return raw * Similarity.decodeNorm(norms[doc]); // normalize for field
  }
  public boolean skipTo(int target) throws IOException {
    // first scan in cache
    for (pointer++; pointer < pointerMax; pointer++) {
      if (!(target > docs[pointer])) {
        doc = docs[pointer];
        return true;
      }
    }
    // not found in cache, seek underlying stream
    boolean result = termDocs.skipTo(target);
    if (result) {
      pointerMax = 1;
      pointer = 0;
      docs[pointer] = doc = termDocs.doc();
      freqs[pointer] = termDocs.freq();
    } else {
      doc = Integer.MAX_VALUE;
    }
    return result;
  }
  public Explanation explain(int doc) throws IOException {
--- a/src/java/org/apache/lucene/store/RAMDirectory.java
+++ b/src/java/org/apache/lucene/store/RAMDirectory.java
@ -226,98 +226,3 @@ public final class RAMDirectory extends Directory {
  public final void close() {
  }
 }
 final class RAMInputStream extends InputStream implements Cloneable {
  RAMFile file;
  int pointer = 0;
  public RAMInputStream(RAMFile f) {
    file = f;
    length = file.length;
  }
  /** InputStream methods */
  public final void readInternal(byte[] dest, int destOffset, int len) {
    int remainder = len;
    int start = pointer;
    while (remainder != 0) {
      int bufferNumber = start/InputStream.BUFFER_SIZE;
      int bufferOffset = start%InputStream.BUFFER_SIZE;
      int bytesInBuffer = InputStream.BUFFER_SIZE - bufferOffset;
      int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
      byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
      System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
      destOffset += bytesToCopy;
      start += bytesToCopy;
      remainder -= bytesToCopy;
    }
    pointer += len;
  }
  public final void close() {
  }
  /** Random-access methods */
  public final void seekInternal(long pos) {
    pointer = (int)pos;
  }
 }
 final class RAMOutputStream extends OutputStream {
  RAMFile file;
  int pointer = 0;
  public RAMOutputStream(RAMFile f) {
    file = f;
  }
  /** output methods: */
  public final void flushBuffer(byte[] src, int len) {
    int bufferNumber = pointer/OutputStream.BUFFER_SIZE;
    int bufferOffset = pointer%OutputStream.BUFFER_SIZE;
    int bytesInBuffer = OutputStream.BUFFER_SIZE - bufferOffset;
    int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
    if (bufferNumber == file.buffers.size())
      file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
    byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
    System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
    if (bytesToCopy < len) {			  // not all in one buffer
      int srcOffset = bytesToCopy;
      bytesToCopy = len - bytesToCopy;		  // remaining bytes
      bufferNumber++;
      if (bufferNumber == file.buffers.size())
        file.buffers.addElement(new byte[OutputStream.BUFFER_SIZE]);
      buffer = (byte[])file.buffers.elementAt(bufferNumber);
      System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
    }
    pointer += len;
    if (pointer > file.length)
      file.length = pointer;
    file.lastModified = System.currentTimeMillis();
  }
  public final void close() throws IOException {
    super.close();
  }
  /** Random-access methods */
  public final void seek(long pos) throws IOException {
    super.seek(pos);
    pointer = (int)pos;
  }
  public final long length() throws IOException {
    return file.length;
  }
 }
 final class RAMFile {
  Vector buffers = new Vector();
  long length;
  long lastModified = System.currentTimeMillis();
 }
--- a/src/java/org/apache/lucene/store/RAMFile.java
+++ b/src/java/org/apache/lucene/store/RAMFile.java
@ -0,0 +1,63 @@
 package org.apache.lucene.store;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 import java.util.Vector;
 class RAMFile {
  Vector buffers = new Vector();
  long length;
  long lastModified = System.currentTimeMillis();
 }
--- a/src/java/org/apache/lucene/store/RAMInputStream.java
+++ b/src/java/org/apache/lucene/store/RAMInputStream.java
@ -0,0 +1,95 @@
 package org.apache.lucene.store;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 /**
 * A memory-resident {@link InputStream} implementation.
 *
 * @version $Id$
 */
 class RAMInputStream extends InputStream implements Cloneable {
  private RAMFile file;
  private int pointer = 0;
  public RAMInputStream(RAMFile f) {
    file = f;
    length = file.length;
  }
  public void readInternal(byte[] dest, int destOffset, int len) {
    int remainder = len;
    int start = pointer;
    while (remainder != 0) {
      int bufferNumber = start/BUFFER_SIZE;
      int bufferOffset = start%BUFFER_SIZE;
      int bytesInBuffer = BUFFER_SIZE - bufferOffset;
      int bytesToCopy = bytesInBuffer >= remainder ? remainder : bytesInBuffer;
      byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
      System.arraycopy(buffer, bufferOffset, dest, destOffset, bytesToCopy);
      destOffset += bytesToCopy;
      start += bytesToCopy;
      remainder -= bytesToCopy;
    }
    pointer += len;
  }
  public void close() {
  }
  public void seekInternal(long pos) {
    pointer = (int)pos;
  }
 }
--- a/src/java/org/apache/lucene/store/RAMOutputStream.java
+++ b/src/java/org/apache/lucene/store/RAMOutputStream.java
@ -0,0 +1,145 @@
 package org.apache.lucene.store;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 import java.io.IOException;
 /**
 * A memory-resident {@link OutputStream} implementation.
 *
 * @version $Id$
 */
 public class RAMOutputStream extends OutputStream {
  private RAMFile file;
  private int pointer = 0;
  /** Construct an empty output buffer. */
  public RAMOutputStream() {
    this(new RAMFile());
  }
  RAMOutputStream(RAMFile f) {
    file = f;
  }
  /** Copy the current contents of this buffer to the named output. */
  public void writeTo(OutputStream out) throws IOException {
    flush();
    final long end = file.length;
    long pos = 0;
    int buffer = 0;
    while (pos < end) {
      int length = BUFFER_SIZE;
      long nextPos = pos + length;
      if (nextPos > end) {                        // at the last buffer
        length = (int)(end - pos);
      }
      out.writeBytes((byte[])file.buffers.elementAt(buffer++), length);
      pos = nextPos;
    }
  }
  /** Resets this to an empty buffer. */
  public void reset() {
    try {
      seek(0);
    } catch (IOException e) {                     // should never happen
      throw new RuntimeException(e.toString());
    }
    file.length = 0;
  }
  public void flushBuffer(byte[] src, int len) {
    int bufferNumber = pointer/BUFFER_SIZE;
    int bufferOffset = pointer%BUFFER_SIZE;
    int bytesInBuffer = BUFFER_SIZE - bufferOffset;
    int bytesToCopy = bytesInBuffer >= len ? len : bytesInBuffer;
    if (bufferNumber == file.buffers.size())
      file.buffers.addElement(new byte[BUFFER_SIZE]);
    byte[] buffer = (byte[])file.buffers.elementAt(bufferNumber);
    System.arraycopy(src, 0, buffer, bufferOffset, bytesToCopy);
    if (bytesToCopy < len) {			  // not all in one buffer
      int srcOffset = bytesToCopy;
      bytesToCopy = len - bytesToCopy;		  // remaining bytes
      bufferNumber++;
      if (bufferNumber == file.buffers.size())
        file.buffers.addElement(new byte[BUFFER_SIZE]);
      buffer = (byte[])file.buffers.elementAt(bufferNumber);
      System.arraycopy(src, srcOffset, buffer, 0, bytesToCopy);
    }
    pointer += len;
    if (pointer > file.length)
      file.length = pointer;
    file.lastModified = System.currentTimeMillis();
  }
  public void close() throws IOException {
    super.close();
  }
  public void seek(long pos) throws IOException {
    super.seek(pos);
    pointer = (int)pos;
  }
  public long length() {
    return file.length;
  }
 }
--- a/src/test/org/apache/lucene/ThreadSafetyTest.java
+++ b/src/test/org/apache/lucene/ThreadSafetyTest.java
@ -54,6 +54,7 @@ package org.apache.lucene;
 * <http://www.apache.org/>.
 */
 import org.apache.lucene.util.*;
 import org.apache.lucene.store.*;
 import org.apache.lucene.document.*;
 import org.apache.lucene.analysis.*;
@ -93,7 +94,7 @@ class ThreadSafetyTest {
          Document d = new Document();
          int n = RANDOM.nextInt();
          d.add(Field.Keyword("id", Integer.toString(n)));
-          d.add(Field.UnStored("contents", intToEnglish(n)));
+          d.add(Field.UnStored("contents", English.intToEnglish(n)));
          System.out.println("Adding " + n);
          // Switch between single and multiple file segments
@ -151,7 +152,7 @@ class ThreadSafetyTest {
      throws Exception {
      System.out.println("Searching for " + n);
      Hits hits =
-        searcher.search(QueryParser.parse(intToEnglish(n), "contents",
+        searcher.search(QueryParser.parse(English.intToEnglish(n), "contents",
                                          ANALYZER));
      System.out.println("Search for " + n + ": total=" + hits.length());
      for (int j = 0; j < Math.min(3, hits.length()); j++) {
@ -197,76 +198,4 @@ class ThreadSafetyTest {
    SearcherThread searcherThread3 = new SearcherThread(true);
    searcherThread3.start();
  }
  private static String intToEnglish(int i) {
    StringBuffer result = new StringBuffer();
    intToEnglish(i, result);
    return result.toString();
  }
  private static void intToEnglish(int i, StringBuffer result) {
    if (i < 0) {
      result.append("minus ");
      i = -i;
    }
    if (i >= 1000000000) {			  // billions
      intToEnglish(i/1000000000, result);
      result.append("billion, ");
      i = i%1000000000;
    }
    if (i >= 1000000) {				  // millions
      intToEnglish(i/1000000, result);
      result.append("million, ");
      i = i%1000000;
    }
    if (i >= 1000) {				  // thousands
      intToEnglish(i/1000, result);
      result.append("thousand, ");
      i = i%1000;
    }
    if (i >= 100) {				  // hundreds
      intToEnglish(i/100, result);
      result.append("hundred ");
      i = i%100;
    }
    if (i >= 20) {
      switch (i/10) {
      case 9 : result.append("ninety"); break;
      case 8 : result.append("eighty"); break;
      case 7 : result.append("seventy"); break;
      case 6 : result.append("sixty"); break;
      case 5 : result.append("fifty"); break;
      case 4 : result.append("forty"); break;
      case 3 : result.append("thirty"); break;
      case 2 : result.append("twenty"); break;
      }
      i = i%10;
      if (i == 0)
        result.append(" ");
      else 
        result.append("-");
    }
    switch (i) {
    case 19 : result.append("nineteen "); break;
    case 18 : result.append("eighteen "); break;
    case 17 : result.append("seventeen "); break;
    case 16 : result.append("sixteen "); break;
    case 15 : result.append("fifteen "); break;
    case 14 : result.append("fourteen "); break;
    case 13 : result.append("thirteen "); break;
    case 12 : result.append("twelve "); break;
    case 11 : result.append("eleven "); break;
    case 10 : result.append("ten "); break;
    case 9 : result.append("nine "); break;
    case 8 : result.append("eight "); break;
    case 7 : result.append("seven "); break;
    case 6 : result.append("six "); break;
    case 5 : result.append("five "); break;
    case 4 : result.append("four "); break;
    case 3 : result.append("three "); break;
    case 2 : result.append("two "); break;
    case 1 : result.append("one "); break;
    case 0 : result.append(""); break;
    }
  }
 }
--- a/src/test/org/apache/lucene/search/TestBasics.java
+++ b/src/test/org/apache/lucene/search/TestBasics.java
@ -0,0 +1,135 @@
 package org.apache.lucene.search;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 import junit.framework.TestCase;
 import org.apache.lucene.util.English;
 import org.apache.lucene.analysis.SimpleAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.RAMDirectory;
 /**
 * Tests basic search capabilities.
 *
 * @author Doug Cutting
 */
 public class TestBasics extends TestCase {
  private IndexSearcher searcher;
  public void setUp() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    IndexWriter writer
      = new IndexWriter(directory, new SimpleAnalyzer(), true);
    //writer.infoStream = System.out;
    StringBuffer buffer = new StringBuffer();
    for (int i = 0; i < 1000; i++) {
      Document doc = new Document();
      doc.add(Field.Text("field", English.intToEnglish(i)));
      writer.addDocument(doc);
    }
    writer.close();
    searcher = new IndexSearcher(directory);
  }
  public void testTerm() throws Exception {
    Query query = new TermQuery(new Term("field", "seventy"));
    Hits hits = searcher.search(query);
    assertEquals(100, hits.length());
  }
  public void testTerm2() throws Exception {
    Query query = new TermQuery(new Term("field", "seventish"));
    Hits hits = searcher.search(query);
    assertEquals(0, hits.length());
  }
  public void testPhrase() throws Exception {
    PhraseQuery query = new PhraseQuery();
    query.add(new Term("field", "seventy"));
    query.add(new Term("field", "seven"));
    Hits hits = searcher.search(query);
    assertEquals(10, hits.length());
  }
  public void testPhrase2() throws Exception {
    PhraseQuery query = new PhraseQuery();
    query.add(new Term("field", "seventish"));
    query.add(new Term("field", "sevenon"));
    Hits hits = searcher.search(query);
    assertEquals(0, hits.length());
  }
  public void testBoolean() throws Exception {
    BooleanQuery query = new BooleanQuery();
    query.add(new TermQuery(new Term("field", "seventy")), true, false);
    query.add(new TermQuery(new Term("field", "seven")), true, false);
    Hits hits = searcher.search(query);
    assertEquals(19, hits.length());
  }
  public void testBoolean2() throws Exception {
    BooleanQuery query = new BooleanQuery();
    query.add(new TermQuery(new Term("field", "sevento")), true, false);
    query.add(new TermQuery(new Term("field", "sevenly")), true, false);
    Hits hits = searcher.search(query);
    assertEquals(0, hits.length());
  }
 }
--- a/src/test/org/apache/lucene/util/English.java
+++ b/src/test/org/apache/lucene/util/English.java
@ -0,0 +1,140 @@
 package org.apache.lucene.util;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001, 2004 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 public class English {
  public static String intToEnglish(int i) {
    StringBuffer result = new StringBuffer();
    intToEnglish(i, result);
    return result.toString();
  }
  public static void intToEnglish(int i, StringBuffer result) {
    if (i == 0) {
      result.append("zero");
      return;
    }
    if (i < 0) {
      result.append("minus ");
      i = -i;
    }
    if (i >= 1000000000) {			  // billions
      intToEnglish(i/1000000000, result);
      result.append("billion, ");
      i = i%1000000000;
    }
    if (i >= 1000000) {				  // millions
      intToEnglish(i/1000000, result);
      result.append("million, ");
      i = i%1000000;
    }
    if (i >= 1000) {				  // thousands
      intToEnglish(i/1000, result);
      result.append("thousand, ");
      i = i%1000;
    }
    if (i >= 100) {				  // hundreds
      intToEnglish(i/100, result);
      result.append("hundred ");
      i = i%100;
    }
    if (i >= 20) {
      switch (i/10) {
      case 9 : result.append("ninety"); break;
      case 8 : result.append("eighty"); break;
      case 7 : result.append("seventy"); break;
      case 6 : result.append("sixty"); break;
      case 5 : result.append("fifty"); break;
      case 4 : result.append("forty"); break;
      case 3 : result.append("thirty"); break;
      case 2 : result.append("twenty"); break;
      }
      i = i%10;
      if (i == 0)
        result.append(" ");
      else 
        result.append("-");
    }
    switch (i) {
    case 19 : result.append("nineteen "); break;
    case 18 : result.append("eighteen "); break;
    case 17 : result.append("seventeen "); break;
    case 16 : result.append("sixteen "); break;
    case 15 : result.append("fifteen "); break;
    case 14 : result.append("fourteen "); break;
    case 13 : result.append("thirteen "); break;
    case 12 : result.append("twelve "); break;
    case 11 : result.append("eleven "); break;
    case 10 : result.append("ten "); break;
    case 9 : result.append("nine "); break;
    case 8 : result.append("eight "); break;
    case 7 : result.append("seven "); break;
    case 6 : result.append("six "); break;
    case 5 : result.append("five "); break;
    case 4 : result.append("four "); break;
    case 3 : result.append("three "); break;
    case 2 : result.append("two "); break;
    case 1 : result.append("one "); break;
    case 0 : result.append(""); break;
    }
  }
  public static void main(String[] args) {
    System.out.println(intToEnglish(Integer.parseInt(args[0])));
  }
 }