Implementation of compound files. This reduces the number of files

used by Lucene to 1 per index segment (2 when deleted documents exist). Test cases modified and added to go with this code. git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@150067 13f79535-47bb-0310-9956-ffa450edef68
2003-09-25 22:01:51 +00:00 · 2003-09-25 22:01:51 +00:00 · e2559e4003
parent 4e84ddc3f5
commit e2559e4003
12 changed files with 2142 additions and 192 deletions
--- a/src/java/org/apache/lucene/index/CompoundFileReader.java
+++ b/src/java/org/apache/lucene/index/CompoundFileReader.java
@ -0,0 +1,247 @@
 package org.apache.lucene.index;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.InputStream;
 import org.apache.lucene.store.OutputStream;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.Lock;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.io.IOException;
 /** Class for accessing a compound stream.
 *  This class implements a directory, but is limited to only read operations.
 *  Directory methods that would normally modify data throw an exception.
 */
 public class CompoundFileReader extends Directory {
    private static final class FileEntry {
        long offset;
        long length;
    };
    // Base info
    private Directory directory;
    private String fileName;
    // Reference count
    private boolean open;
    private InputStream stream;
    private HashMap entries = new HashMap();
    public CompoundFileReader(Directory dir, String name) 
    throws IOException
    {
        directory = dir;
        fileName = name;
        boolean success = false;
        try {
            stream = dir.openFile(name);
            // read the directory and init files
            int count = stream.readVInt();
            FileEntry entry = null;
            for (int i=0; i<count; i++) {
                long offset = stream.readLong();
                String id = stream.readString();
                if (entry != null) {
                    // set length of the previous entry
                    entry.length = offset - entry.offset;
                }
                entry = new FileEntry();
                entry.offset = offset;
                entries.put(id, entry);
            }
            // set the length of the final entry
            if (entry != null) {
                entry.length = stream.length() - entry.offset;
            }
            success = true;
        } finally {
            if (! success) {
                try {
                    stream.close();
                } catch (IOException e) { }
            }
        }        
    }
    public Directory getDirectory() {
        return directory;
    }
    public String getName() {
        return fileName;
    }
    public synchronized void close() throws IOException {
        if (stream == null)
            throw new IOException("Already closed");
        entries.clear();
        stream.close();
        stream = null;
    }
    public synchronized InputStream openFile(String id) 
    throws IOException
    {
        if (stream == null) 
            throw new IOException("Stream closed");
        FileEntry entry = (FileEntry) entries.get(id);
        if (entry == null)
            throw new IOException("No sub-file with id " + id + " found");
        return new CSInputStream(stream, entry.offset, entry.length);
    }
    /** Returns an array of strings, one for each file in the directory. */
    public String[] list() {
        String res[] = new String[entries.size()];
        return (String[]) entries.keySet().toArray(res);
    }
    /** Returns true iff a file with the given name exists. */
    public boolean fileExists(String name) {
        return entries.containsKey(name);
    }
    /** Returns the time the named file was last modified. */
    public long fileModified(String name) throws IOException {
        return directory.fileModified(fileName);
    }
    /** Set the modified time of an existing file to now. */
    public void touchFile(String name) throws IOException {
        directory.touchFile(fileName);
    }
    /** Removes an existing file in the directory. */
    public void deleteFile(String name) 
    {
        throw new UnsupportedOperationException();
    }
    /** Renames an existing file in the directory.
    If a file already exists with the new name, then it is replaced.
    This replacement should be atomic. */
    public void renameFile(String from, String to)
    {
        throw new UnsupportedOperationException();
    }
    /** Returns the length of a file in the directory. */
    public long fileLength(String name)
    throws IOException
    {
        FileEntry e = (FileEntry) entries.get(name);
        if (e == null)
            throw new IOException("File " + name + " does not exist");
        return e.length;
    }
    /** Creates a new, empty file in the directory with the given name.
      Returns a stream writing this file. */
    public OutputStream createFile(String name)
    {
        throw new UnsupportedOperationException();
    }
    /** Construct a {@link Lock}.
     * @param name the name of the lock file
     */
    public Lock makeLock(String name) 
    {
        throw new UnsupportedOperationException();
    }
    /** Implementation of an InputStream that reads from a portion of the
     *  compound file. The visibility is left as "package" *only* because
     *  this helps with testing since JUnit test cases in a different class
     *  can then access package fields of this class.
     */
    static final class CSInputStream extends InputStream {
        InputStream base;
        long fileOffset;
        CSInputStream(final InputStream base, 
                      final long fileOffset, 
                      final long length) 
        throws IOException
        {
            this.base = (InputStream) base.clone();
            this.fileOffset = fileOffset;
            this.length = length;   // variable in the superclass
            seekInternal(0);        // position to the adjusted 0th byte
        }
        /** Expert: implements buffer refill.  Reads bytes from the current 
         *  position in the input.
         * @param b the array to read bytes into
         * @param offset the offset in the array to start storing bytes
         * @param length the number of bytes to read
         */
        protected void readInternal(byte[] b, int offset, int len)
        throws IOException 
        {
            base.readBytes(b, offset, len);
        }
        /** Expert: implements seek.  Sets current position in this file, where 
         *  the next {@link #readInternal(byte[],int,int)} will occur.
         * @see #readInternal(byte[],int,int)
         */
        protected void seekInternal(long pos) throws IOException
        {
            if (pos > 0 && pos >= length)
                throw new IOException("Seek past the end of file");
            if (pos < 0)
                throw new IOException("Seek to a negative offset");
            base.seek(fileOffset + pos);
        }
        /** Closes the stream to futher operations. */
        public void close() throws IOException
        {
            base.close();
        }
        /** Returns a clone of this stream.
         *
         * <p>Clones of a stream access the same data, and are positioned at the same
         * point as the stream they were cloned from.
         *
         * <p>Expert: Subclasses must ensure that clones may be positioned at
         * different points in the input from each other and from the stream they
         * were cloned from.
         */
        public Object clone() {
            CSInputStream other = (CSInputStream) super.clone();
            other.base = (InputStream) base.clone();
            return other;
        }       
    }
 }
--- a/src/java/org/apache/lucene/index/CompoundFileWriter.java
+++ b/src/java/org/apache/lucene/index/CompoundFileWriter.java
@ -0,0 +1,210 @@
 package org.apache.lucene.index;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.OutputStream;
 import org.apache.lucene.store.InputStream;
 import java.util.LinkedList;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.io.IOException;
 /** Combines multiple files into a single compound file.
 *  The file format:<br>
 *  <ul>
 *      <li>VInt fileCount</li>
 *      <li>{Directory}
 *          fileCount entries with the following structure:</li>
 *          <ul>
 *              <li>long dataOffset</li>
 *              <li>UTFString extension</li>
 *          </ul>
 *      <li>{File Data}
 *          fileCount entries with the raw data of the corresponding file</li>
 *  </ul>
 *  
 *  The fileCount integer indicates how many files are contained in this compound
 *  file. The {directory} that follows has that many entries. Each directory entry
 *  contains an encoding identifier, an long pointer to the start of this file's
 *  data section, and a UTF String with that file's extension.
 */
 final class CompoundFileWriter {
    private static final class FileEntry {
        /** source file */
        String file;      
        /** temporary holder for the start of directory entry for this file */
        long directoryOffset;   
        /** temporary holder for the start of this file's data section */
        long dataOffset;        
    }
    private Directory directory;
    private String fileName;
    private HashSet ids;
    private LinkedList entries;
    private boolean merged = false;
    /** Create the compound stream in the specified file. The file name is the
     *  entire name (no extensions are added).
     */
    public CompoundFileWriter(Directory dir, String name) {
        if (dir == null)
            throw new IllegalArgumentException("Missing directory");
        if (name == null)
            throw new IllegalArgumentException("Missing name");
        directory = dir;
        fileName = name;
        ids = new HashSet();
        entries = new LinkedList();
    }
    /** Returns the directory of the compound file. */
    public Directory getDirectory() {
        return directory;
    }
    /** Returns the name of the compound file. */
    public String getName() {
        return fileName;
    }
    /** Add a source stream. If sourceDir is null, it is set to the
     *  same value as the directory where this compound stream exists.
     *  The id is the string by which the sub-stream will be know in the
     *  compound stream. The caller must ensure that the ID is unique. If the
     *  id is null, it is set to the name of the source file.
     */
    public void addFile(String file) {
        if (merged)
            throw new IllegalStateException(
                "Can't add extensions after merge has been called");
        if (file == null)
            throw new IllegalArgumentException(
                "Missing source file");
        if (! ids.add(file)) 
            throw new IllegalArgumentException(
                "File " + file + " already added");
        FileEntry entry = new FileEntry();
        entry.file = file;
        entries.add(entry);
    }
    /** Merge files with the extensions added up to now.
     *  All files with these extensions are combined sequentially into the 
     *  compound stream. After successful merge, the source files
     *  are deleted.
     */    
    public void close() throws IOException {
        if (merged)
            throw new IllegalStateException(
                "Merge already performed");
        if (entries.isEmpty())
            throw new IllegalStateException(
                "No entries to merge have been defined");
        merged = true;
        // open the compound stream
        OutputStream os = null;
        try {
            os = directory.createFile(fileName);
            // Write the number of entries
            os.writeVInt(entries.size());
            // Write the directory with all offsets at 0.
            // Remember the positions of directory entries so that we can
            // adjust the offsets later
            Iterator it = entries.iterator();
            while(it.hasNext()) {
                FileEntry fe = (FileEntry) it.next();
                fe.directoryOffset = os.getFilePointer();
                os.writeLong(0);    // for now
                os.writeString(fe.file);
            }
            // Open the files and copy their data into the stream.
            // Remeber the locations of each file's data section.
            byte buffer[] = new byte[1024];
            it = entries.iterator();
            while(it.hasNext()) {
                FileEntry fe = (FileEntry) it.next();
                fe.dataOffset = os.getFilePointer();                
                copyFile(fe, os, buffer);
            }
            // Write the data offsets into the directory of the compound stream
            it = entries.iterator();
            while(it.hasNext()) {
                FileEntry fe = (FileEntry) it.next();
                os.seek(fe.directoryOffset);
                os.writeLong(fe.dataOffset);
            }
            // Close the output stream. Set the os to null before trying to
            // close so that if an exception occurs during the close, the 
            // finally clause below will not attempt to close the stream
            // the second time.
            OutputStream tmp = os;
            os = null;
            tmp.close();
        } finally {
            if (os != null) try { os.close(); } catch (IOException e) { }
        }
    }
    /** Copy the contents of the file with specified extension into the
     *  provided output stream. Use the provided buffer for moving data
     *  to reduce memory allocation.
     */
    private void copyFile(FileEntry source, OutputStream os, byte buffer[]) 
    throws IOException
    {
        InputStream is = null;
        try {
            long startPtr = os.getFilePointer();
            is = directory.openFile(source.file);
            long length = is.length();
            long remainder = length;
            int chunk = buffer.length;
            while(remainder > 0) {
                int len = (int) Math.min(chunk, remainder);
                is.readBytes(buffer, 0, len);
                os.writeBytes(buffer, len);
                remainder -= len;
            }
            // Verify that remainder is 0
            if (remainder != 0) 
                throw new IOException(
                    "Non-zero remainder length after copying: " + remainder
                    + " (id: " + source.file + ", length: " + length 
                    + ", buffer size: " + chunk + ")");
            // Verify that the output length diff is equal to original file
            long endPtr = os.getFilePointer();
            long diff = endPtr - startPtr;
            if (diff != length) 
                throw new IOException(
                    "Difference in the output file offsets " + diff
                    + " does not match the original file length " + length);
        } finally { 
            if (is != null) is.close();
        }
    }
 }
--- a/src/java/org/apache/lucene/index/IndexWriter.java
+++ b/src/java/org/apache/lucene/index/IndexWriter.java
@ -100,6 +100,29 @@ public class IndexWriter {
  private Lock writeLock;
  /** Use compound file setting. Defaults to false to maintain multiple files 
   *  per segment behavior.
   */  
  private boolean useCompoundFile = false;
  /** Setting to turn on usage of a compound file. When on, multiple files
   *  for each segment are merged into a single file once the segment creation
   *  is finished. This is done regardless of what directory is in use.
   */
  public boolean getUseCompoundFile() {
    return useCompoundFile;
  }
  /** Setting to turn on usage of a compound file. When on, multiple files
   *  for each segment are merged into a single file once the segment creation
   *  is finished. This is done regardless of what directory is in use.
   */
  public void setUseCompoundFile(boolean value) {
    useCompoundFile = value;
  }
    /** Expert: Set the Similarity implementation used by this IndexWriter.
   *
   * @see Similarity#setDefault(Similarity)
@ -150,14 +173,14 @@ public class IndexWriter {
    synchronized (directory) {			  // in- & inter-process sync
      new Lock.With(directory.makeLock("commit.lock"), COMMIT_LOCK_TIMEOUT) {
-	  public Object doBody() throws IOException {
+          public Object doBody() throws IOException {
-	    if (create)
+            if (create)
-	      segmentInfos.write(directory);
+              segmentInfos.write(directory);
-	    else
+            else
-	      segmentInfos.read(directory);
+              segmentInfos.read(directory);
-	    return null;
+            return null;
-	  }
+          }
-	}.run();
+        }.run();
    }
  }
@ -266,12 +289,14 @@ public class IndexWriter {
  public synchronized void optimize() throws IOException {
    flushRamSegments();
    while (segmentInfos.size() > 1 ||
-	   (segmentInfos.size() == 1 &&
+           (segmentInfos.size() == 1 &&
-	    (SegmentReader.hasDeletions(segmentInfos.info(0)) ||
+            (SegmentReader.hasDeletions(segmentInfos.info(0)) ||
-             segmentInfos.info(0).dir != directory))) {
+             (useCompoundFile && 
              !SegmentReader.usesCompoundFile(segmentInfos.info(0))) ||
              segmentInfos.info(0).dir != directory))) {
      int minSegment = segmentInfos.size() - mergeFactor;
      mergeSegments(minSegment < 0 ? 0 : minSegment);
-    }
+    }    
  }
  /** Merges all segments from an array of indexes into this index.
@ -290,7 +315,7 @@ public class IndexWriter {
      SegmentInfos sis = new SegmentInfos();	  // read infos from dir
      sis.read(dirs[i]);
      for (int j = 0; j < sis.size(); j++) {
-	segmentInfos.addElement(sis.info(j));	  // add each info
+        segmentInfos.addElement(sis.info(j));	  // add each info
      }
    }
    optimize();					  // final cleanup
@ -301,13 +326,13 @@ public class IndexWriter {
    int minSegment = segmentInfos.size()-1;
    int docCount = 0;
    while (minSegment >= 0 &&
-	   (segmentInfos.info(minSegment)).dir == ramDirectory) {
+           (segmentInfos.info(minSegment)).dir == ramDirectory) {
      docCount += segmentInfos.info(minSegment).docCount;
      minSegment--;
    }
    if (minSegment < 0 ||			  // add one FS segment?
-	(docCount + segmentInfos.info(minSegment).docCount) > mergeFactor ||
+        (docCount + segmentInfos.info(minSegment).docCount) > mergeFactor ||
-	!(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory))
+        !(segmentInfos.info(segmentInfos.size()-1).dir == ramDirectory))
      minSegment++;
    if (minSegment >= segmentInfos.size())
      return;					  // none to merge
@ -322,16 +347,16 @@ public class IndexWriter {
      int minSegment = segmentInfos.size();
      int mergeDocs = 0;
      while (--minSegment >= 0) {
-	SegmentInfo si = segmentInfos.info(minSegment);
+        SegmentInfo si = segmentInfos.info(minSegment);
-	if (si.docCount >= targetMergeDocs)
+        if (si.docCount >= targetMergeDocs)
-	  break;
+          break;
-	mergeDocs += si.docCount;
+        mergeDocs += si.docCount;
      }
      if (mergeDocs >= targetMergeDocs)		  // found a merge to do
-	mergeSegments(minSegment+1);
+        mergeSegments(minSegment+1);
      else
-	break;
+        break;
      targetMergeDocs *= mergeFactor;		  // increase target size
    }
@ -344,17 +369,19 @@ public class IndexWriter {
    String mergedName = newSegmentName();
    int mergedDocCount = 0;
    if (infoStream != null) infoStream.print("merging segments");
-    SegmentMerger merger = new SegmentMerger(directory, mergedName);
+    SegmentMerger merger = 
        new SegmentMerger(directory, mergedName, useCompoundFile);
    final Vector segmentsToDelete = new Vector();
    for (int i = minSegment; i < segmentInfos.size(); i++) {
      SegmentInfo si = segmentInfos.info(i);
      if (infoStream != null)
-	infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
+        infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
      SegmentReader reader = new SegmentReader(si);
      merger.add(reader);
      if ((reader.directory == this.directory) || // if we own the directory
          (reader.directory == this.ramDirectory))
-	segmentsToDelete.addElement(reader);	  // queue segment for deletion
+        segmentsToDelete.addElement(reader);	  // queue segment for deletion
      mergedDocCount += reader.numDocs();
    }
    if (infoStream != null) {
@ -362,19 +389,19 @@ public class IndexWriter {
      infoStream.println(" into "+mergedName+" ("+mergedDocCount+" docs)");
    }
    merger.merge();
-
+    
    segmentInfos.setSize(minSegment);		  // pop old infos & add new
    segmentInfos.addElement(new SegmentInfo(mergedName, mergedDocCount,
-					    directory));
+                                            directory));
    synchronized (directory) {			  // in- & inter-process sync
      new Lock.With(directory.makeLock("commit.lock"), COMMIT_LOCK_TIMEOUT) {
-	  public Object doBody() throws IOException {
+          public Object doBody() throws IOException {
-	    segmentInfos.write(directory);	  // commit before deleting
+            segmentInfos.write(directory);	  // commit before deleting
-	    deleteSegments(segmentsToDelete);	  // delete now-unused segments
+            deleteSegments(segmentsToDelete);	  // delete now-unused segments
-	    return null;
+            return null;
-	  }
+          }
-	}.run();
+        }.run();
    }
  }
@ -391,9 +418,9 @@ public class IndexWriter {
    for (int i = 0; i < segments.size(); i++) {
      SegmentReader reader = (SegmentReader)segments.elementAt(i);
      if (reader.directory == this.directory)
-	deleteFiles(reader.files(), deletable);	  // try to delete our files
+        deleteFiles(reader.files(), deletable);	  // try to delete our files
      else
-	deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
+        deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
    }
    writeDeleteableFiles(deletable);		  // note files we can't delete
@ -410,13 +437,13 @@ public class IndexWriter {
    for (int i = 0; i < files.size(); i++) {
      String file = (String)files.elementAt(i);
      try {
-	directory.deleteFile(file);		  // try to delete each file
+        directory.deleteFile(file);		  // try to delete each file
      } catch (IOException e) {			  // if delete fails
-	if (directory.fileExists(file)) {
+        if (directory.fileExists(file)) {
-	  if (infoStream != null)
+          if (infoStream != null)
-	    infoStream.println(e.getMessage() + "; Will re-try later.");
+            infoStream.println(e.getMessage() + "; Will re-try later.");
-	  deletable.addElement(file);		  // add to deletable
+          deletable.addElement(file);		  // add to deletable
-	}
+        }
      }
    }
  }
@ -429,7 +456,7 @@ public class IndexWriter {
    InputStream input = directory.openFile("deletable");
    try {
      for (int i = input.readInt(); i > 0; i--)	  // read file names
-	result.addElement(input.readString());
+        result.addElement(input.readString());
    } finally {
      input.close();
    }
@ -441,7 +468,7 @@ public class IndexWriter {
    try {
      output.writeInt(files.size());
      for (int i = 0; i < files.size(); i++)
-	output.writeString((String)files.elementAt(i));
+        output.writeString((String)files.elementAt(i));
    } finally {
      output.close();
    }
--- a/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/src/java/org/apache/lucene/index/SegmentMerger.java
@ -55,6 +55,8 @@ package org.apache.lucene.index;
 */
 import java.util.Vector;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.io.IOException;
 import org.apache.lucene.store.Directory;
@ -63,15 +65,17 @@ import org.apache.lucene.store.InputStream;
 import org.apache.lucene.util.BitVector;
 final class SegmentMerger {
  private boolean useCompoundFile;
  private Directory directory;
  private String segment;
  private Vector readers = new Vector();
  private FieldInfos fieldInfos;
-  SegmentMerger(Directory dir, String name) {
+  SegmentMerger(Directory dir, String name, boolean compoundFile) {
    directory = dir;
    segment = name;
    useCompoundFile = compoundFile;
  }
  final void add(SegmentReader reader) {
@ -90,12 +94,62 @@ final class SegmentMerger {
    } finally {
      for (int i = 0; i < readers.size(); i++) {  // close readers
-	SegmentReader reader = (SegmentReader)readers.elementAt(i);
+        SegmentReader reader = (SegmentReader)readers.elementAt(i);
-	reader.close();
+        reader.close();
      }
    }
    if (useCompoundFile)
        createCompoundFile();
  }
  // Add the fixed files
  private final String COMPOUND_EXTENSIONS[] = new String[] {
    "fnm", "frq", "prx", "fdx", "fdt", "tii", "tis"
  };
  private final void createCompoundFile() 
  throws IOException
  {
    CompoundFileWriter oneWriter = 
        new CompoundFileWriter(directory, segment + ".cfs");
    ArrayList files = 
        new ArrayList(COMPOUND_EXTENSIONS.length + fieldInfos.size());    
    // Basic files
    for (int i=0; i<COMPOUND_EXTENSIONS.length; i++) {
        files.add(segment + "." + COMPOUND_EXTENSIONS[i]);
    }
    // Field norm files
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed) {
        files.add(segment + ".f" + i);
      }
    }
    // Now merge all added files
    Iterator it = files.iterator();
    while(it.hasNext()) {
        oneWriter.addFile((String) it.next());
    }
    // Perform the merge
    oneWriter.close();
    // Now delete the source files
    it = files.iterator();
    while(it.hasNext()) {
        directory.deleteFile((String) it.next());
    }
  }
  private final void mergeFields() throws IOException {
    fieldInfos = new FieldInfos();		  // merge field names
    for (int i = 0; i < readers.size(); i++) {
@ -108,12 +162,12 @@ final class SegmentMerger {
      new FieldsWriter(directory, segment, fieldInfos);
    try {
      for (int i = 0; i < readers.size(); i++) {
-	SegmentReader reader = (SegmentReader)readers.elementAt(i);
+        SegmentReader reader = (SegmentReader)readers.elementAt(i);
-	BitVector deletedDocs = reader.deletedDocs;
+        BitVector deletedDocs = reader.deletedDocs;
-	int maxDoc = reader.maxDoc();
+        int maxDoc = reader.maxDoc();
-	for (int j = 0; j < maxDoc; j++)
+        for (int j = 0; j < maxDoc; j++)
-	  if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
+          if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
-	    fieldsWriter.addDocument(reader.document(j));
+            fieldsWriter.addDocument(reader.document(j));
      }
    } finally {
      fieldsWriter.close();
@ -130,7 +184,7 @@ final class SegmentMerger {
      freqOutput = directory.createFile(segment + ".frq");
      proxOutput = directory.createFile(segment + ".prx");
      termInfosWriter =
-	new TermInfosWriter(directory, segment, fieldInfos);
+        new TermInfosWriter(directory, segment, fieldInfos);
      mergeTermInfos();
@ -151,9 +205,9 @@ final class SegmentMerger {
      SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
      base += reader.numDocs();
      if (smi.next())
-	queue.put(smi);				  // initialize queue
+        queue.put(smi);				  // initialize queue
      else
-	smi.close();
+        smi.close();
    }
    SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()];
@ -165,18 +219,18 @@ final class SegmentMerger {
      SegmentMergeInfo top = (SegmentMergeInfo)queue.top();
      while (top != null && term.compareTo(top.term) == 0) {
-	match[matchSize++] = (SegmentMergeInfo)queue.pop();
+        match[matchSize++] = (SegmentMergeInfo)queue.pop();
-	top = (SegmentMergeInfo)queue.top();
+        top = (SegmentMergeInfo)queue.top();
      }
      mergeTermInfo(match, matchSize);		  // add new TermInfo
      while (matchSize > 0) {
-	SegmentMergeInfo smi = match[--matchSize];
+        SegmentMergeInfo smi = match[--matchSize];
-	if (smi.next())
+        if (smi.next())
-	  queue.put(smi);			  // restore queue
+          queue.put(smi);			  // restore queue
-	else
+        else
-	  smi.close();				  // done with a segment
+          smi.close();				  // done with a segment
      }
    }
  }
@ -209,34 +263,34 @@ final class SegmentMerger {
      smi.termEnum.termInfo(termInfo);
      postings.seek(termInfo);
      while (postings.next()) {
-	int doc;
+        int doc;
-	if (docMap == null)
+        if (docMap == null)
-	  doc = base + postings.doc;		  // no deletions
+          doc = base + postings.doc;		  // no deletions
-	else
+        else
-	  doc = base + docMap[postings.doc];	  // re-map around deletions
+          doc = base + docMap[postings.doc];	  // re-map around deletions
-	if (doc < lastDoc)
+        if (doc < lastDoc)
-	  throw new IllegalStateException("docs out of order");
+          throw new IllegalStateException("docs out of order");
-	int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
+        int docCode = (doc - lastDoc) << 1;	  // use low bit to flag freq=1
-	lastDoc = doc;
+        lastDoc = doc;
-	int freq = postings.freq;
+        int freq = postings.freq;
-	if (freq == 1) {
+        if (freq == 1) {
-	  freqOutput.writeVInt(docCode | 1);	  // write doc & freq=1
+          freqOutput.writeVInt(docCode | 1);	  // write doc & freq=1
-	} else {
+        } else {
-	  freqOutput.writeVInt(docCode);	  // write doc
+          freqOutput.writeVInt(docCode);	  // write doc
-	  freqOutput.writeVInt(freq);		  // write frequency in doc
+          freqOutput.writeVInt(freq);		  // write frequency in doc
-	}
+        }
-	  
+          
-	int lastPosition = 0;			  // write position deltas
+        int lastPosition = 0;			  // write position deltas
-	for (int j = 0; j < freq; j++) {
+        for (int j = 0; j < freq; j++) {
-	  int position = postings.nextPosition();
+          int position = postings.nextPosition();
-	  proxOutput.writeVInt(position - lastPosition);
+          proxOutput.writeVInt(position - lastPosition);
-	  lastPosition = position;
+          lastPosition = position;
-	}
+        }
-	df++;
+        df++;
      }
    }
    return df;
@ -246,27 +300,27 @@ final class SegmentMerger {
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed) {
-	OutputStream output = directory.createFile(segment + ".f" + i);
+        OutputStream output = directory.createFile(segment + ".f" + i);
-	try {
+        try {
-	  for (int j = 0; j < readers.size(); j++) {
+          for (int j = 0; j < readers.size(); j++) {
-	    SegmentReader reader = (SegmentReader)readers.elementAt(j);
+            SegmentReader reader = (SegmentReader)readers.elementAt(j);
-	    BitVector deletedDocs = reader.deletedDocs;
+            BitVector deletedDocs = reader.deletedDocs;
-	    InputStream input = reader.normStream(fi.name);
+            InputStream input = reader.normStream(fi.name);
            int maxDoc = reader.maxDoc();
-	    try {
+            try {
-	      for (int k = 0; k < maxDoc; k++) {
+              for (int k = 0; k < maxDoc; k++) {
-		byte norm = input != null ? input.readByte() : (byte)0;
+                byte norm = input != null ? input.readByte() : (byte)0;
-		if (deletedDocs == null || !deletedDocs.get(k))
+                if (deletedDocs == null || !deletedDocs.get(k))
-		  output.writeByte(norm);
+                  output.writeByte(norm);
-	      }
+              }
-	    } finally {
+            } finally {
-	      if (input != null)
+              if (input != null)
-		input.close();
+                input.close();
-	    }
+            }
-	  }
+          }
-	} finally {
+        } finally {
-	  output.close();
+          output.close();
-	}
+        }
      }
    }
  }
--- a/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/src/java/org/apache/lucene/index/SegmentReader.java
@ -65,6 +65,7 @@ import java.util.Vector;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.store.InputStream;
 import org.apache.lucene.store.Lock;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BitVector;
 final class SegmentReader extends IndexReader {
@ -81,7 +82,9 @@ final class SegmentReader extends IndexReader {
  InputStream freqStream;
  InputStream proxStream;
-
+  
  // Compound File Reader when based on a compound file segment
  CompoundFileReader cfsReader;
  private static class Norm {
    public Norm(InputStream in) { this.in = in; }
@ -101,32 +104,42 @@ final class SegmentReader extends IndexReader {
    super(si.dir);
    segment = si.name;
-    fieldInfos = new FieldInfos(directory, segment + ".fnm");
+    // Use compound file directory for some files, if it exists
-    fieldsReader = new FieldsReader(directory, segment, fieldInfos);
+    Directory cfsDir = directory;
    if (directory.fileExists(segment + ".cfs")) {
        cfsReader = new CompoundFileReader(directory, segment + ".cfs");
        cfsDir = cfsReader;
    }
-    tis = new TermInfosReader(directory, segment, fieldInfos);
+    // No compound file exists - use the multi-file format
    fieldInfos = new FieldInfos(cfsDir, segment + ".fnm");
    fieldsReader = new FieldsReader(cfsDir, segment, fieldInfos);
    tis = new TermInfosReader(cfsDir, segment, fieldInfos);
    // NOTE: the bitvector is stored using the regular directory, not cfs
    if (hasDeletions(si))
      deletedDocs = new BitVector(directory, segment + ".del");
    // make sure that all index files have been read or are kept open
    // so that if an index update removes them we'll still have them
-    freqStream = directory.openFile(segment + ".frq");
+    freqStream = cfsDir.openFile(segment + ".frq");
-    proxStream = directory.openFile(segment + ".prx");
+    proxStream = cfsDir.openFile(segment + ".prx");
-    openNorms();
+    openNorms(cfsDir);
  }
  final synchronized void doClose() throws IOException {
    if (deletedDocsDirty) {
      synchronized (directory) {		  // in- & inter-process sync
-	new Lock.With(directory.makeLock("commit.lock"), IndexWriter.COMMIT_LOCK_TIMEOUT) {
+        new Lock.With(directory.makeLock("commit.lock"), IndexWriter.COMMIT_LOCK_TIMEOUT) {
-	    public Object doBody() throws IOException {
+            public Object doBody() throws IOException {
-	      deletedDocs.write(directory, segment + ".tmp");
+              deletedDocs.write(directory, segment + ".tmp");
-	      directory.renameFile(segment + ".tmp", segment + ".del");
+              directory.renameFile(segment + ".tmp", segment + ".del");
              directory.touchFile("segments");
-	      return null;
+              return null;
-	    }
+            }
-	  }.run();
+          }.run();
      }
      deletedDocsDirty = false;
    }
@ -140,6 +153,9 @@ final class SegmentReader extends IndexReader {
      proxStream.close();
    closeNorms();
    if (cfsReader != null)
      cfsReader.close();
    if (closeDirectory)
      directory.close();
@ -149,6 +165,10 @@ final class SegmentReader extends IndexReader {
    return si.dir.fileExists(si.name + ".del");
  }
  static final boolean usesCompoundFile(SegmentInfo si) throws IOException {
    return si.dir.fileExists(si.name + ".cfs");
  }
  final synchronized void doDelete(int docNum) throws IOException {
    if (deletedDocs == null)
      deletedDocs = new BitVector(maxDoc());
@ -158,21 +178,20 @@ final class SegmentReader extends IndexReader {
  final Vector files() throws IOException {
    Vector files = new Vector(16);
-    files.addElement(segment + ".fnm");
+    final String ext[] = new String[] {
-    files.addElement(segment + ".fdx");
+        "cfs", "fnm", "fdx", "fdt", "tii", "tis", "frq", "prx", "del"
-    files.addElement(segment + ".fdt");
+    };
-    files.addElement(segment + ".tii");
+    
-    files.addElement(segment + ".tis");
+    for (int i=0; i<ext.length; i++) {
-    files.addElement(segment + ".frq");
+        String name = segment + "." + ext[i];
-    files.addElement(segment + ".prx");
+        if (directory.fileExists(name)) 
-
+            files.addElement(name);
-    if (directory.fileExists(segment + ".del"))
+    }
-      files.addElement(segment + ".del");
+    
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed)
-	files.addElement(segment + ".f" + i);
+        files.addElement(segment + ".f" + i);
    }
    return files;
  }
@ -188,7 +207,7 @@ final class SegmentReader extends IndexReader {
  public final synchronized Document document(int n) throws IOException {
    if (isDeleted(n))
      throw new IllegalArgumentException
-	("attempt to access a deleted document");
+        ("attempt to access a deleted document");
    return fieldsReader.doc(n);
  }
@ -282,12 +301,12 @@ final class SegmentReader extends IndexReader {
    return result;
  }
-  private final void openNorms() throws IOException {
+  private final void openNorms(Directory useDir) throws IOException {
    for (int i = 0; i < fieldInfos.size(); i++) {
      FieldInfo fi = fieldInfos.fieldInfo(i);
      if (fi.isIndexed)
-	norms.put(fi.name,
+        norms.put(fi.name,
-		  new Norm(directory.openFile(segment + ".f" + fi.number)));
+                  new Norm(useDir.openFile(segment + ".f" + fi.number)));
    }
  }
@ -295,8 +314,8 @@ final class SegmentReader extends IndexReader {
    synchronized (norms) {
      Enumeration enumerator  = norms.elements();
      while (enumerator.hasMoreElements()) {
-	Norm norm = (Norm)enumerator.nextElement();
+        Norm norm = (Norm)enumerator.nextElement();
-	norm.in.close();
+        norm.in.close();
      }
    }
  }
--- a/src/test/org/apache/lucene/TestSearch.java
+++ b/src/test/org/apache/lucene/TestSearch.java
@ -0,0 +1,178 @@
 package org.apache.lucene;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 import java.util.GregorianCalendar;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import junit.framework.TestCase;
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
 import org.apache.lucene.store.*;
 import org.apache.lucene.document.*;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.*;
 import org.apache.lucene.queryParser.*;
 /** JUnit adaptation of an older test case SearchTest.
 * @author dmitrys@earthlink.net
 * @version $Id$
 */
 public class TestSearch extends TestCase {
    /** Main for running test case by itself. */
    public static void main(String args[]) {
        TestRunner.run (new TestSuite(TestSearch.class));
    }
    /** This test performs a number of searches. It also compares output
     *  of searches using multi-file index segments with single-file
     *  index segments. 
     *  
     *  TODO: someone should check that the results of the searches are
     *        still correct by adding assert statements. Right now, the test
     *        passes if the results are the same between multi-file and
     *        single-file formats, even if the results are wrong.
     */    
    public void testSearch() throws Exception {
      StringWriter sw = new StringWriter();
      PrintWriter pw = new PrintWriter(sw, true);
      doTestSearch(pw, false);
      pw.close();
      sw.close();
      String multiFileOutput = sw.getBuffer().toString();
      System.out.println(multiFileOutput);
      sw = new StringWriter();
      pw = new PrintWriter(sw, true);
      doTestSearch(pw, true);
      pw.close();
      sw.close();
      String singleFileOutput = sw.getBuffer().toString();
      assertEquals(multiFileOutput, singleFileOutput);
    }
    private void doTestSearch(PrintWriter out, boolean useCompoundFile)
    throws Exception
    {
      Directory directory = new RAMDirectory();  
      Analyzer analyzer = new SimpleAnalyzer();
      IndexWriter writer = new IndexWriter(directory, analyzer, true);
      writer.setUseCompoundFile(useCompoundFile);
      String[] docs = {
        "a b c d e",
        "a b c d e a b c d e",
        "a b c d e f g h i j",
        "a c e",
        "e c a",
        "a c e a c e",
        "a c e a b c"
      };
      for (int j = 0; j < docs.length; j++) {
        Document d = new Document();
        d.add(Field.Text("contents", docs[j]));
        writer.addDocument(d);
      }
      writer.close();
      Searcher searcher = new IndexSearcher(directory);
      String[] queries = {
        "a b",
        "\"a b\"",
        "\"a b c\"",
        "a c",
        "\"a c\"",
        "\"a c e\"",
      };
      Hits hits = null;
      QueryParser parser = new QueryParser("contents", analyzer);
      parser.setPhraseSlop(4);
      for (int j = 0; j < queries.length; j++) {
        Query query = parser.parse(queries[j]);
        out.println("Query: " + query.toString("contents"));
      //DateFilter filter =
      //  new DateFilter("modified", Time(1997,0,1), Time(1998,0,1));
      //DateFilter filter = DateFilter.Before("modified", Time(1997,00,01));
      //System.out.println(filter);
        hits = searcher.search(query);
        out.println(hits.length() + " total results");
        for (int i = 0 ; i < hits.length() && i < 10; i++) {
          Document d = hits.doc(i);
          out.println(i + " " + hits.score(i)
 // 			   + " " + DateField.stringToDate(d.get("modified"))
                             + " " + d.get("contents"));
        }
      }
      searcher.close();
  }
  static long Time(int year, int month, int day) {
    GregorianCalendar calendar = new GregorianCalendar();
    calendar.set(year, month, day);
    return calendar.getTime().getTime();
  }
 }
--- a/src/test/org/apache/lucene/TestSearchForDuplicates.java
+++ b/src/test/org/apache/lucene/TestSearchForDuplicates.java
@ -0,0 +1,190 @@
 package org.apache.lucene;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 import java.io.IOException;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import org.apache.lucene.store.*;
 import org.apache.lucene.document.*;
 import org.apache.lucene.analysis.*;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.*;
 import org.apache.lucene.queryParser.*;
 import junit.framework.TestCase;
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
 /** JUnit adaptation of an older test case DocTest.
 * @author dmitrys@earthlink.net
 * @version $Id$
 */
 public class TestSearchForDuplicates extends TestCase {
    /** Main for running test case by itself. */
    public static void main(String args[]) {
        TestRunner.run (new TestSuite(TestSearchForDuplicates.class));
    }
  static final String PRIORITY_FIELD ="priority";
  static final String ID_FIELD ="id";
  static final String HIGH_PRIORITY ="high";
  static final String MED_PRIORITY ="medium";
  static final String LOW_PRIORITY ="low";
  /** This test compares search results when using and not using compound
   *  files. 
   *  
   *  TODO: There is rudimentary search result validation as well, but it is
   *        simply based on asserting the output observed in the old test case,
   *        without really knowing if the output is correct. Someone needs to
   *        validate this output and make any changes to the checkHits method.
   */
  public void testRun() throws Exception {
      StringWriter sw = new StringWriter();
      PrintWriter pw = new PrintWriter(sw, true);
      doTest(pw, false);
      pw.close();
      sw.close();
      String multiFileOutput = sw.getBuffer().toString();
      System.out.println(multiFileOutput);
      sw = new StringWriter();
      pw = new PrintWriter(sw, true);
      doTest(pw, true);
      pw.close();
      sw.close();
      String singleFileOutput = sw.getBuffer().toString();
      assertEquals(multiFileOutput, singleFileOutput);
  }
  private void doTest(PrintWriter out, boolean useCompoundFiles) throws Exception {
      Directory directory = new RAMDirectory();
      Analyzer analyzer = new SimpleAnalyzer();
      IndexWriter writer = new IndexWriter(directory, analyzer, true);
      writer.setUseCompoundFile(useCompoundFiles);
      final int MAX_DOCS = 225;
      for (int j = 0; j < MAX_DOCS; j++) {
        Document d = new Document();
        d.add(Field.Text(PRIORITY_FIELD, HIGH_PRIORITY));
        d.add(Field.Text(ID_FIELD, Integer.toString(j)));
        writer.addDocument(d);
      }
      writer.close();
      // try a search without OR
      Searcher searcher = new IndexSearcher(directory);
      Hits hits = null;
      QueryParser parser = new QueryParser(PRIORITY_FIELD, analyzer);
      Query query = parser.parse(HIGH_PRIORITY);
      out.println("Query: " + query.toString(PRIORITY_FIELD));
      hits = searcher.search(query);
      printHits(out, hits);
      checkHits(hits, MAX_DOCS);
      searcher.close();
      // try a new search with OR
      searcher = new IndexSearcher(directory);
      hits = null;
      parser = new QueryParser(PRIORITY_FIELD, analyzer);
      query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
      out.println("Query: " + query.toString(PRIORITY_FIELD));
      hits = searcher.search(query);
      printHits(out, hits);
      checkHits(hits, MAX_DOCS);
      searcher.close();
  }
  private void printHits(PrintWriter out, Hits hits ) throws IOException {
    out.println(hits.length() + " total results\n");
    for (int i = 0 ; i < hits.length(); i++) {
      if ( i < 10 || (i > 94 && i < 105) ) {
        Document d = hits.doc(i);
        out.println(i + " " + d.get(ID_FIELD));
      }
    }
  }
  private void checkHits(Hits hits, int expectedCount) throws IOException {
    assertEquals("total results", expectedCount, hits.length());
    for (int i = 0 ; i < hits.length(); i++) {
      if ( i < 10 || (i > 94 && i < 105) ) {
        Document d = hits.doc(i);
        assertEquals("check " + i, String.valueOf(i), d.get(ID_FIELD));
      }
    }
  }
 }
--- a/src/test/org/apache/lucene/ThreadSafetyTest.java
+++ b/src/test/org/apache/lucene/ThreadSafetyTest.java
@ -62,6 +62,7 @@ import org.apache.lucene.search.*;
 import org.apache.lucene.queryParser.*;
 import java.util.Random;
 import java.io.File;
 class ThreadSafetyTest {
  private static final Analyzer ANALYZER = new SimpleAnalyzer();
@ -86,26 +87,33 @@ class ThreadSafetyTest {
    public void run() {
      try {
-	for (int i = 0; i < 1024*ITERATIONS; i++) {
+        boolean useCompoundFiles = false;
-	  Document d = new Document();
+        
-	  int n = RANDOM.nextInt();
+        for (int i = 0; i < 1024*ITERATIONS; i++) {
-	  d.add(Field.Keyword("id", Integer.toString(n)));
+          Document d = new Document();
-	  d.add(Field.UnStored("contents", intToEnglish(n)));
+          int n = RANDOM.nextInt();
-	  System.out.println("Adding " + n);
+          d.add(Field.Keyword("id", Integer.toString(n)));
-	  writer.addDocument(d);
+          d.add(Field.UnStored("contents", intToEnglish(n)));
          System.out.println("Adding " + n);
          // Switch between single and multiple file segments
          useCompoundFiles = Math.random() < 0.5;
          writer.setUseCompoundFile(useCompoundFiles);
          writer.addDocument(d);
-	  if (i%reopenInterval == 0) {
+          if (i%reopenInterval == 0) {
-	    writer.close();
+            writer.close();
-	    writer = new IndexWriter("index", ANALYZER, false);
+            writer = new IndexWriter("index", ANALYZER, false);
-	  }
+          }
-	}
+        }
-	
+        
-	writer.close();
+        writer.close();
      } catch (Exception e) {
-	System.out.println(e.toString());
+        System.out.println(e.toString());
-	e.printStackTrace();
+        e.printStackTrace();
-	System.exit(0);
+        System.exit(0);
      }
    }
  }
@ -116,26 +124,26 @@ class ThreadSafetyTest {
    public SearcherThread(boolean useGlobal) throws java.io.IOException {
      if (!useGlobal)
-	this.searcher = new IndexSearcher("index");
+        this.searcher = new IndexSearcher("index");
    }
    public void run() {
      try {
-	for (int i = 0; i < 512*ITERATIONS; i++) {
+        for (int i = 0; i < 512*ITERATIONS; i++) {
-	  searchFor(RANDOM.nextInt(), (searcher==null)?SEARCHER:searcher);
+          searchFor(RANDOM.nextInt(), (searcher==null)?SEARCHER:searcher);
-	  if (i%reopenInterval == 0) {
+          if (i%reopenInterval == 0) {
-	    if (searcher == null) {
+            if (searcher == null) {
-	      SEARCHER = new IndexSearcher("index");
+              SEARCHER = new IndexSearcher("index");
-	    } else {
+            } else {
-	      searcher.close();
+              searcher.close();
-	      searcher = new IndexSearcher("index");
+              searcher = new IndexSearcher("index");
-	    }
+            }
-	  }
+          }
-	}
+        }
      } catch (Exception e) {
-	System.out.println(e.toString());
+        System.out.println(e.toString());
-	e.printStackTrace();
+        e.printStackTrace();
-	System.exit(0);
+        System.exit(0);
      }
    }
@ -143,11 +151,11 @@ class ThreadSafetyTest {
      throws Exception {
      System.out.println("Searching for " + n);
      Hits hits =
-	searcher.search(QueryParser.parse(intToEnglish(n), "contents",
+        searcher.search(QueryParser.parse(intToEnglish(n), "contents",
-					  ANALYZER));
+                                          ANALYZER));
      System.out.println("Search for " + n + ": total=" + hits.length());
      for (int j = 0; j < Math.min(3, hits.length()); j++) {
-	System.out.println("Hit for " + n + ": " + hits.doc(j).get("id"));
+        System.out.println("Hit for " + n + ": " + hits.doc(j).get("id"));
      }
    }
  }
@ -159,15 +167,18 @@ class ThreadSafetyTest {
    for (int i = 0; i < args.length; i++) {
      if ("-ro".equals(args[i]))
-	readOnly = true;
+        readOnly = true;
      if ("-add".equals(args[i]))
-	add = true;
+        add = true;
    }
-    IndexReader.unlock(FSDirectory.getDirectory("index", false));
+    File indexDir = new File("index");
    if (! indexDir.exists()) indexDir.mkdirs();
    IndexReader.unlock(FSDirectory.getDirectory(indexDir, false));
    if (!readOnly) {
-      IndexWriter writer = new IndexWriter("index", ANALYZER, !add);
+      IndexWriter writer = new IndexWriter(indexDir, ANALYZER, !add);
      Thread indexerThread = new IndexerThread(writer);
      indexerThread.start();
@ -178,7 +189,7 @@ class ThreadSafetyTest {
    SearcherThread searcherThread1 = new SearcherThread(false);
    searcherThread1.start();
-    SEARCHER = new IndexSearcher("index");
+    SEARCHER = new IndexSearcher(indexDir.toString());
    SearcherThread searcherThread2 = new SearcherThread(true);
    searcherThread2.start();
@ -231,9 +242,9 @@ class ThreadSafetyTest {
      }
      i = i%10;
      if (i == 0)
-	result.append(" ");
+        result.append(" ");
      else 
-	result.append("-");
+        result.append("-");
    }
    switch (i) {
    case 19 : result.append("nineteen "); break;
--- a/src/test/org/apache/lucene/index/DocTest.java
+++ b/src/test/org/apache/lucene/index/DocTest.java
@ -87,7 +87,8 @@ class DocTest {
    } catch (Exception e) {
      System.out.println(" caught a " + e.getClass() +
-			 "\n with message: " + e.getMessage());
+                         "\n with message: " + e.getMessage());
      e.printStackTrace();
    }
  }
@ -113,7 +114,7 @@ class DocTest {
    SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory));
    SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory));
-    SegmentMerger merger = new SegmentMerger(directory, merged);
+    SegmentMerger merger = new SegmentMerger(directory, merged, false);
    merger.add(r1);
    merger.add(r2);
    merger.merge();
@ -137,17 +138,17 @@ class DocTest {
      TermPositions positions = reader.termPositions(tis.term());
      try {
-	while (positions.next()) {
+        while (positions.next()) {
-	  System.out.print(" doc=" + positions.doc());
+          System.out.print(" doc=" + positions.doc());
-	  System.out.print(" TF=" + positions.freq());
+          System.out.print(" TF=" + positions.freq());
-	  System.out.print(" pos=");
+          System.out.print(" pos=");
-	  System.out.print(positions.nextPosition());
+          System.out.print(positions.nextPosition());
-	  for (int j = 1; j < positions.freq(); j++)
+          for (int j = 1; j < positions.freq(); j++)
-	    System.out.print("," + positions.nextPosition());
+            System.out.print("," + positions.nextPosition());
-	  System.out.println("");
+          System.out.println("");
-	}
+        }
      } finally {
-	positions.close();
+        positions.close();
      }
    }
    tis.close();
--- a/src/test/org/apache/lucene/index/TestCompoundFile.java
+++ b/src/test/org/apache/lucene/index/TestCompoundFile.java
@ -0,0 +1,701 @@
 package org.apache.lucene.index;
 import java.io.IOException;
 import junit.framework.TestCase;
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
 import org.apache.lucene.store.*;
 import org.apache.lucene.analysis.WhitespaceAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 /**
 * @author dmitrys@earthlink.net
 * @version $Id$
 */
 public class TestCompoundFile extends TestCase
 {
    /** Main for running test case by itself. */
    public static void main(String args[]) {
        TestRunner.run (new TestSuite(TestCompoundFile.class));
 //        TestRunner.run (new TestCompoundFile("testSingleFile"));
 //        TestRunner.run (new TestCompoundFile("testTwoFiles"));
 //        TestRunner.run (new TestCompoundFile("testRandomFiles"));
 //        TestRunner.run (new TestCompoundFile("testClonedStreamsClosing"));
 //        TestRunner.run (new TestCompoundFile("testReadAfterClose"));
 //        TestRunner.run (new TestCompoundFile("testRandomAccess"));
 //        TestRunner.run (new TestCompoundFile("testRandomAccessClones"));
 //        TestRunner.run (new TestCompoundFile("testFileNotFound"));
 //        TestRunner.run (new TestCompoundFile("testReadPastEOF"));
 //        TestRunner.run (new TestCompoundFile("testIWCreate"));
    }
    public TestCompoundFile() {
        super();
    }
    public TestCompoundFile(String name) {
        super(name);
    }
    private Directory dir;
    public void setUp() throws IOException {
        //dir = new RAMDirectory();
        dir = FSDirectory.getDirectory("testIndex", true);        
    }
    /** Creates a file of the specified size with random data. */
    private void createRandomFile(Directory dir, String name, int size)
    throws IOException
    {
        OutputStream os = dir.createFile(name);
        for (int i=0; i<size; i++) {
            byte b = (byte) (Math.random() * 256);
            os.writeByte(b);
        }
        os.close();
    }
    /** Creates a file of the specified size with sequential data. The first
     *  byte is written as the start byte provided. All subsequent bytes are
     *  computed as start + offset where offset is the number of the byte.
     */
    private void createSequenceFile(Directory dir, 
                                    String name, 
                                    byte start, 
                                    int size) 
    throws IOException
    {
        OutputStream os = dir.createFile(name);
        for (int i=0; i < size; i++) {
            os.writeByte(start);
            start ++;
        }
        os.close();
    }
    private void assertSameStreams(String msg, 
                                   InputStream expected, 
                                   InputStream test)
    throws IOException
    {
        assertNotNull(msg + " null expected", expected);
        assertNotNull(msg + " null test", test);
        assertEquals(msg + " length", expected.length(), test.length());
        assertEquals(msg + " position", expected.getFilePointer(), 
                                        test.getFilePointer());
        byte expectedBuffer[] = new byte[512];
        byte testBuffer[] = new byte[expectedBuffer.length];
        long remainder = expected.length() - expected.getFilePointer();
        while(remainder > 0) {
            int readLen = (int) Math.min(remainder, expectedBuffer.length);
            expected.readBytes(expectedBuffer, 0, readLen);
            test.readBytes(testBuffer, 0, readLen);
            assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, 
                testBuffer, 0, readLen);
            remainder -= readLen;
        }
    }
    private void assertSameStreams(String msg, 
                                   InputStream expected,
                                   InputStream actual,
                                   long seekTo)
    throws IOException
    {
        if (seekTo < 0) {
            try {
                actual.seek(seekTo);
                fail(msg + ", " + seekTo + ", negative seek");
            } catch (IOException e) { 
                /* success */ 
                //System.out.println("SUCCESS: Negative seek: " + e);
            }
        } else if (seekTo > 0 && seekTo >= expected.length()) {
            try {
                actual.seek(seekTo);
                fail(msg + ", " + seekTo + ", seek past EOF");
            } catch (IOException e) { 
                /* success */ 
                //System.out.println("SUCCESS: Seek past EOF: " + e);
            }
        } else {
            expected.seek(seekTo);
            actual.seek(seekTo);
            assertSameStreams(msg + ", seek(mid)", expected, actual);
        }
    }
    private void assertSameSeekBehavior(String msg, 
                                        InputStream expected,
                                        InputStream actual)
    throws IOException
    {
        // seek to 0
        long point = 0;
        assertSameStreams(msg + ", seek(0)", expected, actual, point);
        // seek to middle
        point = expected.length() / 2l;
        assertSameStreams(msg + ", seek(mid)", expected, actual, point);
        // seek to end - 2
        point = expected.length() - 2;
        assertSameStreams(msg + ", seek(end-2)", expected, actual, point);
        // seek to end - 1
        point = expected.length() - 1;
        assertSameStreams(msg + ", seek(end-1)", expected, actual, point);
        // seek to the end
        point = expected.length();
        assertSameStreams(msg + ", seek(end)", expected, actual, point);
        // seek past end
        point = expected.length() + 1;
        assertSameStreams(msg + ", seek(end+1)", expected, actual, point);
    }
    private void assertEqualArrays(String msg, 
                                   byte[] expected, 
                                   byte[] test, 
                                   int start, 
                                   int len) 
    {
        assertNotNull(msg + " null expected", expected);
        assertNotNull(msg + " null test", test);
        for (int i=start; i<len; i++) {
            assertEquals(msg + " " + i, expected[i], test[i]);
        }
    }
    // ===========================================================
    //  Tests of the basic CompoundFile functionality
    // ===========================================================
    /** This test creates compound file based on a single file.
     *  Files of different sizes are tested: 0, 1, 10, 100 bytes.
     */    
    public void testSingleFile() throws IOException {
        int data[] = new int[] { 0, 1, 10, 100 };
        for (int i=0; i<data.length; i++) {
            String name = "t" + data[i];
            createSequenceFile(dir, name, (byte) 0, data[i]);
            CompoundFileWriter csw = new CompoundFileWriter(dir, name + ".cfs");
            csw.addFile(name);
            csw.close();
            CompoundFileReader csr = new CompoundFileReader(dir, name + ".cfs");
            InputStream expected = dir.openFile(name);
            InputStream actual = csr.openFile(name);
            assertSameStreams(name, expected, actual);
            assertSameSeekBehavior(name, expected, actual);
            expected.close();
            actual.close();
            csr.close();
        }
    }
    /** This test creates compound file based on two files.
     *  
     */
    public void testTwoFiles() throws IOException {
        createSequenceFile(dir, "d1", (byte) 0, 15);
        createSequenceFile(dir, "d2", (byte) 0, 114);
        CompoundFileWriter csw = new CompoundFileWriter(dir, "d.csf");
        csw.addFile("d1");
        csw.addFile("d2");
        csw.close();
        CompoundFileReader csr = new CompoundFileReader(dir, "d.csf");
        InputStream expected = dir.openFile("d1");
        InputStream actual = csr.openFile("d1");
        assertSameStreams("d1", expected, actual);
        assertSameSeekBehavior("d1", expected, actual);
        expected.close();
        actual.close();
        expected = dir.openFile("d2");
        actual = csr.openFile("d2");
        assertSameStreams("d2", expected, actual);
        assertSameSeekBehavior("d2", expected, actual);
        expected.close();
        actual.close();
        csr.close();
    }
    /** This test creates a compound file based on a large number of files of
     *  various length. The file content is generated randomly. The sizes range
     *  from 0 to 1Mb. Some of the sizes are selected to test the buffering
     *  logic in the file reading code. For this the chunk variable is set to
     *  the length of the buffer used internally by the compound file logic.
     */
    public void testRandomFiles() throws IOException {
        // Setup the test segment
        String segment = "test";
        int chunk = 1024; // internal buffer size used by the stream
        createRandomFile(dir, segment + ".zero", 0);
        createRandomFile(dir, segment + ".one", 1);
        createRandomFile(dir, segment + ".ten", 10);
        createRandomFile(dir, segment + ".hundred", 100);
        createRandomFile(dir, segment + ".big1", chunk);
        createRandomFile(dir, segment + ".big2", chunk - 1);
        createRandomFile(dir, segment + ".big3", chunk + 1);
        createRandomFile(dir, segment + ".big4", 3 * chunk);
        createRandomFile(dir, segment + ".big5", 3 * chunk - 1);
        createRandomFile(dir, segment + ".big6", 3 * chunk + 1);
        createRandomFile(dir, segment + ".big7", 1000 * chunk);
        // Setup extraneous files
        createRandomFile(dir, "onetwothree", 100);
        createRandomFile(dir, segment + ".notIn", 50);
        createRandomFile(dir, segment + ".notIn2", 51);
        // Now test
        CompoundFileWriter csw = new CompoundFileWriter(dir, "test.cfs");
        final String data[] = new String[] {
            ".zero", ".one", ".ten", ".hundred", ".big1", ".big2", ".big3", 
            ".big4", ".big5", ".big6", ".big7"
        };
        for (int i=0; i<data.length; i++) {
            csw.addFile(segment + data[i]);
        }
        csw.close();
        CompoundFileReader csr = new CompoundFileReader(dir, "test.cfs");
        for (int i=0; i<data.length; i++) {
            InputStream check = dir.openFile(segment + data[i]);
            InputStream test = csr.openFile(segment + data[i]);
            assertSameStreams(data[i], check, test);
            assertSameSeekBehavior(data[i], check, test);
            test.close();
            check.close();
        }
        csr.close();
    }
    /** Setup a larger compound file with a number of components, each of 
     *  which is a sequential file (so that we can easily tell that we are
     *  reading in the right byte). The methods sets up 20 files - f0 to f19, 
     *  the size of each file is 1000 bytes.
     */
    private void setUp_2() throws IOException {
        CompoundFileWriter cw = new CompoundFileWriter(dir, "f.comp");
        for (int i=0; i<20; i++) {
            createSequenceFile(dir, "f" + i, (byte) 0, 2000);
            cw.addFile("f" + i);
        }
        cw.close();
    }
    public void testReadAfterClose() throws IOException {
        demo_FSInputStreamBug((FSDirectory) dir, "test");
    }
    private void demo_FSInputStreamBug(FSDirectory fsdir, String file)
    throws IOException
    {
        // Setup the test file - we need more than 1024 bytes
        OutputStream os = fsdir.createFile(file);
        for(int i=0; i<2000; i++) {
            os.writeByte((byte) i);
        }
        os.close();
        InputStream in = fsdir.openFile(file);
        // This read primes the buffer in InputStream
        byte b = in.readByte();
        // Close the file
        in.close();
        // ERROR: this call should fail, but succeeds because the buffer 
        // is still filled
        b = in.readByte();
        // ERROR: this call should fail, but succeeds for some reason as well
        in.seek(1099);
        try {
            // OK: this call correctly fails. We are now past the 1024 internal 
            // buffer, so an actual IO is attempted, which fails
            b = in.readByte();
        } catch (IOException e) {
        }
    }
    static boolean isCSInputStream(InputStream is) {
        return is instanceof CompoundFileReader.CSInputStream;
    }
    static boolean isCSInputStreamOpen(InputStream is) throws IOException {
        if (isCSInputStream(is)) {
            CompoundFileReader.CSInputStream cis = 
            (CompoundFileReader.CSInputStream) is;
            return _TestHelper.isFSInputStreamOpen(cis.base);
        } else {
            return false;
        }
    }
    public void testClonedStreamsClosing() throws IOException {
        setUp_2();
        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
        // basic clone
        InputStream expected = dir.openFile("f11");
        assertTrue(_TestHelper.isFSInputStreamOpen(expected));
        InputStream one = cr.openFile("f11");
        assertTrue(isCSInputStreamOpen(one));
        InputStream two = (InputStream) one.clone();
        assertTrue(isCSInputStreamOpen(two));
        assertSameStreams("basic clone one", expected, one);
        expected.seek(0);
        assertSameStreams("basic clone two", expected, two);
        // Now close the first stream
        one.close();        
        assertTrue("Only close when cr is closed", isCSInputStreamOpen(one));
        // The following should really fail since we couldn't expect to
        // access a file once close has been called on it (regardless of
        // buffering and/or clone magic)    
        expected.seek(0);
        two.seek(0);
        assertSameStreams("basic clone two/2", expected, two);
        // Now close the compound reader 
        cr.close();
        assertFalse("Now closed one", isCSInputStreamOpen(one));
        assertFalse("Now closed two", isCSInputStreamOpen(two));
        // The following may also fail since the compound stream is closed
        expected.seek(0);
        two.seek(0);
        //assertSameStreams("basic clone two/3", expected, two);
        // Now close the second clone
        two.close();
        expected.seek(0);
        two.seek(0);
        //assertSameStreams("basic clone two/4", expected, two);
        expected.close();
    }    
    /** This test opens two files from a compound stream and verifies that
     *  their file positions are independent of each other.
     */
    public void testRandomAccess() throws IOException {
        setUp_2();
        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
        // Open two files 
        InputStream e1 = dir.openFile("f11");
        InputStream e2 = dir.openFile("f3");
        InputStream a1 = cr.openFile("f11");        
        InputStream a2 = dir.openFile("f3");
        // Seek the first pair
        e1.seek(100);
        a1.seek(100); 
        assertEquals(100, e1.getFilePointer());
        assertEquals(100, a1.getFilePointer());
        byte be1 = e1.readByte();
        byte ba1 = a1.readByte();
        assertEquals(be1, ba1);
        // Now seek the second pair
        e2.seek(1027);
        a2.seek(1027); 
        assertEquals(1027, e2.getFilePointer());
        assertEquals(1027, a2.getFilePointer());
        byte be2 = e2.readByte();
        byte ba2 = a2.readByte();
        assertEquals(be2, ba2);
        // Now make sure the first one didn't move
        assertEquals(101, e1.getFilePointer());
        assertEquals(101, a1.getFilePointer());
        be1 = e1.readByte();
        ba1 = a1.readByte();
        assertEquals(be1, ba1);
        // Now more the first one again, past the buffer length
        e1.seek(1910);
        a1.seek(1910);
        assertEquals(1910, e1.getFilePointer());
        assertEquals(1910, a1.getFilePointer());
        be1 = e1.readByte();
        ba1 = a1.readByte();
        assertEquals(be1, ba1);
        // Now make sure the second set didn't move
        assertEquals(1028, e2.getFilePointer());
        assertEquals(1028, a2.getFilePointer());
        be2 = e2.readByte();
        ba2 = a2.readByte();
        assertEquals(be2, ba2);
        // Move the second set back, again cross the buffer size
        e2.seek(17);
        a2.seek(17);
        assertEquals(17, e2.getFilePointer());
        assertEquals(17, a2.getFilePointer());
        be2 = e2.readByte();
        ba2 = a2.readByte();
        assertEquals(be2, ba2);
        // Finally, make sure the first set didn't move        
        // Now make sure the first one didn't move
        assertEquals(1911, e1.getFilePointer());
        assertEquals(1911, a1.getFilePointer());
        be1 = e1.readByte();
        ba1 = a1.readByte();
        assertEquals(be1, ba1);
        e1.close();
        e2.close();
        a1.close();
        a2.close();
        cr.close();
    }    
    /** This test opens two files from a compound stream and verifies that
     *  their file positions are independent of each other.
     */
    public void testRandomAccessClones() throws IOException {
        setUp_2();
        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
        // Open two files 
        InputStream e1 = cr.openFile("f11");
        InputStream e2 = cr.openFile("f3");
        InputStream a1 = (InputStream) e1.clone();
        InputStream a2 = (InputStream) e2.clone();
        // Seek the first pair
        e1.seek(100);
        a1.seek(100); 
        assertEquals(100, e1.getFilePointer());
        assertEquals(100, a1.getFilePointer());
        byte be1 = e1.readByte();
        byte ba1 = a1.readByte();
        assertEquals(be1, ba1);
        // Now seek the second pair
        e2.seek(1027);
        a2.seek(1027); 
        assertEquals(1027, e2.getFilePointer());
        assertEquals(1027, a2.getFilePointer());
        byte be2 = e2.readByte();
        byte ba2 = a2.readByte();
        assertEquals(be2, ba2);
        // Now make sure the first one didn't move
        assertEquals(101, e1.getFilePointer());
        assertEquals(101, a1.getFilePointer());
        be1 = e1.readByte();
        ba1 = a1.readByte();
        assertEquals(be1, ba1);
        // Now more the first one again, past the buffer length
        e1.seek(1910);
        a1.seek(1910);
        assertEquals(1910, e1.getFilePointer());
        assertEquals(1910, a1.getFilePointer());
        be1 = e1.readByte();
        ba1 = a1.readByte();
        assertEquals(be1, ba1);
        // Now make sure the second set didn't move
        assertEquals(1028, e2.getFilePointer());
        assertEquals(1028, a2.getFilePointer());
        be2 = e2.readByte();
        ba2 = a2.readByte();
        assertEquals(be2, ba2);
        // Move the second set back, again cross the buffer size
        e2.seek(17);
        a2.seek(17);
        assertEquals(17, e2.getFilePointer());
        assertEquals(17, a2.getFilePointer());
        be2 = e2.readByte();
        ba2 = a2.readByte();
        assertEquals(be2, ba2);
        // Finally, make sure the first set didn't move        
        // Now make sure the first one didn't move
        assertEquals(1911, e1.getFilePointer());
        assertEquals(1911, a1.getFilePointer());
        be1 = e1.readByte();
        ba1 = a1.readByte();
        assertEquals(be1, ba1);
        e1.close();
        e2.close();
        a1.close();
        a2.close();
        cr.close();
    }    
    public void testFileNotFound() throws IOException {
        setUp_2();        
        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
        // Open two files 
        try {
            InputStream e1 = cr.openFile("bogus");
            fail("File not found");
        } catch (IOException e) {
            /* success */
            //System.out.println("SUCCESS: File Not Found: " + e);
        }
        cr.close();
    }
    public void testReadPastEOF() throws IOException {
        setUp_2();
        CompoundFileReader cr = new CompoundFileReader(dir, "f.comp");
        InputStream is = cr.openFile("f2");
        is.seek(is.length() - 10);
        byte b[] = new byte[100];
        is.readBytes(b, 0, 10);
        try {
            byte test = is.readByte();
            fail("Single byte read past end of file");
        } catch (IOException e) {
            /* success */
            //System.out.println("SUCCESS: single byte read past end of file: " + e);
        }
        is.seek(is.length() - 10);
        try {
            is.readBytes(b, 0, 50);
            fail("Block read past end of file");
        } catch (IOException e) {
            /* success */
            //System.out.println("SUCCESS: block read past end of file: " + e);
        }
        is.close();
        cr.close();
    }
    // ===========================================================
    //  More extensive tests involving an IndexWriter
    // ===========================================================
    public void testIWCreate() throws IOException {
        // create index writer
        IndexWriter iw = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
        int created = 0;
        for (int i=0; i<150; i++) {
            iw.addDocument(createTestDoc(String.valueOf(i)));
            created ++;
        }
        assertEquals(created, iw.docCount());
        iw.close();
        // delete 500 documents
        IndexReader reader = IndexReader.open(dir);
        int deleted = 0;
        for (int i = 10; i < created-7; i+=7) {
            reader.delete(i);
            deleted ++;
        }
        reader.close();
        int remains = created - deleted;
        iw  = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
        assertEquals(created, iw.docCount());
        iw.close();
        reader = IndexReader.open(dir);
        assertEquals(created, reader.maxDoc());
        assertEquals(remains, reader.numDocs());
        for (int i = 10; i < created-7; i+=7) {
            assertTrue("deleted: " + i, reader.isDeleted(i));
            assertFalse("deleted+1: " + i, reader.isDeleted(i + 1));
            assertFalse("deleted-1: " + i, reader.isDeleted(i - 1));
        }
        reader.close();
        iw = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
        iw.optimize();
        assertEquals(remains, iw.docCount());
        iw.close();
        reader = IndexReader.open(dir);
        assertEquals(remains, reader.maxDoc());
        assertEquals(remains, reader.numDocs());
        reader.close();
    }
    private Document createTestDoc(String id) {
        Document doc = new Document();
        doc.add(Field.Keyword("keyword_id", id));
        doc.add(Field.Text("text_id", id));
        doc.add(Field.Keyword("keyword_text", "KeywordText"));
        doc.add(Field.Text("text", "This is a text field"));
        doc.add(Field.UnIndexed("unindexed", "This is some payload unindexed text"));
        doc.add(Field.UnStored("unstored", "This is unstored text"));
        return doc;
    }
    private void verifyDoc(Document doc, String id) {
        assertEquals("keyword_id", doc.get("keyword_id"), id);
        assertEquals("text_id", id);
        assertEquals("keyword_text", doc.get("keyword_text"), "KeywordText");
        assertEquals("text", doc.get("text"), "This is some payload unindexed text");
        assertEquals("unindexed", doc.get("unindexed"), "This is some payload unindexed text");
        assertNull("unstored", doc.get("unstored"));
    }
 }
--- a/src/test/org/apache/lucene/index/TestDoc.java
+++ b/src/test/org/apache/lucene/index/TestDoc.java
@ -0,0 +1,265 @@
 package org.apache.lucene.index;
 /* ====================================================================
 * The Apache Software License, Version 1.1
 *
 * Copyright (c) 2001 The Apache Software Foundation.  All rights
 * reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Apache" and "Apache Software Foundation" and
 *    "Apache Lucene" must not be used to endorse or promote products
 *    derived from this software without prior written permission. For
 *    written permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    "Apache Lucene", nor may "Apache" appear in their name, without
 *    prior written permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */
 import junit.framework.TestCase;
 import junit.framework.TestSuite;
 import junit.textui.TestRunner;
 import org.apache.lucene.analysis.SimpleAnalyzer;
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.search.Similarity;
 import org.apache.lucene.demo.FileDocument;
 import java.io.*;
 import java.util.*;
 /** JUnit adaptation of an older test case DocTest.
 * @author dmitrys@earthlink.net
 * @version $Id$
 */
 public class TestDoc extends TestCase {
    /** Main for running test case by itself. */
    public static void main(String args[]) {
        TestRunner.run (new TestSuite(TestDoc.class));
    }
    private File workDir;
    private File indexDir;
    private LinkedList files;
    /** Set the test case. This test case needs 
     *  a few text files created in the current working directory.
     */
    public void setUp() throws IOException {
        workDir = new File("TestDoc");
        workDir.mkdirs();
        indexDir = new File(workDir, "testIndex");
        indexDir.mkdirs();
        Directory directory = FSDirectory.getDirectory(indexDir, true);
        directory.close();
        files = new LinkedList();
        files.add(createFile("test.txt", 
            "This is the first test file"
        ));
        files.add(createFile("test2.txt",
            "This is the second test file"
        ));
    }
    private File createFile(String name, String text) throws IOException {
        FileWriter fw = null;
        PrintWriter pw = null;
        try {
            File f = new File(workDir, name);
            if (f.exists()) f.delete();
            fw = new FileWriter(f);
            pw = new PrintWriter(fw);
            pw.println(text);
            return f;
        } finally {
            if (pw != null) pw.close();
            if (fw != null) fw.close();
        }
    }
    /** This test executes a number of merges and compares the contents of
     *  the segments created when using compound file or not using one.
     *  
     *  TODO: the original test used to print the segment contents to System.out
     *        for visual validation. To have the same effect, a new method
     *        checkSegment(String name, ...) should be created that would 
     *        assert various things about the segment.
     */
    public void testIndexAndMerge() throws Exception {
      StringWriter sw = new StringWriter();
      PrintWriter out = new PrintWriter(sw, true);
      Directory directory = FSDirectory.getDirectory(indexDir, true);
      directory.close();
      indexDoc("one", "test.txt");
      printSegment(out, "one");
      indexDoc("two", "test2.txt");
      printSegment(out, "two");
      merge("one", "two", "merge", false);
      printSegment(out, "merge");
      merge("one", "two", "merge2", false);
      printSegment(out, "merge2");
      merge("merge", "merge2", "merge3", false);
      printSegment(out, "merge3");
      out.close();
      sw.close();
      String multiFileOutput = sw.getBuffer().toString();
      System.out.println(multiFileOutput);
      sw = new StringWriter();
      out = new PrintWriter(sw, true);
      directory = FSDirectory.getDirectory(indexDir, true);
      directory.close();
      indexDoc("one", "test.txt");
      printSegment(out, "one");
      indexDoc("two", "test2.txt");
      printSegment(out, "two");
      merge("one", "two", "merge", true);
      printSegment(out, "merge");
      merge("one", "two", "merge2", true);
      printSegment(out, "merge2");
      merge("merge", "merge2", "merge3", true);
      printSegment(out, "merge3");
      out.close();
      sw.close();
      String singleFileOutput = sw.getBuffer().toString();
      assertEquals(multiFileOutput, singleFileOutput);
   }
   private void indexDoc(String segment, String fileName)
   throws Exception 
   {
      Directory directory = FSDirectory.getDirectory(indexDir, false);
      Analyzer analyzer = new SimpleAnalyzer();
      DocumentWriter writer =
         new DocumentWriter(directory, analyzer, Similarity.getDefault(), 1000);
      File file = new File(workDir, fileName);
      Document doc = FileDocument.Document(file);
      writer.addDocument(segment, doc);
      directory.close();
   }
   private void merge(String seg1, String seg2, String merged, boolean useCompoundFile)
   throws Exception {
      Directory directory = FSDirectory.getDirectory(indexDir, false);
      SegmentReader r1 = new SegmentReader(new SegmentInfo(seg1, 1, directory));
      SegmentReader r2 = new SegmentReader(new SegmentInfo(seg2, 1, directory));
      SegmentMerger merger = 
        new SegmentMerger(directory, merged, useCompoundFile);
      merger.add(r1);
      merger.add(r2);
      merger.merge();
      directory.close();
   }
   private void printSegment(PrintWriter out, String segment)
   throws Exception {
      Directory directory = FSDirectory.getDirectory(indexDir, false);
      SegmentReader reader =
        new SegmentReader(new SegmentInfo(segment, 1, directory));
      for (int i = 0; i < reader.numDocs(); i++)
        out.println(reader.document(i));
      TermEnum tis = reader.terms();
      while (tis.next()) {
        out.print(tis.term());
        out.println(" DF=" + tis.docFreq());
        TermPositions positions = reader.termPositions(tis.term());
        try {
          while (positions.next()) {
            out.print(" doc=" + positions.doc());
            out.print(" TF=" + positions.freq());
            out.print(" pos=");
            out.print(positions.nextPosition());
            for (int j = 1; j < positions.freq(); j++)
              out.print("," + positions.nextPosition());
            out.println("");
          }
        } finally {
          positions.close();
        }
      }
      tis.close();
      reader.close();
      directory.close();
    }
 }
--- a/src/test/org/apache/lucene/store/_TestHelper.java
+++ b/src/test/org/apache/lucene/store/_TestHelper.java
@ -0,0 +1,47 @@
 package org.apache.lucene.store;
 import java.io.RandomAccessFile;
 import java.io.IOException;
 /** This class provides access to package-level features defined in the
 *  store package. It is used for testing only.
 */
 public class _TestHelper {
    /** Returns true if the instance of the provided input stream is actually
     *  an FSInputStream.
     */
    public static boolean isFSInputStream(InputStream is) {
        return is instanceof FSInputStream;
    }
    /** Returns true if the provided input stream is an FSInputStream and 
     *  is a clone, that is it does not own its underlying file descriptor.
     */
    public static boolean isFSInputStreamClone(InputStream is) {
        if (isFSInputStream(is)) {
            return ((FSInputStream) is).isClone;
        } else {
            return false;
        }
    }
    /** Given an instance of FSDirectory.FSInputStream, this method returns
     *  true if the underlying file descriptor is valid, and false otherwise.
     *  This can be used to determine if the OS file has been closed.
     *  The descriptor becomes invalid when the non-clone instance of the 
     *  FSInputStream that owns this descriptor is closed. However, the
     *  descriptor may possibly become invalid in other ways as well.
     */
    public static boolean isFSInputStreamOpen(InputStream is)
    throws IOException
    {
        if (isFSInputStream(is)) {
            FSInputStream fis = (FSInputStream) is;
            return fis.isFDValid();
        } else {
            return false;
        }
    }
 }