From 674eae39ec8bee47dae47cb61e8784b472832881 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Fri, 25 Jan 2008 09:42:29 +0000 Subject: [PATCH] LUCENE-705: pre-set the CFS file length before building it git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@615160 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 7 +++++++ .../org/apache/lucene/index/CompoundFileWriter.java | 13 +++++++++++++ src/java/org/apache/lucene/store/FSDirectory.java | 4 +++- src/java/org/apache/lucene/store/IndexOutput.java | 12 +++++++++++- 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index f5c224507ce..43942fffeb1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -20,6 +20,13 @@ New features Optimizations + 1. LUCENE-705: When building a compound file, use + RandomAccessFile.setLength() to tell the OS/filesystem to + pre-allocate space for the file. This may improve fragmentation + in how the CFS file is stored, and allows us to detect an upcoming + disk full situation before actually filling up the disk. (Mike + McCandless) + Documentation Build diff --git a/src/java/org/apache/lucene/index/CompoundFileWriter.java b/src/java/org/apache/lucene/index/CompoundFileWriter.java index fc8d2c82aaf..40700871f6f 100644 --- a/src/java/org/apache/lucene/index/CompoundFileWriter.java +++ b/src/java/org/apache/lucene/index/CompoundFileWriter.java @@ -156,13 +156,24 @@ final class CompoundFileWriter { // Remember the positions of directory entries so that we can // adjust the offsets later Iterator it = entries.iterator(); + long totalSize = 0; while(it.hasNext()) { FileEntry fe = (FileEntry) it.next(); fe.directoryOffset = os.getFilePointer(); os.writeLong(0); // for now os.writeString(fe.file); + totalSize += directory.fileLength(fe.file); } + // Pre-allocate size of file as optimization -- + // this can potentially help IO performance as + // we write the file and also later during + // searching. It also uncovers a disk-full + // situation earlier and hopefully without + // actually filling disk to 100%: + final long finalLength = totalSize+os.getFilePointer(); + os.setLength(finalLength); + // Open the files and copy their data into the stream. // Remember the locations of each file's data section. byte buffer[] = new byte[16384]; @@ -181,6 +192,8 @@ final class CompoundFileWriter { os.writeLong(fe.dataOffset); } + assert finalLength == os.length(); + // Close the output stream. Set the os to null before trying to // close so that if an exception occurs during the close, the // finally clause below will not attempt to close the stream diff --git a/src/java/org/apache/lucene/store/FSDirectory.java b/src/java/org/apache/lucene/store/FSDirectory.java index 6b23aca3c7a..727507022f0 100644 --- a/src/java/org/apache/lucene/store/FSDirectory.java +++ b/src/java/org/apache/lucene/store/FSDirectory.java @@ -615,6 +615,8 @@ public class FSDirectory extends Directory { public long length() throws IOException { return file.length(); } - + public void setLength(long length) throws IOException { + file.setLength(length); + } } } diff --git a/src/java/org/apache/lucene/store/IndexOutput.java b/src/java/org/apache/lucene/store/IndexOutput.java index 504934ec7b2..648355db08f 100644 --- a/src/java/org/apache/lucene/store/IndexOutput.java +++ b/src/java/org/apache/lucene/store/IndexOutput.java @@ -189,5 +189,15 @@ public abstract class IndexOutput { /** The number of bytes in the file. */ public abstract long length() throws IOException; - + /** Set the file length. By default, this method does + * nothing (it's optional for a Directory to implement + * it). But, certain Directory implementations (for + * example @see FSDirectory) can use this to inform the + * underlying IO system to pre-allocate the file to the + * specified size. If the length is longer than the + * current file length, the bytes added to the file are + * undefined. Otherwise the file is truncated. + * @param length file length + */ + public void setLength(long length) throws IOException {}; }