LUCENE-3218: Improve CompoundFile Handling and make CFS append files directly

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1138063 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Simon Willnauer 2011-06-21 15:53:45 +00:00
parent c039e210b0
commit a97c53e7fd
29 changed files with 1375 additions and 671 deletions

View File

@ -458,6 +458,14 @@ Optimizations
seek the term dictionary in TermQuery / TermWeight. seek the term dictionary in TermQuery / TermWeight.
(Simon Willnauer, Mike McCandless, Robert Muir) (Simon Willnauer, Mike McCandless, Robert Muir)
* LUCENE-3201, LUCENE-3218: CompoundFileSystem code has been consolidated
into a Directory implementation. Reading is optimized for MMapDirectory,
NIOFSDirectory and SimpleFSDirectory to only map requested parts of the
CFS into an IndexInput. Writing to a CFS now tries to append to the CF
directly if possible and merges separately written files on the fly instead
of during close. (Simon Willnauer, Robert Muir)
Bug fixes Bug fixes
* LUCENE-2633: PackedInts Packed32 and Packed64 did not support internal * LUCENE-2633: PackedInts Packed32 and Packed64 did not support internal

View File

@ -233,6 +233,25 @@ public class NRTCachingDirectory extends Directory {
} }
} }
@Override
public synchronized CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
if (cache.fileExists(name)) {
return cache.openCompoundInput(name, bufferSize);
} else {
return delegate.openCompoundInput(name, bufferSize);
}
}
@Override
public synchronized CompoundFileDirectory createCompoundOutput(String name)
throws IOException {
if (cache.fileExists(name)) {
throw new IOException("File " + name + "already exists");
} else {
return delegate.createCompoundOutput(name);
}
}
@Override @Override
public synchronized IndexInput openInput(String name, int bufferSize) throws IOException { public synchronized IndexInput openInput(String name, int bufferSize) throws IOException {
if (cache.fileExists(name)) { if (cache.fileExists(name)) {

View File

@ -78,7 +78,7 @@ public class TestIndexSplitter extends LuceneTestCase {
_TestUtil.rmDir(destDir2); _TestUtil.rmDir(destDir2);
destDir2.mkdirs(); destDir2.mkdirs();
IndexSplitter.main(new String[] {dir.getAbsolutePath(), destDir2.getAbsolutePath(), splitSegName}); IndexSplitter.main(new String[] {dir.getAbsolutePath(), destDir2.getAbsolutePath(), splitSegName});
assertEquals(3, destDir2.listFiles().length); assertEquals(4, destDir2.listFiles().length);
Directory fsDirDest2 = newFSDirectory(destDir2); Directory fsDirDest2 = newFSDirectory(destDir2);
r = IndexReader.open(fsDirDest2, true); r = IndexReader.open(fsDirDest2, true);
assertEquals(50, r.maxDoc()); assertEquals(50, r.maxDoc());

View File

@ -1,307 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import java.util.Collection;
import java.util.HashMap;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* Class for accessing a compound stream.
* This class implements a directory, but is limited to only read operations.
* Directory methods that would normally modify data throw an exception.
* @lucene.experimental
*/
public class CompoundFileReader extends Directory {
private int readBufferSize;
private static final class FileEntry {
long offset;
long length;
}
// Base info
private Directory directory;
private String fileName;
private IndexInput stream;
private HashMap<String,FileEntry> entries = new HashMap<String,FileEntry>();
public CompoundFileReader(Directory dir, String name) throws IOException {
this(dir, name, BufferedIndexInput.BUFFER_SIZE);
}
public CompoundFileReader(Directory dir, String name, int readBufferSize) throws IOException {
assert !(dir instanceof CompoundFileReader) : "compound file inside of compound file: " + name;
directory = dir;
fileName = name;
this.readBufferSize = readBufferSize;
boolean success = false;
try {
stream = dir.openInput(name, readBufferSize);
// read the first VInt. If it is negative, it's the version number
// otherwise it's the count (pre-3.1 indexes)
int firstInt = stream.readVInt();
final int count;
final boolean stripSegmentName;
if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) {
if (firstInt < CompoundFileWriter.FORMAT_CURRENT) {
throw new CorruptIndexException("Incompatible format version: "
+ firstInt + " expected " + CompoundFileWriter.FORMAT_CURRENT);
}
// It's a post-3.1 index, read the count.
count = stream.readVInt();
stripSegmentName = false;
} else {
count = firstInt;
stripSegmentName = true;
}
// read the directory and init files
FileEntry entry = null;
for (int i=0; i<count; i++) {
long offset = stream.readLong();
String id = stream.readString();
if (stripSegmentName) {
// Fix the id to not include the segment names. This is relevant for
// pre-3.1 indexes.
id = IndexFileNames.stripSegmentName(id);
}
if (entry != null) {
// set length of the previous entry
entry.length = offset - entry.offset;
}
entry = new FileEntry();
entry.offset = offset;
entries.put(id, entry);
}
// set the length of the final entry
if (entry != null) {
entry.length = stream.length() - entry.offset;
}
success = true;
} finally {
if (!success && (stream != null)) {
try {
stream.close();
} catch (IOException e) { }
}
}
}
public Directory getDirectory() {
return directory;
}
public String getName() {
return fileName;
}
@Override
public synchronized void close() throws IOException {
if (stream == null)
throw new IOException("Already closed");
entries.clear();
stream.close();
stream = null;
}
@Override
public synchronized IndexInput openInput(String id) throws IOException {
// Default to readBufferSize passed in when we were opened
return openInput(id, readBufferSize);
}
@Override
public synchronized IndexInput openInput(String id, int readBufferSize) throws IOException {
if (stream == null)
throw new IOException("Stream closed");
id = IndexFileNames.stripSegmentName(id);
final FileEntry entry = entries.get(id);
if (entry == null)
throw new IOException("No sub-file with id " + id + " found (files: " + entries.keySet() + ")");
return new CSIndexInput(stream, entry.offset, entry.length, readBufferSize);
}
/** Returns an array of strings, one for each file in the directory. */
@Override
public String[] listAll() {
String[] res = entries.keySet().toArray(new String[entries.size()]);
// Add the segment name
String seg = fileName.substring(0, fileName.indexOf('.'));
for (int i = 0; i < res.length; i++) {
res[i] = seg + res[i];
}
return res;
}
/** Returns true iff a file with the given name exists. */
@Override
public boolean fileExists(String name) {
return entries.containsKey(IndexFileNames.stripSegmentName(name));
}
/** Returns the time the compound file was last modified. */
@Override
public long fileModified(String name) throws IOException {
return directory.fileModified(fileName);
}
/** Not implemented
* @throws UnsupportedOperationException */
@Override
public void deleteFile(String name) {
throw new UnsupportedOperationException();
}
/** Not implemented
* @throws UnsupportedOperationException */
public void renameFile(String from, String to) {
throw new UnsupportedOperationException();
}
/** Returns the length of a file in the directory.
* @throws IOException if the file does not exist */
@Override
public long fileLength(String name) throws IOException {
FileEntry e = entries.get(IndexFileNames.stripSegmentName(name));
if (e == null)
throw new FileNotFoundException(name);
return e.length;
}
/** Not implemented
* @throws UnsupportedOperationException */
@Override
public IndexOutput createOutput(String name) {
throw new UnsupportedOperationException();
}
@Override
public void sync(Collection<String> names) throws IOException {
}
/** Not implemented
* @throws UnsupportedOperationException */
@Override
public Lock makeLock(String name) {
throw new UnsupportedOperationException();
}
/** Implementation of an IndexInput that reads from a portion of the
* compound file. The visibility is left as "package" *only* because
* this helps with testing since JUnit test cases in a different class
* can then access package fields of this class.
*/
static final class CSIndexInput extends BufferedIndexInput {
IndexInput base;
long fileOffset;
long length;
CSIndexInput(final IndexInput base, final long fileOffset, final long length) {
this(base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE);
}
CSIndexInput(final IndexInput base, final long fileOffset, final long length, int readBufferSize) {
super(readBufferSize);
this.base = (IndexInput)base.clone();
this.fileOffset = fileOffset;
this.length = length;
}
@Override
public Object clone() {
CSIndexInput clone = (CSIndexInput)super.clone();
clone.base = (IndexInput)base.clone();
clone.fileOffset = fileOffset;
clone.length = length;
return clone;
}
/** Expert: implements buffer refill. Reads bytes from the current
* position in the input.
* @param b the array to read bytes into
* @param offset the offset in the array to start storing bytes
* @param len the number of bytes to read
*/
@Override
protected void readInternal(byte[] b, int offset, int len) throws IOException {
long start = getFilePointer();
if(start + len > length)
throw new IOException("read past EOF");
base.seek(fileOffset + start);
base.readBytes(b, offset, len, false);
}
/** Expert: implements seek. Sets current position in this file, where
* the next {@link #readInternal(byte[],int,int)} will occur.
* @see #readInternal(byte[],int,int)
*/
@Override
protected void seekInternal(long pos) {}
/** Closes the stream to further operations. */
@Override
public void close() throws IOException {
base.close();
}
@Override
public long length() {
return length;
}
@Override
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
// Copy first whatever is in the buffer
numBytes -= flushBuffer(out, numBytes);
// If there are more bytes left to copy, delegate the copy task to the
// base IndexInput, in case it can do an optimized copy.
if (numBytes > 0) {
long start = getFilePointer();
if (start + numBytes > length) {
throw new IOException("read past EOF");
}
base.seek(fileOffset + start);
base.copyBytes(out, numBytes);
}
}
}
}

View File

@ -1,252 +0,0 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.LinkedList;
import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.IOUtils;
/**
* Combines multiple files into a single compound file.
* The file format:<br>
* <ul>
* <li>VInt fileCount</li>
* <li>{Directory}
* fileCount entries with the following structure:</li>
* <ul>
* <li>long dataOffset</li>
* <li>String fileName</li>
* </ul>
* <li>{File Data}
* fileCount entries with the raw data of the corresponding file</li>
* </ul>
*
* The fileCount integer indicates how many files are contained in this compound
* file. The {directory} that follows has that many entries. Each directory entry
* contains a long pointer to the start of this file's data section, and a String
* with that file's name.
*
* @lucene.internal
*/
public final class CompoundFileWriter {
private static final class FileEntry {
/** source file */
String file;
/** temporary holder for the start of directory entry for this file */
long directoryOffset;
/** temporary holder for the start of this file's data section */
long dataOffset;
/** the directory which contains the file. */
Directory dir;
}
// Before versioning started.
static final int FORMAT_PRE_VERSION = 0;
// Segment name is not written in the file names.
static final int FORMAT_NO_SEGMENT_PREFIX = -1;
// NOTE: if you introduce a new format, make it 1 lower
// than the current one, and always change this if you
// switch to a new format!
static final int FORMAT_CURRENT = FORMAT_NO_SEGMENT_PREFIX;
private Directory directory;
private String fileName;
private HashSet<String> ids;
private LinkedList<FileEntry> entries;
private boolean merged = false;
private MergeState.CheckAbort checkAbort;
/** Create the compound stream in the specified file. The file name is the
* entire name (no extensions are added).
* @throws NullPointerException if <code>dir</code> or <code>name</code> is null
*/
public CompoundFileWriter(Directory dir, String name) {
this(dir, name, null);
}
CompoundFileWriter(Directory dir, String name, MergeState.CheckAbort checkAbort) {
if (dir == null)
throw new NullPointerException("directory cannot be null");
if (name == null)
throw new NullPointerException("name cannot be null");
this.checkAbort = checkAbort;
directory = dir;
fileName = name;
ids = new HashSet<String>();
entries = new LinkedList<FileEntry>();
}
/** Returns the directory of the compound file. */
public Directory getDirectory() {
return directory;
}
/** Returns the name of the compound file. */
public String getName() {
return fileName;
}
/** Add a source stream. <code>file</code> is the string by which the
* sub-stream will be known in the compound stream.
*
* @throws IllegalStateException if this writer is closed
* @throws NullPointerException if <code>file</code> is null
* @throws IllegalArgumentException if a file with the same name
* has been added already
*/
public void addFile(String file) {
addFile(file, directory);
}
/**
* Same as {@link #addFile(String)}, only for files that are found in an
* external {@link Directory}.
*/
public void addFile(String file, Directory dir) {
if (merged)
throw new IllegalStateException(
"Can't add extensions after merge has been called");
if (file == null)
throw new NullPointerException(
"file cannot be null");
if (! ids.add(file))
throw new IllegalArgumentException(
"File " + file + " already added");
FileEntry entry = new FileEntry();
entry.file = file;
entry.dir = dir;
entries.add(entry);
}
/** Merge files with the extensions added up to now.
* All files with these extensions are combined sequentially into the
* compound stream.
* @throws IllegalStateException if close() had been called before or
* if no file has been added to this object
*/
public void close() throws IOException {
if (merged)
throw new IllegalStateException("Merge already performed");
if (entries.isEmpty())
throw new IllegalStateException("No entries to merge have been defined");
merged = true;
// open the compound stream
IndexOutput os = directory.createOutput(fileName);
IOException priorException = null;
try {
// Write the Version info - must be a VInt because CFR reads a VInt
// in older versions!
os.writeVInt(FORMAT_CURRENT);
// Write the number of entries
os.writeVInt(entries.size());
// Write the directory with all offsets at 0.
// Remember the positions of directory entries so that we can
// adjust the offsets later
long totalSize = 0;
for (FileEntry fe : entries) {
fe.directoryOffset = os.getFilePointer();
os.writeLong(0); // for now
os.writeString(IndexFileNames.stripSegmentName(fe.file));
totalSize += fe.dir.fileLength(fe.file);
}
// Pre-allocate size of file as optimization --
// this can potentially help IO performance as
// we write the file and also later during
// searching. It also uncovers a disk-full
// situation earlier and hopefully without
// actually filling disk to 100%:
final long finalLength = totalSize+os.getFilePointer();
os.setLength(finalLength);
// Open the files and copy their data into the stream.
// Remember the locations of each file's data section.
for (FileEntry fe : entries) {
fe.dataOffset = os.getFilePointer();
copyFile(fe, os);
}
// Write the data offsets into the directory of the compound stream
for (FileEntry fe : entries) {
os.seek(fe.directoryOffset);
os.writeLong(fe.dataOffset);
}
assert finalLength == os.length();
// Close the output stream. Set the os to null before trying to
// close so that if an exception occurs during the close, the
// finally clause below will not attempt to close the stream
// the second time.
IndexOutput tmp = os;
os = null;
tmp.close();
} catch (IOException e) {
priorException = e;
} finally {
IOUtils.closeSafely(priorException, os);
}
}
/**
* Copy the contents of the file with specified extension into the provided
* output stream.
*/
private void copyFile(FileEntry source, IndexOutput os) throws IOException {
IndexInput is = source.dir.openInput(source.file);
try {
long startPtr = os.getFilePointer();
long length = is.length();
os.copyBytes(is, length);
if (checkAbort != null) {
checkAbort.work(length);
}
// Verify that the output length diff is equal to original file
long endPtr = os.getFilePointer();
long diff = endPtr - startPtr;
if (diff != length)
throw new IOException("Difference in the output file offsets " + diff
+ " does not match the original file length " + length);
} finally {
is.close();
}
}
}

View File

@ -69,6 +69,9 @@ public final class IndexFileNames {
/** Extension of compound file */ /** Extension of compound file */
public static final String COMPOUND_FILE_EXTENSION = "cfs"; public static final String COMPOUND_FILE_EXTENSION = "cfs";
/** Extension of compound file entries */
public static final String COMPOUND_FILE_ENTRIES_EXTENSION = "cfe";
/** Extension of compound file for doc store files*/ /** Extension of compound file for doc store files*/
public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx"; public static final String COMPOUND_FILE_STORE_EXTENSION = "cfx";
@ -93,6 +96,7 @@ public final class IndexFileNames {
*/ */
public static final String INDEX_EXTENSIONS[] = new String[] { public static final String INDEX_EXTENSIONS[] = new String[] {
COMPOUND_FILE_EXTENSION, COMPOUND_FILE_EXTENSION,
COMPOUND_FILE_ENTRIES_EXTENSION,
FIELD_INFOS_EXTENSION, FIELD_INFOS_EXTENSION,
FIELDS_INDEX_EXTENSION, FIELDS_INDEX_EXTENSION,
FIELDS_EXTENSION, FIELDS_EXTENSION,
@ -245,6 +249,14 @@ public final class IndexFileNames {
return filename; return filename;
} }
public static String stripExtension(String filename) {
int idx = filename.indexOf('.');
if (idx != -1) {
filename = filename.substring(0, idx);
}
return filename;
}
/** /**
* Returns true if the given filename ends with the separate norms file * Returns true if the given filename ends with the separate norms file
* pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}. * pattern: {@code SEPARATE_NORMS_EXTENSION + "[0-9]+"}.

View File

@ -1432,14 +1432,14 @@ public abstract class IndexReader implements Cloneable,Closeable {
} }
Directory dir = null; Directory dir = null;
CompoundFileReader cfr = null; CompoundFileDirectory cfr = null;
try { try {
File file = new File(filename); File file = new File(filename);
String dirname = file.getAbsoluteFile().getParent(); String dirname = file.getAbsoluteFile().getParent();
filename = file.getName(); filename = file.getName();
dir = FSDirectory.open(new File(dirname)); dir = FSDirectory.open(new File(dirname));
cfr = new CompoundFileReader(dir, filename); cfr = dir.openCompoundInput(filename, BufferedIndexInput.BUFFER_SIZE);
String [] files = cfr.listAll(); String [] files = cfr.listAll();
ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable ArrayUtil.mergeSort(files); // sort the array of filename so that the output is more readable

View File

@ -46,12 +46,14 @@ import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.Lock; import org.apache.lucene.store.Lock;
import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BitVector; import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Constants; import org.apache.lucene.util.Constants;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.ThreadInterruptedException; import org.apache.lucene.util.ThreadInterruptedException;
import org.apache.lucene.util.MapBackedSet; import org.apache.lucene.util.MapBackedSet;
@ -2190,13 +2192,19 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
String compoundFileName = IndexFileNames.segmentFileName(newSegment.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION); String compoundFileName = IndexFileNames.segmentFileName(newSegment.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
message("creating compound file " + compoundFileName); message("creating compound file " + compoundFileName);
// Now build compound file // Now build compound file
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); final Directory cfsDir = directory.createCompoundOutput(compoundFileName);
for(String fileName : newSegment.files()) { IOException prior = null;
cfsWriter.addFile(fileName); try {
for(String fileName : newSegment.files()) {
directory.copy(cfsDir, fileName, fileName);
}
} catch(IOException ex) {
prior = ex;
} finally {
IOUtils.closeSafely(prior, cfsDir);
} }
// Perform the merge // Perform the merge
cfsWriter.close();
synchronized(this) { synchronized(this) {
deleter.deleteNewFiles(newSegment.files()); deleter.deleteNewFiles(newSegment.files());
} }
@ -2502,21 +2510,22 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException { private void copySegmentIntoCFS(SegmentInfo info, String segName) throws IOException {
String segFileName = IndexFileNames.segmentFileName(segName, "", IndexFileNames.COMPOUND_FILE_EXTENSION); String segFileName = IndexFileNames.segmentFileName(segName, "", IndexFileNames.COMPOUND_FILE_EXTENSION);
Collection<String> files = info.files(); Collection<String> files = info.files();
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, segFileName); final CompoundFileDirectory cfsdir = directory.createCompoundOutput(segFileName);
for (String file : files) { try {
String newFileName = segName + IndexFileNames.stripSegmentName(file); for (String file : files) {
if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION) String newFileName = segName + IndexFileNames.stripSegmentName(file);
&& !IndexFileNames.isSeparateNormsFile(file)) { if (!IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
cfsWriter.addFile(file, info.dir); && !IndexFileNames.isSeparateNormsFile(file)) {
} else { info.dir.copy(cfsdir, file, file);
assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists"; } else {
info.dir.copy(directory, file, newFileName); assert !directory.fileExists(newFileName): "file \"" + newFileName + "\" already exists";
info.dir.copy(directory, file, newFileName);
}
} }
} finally {
IOUtils.closeSafely(true, cfsdir);
} }
// Create the .cfs
cfsWriter.close();
info.dir = directory; info.dir = directory;
info.name = segName; info.name = segName;
info.setUseCompoundFile(true); info.setUseCompoundFile(true);
@ -3515,6 +3524,8 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
synchronized(this) { synchronized(this) {
deleter.deleteFile(compoundFileName); deleter.deleteFile(compoundFileName);
deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
deleter.deleteNewFiles(merge.info.files()); deleter.deleteNewFiles(merge.info.files());
} }
} }

View File

@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
/** Holds core readers that are shared (unchanged) when /** Holds core readers that are shared (unchanged) when
@ -52,8 +53,8 @@ final class SegmentCoreReaders {
FieldsReader fieldsReaderOrig; FieldsReader fieldsReaderOrig;
TermVectorsReader termVectorsReaderOrig; TermVectorsReader termVectorsReaderOrig;
CompoundFileReader cfsReader; CompoundFileDirectory cfsReader;
CompoundFileReader storeCFSReader; CompoundFileDirectory storeCFSReader;
@ -73,7 +74,7 @@ final class SegmentCoreReaders {
try { try {
Directory dir0 = dir; Directory dir0 = dir;
if (si.getUseCompoundFile()) { if (si.getUseCompoundFile()) {
cfsReader = new CompoundFileReader(dir, IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize); cfsReader = dir.openCompoundInput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize);
dir0 = cfsReader; dir0 = cfsReader;
} }
cfsDir = dir0; cfsDir = dir0;
@ -161,7 +162,7 @@ final class SegmentCoreReaders {
if (si.getDocStoreOffset() != -1) { if (si.getDocStoreOffset() != -1) {
if (si.getDocStoreIsCompoundFile()) { if (si.getDocStoreIsCompoundFile()) {
assert storeCFSReader == null; assert storeCFSReader == null;
storeCFSReader = new CompoundFileReader(dir, storeCFSReader = dir.openCompoundInput(
IndexFileNames.segmentFileName(si.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), IndexFileNames.segmentFileName(si.getDocStoreSegment(), "", IndexFileNames.COMPOUND_FILE_STORE_EXTENSION),
readBufferSize); readBufferSize);
storeDir = storeCFSReader; storeDir = storeCFSReader;
@ -175,7 +176,7 @@ final class SegmentCoreReaders {
// was not used, but then we are asked to open doc // was not used, but then we are asked to open doc
// stores after the segment has switched to CFS // stores after the segment has switched to CFS
if (cfsReader == null) { if (cfsReader == null) {
cfsReader = new CompoundFileReader(dir, IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize); cfsReader = dir.openCompoundInput(IndexFileNames.segmentFileName(segment, "", IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize);
} }
storeDir = cfsReader; storeDir = cfsReader;
assert storeDir != null; assert storeDir != null;

View File

@ -30,10 +30,12 @@ import java.util.Set;
import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.Constants; import org.apache.lucene.util.Constants;
import org.apache.lucene.util.StringHelper;
/** /**
* Information about a segment such as it's name, directory, and files related * Information about a segment such as it's name, directory, and files related
@ -245,7 +247,7 @@ public final class SegmentInfo implements Cloneable {
} }
final Directory dirToTest; final Directory dirToTest;
if (isCompoundFile) { if (isCompoundFile) {
dirToTest = new CompoundFileReader(dir, IndexFileNames.segmentFileName(storesSegment, "", ext)); dirToTest = dir.openCompoundInput(IndexFileNames.segmentFileName(storesSegment, "", ext), BufferedIndexInput.BUFFER_SIZE);
} else { } else {
dirToTest = dir; dirToTest = dir;
} }
@ -263,8 +265,8 @@ public final class SegmentInfo implements Cloneable {
if (fieldInfos == null) { if (fieldInfos == null) {
Directory dir0 = dir; Directory dir0 = dir;
if (isCompoundFile && checkCompoundFile) { if (isCompoundFile && checkCompoundFile) {
dir0 = new CompoundFileReader(dir, IndexFileNames.segmentFileName(name, dir0 = dir.openCompoundInput(IndexFileNames.segmentFileName(name,
"", IndexFileNames.COMPOUND_FILE_EXTENSION)); "", IndexFileNames.COMPOUND_FILE_EXTENSION), BufferedIndexInput.BUFFER_SIZE);
} }
try { try {
fieldInfos = new FieldInfos(dir0, IndexFileNames.segmentFileName(name, fieldInfos = new FieldInfos(dir0, IndexFileNames.segmentFileName(name,
@ -617,6 +619,10 @@ public final class SegmentInfo implements Cloneable {
if (useCompoundFile) { if (useCompoundFile) {
fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); fileSet.add(IndexFileNames.segmentFileName(name, "", IndexFileNames.COMPOUND_FILE_EXTENSION));
if (version != null && StringHelper.getVersionComparator().compare("4.0", version) <= 0) {
fileSet.add(IndexFileNames.segmentFileName(name, "",
IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
}
} else { } else {
for(String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS) { for(String ext : IndexFileNames.NON_STORE_INDEX_EXTENSIONS) {
addIfExists(fileSet, IndexFileNames.segmentFileName(name, "", ext)); addIfExists(fileSet, IndexFileNames.segmentFileName(name, "", ext));

View File

@ -31,6 +31,7 @@ import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.MergeState; import org.apache.lucene.index.codecs.MergeState;
import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
@ -132,18 +133,20 @@ final class SegmentMerger {
// Now merge all added files // Now merge all added files
Collection<String> files = info.files(); Collection<String> files = info.files();
CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, fileName, checkAbort); CompoundFileDirectory cfsDir = directory.createCompoundOutput(fileName);
for (String file : files) { try {
assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION) for (String file : files) {
: ".del file is not allowed in .cfs: " + file; assert !IndexFileNames.matchesExtension(file, IndexFileNames.DELETES_EXTENSION)
assert !IndexFileNames.isSeparateNormsFile(file) : ".del file is not allowed in .cfs: " + file;
: "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file; assert !IndexFileNames.isSeparateNormsFile(file)
cfsWriter.addFile(file); : "separate norms file (.s[0-9]+) is not allowed in .cfs: " + file;
directory.copy(cfsDir, file, file);
checkAbort.work(directory.fileLength(file));
}
} finally {
cfsDir.close();
} }
// Perform the merge
cfsWriter.close();
return files; return files;
} }

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index.codecs;
import java.io.IOException; import java.io.IOException;
import org.apache.lucene.index.CompoundFileReader;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.FieldsReader; import org.apache.lucene.index.FieldsReader;
import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.index.IndexFileNames;
@ -68,12 +67,12 @@ public class DefaultSegmentInfosReader extends SegmentInfosReader {
Directory dir = directory; Directory dir = directory;
if (si.getDocStoreOffset() != -1) { if (si.getDocStoreOffset() != -1) {
if (si.getDocStoreIsCompoundFile()) { if (si.getDocStoreIsCompoundFile()) {
dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName( dir = dir.openCompoundInput(IndexFileNames.segmentFileName(
si.getDocStoreSegment(), "", si.getDocStoreSegment(), "",
IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024); IndexFileNames.COMPOUND_FILE_STORE_EXTENSION), 1024);
} }
} else if (si.getUseCompoundFile()) { } else if (si.getUseCompoundFile()) {
dir = new CompoundFileReader(dir, IndexFileNames.segmentFileName( dir = dir.openCompoundInput(IndexFileNames.segmentFileName(
si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), 1024); si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), 1024);
} }

View File

@ -25,7 +25,6 @@ import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.TreeMap; import java.util.TreeMap;
import org.apache.lucene.index.CompoundFileReader;
import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FieldInfo;
@ -37,6 +36,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms; import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.codecs.FieldsProducer; import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
@ -177,8 +177,8 @@ public class PreFlexFields extends FieldsProducer {
// terms reader with index, the segment has switched // terms reader with index, the segment has switched
// to CFS // to CFS
if (!(dir instanceof CompoundFileReader)) { if (!(dir instanceof CompoundFileDirectory)) {
dir0 = cfsReader = new CompoundFileReader(dir, IndexFileNames.segmentFileName(si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize); dir0 = cfsReader = dir.openCompoundInput(IndexFileNames.segmentFileName(si.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), readBufferSize);
} else { } else {
dir0 = dir; dir0 = dir;
} }

View File

@ -0,0 +1,313 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
import org.apache.lucene.util.IOUtils;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.io.FileNotFoundException;
import java.io.IOException;
/**
* Class for accessing a compound stream.
* This class implements a directory, but is limited to only read operations.
* Directory methods that would normally modify data throw an exception.
* @lucene.experimental
*/
public abstract class CompoundFileDirectory extends Directory {
/** Offset/Length for a slice inside of a compound file */
public static final class FileEntry {
long offset;
long length;
}
private final Directory directory;
private final String fileName;
private final int readBufferSize;
private Map<String,FileEntry> entries;
private boolean openForWrite;
private static final Map<String,FileEntry> SENTINEL = Collections.emptyMap();
private CompoundFileWriter writer;
/**
* Create a new CompoundFileDirectory.
* <p>
* NOTE: subclasses must call {@link #initForRead(Map)} before the directory can be used.
*/
public CompoundFileDirectory(Directory directory, String fileName, int readBufferSize) throws IOException {
assert !(directory instanceof CompoundFileDirectory) : "compound file inside of compound file: " + fileName;
this.directory = directory;
this.fileName = fileName;
this.readBufferSize = readBufferSize;
this.isOpen = false;
}
/** Initialize with a map of filename->slices */
protected final void initForRead(Map<String,FileEntry> entries) {
this.entries = entries;
this.isOpen = true;
this.openForWrite = false;
}
protected final void initForWrite() {
this.entries = SENTINEL;
this.openForWrite = true;
this.isOpen = true;
}
/** Helper method that reads CFS entries from an input stream */
public static final Map<String,FileEntry> readEntries(IndexInput stream, Directory dir, String name) throws IOException {
// read the first VInt. If it is negative, it's the version number
// otherwise it's the count (pre-3.1 indexes)
final int firstInt = stream.readVInt();
if (firstInt == CompoundFileWriter.FORMAT_CURRENT) {
IndexInput input = null;
try {
input = dir.openInput(IndexFileNames.segmentFileName(IndexFileNames.stripExtension(name), "",
IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION));
final int readInt = input.readInt(); // unused right now
assert readInt == CompoundFileWriter.ENTRY_FORMAT_CURRENT;
final int numEntries = input.readVInt();
final Map<String, FileEntry> mapping = new HashMap<String, CompoundFileDirectory.FileEntry>(
numEntries);
for (int i = 0; i < numEntries; i++) {
final FileEntry fileEntry = new FileEntry();
mapping.put(input.readString(), fileEntry);
fileEntry.offset = input.readLong();
fileEntry.length = input.readLong();
}
return mapping;
} finally {
IOUtils.closeSafely(true, input);
}
}
// TODO remove once 3.x is not supported anymore
return readLegacyEntries(stream, firstInt);
}
private static Map<String, FileEntry> readLegacyEntries(IndexInput stream,
int firstInt) throws CorruptIndexException, IOException {
final Map<String,FileEntry> entries = new HashMap<String,FileEntry>();
final int count;
final boolean stripSegmentName;
if (firstInt < CompoundFileWriter.FORMAT_PRE_VERSION) {
if (firstInt < CompoundFileWriter.FORMAT_CURRENT) {
throw new CorruptIndexException("Incompatible format version: "
+ firstInt + " expected " + CompoundFileWriter.FORMAT_CURRENT);
}
// It's a post-3.1 index, read the count.
count = stream.readVInt();
stripSegmentName = false;
} else {
count = firstInt;
stripSegmentName = true;
}
// read the directory and init files
long streamLength = stream.length();
FileEntry entry = null;
for (int i=0; i<count; i++) {
long offset = stream.readLong();
if (offset < 0 || offset > streamLength) {
throw new CorruptIndexException("Invalid CFS entry offset: " + offset);
}
String id = stream.readString();
if (stripSegmentName) {
// Fix the id to not include the segment names. This is relevant for
// pre-3.1 indexes.
id = IndexFileNames.stripSegmentName(id);
}
if (entry != null) {
// set length of the previous entry
entry.length = offset - entry.offset;
}
entry = new FileEntry();
entry.offset = offset;
entries.put(id, entry);
}
// set the length of the final entry
if (entry != null) {
entry.length = streamLength - entry.offset;
}
return entries;
}
public Directory getDirectory() {
return directory;
}
public String getName() {
return fileName;
}
@Override
public synchronized void close() throws IOException {
ensureOpen();
entries = null;
isOpen = false;
if (writer != null) {
assert openForWrite;
writer.close();
}
}
@Override
public synchronized IndexInput openInput(String id) throws IOException {
// Default to readBufferSize passed in when we were opened
return openInput(id, readBufferSize);
}
@Override
public synchronized IndexInput openInput(String id, int readBufferSize) throws IOException {
ensureOpen();
assert !openForWrite;
id = IndexFileNames.stripSegmentName(id);
final FileEntry entry = entries.get(id);
if (entry == null)
throw new IOException("No sub-file with id " + id + " found (files: " + entries.keySet() + ")");
return openInputSlice(id, entry.offset, entry.length, readBufferSize);
}
/** Return an IndexInput that represents a "slice" or portion of the CFS file. */
public abstract IndexInput openInputSlice(String id, long offset, long length, int readBufferSize) throws IOException;
/** Returns an array of strings, one for each file in the directory. */
@Override
public String[] listAll() {
ensureOpen();
String[] res;
if (writer != null) {
res = writer.listAll();
} else {
res = entries.keySet().toArray(new String[entries.size()]);
// Add the segment name
String seg = fileName.substring(0, fileName.indexOf('.'));
for (int i = 0; i < res.length; i++) {
res[i] = seg + res[i];
}
}
return res;
}
/** Returns true iff a file with the given name exists. */
@Override
public boolean fileExists(String name) {
ensureOpen();
if (this.writer != null) {
return writer.fileExists(name);
}
return entries.containsKey(IndexFileNames.stripSegmentName(name));
}
/** Returns the time the compound file was last modified. */
@Override
public long fileModified(String name) throws IOException {
ensureOpen();
return directory.fileModified(fileName);
}
/** Not implemented
* @throws UnsupportedOperationException */
@Override
public void deleteFile(String name) {
throw new UnsupportedOperationException();
}
/** Not implemented
* @throws UnsupportedOperationException */
public void renameFile(String from, String to) {
throw new UnsupportedOperationException();
}
/** Returns the length of a file in the directory.
* @throws IOException if the file does not exist */
@Override
public long fileLength(String name) throws IOException {
ensureOpen();
if (this.writer != null) {
return writer.fileLenght(name);
}
FileEntry e = entries.get(IndexFileNames.stripSegmentName(name));
if (e == null)
throw new FileNotFoundException(name);
return e.length;
}
@Override
public IndexOutput createOutput(String name) throws IOException {
ensureOpen();
initWriter();
return writer.createOutput(name);
}
@Override
public void sync(Collection<String> names) throws IOException {
throw new UnsupportedOperationException();
}
/** Not implemented
* @throws UnsupportedOperationException */
@Override
public Lock makeLock(String name) {
throw new UnsupportedOperationException();
}
/** Not implemented
* @throws UnsupportedOperationException */
@Override
public final CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
// NOTE: final to make nested compounding impossible.
throw new UnsupportedOperationException();
}
/** Not implemented
* @throws UnsupportedOperationException */
@Override
public CompoundFileDirectory createCompoundOutput(String name)
throws IOException {
// NOTE: final to make nested compounding impossible.
throw new UnsupportedOperationException();
}
private final void initWriter() {
assert openForWrite;
assert entries == SENTINEL;
if (writer == null) {
writer = new CompoundFileWriter(directory, fileName);
}
}
}

View File

@ -0,0 +1,364 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Queue;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergePolicy.MergeAbortedException;
import org.apache.lucene.util.IOUtils;
/**
* Combines multiple files into a single compound file.
*
* The file format data file:<br>
* <ul>
* <li>VInt Version</li>
* <li>{File Data} fileCount entries with the raw data of the corresponding file
* </li>
* <ul>
* File format entry table:<br>
* <ul>
* <li>int Version</li>
* <li>VInt fileCount - number of entries with the following structure:</li>
* <ul>
* <li>String fileName</li>
* <li>long dataOffset</li>
* <li>long dataLength</li>
* </ul>
* </li> </ul> The fileCount integer indicates how many files are contained in
* this compound file. The entry table that follows has that many entries. Each
* directory entry contains a long pointer to the start of this file's data
* section, the files length, and a String with that file's name.
*
* @lucene.internal
*/
final class CompoundFileWriter {
private static final class FileEntry {
/** source file */
String file;
long length;
/** temporary holder for the start of this file's data section */
long offset;
/** the directory which contains the file. */
Directory dir;
}
// Before versioning started.
static final int FORMAT_PRE_VERSION = 0;
// Segment name is not written in the file names.
static final int FORMAT_NO_SEGMENT_PREFIX = -1;
static final int FORMAT_APPEND_FILES = -2;
static final int ENTRY_FORMAT_CURRENT = -1;
// NOTE: if you introduce a new format, make it 1 lower
// than the current one, and always change this if you
// switch to a new format!
/** @lucene.internal */
static final int FORMAT_CURRENT = FORMAT_APPEND_FILES;
private final Directory directory;
private final Map<String, FileEntry> entries = new HashMap<String, FileEntry>();
// all entries that are written to a sep. file but not yet moved into CFS
private final Queue<FileEntry> pendingEntries = new LinkedList<FileEntry>();
private boolean closed = false;
private volatile IndexOutput dataOut;
private final AtomicBoolean outputTaken = new AtomicBoolean(false);
private final String entryTableName;
private final String dataFileName;
/**
* Create the compound stream in the specified file. The file name is the
* entire name (no extensions are added).
*
* @throws NullPointerException
* if <code>dir</code> or <code>name</code> is null
*/
CompoundFileWriter(Directory dir, String name) {
if (dir == null)
throw new NullPointerException("directory cannot be null");
if (name == null)
throw new NullPointerException("name cannot be null");
directory = dir;
entryTableName = IndexFileNames.segmentFileName(
IndexFileNames.stripExtension(name), "",
IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION);
dataFileName = name;
}
/** Returns the directory of the compound file. */
Directory getDirectory() {
return directory;
}
/** Returns the name of the compound file. */
String getName() {
return dataFileName;
}
/**
* Closes all resources and writes the entry table
*
* @throws IllegalStateException
* if close() had been called before or if no file has been added to
* this object
*/
void close() throws IOException {
if (closed) {
throw new IllegalStateException("already closed");
}
IOException priorException = null;
IndexOutput entryTableOut = null;
try {
if (entries.isEmpty()) {
throw new IllegalStateException("CFS has no entries");
}
if (!pendingEntries.isEmpty() || outputTaken.get()) {
throw new IllegalStateException("CFS has pending open files");
}
closed = true;
// open the compound stream
assert dataOut != null;
long finalLength = dataOut.getFilePointer();
assert assertFileLength(finalLength, dataOut);
entryTableOut = directory.createOutput(entryTableName);
writeEntryTable(entries.values(), entryTableOut);
} catch (IOException e) {
priorException = e;
} finally {
IOUtils.closeSafely(priorException, dataOut, entryTableOut);
}
}
private static boolean assertFileLength(long expected, IndexOutput out)
throws IOException {
out.flush();
assert expected == out.length() : "expected: " + expected + " was "
+ out.length();
return true;
}
private final void ensureOpen() {
if (closed) {
throw new IllegalStateException("CFS Directory is already closed");
}
}
/**
* Copy the contents of the file with specified extension into the provided
* output stream.
*/
private final long copyFileEntry(IndexOutput dataOut, FileEntry fileEntry)
throws IOException, MergeAbortedException {
final IndexInput is = fileEntry.dir.openInput(fileEntry.file);
try {
final long startPtr = dataOut.getFilePointer();
final long length = fileEntry.length;
dataOut.copyBytes(is, length);
// Verify that the output length diff is equal to original file
long endPtr = dataOut.getFilePointer();
long diff = endPtr - startPtr;
if (diff != length)
throw new IOException("Difference in the output file offsets " + diff
+ " does not match the original file length " + length);
fileEntry.offset = startPtr;
// copy successful - delete file
fileEntry.dir.deleteFile(fileEntry.file);
return length;
} finally {
is.close();
}
}
protected void writeEntryTable(Collection<FileEntry> entries,
IndexOutput entryOut) throws IOException {
entryOut.writeInt(ENTRY_FORMAT_CURRENT);
entryOut.writeVInt(entries.size());
for (FileEntry fe : entries) {
entryOut.writeString(IndexFileNames.stripSegmentName(fe.file));
entryOut.writeLong(fe.offset);
entryOut.writeLong(fe.length);
}
}
IndexOutput createOutput(String name) throws IOException {
ensureOpen();
boolean success = false;
try {
assert name != null : "name must not be null";
if (entries.containsKey(name)) {
throw new IllegalArgumentException("File " + name + " already exists");
}
final FileEntry entry = new FileEntry();
entry.file = name;
entries.put(name, entry);
final DirectCFSIndexOutput out;
if (outputTaken.compareAndSet(false, true)) {
initDataOut();
success = true;
out = new DirectCFSIndexOutput(dataOut, entry, false);
} else {
entry.dir = this.directory;
out = new DirectCFSIndexOutput(directory.createOutput(name), entry,
true);
}
success = true;
return out;
} finally {
if (!success) {
entries.remove(name);
}
}
}
final void releaseOutputLock() {
outputTaken.compareAndSet(true, false);
}
private synchronized final void initDataOut() throws IOException {
if (dataOut == null) {
boolean success = false;
try {
dataOut = directory.createOutput(dataFileName);
dataOut.writeVInt(FORMAT_CURRENT);
success = true;
} finally {
if (!success) {
IOUtils.closeSafely(true, dataOut);
}
}
}
}
private final void prunePendingEntries() throws IOException {
// claim the output and copy all pending files in
if (outputTaken.compareAndSet(false, true)) {
try {
while (!pendingEntries.isEmpty()) {
FileEntry entry = pendingEntries.poll();
copyFileEntry(dataOut, entry);
entries.put(entry.file, entry);
}
} finally {
final boolean compareAndSet = outputTaken.compareAndSet(true, false);
assert compareAndSet;
}
}
}
long fileLenght(String name) throws IOException {
FileEntry fileEntry = entries.get(name);
if (fileEntry == null) {
throw new FileNotFoundException(name + " does not exist");
}
return fileEntry.length;
}
boolean fileExists(String name) {
return entries.containsKey(name);
}
String[] listAll() {
return entries.keySet().toArray(new String[0]);
}
private final class DirectCFSIndexOutput extends IndexOutput {
private final IndexOutput delegate;
private final long offset;
private boolean closed;
private FileEntry entry;
private long writtenBytes;
private final boolean isSeparate;
DirectCFSIndexOutput(IndexOutput delegate, FileEntry entry,
boolean isSeparate) {
super();
this.delegate = delegate;
this.entry = entry;
entry.offset = offset = delegate.getFilePointer();
this.isSeparate = isSeparate;
}
@Override
public void flush() throws IOException {
delegate.flush();
}
@Override
public void close() throws IOException {
if (!closed) {
closed = true;
entry.length = writtenBytes;
if (isSeparate) {
// we are a separate file - push into the pending entries
pendingEntries.add(entry);
} else {
// we have been written into the CFS directly - release the lock
releaseOutputLock();
}
// now prune all pending entries and push them into the CFS
prunePendingEntries();
}
}
@Override
public long getFilePointer() {
return delegate.getFilePointer() - offset;
}
@Override
public void seek(long pos) throws IOException {
assert !closed;
delegate.seek(offset + pos);
}
@Override
public long length() throws IOException {
assert !closed;
return delegate.length() - offset;
}
@Override
public void writeByte(byte b) throws IOException {
assert !closed;
writtenBytes++;
delegate.writeByte(b);
}
@Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
assert !closed;
writtenBytes += length;
delegate.writeBytes(b, offset, length);
}
}
}

View File

@ -61,6 +61,14 @@ public abstract class DataOutput {
writeByte((byte) i); writeByte((byte) i);
} }
/** Writes a short as two bytes.
* @see DataInput#readShort()
*/
public void writeShort(short i) throws IOException {
writeByte((byte)(i >> 8));
writeByte((byte) i);
}
/** Writes an int in a variable-length format. Writes between one and /** Writes an int in a variable-length format. Writes between one and
* five bytes. Smaller values take fewer bytes. Negative numbers are not * five bytes. Smaller values take fewer bytes. Negative numbers are not
* supported. * supported.

View File

@ -0,0 +1,140 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.util.IOUtils;
/**
* Default implementation of {@link CompoundFileDirectory}.
* <p>
* This implementation returns a BufferedIndexInput that wraps the underlying
* Directory's IndexInput for the compound file (using unbuffered reads).
* @lucene.experimental
*/
public class DefaultCompoundFileDirectory extends CompoundFileDirectory {
protected IndexInput stream;
public DefaultCompoundFileDirectory(Directory directory, String fileName, int readBufferSize, boolean writeable) throws IOException {
super(directory, fileName, readBufferSize);
if (!writeable) {
try {
stream = directory.openInput(fileName, readBufferSize);
initForRead(CompoundFileDirectory.readEntries(stream, directory, fileName));
} catch (IOException e) {
IOUtils.closeSafely(e, stream);
}
} else {
initForWrite();
}
}
@Override
public IndexInput openInputSlice(String id, long offset, long length, int readBufferSize) throws IOException {
return new CSIndexInput(stream, offset, length, readBufferSize);
}
@Override
public synchronized void close() throws IOException {
try {
IOUtils.closeSafely(false, stream);
} finally {
super.close();
}
}
/** Implementation of an IndexInput that reads from a portion of the
* compound file.
*/
static final class CSIndexInput extends BufferedIndexInput {
IndexInput base;
long fileOffset;
long length;
CSIndexInput(final IndexInput base, final long fileOffset, final long length) {
this(base, fileOffset, length, BufferedIndexInput.BUFFER_SIZE);
}
CSIndexInput(final IndexInput base, final long fileOffset, final long length, int readBufferSize) {
super(readBufferSize);
this.base = (IndexInput)base.clone();
this.fileOffset = fileOffset;
this.length = length;
}
@Override
public Object clone() {
CSIndexInput clone = (CSIndexInput)super.clone();
clone.base = (IndexInput)base.clone();
clone.fileOffset = fileOffset;
clone.length = length;
return clone;
}
/** Expert: implements buffer refill. Reads bytes from the current
* position in the input.
* @param b the array to read bytes into
* @param offset the offset in the array to start storing bytes
* @param len the number of bytes to read
*/
@Override
protected void readInternal(byte[] b, int offset, int len) throws IOException {
long start = getFilePointer();
if(start + len > length)
throw new IOException("read past EOF");
base.seek(fileOffset + start);
base.readBytes(b, offset, len, false);
}
/** Expert: implements seek. Sets current position in this file, where
* the next {@link #readInternal(byte[],int,int)} will occur.
* @see #readInternal(byte[],int,int)
*/
@Override
protected void seekInternal(long pos) {}
/** Closes the stream to further operations. */
@Override
public void close() throws IOException {
base.close();
}
@Override
public long length() {
return length;
}
@Override
public void copyBytes(IndexOutput out, long numBytes) throws IOException {
// Copy first whatever is in the buffer
numBytes -= flushBuffer(out, numBytes);
// If there are more bytes left to copy, delegate the copy task to the
// base IndexInput, in case it can do an optimized copy.
if (numBytes > 0) {
long start = getFilePointer();
if (start + numBytes > length) {
throw new IOException("read past EOF");
}
base.seek(fileOffset + start);
base.copyBytes(out, numBytes);
}
}
}
}

View File

@ -112,12 +112,36 @@ public abstract class Directory implements Closeable {
* implementation may ignore the buffer size. Currently * implementation may ignore the buffer size. Currently
* the only Directory implementations that respect this * the only Directory implementations that respect this
* parameter are {@link FSDirectory} and {@link * parameter are {@link FSDirectory} and {@link
* org.apache.lucene.index.CompoundFileReader}. * CompoundFileDirectory}.
*/ */
public IndexInput openInput(String name, int bufferSize) throws IOException { public IndexInput openInput(String name, int bufferSize) throws IOException {
return openInput(name); return openInput(name);
} }
/**
* Returns a {@link CompoundFileDirectory} capable of
* reading the Lucene compound file format.
* <p>
* The default implementation returns
* {@link DefaultCompoundFileDirectory}.
* @lucene.experimental
*/
public CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
return new DefaultCompoundFileDirectory(this, name, bufferSize, false);
}
/**
* Returns a {@link CompoundFileDirectory} capable of
* writing the Lucene compound file format.
* <p>
* The default implementation returns
* {@link DefaultCompoundFileDirectory}.
* @lucene.experimental
*/
public CompoundFileDirectory createCompoundOutput(String name) throws IOException {
return new DefaultCompoundFileDirectory(this, name, 1024, true);
}
/** Construct a {@link Lock}. /** Construct a {@link Lock}.
* @param name the name of the lock file * @param name the name of the lock file
*/ */

View File

@ -148,4 +148,14 @@ public class FileSwitchDirectory extends Directory {
public IndexInput openInput(String name) throws IOException { public IndexInput openInput(String name) throws IOException {
return getDirectory(name).openInput(name); return getDirectory(name).openInput(name);
} }
@Override
public CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
return getDirectory(name).openCompoundInput(name, bufferSize);
}
@Override
public CompoundFileDirectory createCompoundOutput(String name) throws IOException {
return getDirectory(name).createCompoundOutput(name);
}
} }

View File

@ -58,4 +58,5 @@ public abstract class IndexOutput extends DataOutput implements Closeable {
* @param length file length * @param length file length
*/ */
public void setLength(long length) throws IOException {} public void setLength(long length) throws IOException {}
} }

View File

@ -32,6 +32,7 @@ import java.security.PrivilegedActionException;
import java.lang.reflect.Method; import java.lang.reflect.Method;
import org.apache.lucene.util.Constants; import org.apache.lucene.util.Constants;
import org.apache.lucene.util.IOUtils;
/** File-based {@link Directory} implementation that uses /** File-based {@link Directory} implementation that uses
* mmap for reading, and {@link * mmap for reading, and {@link
@ -213,12 +214,50 @@ public class MMapDirectory extends FSDirectory {
File f = new File(getDirectory(), name); File f = new File(getDirectory(), name);
RandomAccessFile raf = new RandomAccessFile(f, "r"); RandomAccessFile raf = new RandomAccessFile(f, "r");
try { try {
return new MMapIndexInput(raf, chunkSizePower); return new MMapIndexInput(raf, 0, raf.length(), chunkSizePower);
} finally { } finally {
raf.close(); raf.close();
} }
} }
@Override
public CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
return new MMapCompoundFileDirectory(name, bufferSize);
}
private final class MMapCompoundFileDirectory extends CompoundFileDirectory {
private RandomAccessFile raf = null;
public MMapCompoundFileDirectory(String fileName, int readBufferSize) throws IOException {
super(MMapDirectory.this, fileName, readBufferSize);
IndexInput stream = null;
try {
File f = new File(MMapDirectory.this.getDirectory(), fileName);
raf = new RandomAccessFile(f, "r");
stream = new MMapIndexInput(raf, 0, raf.length(), chunkSizePower);
initForRead(CompoundFileDirectory.readEntries(stream, MMapDirectory.this, fileName));
stream.close();
} catch (IOException e) {
// throw our original exception
IOUtils.closeSafely(e, raf, stream);
}
}
@Override
public IndexInput openInputSlice(String id, long offset, long length, int readBufferSize) throws IOException {
return new MMapIndexInput(raf, offset, length, chunkSizePower);
}
@Override
public synchronized void close() throws IOException {
try {
raf.close();
} finally {
super.close();
}
}
}
// Because Java's ByteBuffer uses an int to address the // Because Java's ByteBuffer uses an int to address the
// values, it's necessary to access a file > // values, it's necessary to access a file >
// Integer.MAX_VALUE in size using multiple byte buffers. // Integer.MAX_VALUE in size using multiple byte buffers.
@ -235,8 +274,8 @@ public class MMapDirectory extends FSDirectory {
private boolean isClone = false; private boolean isClone = false;
MMapIndexInput(RandomAccessFile raf, int chunkSizePower) throws IOException { MMapIndexInput(RandomAccessFile raf, long offset, long length, int chunkSizePower) throws IOException {
this.length = raf.length(); this.length = length;
this.chunkSizePower = chunkSizePower; this.chunkSizePower = chunkSizePower;
this.chunkSize = 1L << chunkSizePower; this.chunkSize = 1L << chunkSizePower;
this.chunkSizeMask = chunkSize - 1L; this.chunkSizeMask = chunkSize - 1L;
@ -261,7 +300,7 @@ public class MMapDirectory extends FSDirectory {
? chunkSize ? chunkSize
: (length - bufferStart) : (length - bufferStart)
); );
this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY, bufferStart, bufSize); this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY, offset + bufferStart, bufSize);
bufferStart += bufSize; bufferStart += bufSize;
} }
seek(0L); seek(0L);

View File

@ -24,6 +24,9 @@ import java.nio.channels.ClosedChannelException; // javadoc @link
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.util.concurrent.Future; // javadoc import java.util.concurrent.Future; // javadoc
import org.apache.lucene.store.SimpleFSDirectory.SimpleFSIndexInput;
import org.apache.lucene.util.IOUtils;
/** /**
* An {@link FSDirectory} implementation that uses java.nio's FileChannel's * An {@link FSDirectory} implementation that uses java.nio's FileChannel's
* positional read, which allows multiple threads to read from the same file * positional read, which allows multiple threads to read from the same file
@ -78,6 +81,47 @@ public class NIOFSDirectory extends FSDirectory {
return new NIOFSIndexInput(new File(getDirectory(), name), bufferSize, getReadChunkSize()); return new NIOFSIndexInput(new File(getDirectory(), name), bufferSize, getReadChunkSize());
} }
@Override
public CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
return new NIOFSCompoundFileDirectory(name, bufferSize);
}
private final class NIOFSCompoundFileDirectory extends CompoundFileDirectory {
private SimpleFSIndexInput.Descriptor fd;
private FileChannel fc;
public NIOFSCompoundFileDirectory(String fileName, int readBufferSize) throws IOException {
super(NIOFSDirectory.this, fileName, readBufferSize);
IndexInput stream = null;
try {
File f = new File(NIOFSDirectory.this.getDirectory(), fileName);
fd = new SimpleFSIndexInput.Descriptor(f, "r");
fc = fd.getChannel();
stream = new NIOFSIndexInput(fd, fc, 0, fd.length, readBufferSize,
getReadChunkSize());
initForRead(CompoundFileDirectory.readEntries(stream, NIOFSDirectory.this, fileName));
stream.close();
} catch (IOException e) {
// throw our original exception
IOUtils.closeSafely(e, fc, fd, stream);
}
}
@Override
public IndexInput openInputSlice(String id, long offset, long length, int readBufferSize) throws IOException {
return new NIOFSIndexInput(fd, fc, offset, length, readBufferSize, getReadChunkSize());
}
@Override
public synchronized void close() throws IOException {
try {
IOUtils.closeSafely(false, fc, fd);
} finally {
super.close();
}
}
}
protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput { protected static class NIOFSIndexInput extends SimpleFSDirectory.SimpleFSIndexInput {
private ByteBuffer byteBuf; // wraps the buffer for NIO private ByteBuffer byteBuf; // wraps the buffer for NIO
@ -92,6 +136,12 @@ public class NIOFSDirectory extends FSDirectory {
channel = file.getChannel(); channel = file.getChannel();
} }
public NIOFSIndexInput(Descriptor file, FileChannel fc, long off, long length, int bufferSize, int chunkSize) throws IOException {
super(file, off, length, bufferSize, chunkSize);
channel = fc;
isClone = true;
}
@Override @Override
protected void newBuffer(byte[] newBuffer) { protected void newBuffer(byte[] newBuffer) {
super.newBuffer(newBuffer); super.newBuffer(newBuffer);
@ -145,7 +195,11 @@ public class NIOFSDirectory extends FSDirectory {
int readLength = bb.limit() - readOffset; int readLength = bb.limit() - readOffset;
assert readLength == len; assert readLength == len;
long pos = getFilePointer(); long pos = getFilePointer() + off;
if (pos + len > end) {
throw new IOException("read past EOF");
}
try { try {
while (readLength > 0) { while (readLength > 0) {
@ -159,9 +213,6 @@ public class NIOFSDirectory extends FSDirectory {
} }
bb.limit(limit); bb.limit(limit);
int i = channel.read(bb, pos); int i = channel.read(bb, pos);
if (i == -1) {
throw new IOException("read past EOF");
}
pos += i; pos += i;
readOffset += i; readOffset += i;
readLength -= i; readLength -= i;

View File

@ -21,6 +21,8 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import org.apache.lucene.util.IOUtils;
/** A straightforward implementation of {@link FSDirectory} /** A straightforward implementation of {@link FSDirectory}
* using java.io.RandomAccessFile. However, this class has * using java.io.RandomAccessFile. However, this class has
* poor concurrent performance (multiple threads will * poor concurrent performance (multiple threads will
@ -56,6 +58,45 @@ public class SimpleFSDirectory extends FSDirectory {
return new SimpleFSIndexInput(new File(directory, name), bufferSize, getReadChunkSize()); return new SimpleFSIndexInput(new File(directory, name), bufferSize, getReadChunkSize());
} }
@Override
public CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
return new SimpleFSCompoundFileDirectory(name, bufferSize);
}
private final class SimpleFSCompoundFileDirectory extends CompoundFileDirectory {
private SimpleFSIndexInput.Descriptor fd;
public SimpleFSCompoundFileDirectory(String fileName, int readBufferSize) throws IOException {
super(SimpleFSDirectory.this, fileName, readBufferSize);
IndexInput stream = null;
try {
final File f = new File(SimpleFSDirectory.this.getDirectory(), fileName);
fd = new SimpleFSIndexInput.Descriptor(f, "r");
stream = new SimpleFSIndexInput(fd, 0, fd.length, readBufferSize,
getReadChunkSize());
initForRead(CompoundFileDirectory.readEntries(stream, SimpleFSDirectory.this, fileName));
stream.close();
} catch (IOException e) {
// throw our original exception
IOUtils.closeSafely(e, fd, stream);
}
}
@Override
public IndexInput openInputSlice(String id, long offset, long length, int readBufferSize) throws IOException {
return new SimpleFSIndexInput(fd, offset, length, readBufferSize, getReadChunkSize());
}
@Override
public synchronized void close() throws IOException {
try {
fd.close();
} finally {
super.close();
}
}
}
protected static class SimpleFSIndexInput extends BufferedIndexInput { protected static class SimpleFSIndexInput extends BufferedIndexInput {
protected static class Descriptor extends RandomAccessFile { protected static class Descriptor extends RandomAccessFile {
@ -84,11 +125,24 @@ public class SimpleFSDirectory extends FSDirectory {
boolean isClone; boolean isClone;
// LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM // LUCENE-1566 - maximum read length on a 32bit JVM to prevent incorrect OOM
protected final int chunkSize; protected final int chunkSize;
protected final long off;
protected final long end;
public SimpleFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException { public SimpleFSIndexInput(File path, int bufferSize, int chunkSize) throws IOException {
super(bufferSize); super(bufferSize);
file = new Descriptor(path, "r"); this.file = new Descriptor(path, "r");
this.chunkSize = chunkSize; this.chunkSize = chunkSize;
this.off = 0L;
this.end = file.length;
}
public SimpleFSIndexInput(Descriptor file, long off, long length, int bufferSize, int chunkSize) throws IOException {
super(bufferSize);
this.file = file;
this.chunkSize = chunkSize;
this.off = off;
this.end = off + length;
this.isClone = true; // well, we are sorta?
} }
/** IndexInput methods */ /** IndexInput methods */
@ -96,13 +150,17 @@ public class SimpleFSDirectory extends FSDirectory {
protected void readInternal(byte[] b, int offset, int len) protected void readInternal(byte[] b, int offset, int len)
throws IOException { throws IOException {
synchronized (file) { synchronized (file) {
long position = getFilePointer(); long position = off + getFilePointer();
if (position != file.position) { if (position != file.position) {
file.seek(position); file.seek(position);
file.position = position; file.position = position;
} }
int total = 0; int total = 0;
if (position + len > end) {
throw new IOException("read past EOF");
}
try { try {
do { do {
final int readLength; final int readLength;
@ -113,9 +171,6 @@ public class SimpleFSDirectory extends FSDirectory {
readLength = chunkSize; readLength = chunkSize;
} }
final int i = file.read(b, offset + total, readLength); final int i = file.read(b, offset + total, readLength);
if (i == -1) {
throw new IOException("read past EOF");
}
file.position += i; file.position += i;
total += i; total += i;
} while (total < len); } while (total < len);
@ -144,7 +199,7 @@ public class SimpleFSDirectory extends FSDirectory {
@Override @Override
public long length() { public long length() {
return file.length; return end - off;
} }
@Override @Override

View File

@ -0,0 +1,143 @@
package org.apache.lucene.store;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
public class MockCompoundFileDirectoryWrapper extends CompoundFileDirectory {
private final MockDirectoryWrapper parent;
private final CompoundFileDirectory delegate;
private final String name;
public MockCompoundFileDirectoryWrapper(String name, MockDirectoryWrapper parent, CompoundFileDirectory delegate) throws IOException {
super(parent, name, 1024);
this.name = name;
this.parent = parent;
this.delegate = delegate;
super.initForRead(Collections.<String,FileEntry>emptyMap());
parent.addFileHandle(this, name, true);
}
@Override
public Directory getDirectory() {
return delegate.getDirectory();
}
@Override
public String getName() {
return delegate.getName();
}
@Override
public synchronized void close() throws IOException {
try {
delegate.close();
parent.removeOpenFile(this, name);
} finally {
super.close();
}
}
@Override
public synchronized IndexInput openInput(String id, int readBufferSize) throws IOException {
return delegate.openInput(id, readBufferSize);
}
@Override
public String[] listAll() {
return delegate.listAll();
}
@Override
public boolean fileExists(String name) {
return delegate.fileExists(name);
}
@Override
public long fileModified(String name) throws IOException {
return delegate.fileModified(name);
}
@Override
public void deleteFile(String name) {
delegate.deleteFile(name);
}
@Override
public void renameFile(String from, String to) {
delegate.renameFile(from, to);
}
@Override
public long fileLength(String name) throws IOException {
return delegate.fileLength(name);
}
@Override
public IndexOutput createOutput(String name) throws IOException {
return delegate.createOutput(name);
}
@Override
public void sync(Collection<String> names) throws IOException {
delegate.sync(names);
}
@Override
public Lock makeLock(String name) {
return delegate.makeLock(name);
}
@Override
public void clearLock(String name) throws IOException {
delegate.clearLock(name);
}
@Override
public void setLockFactory(LockFactory lockFactory) throws IOException {
delegate.setLockFactory(lockFactory);
}
@Override
public LockFactory getLockFactory() {
return delegate.getLockFactory();
}
@Override
public String getLockID() {
return delegate.getLockID();
}
@Override
public String toString() {
return "MockCompoundFileDirectoryWrapper(" + super.toString() + ")";
}
@Override
public void copy(Directory to, String src, String dest) throws IOException {
delegate.copy(to, src, dest);
}
@Override
public IndexInput openInputSlice(String id, long offset, long length, int readBufferSize) throws IOException {
return delegate.openInputSlice(id, offset, length, readBufferSize);
}
}

View File

@ -388,7 +388,7 @@ public class MockDirectoryWrapper extends Directory {
} }
} }
private void addFileHandle(Closeable c, String name, boolean input) { void addFileHandle(Closeable c, String name, boolean input) {
Integer v = openFiles.get(name); Integer v = openFiles.get(name);
if (v != null) { if (v != null) {
v = Integer.valueOf(v.intValue()+1); v = Integer.valueOf(v.intValue()+1);
@ -417,6 +417,12 @@ public class MockDirectoryWrapper extends Directory {
return ii; return ii;
} }
@Override
public synchronized CompoundFileDirectory openCompoundInput(String name, int bufferSize) throws IOException {
maybeYield();
return new MockCompoundFileDirectoryWrapper(name, this, delegate.openCompoundInput(name, bufferSize));
}
/** Provided for testing purposes. Use sizeInBytes() instead. */ /** Provided for testing purposes. Use sizeInBytes() instead. */
public synchronized final long getRecomputedSizeInBytes() throws IOException { public synchronized final long getRecomputedSizeInBytes() throws IOException {
if (!(delegate instanceof RAMDirectory)) if (!(delegate instanceof RAMDirectory))
@ -481,7 +487,7 @@ public class MockDirectoryWrapper extends Directory {
delegate.close(); delegate.close();
} }
private synchronized void removeOpenFile(Closeable c, String name) { synchronized void removeOpenFile(Closeable c, String name) {
Integer v = openFiles.get(name); Integer v = openFiles.get(name);
// Could be null when crash() was called // Could be null when crash() was called
if (v != null) { if (v != null) {

View File

@ -1075,8 +1075,8 @@ public class TestAddIndexes extends LuceneTestCase {
IndexWriter w3 = new IndexWriter(dir, conf); IndexWriter w3 = new IndexWriter(dir, conf);
w3.addIndexes(readers); w3.addIndexes(readers);
w3.close(); w3.close();
// we should now see segments_X, segments.gen,_Y.cfs, _Z.fnx // we should now see segments_X, segments.gen,_Y.cfs,_Y.cfe, _Z.fnx
assertEquals("Only one compound segment should exist", 4, dir.listAll().length); assertEquals("Only one compound segment should exist", 5, dir.listAll().length);
} }
// LUCENE-3126: tests that if a non-CFS segment is copied, it is converted to // LUCENE-3126: tests that if a non-CFS segment is copied, it is converted to

View File

@ -39,8 +39,8 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
@ -536,7 +536,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// figure out which field number corresponds to // figure out which field number corresponds to
// "content", and then set our expected file names below // "content", and then set our expected file names below
// accordingly: // accordingly:
CompoundFileReader cfsReader = new CompoundFileReader(dir, "_0.cfs"); CompoundFileDirectory cfsReader = dir.openCompoundInput("_0.cfs", 1024);
FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_0.fnm");
int contentFieldIndex = -1; int contentFieldIndex = -1;
for (FieldInfo fi : fieldInfos) { for (FieldInfo fi : fieldInfos) {
@ -549,7 +549,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1); assertTrue("could not locate the 'content' field number in the _2.cfs segment", contentFieldIndex != -1);
// Now verify file names: // Now verify file names:
String[] expected = new String[] {"_0.cfs", String[] expected = new String[] {"_0.cfs", "_0.cfe",
"_0_1.del", "_0_1.del",
"_0_1.s" + contentFieldIndex, "_0_1.s" + contentFieldIndex,
"segments_2", "segments_2",

View File

@ -23,6 +23,8 @@ import java.io.File;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import junit.framework.TestSuite; import junit.framework.TestSuite;
import junit.textui.TestRunner; import junit.textui.TestRunner;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
@ -200,11 +202,11 @@ public class TestCompoundFile extends LuceneTestCase
for (int i=0; i<data.length; i++) { for (int i=0; i<data.length; i++) {
String name = "t" + data[i]; String name = "t" + data[i];
createSequenceFile(dir, name, (byte) 0, data[i]); createSequenceFile(dir, name, (byte) 0, data[i]);
CompoundFileWriter csw = new CompoundFileWriter(dir, name + ".cfs"); CompoundFileDirectory csw = dir.createCompoundOutput(name + ".cfs");
csw.addFile(name); dir.copy(csw, name, name);
csw.close(); csw.close();
CompoundFileReader csr = new CompoundFileReader(dir, name + ".cfs"); CompoundFileDirectory csr = dir.openCompoundInput(name + ".cfs", 1024);
IndexInput expected = dir.openInput(name); IndexInput expected = dir.openInput(name);
IndexInput actual = csr.openInput(name); IndexInput actual = csr.openInput(name);
assertSameStreams(name, expected, actual); assertSameStreams(name, expected, actual);
@ -223,12 +225,12 @@ public class TestCompoundFile extends LuceneTestCase
createSequenceFile(dir, "d1", (byte) 0, 15); createSequenceFile(dir, "d1", (byte) 0, 15);
createSequenceFile(dir, "d2", (byte) 0, 114); createSequenceFile(dir, "d2", (byte) 0, 114);
CompoundFileWriter csw = new CompoundFileWriter(dir, "d.csf"); CompoundFileDirectory csw = dir.createCompoundOutput("d.cfs");
csw.addFile("d1"); dir.copy(csw, "d1", "d1");
csw.addFile("d2"); dir.copy(csw, "d2", "d2");
csw.close(); csw.close();
CompoundFileReader csr = new CompoundFileReader(dir, "d.csf"); CompoundFileDirectory csr = dir.openCompoundInput("d.cfs", 1024);
IndexInput expected = dir.openInput("d1"); IndexInput expected = dir.openInput("d1");
IndexInput actual = csr.openInput("d1"); IndexInput actual = csr.openInput("d1");
assertSameStreams("d1", expected, actual); assertSameStreams("d1", expected, actual);
@ -273,17 +275,18 @@ public class TestCompoundFile extends LuceneTestCase
createRandomFile(dir, segment + ".notIn2", 51); createRandomFile(dir, segment + ".notIn2", 51);
// Now test // Now test
CompoundFileWriter csw = new CompoundFileWriter(dir, "test.cfs"); CompoundFileDirectory csw = dir.createCompoundOutput("test.cfs");
final String data[] = new String[] { final String data[] = new String[] {
".zero", ".one", ".ten", ".hundred", ".big1", ".big2", ".big3", ".zero", ".one", ".ten", ".hundred", ".big1", ".big2", ".big3",
".big4", ".big5", ".big6", ".big7" ".big4", ".big5", ".big6", ".big7"
}; };
for (int i=0; i<data.length; i++) { for (int i=0; i<data.length; i++) {
csw.addFile(segment + data[i]); String fileName = segment + data[i];
dir.copy(csw, fileName, fileName);
} }
csw.close(); csw.close();
CompoundFileReader csr = new CompoundFileReader(dir, "test.cfs"); CompoundFileDirectory csr = dir.openCompoundInput("test.cfs", 1024);
for (int i=0; i<data.length; i++) { for (int i=0; i<data.length; i++) {
IndexInput check = dir.openInput(segment + data[i]); IndexInput check = dir.openInput(segment + data[i]);
IndexInput test = csr.openInput(segment + data[i]); IndexInput test = csr.openInput(segment + data[i]);
@ -302,10 +305,11 @@ public class TestCompoundFile extends LuceneTestCase
* the size of each file is 1000 bytes. * the size of each file is 1000 bytes.
*/ */
private void setUp_2() throws IOException { private void setUp_2() throws IOException {
CompoundFileWriter cw = new CompoundFileWriter(dir, "f.comp"); CompoundFileDirectory cw = dir.createCompoundOutput("f.comp");
for (int i=0; i<20; i++) { for (int i=0; i<20; i++) {
createSequenceFile(dir, "f" + i, (byte) 0, 2000); createSequenceFile(dir, "f" + i, (byte) 0, 2000);
cw.addFile("f" + i); String fileName = "f" + i;
dir.copy(cw, fileName, fileName);
} }
cw.close(); cw.close();
} }
@ -350,26 +354,9 @@ public class TestCompoundFile extends LuceneTestCase
} }
} }
static boolean isCSIndexInput(IndexInput is) {
return is instanceof CompoundFileReader.CSIndexInput;
}
static boolean isCSIndexInputOpen(IndexInput is) throws IOException {
if (isCSIndexInput(is)) {
CompoundFileReader.CSIndexInput cis =
(CompoundFileReader.CSIndexInput) is;
return _TestHelper.isSimpleFSIndexInputOpen(cis.base);
} else {
return false;
}
}
public void testClonedStreamsClosing() throws IOException { public void testClonedStreamsClosing() throws IOException {
setUp_2(); setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); CompoundFileDirectory cr = dir.openCompoundInput("f.comp", 1024);
// basic clone // basic clone
IndexInput expected = dir.openInput("f11"); IndexInput expected = dir.openInput("f11");
@ -379,10 +366,8 @@ public class TestCompoundFile extends LuceneTestCase
assertTrue(_TestHelper.isSimpleFSIndexInputOpen(expected)); assertTrue(_TestHelper.isSimpleFSIndexInputOpen(expected));
IndexInput one = cr.openInput("f11"); IndexInput one = cr.openInput("f11");
assertTrue(isCSIndexInputOpen(one));
IndexInput two = (IndexInput) one.clone(); IndexInput two = (IndexInput) one.clone();
assertTrue(isCSIndexInputOpen(two));
assertSameStreams("basic clone one", expected, one); assertSameStreams("basic clone one", expected, one);
expected.seek(0); expected.seek(0);
@ -390,7 +375,6 @@ public class TestCompoundFile extends LuceneTestCase
// Now close the first stream // Now close the first stream
one.close(); one.close();
assertTrue("Only close when cr is closed", isCSIndexInputOpen(one));
// The following should really fail since we couldn't expect to // The following should really fail since we couldn't expect to
// access a file once close has been called on it (regardless of // access a file once close has been called on it (regardless of
@ -402,8 +386,6 @@ public class TestCompoundFile extends LuceneTestCase
// Now close the compound reader // Now close the compound reader
cr.close(); cr.close();
assertFalse("Now closed one", isCSIndexInputOpen(one));
assertFalse("Now closed two", isCSIndexInputOpen(two));
// The following may also fail since the compound stream is closed // The following may also fail since the compound stream is closed
expected.seek(0); expected.seek(0);
@ -426,7 +408,7 @@ public class TestCompoundFile extends LuceneTestCase
*/ */
public void testRandomAccess() throws IOException { public void testRandomAccess() throws IOException {
setUp_2(); setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); CompoundFileDirectory cr = dir.openCompoundInput("f.comp", 1024);
// Open two files // Open two files
IndexInput e1 = dir.openInput("f11"); IndexInput e1 = dir.openInput("f11");
@ -505,7 +487,7 @@ public class TestCompoundFile extends LuceneTestCase
*/ */
public void testRandomAccessClones() throws IOException { public void testRandomAccessClones() throws IOException {
setUp_2(); setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); CompoundFileDirectory cr = dir.openCompoundInput("f.comp", 1024);
// Open two files // Open two files
IndexInput e1 = cr.openInput("f11"); IndexInput e1 = cr.openInput("f11");
@ -582,7 +564,7 @@ public class TestCompoundFile extends LuceneTestCase
public void testFileNotFound() throws IOException { public void testFileNotFound() throws IOException {
setUp_2(); setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); CompoundFileDirectory cr = dir.openCompoundInput("f.comp", 1024);
// Open two files // Open two files
try { try {
@ -600,7 +582,7 @@ public class TestCompoundFile extends LuceneTestCase
public void testReadPastEOF() throws IOException { public void testReadPastEOF() throws IOException {
setUp_2(); setUp_2();
CompoundFileReader cr = new CompoundFileReader(dir, "f.comp"); CompoundFileDirectory cr = dir.openCompoundInput("f.comp", 1024);
IndexInput is = cr.openInput("f2"); IndexInput is = cr.openInput("f2");
is.seek(is.length() - 10); is.seek(is.length() - 10);
byte b[] = new byte[100]; byte b[] = new byte[100];
@ -653,11 +635,11 @@ public class TestCompoundFile extends LuceneTestCase
createSequenceFile(dir, "d1", (byte) 0, 15); createSequenceFile(dir, "d1", (byte) 0, 15);
Directory newDir = newDirectory(); Directory newDir = newDirectory();
CompoundFileWriter csw = new CompoundFileWriter(newDir, "d.csf"); CompoundFileDirectory csw = newDir.createCompoundOutput("d.cfs");
csw.addFile("d1", dir); dir.copy(csw, "d1", "d1");
csw.close(); csw.close();
CompoundFileReader csr = new CompoundFileReader(newDir, "d.csf"); CompoundFileDirectory csr = newDir.openCompoundInput("d.cfs", 1024);
IndexInput expected = dir.openInput("d1"); IndexInput expected = dir.openInput("d1");
IndexInput actual = csr.openInput("d1"); IndexInput actual = csr.openInput("d1");
assertSameStreams("d1", expected, actual); assertSameStreams("d1", expected, actual);
@ -669,4 +651,71 @@ public class TestCompoundFile extends LuceneTestCase
newDir.close(); newDir.close();
} }
public void testAppend() throws IOException {
Directory newDir = newDirectory();
CompoundFileDirectory csw = newDir.createCompoundOutput("d.cfs");
int size = 5 + random.nextInt(128);
for (int j = 0; j < 2; j++) {
IndexOutput os = csw.createOutput("seg" + j + "_foo.txt");
for (int i = 0; i < size; i++) {
os.writeInt(i);
}
os.close();
String[] listAll = newDir.listAll();
assertEquals(1, listAll.length);
assertEquals("d.cfs", listAll[0]);
}
createSequenceFile(dir, "d1", (byte) 0, 15);
dir.copy(csw, "d1", "d1");
String[] listAll = newDir.listAll();
assertEquals(1, listAll.length);
assertEquals("d.cfs", listAll[0]);
csw.close();
CompoundFileDirectory csr = newDir.openCompoundInput("d.cfs", 1024);
for (int j = 0; j < 2; j++) {
IndexInput openInput = csr.openInput("seg" + j + "_foo.txt");
assertEquals(size * 4, openInput.length());
for (int i = 0; i < size; i++) {
assertEquals(i, openInput.readInt());
}
openInput.close();
}
IndexInput expected = dir.openInput("d1");
IndexInput actual = csr.openInput("d1");
assertSameStreams("d1", expected, actual);
assertSameSeekBehavior("d1", expected, actual);
expected.close();
actual.close();
csr.close();
newDir.close();
}
public void testAppendTwice() throws IOException {
Directory newDir = newDirectory();
CompoundFileDirectory csw = newDir.createCompoundOutput("d.cfs");
createSequenceFile(newDir, "d1", (byte) 0, 15);
IndexOutput out = csw.createOutput("d.xyz");
out.writeInt(0);
try {
newDir.copy(csw, "d1", "d1");
fail("file does already exist");
} catch (IOException e) {
//
}
out.close();
assertEquals(1, csw.listAll().length);
assertEquals("d.xyz", csw.listAll()[0]);
csw.close();
CompoundFileDirectory cfr = newDir.openCompoundInput("d.cfs", 1024);
assertEquals(1, cfr.listAll().length);
assertEquals("d.xyz", cfr.listAll()[0]);
cfr.close();
newDir.close();
}
} }

View File

@ -20,6 +20,7 @@ package org.apache.lucene.index;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity; import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.CompoundFileDirectory;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.IndexOutput;
@ -91,7 +92,7 @@ public class TestIndexFileDeleter extends LuceneTestCase {
// figure out which field number corresponds to // figure out which field number corresponds to
// "content", and then set our expected file names below // "content", and then set our expected file names below
// accordingly: // accordingly:
CompoundFileReader cfsReader = new CompoundFileReader(dir, "_2.cfs"); CompoundFileDirectory cfsReader = dir.openCompoundInput("_2.cfs", 1024);
FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm"); FieldInfos fieldInfos = new FieldInfos(cfsReader, "_2.fnm");
int contentFieldIndex = -1; int contentFieldIndex = -1;
for (FieldInfo fi : fieldInfos) { for (FieldInfo fi : fieldInfos) {