From 6456948a470d21f5d59439417ec7cb3550c876b6 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Thu, 2 Oct 2014 06:34:10 +0000 Subject: [PATCH] LUCENE-5969: start improving CFSDir git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1628889 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene40/Lucene40DocValuesReader.java | 2 +- .../lucene40/Lucene40DocValuesWriter.java | 2 +- .../org/apache/lucene/index/IndexWriter.java | 2 +- .../lucene/index/SegmentCoreReaders.java | 2 +- .../apache/lucene/index/SegmentReader.java | 2 +- .../lucene/store/CompoundFileDirectory.java | 35 +- .../lucene/store/CompoundFileWriter.java | 20 +- .../index/TestAllFilesHaveChecksumFooter.java | 45 +- .../index/TestAllFilesHaveCodecHeader.java | 65 +- .../org/apache/lucene/index/TestCodecs.java | 11 +- .../apache/lucene/index/TestCompoundFile.java | 936 ++++++------------ .../lucene/index/TestCompoundFile2.java | 296 ++++++ .../lucene/store/TestNRTCachingDirectory.java | 3 +- .../lucene/index/CompoundFileExtractor.java | 134 --- .../lucene/store/BaseDirectoryTestCase.java | 6 +- 15 files changed, 712 insertions(+), 849 deletions(-) create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestCompoundFile2.java delete mode 100644 lucene/misc/src/java/org/apache/lucene/index/CompoundFileExtractor.java diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java index d0a88252156..c9dca550004 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40DocValuesReader.java @@ -85,7 +85,7 @@ final class Lucene40DocValuesReader extends DocValuesProducer { Lucene40DocValuesReader(SegmentReadState state, String filename, String legacyKey) throws IOException { this.state = state; this.legacyKey = legacyKey; - this.dir = new CompoundFileDirectory(state.directory, filename, state.context, false); + this.dir = new CompoundFileDirectory(state.segmentInfo.getId(), state.directory, filename, state.context, false); ramBytesUsed = new AtomicLong(RamUsageEstimator.shallowSizeOf(getClass())); merging = false; } diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java index fa2b2d8a02c..4568d84de91 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/Lucene40DocValuesWriter.java @@ -51,7 +51,7 @@ final class Lucene40DocValuesWriter extends DocValuesConsumer { Lucene40DocValuesWriter(SegmentWriteState state, String filename, String legacyKey) throws IOException { this.state = state; this.legacyKey = legacyKey; - this.dir = new CompoundFileDirectory(state.directory, filename, state.context, true); + this.dir = new CompoundFileDirectory(state.segmentInfo.getId(), state.directory, filename, state.context, true); } @Override diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 4300c6281b3..c6d95c6ab33 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -4459,7 +4459,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { } // Now merge all added files Collection files = info.files(); - CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); + CompoundFileDirectory cfsDir = new CompoundFileDirectory(info.getId(), directory, fileName, context, true); boolean success = false; try { for (String file : files) { diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java index 1f60465c50d..84e85ba272f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentCoreReaders.java @@ -99,7 +99,7 @@ final class SegmentCoreReaders implements Accountable { try { if (si.info.getUseCompoundFile()) { - cfsDir = cfsReader = new CompoundFileDirectory(dir, IndexFileNames.segmentFileName(si.info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); + cfsDir = cfsReader = new CompoundFileDirectory(si.info.getId(), dir, IndexFileNames.segmentFileName(si.info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), context, false); } else { cfsReader = null; cfsDir = dir; diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java index 0beba5ec64b..2b9ea1c030b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java @@ -202,7 +202,7 @@ public final class SegmentReader extends LeafReader implements Accountable { final boolean closeDir; if (info.getFieldInfosGen() == -1 && info.info.getUseCompoundFile()) { // no fieldInfos gen and segment uses a compound file - dir = new CompoundFileDirectory(info.info.dir, + dir = new CompoundFileDirectory(info.info.getId(), info.info.dir, IndexFileNames.segmentFileName(info.info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION), IOContext.READONCE, false); diff --git a/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java b/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java index d1d688a1446..0c570354d10 100644 --- a/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java +++ b/lucene/core/src/java/org/apache/lucene/store/CompoundFileDirectory.java @@ -24,7 +24,9 @@ import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexFileNames; import org.apache.lucene.store.DataOutput; // javadocs import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.StringHelper; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; @@ -55,7 +57,7 @@ import java.io.IOException; *
  • Compound (.cfs) --> Header, FileData FileCount, Footer
  • *
  • Compound Entry Table (.cfe) --> Header, FileCount, <FileName, * DataOffset, DataLength> FileCount
  • - *
  • Header --> {@link CodecUtil#writeHeader CodecHeader}
  • + *
  • Header --> {@link CodecUtil#writeSegmentHeader SegmentHeader}
  • *
  • FileCount --> {@link DataOutput#writeVInt VInt}
  • *
  • DataOffset,DataLength,Checksum --> {@link DataOutput#writeLong UInt64}
  • *
  • FileName --> {@link DataOutput#writeString String}
  • @@ -89,12 +91,14 @@ public final class CompoundFileDirectory extends BaseDirectory { private final CompoundFileWriter writer; private final IndexInput handle; private int version; + private final byte[] segmentID; /** * Create a new CompoundFileDirectory. */ - public CompoundFileDirectory(Directory directory, String fileName, IOContext context, boolean openForWrite) throws IOException { + public CompoundFileDirectory(byte[] segmentID, Directory directory, String fileName, IOContext context, boolean openForWrite) throws IOException { this.directory = directory; + this.segmentID = segmentID; this.fileName = fileName; this.readBufferSize = BufferedIndexInput.bufferSize(context); this.isOpen = false; @@ -105,7 +109,17 @@ public final class CompoundFileDirectory extends BaseDirectory { try { this.entries = readEntries(directory, fileName); if (version >= CompoundFileWriter.VERSION_CHECKSUM) { - CodecUtil.checkHeader(handle, CompoundFileWriter.DATA_CODEC, version, version); + if (version >= CompoundFileWriter.VERSION_SEGMENTHEADER) { + // nocommit: remove this null "hack", its because old rw test codecs cant properly impersonate + if (segmentID == null) { + CodecUtil.checkHeader(handle, CompoundFileWriter.DATA_CODEC, version, version); + handle.skipBytes(StringHelper.ID_LENGTH); + } else { + CodecUtil.checkSegmentHeader(handle, CompoundFileWriter.DATA_CODEC, version, version, segmentID, ""); + } + } else { + CodecUtil.checkHeader(handle, CompoundFileWriter.DATA_CODEC, version, version); + } // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption @@ -124,7 +138,7 @@ public final class CompoundFileDirectory extends BaseDirectory { assert !(directory instanceof CompoundFileDirectory) : "compound file inside of compound file: " + fileName; this.entries = SENTINEL; this.isOpen = true; - writer = new CompoundFileWriter(directory, fileName); + writer = new CompoundFileWriter(segmentID, directory, fileName); handle = null; } } @@ -140,6 +154,19 @@ public final class CompoundFileDirectory extends BaseDirectory { IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION); entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE); version = CodecUtil.checkHeader(entriesStream, CompoundFileWriter.ENTRY_CODEC, CompoundFileWriter.VERSION_START, CompoundFileWriter.VERSION_CURRENT); + if (version >= CompoundFileWriter.VERSION_SEGMENTHEADER) { + byte id[] = new byte[StringHelper.ID_LENGTH]; + entriesStream.readBytes(id, 0, id.length); + // nocommit: remove this null "hack", its because old rw test codecs cant properly impersonate + if (segmentID != null && !Arrays.equals(id, segmentID)) { + throw new CorruptIndexException("file mismatch, expected segment id=" + StringHelper.idToString(segmentID) + + ", got=" + StringHelper.idToString(id), entriesStream); + } + byte suffixLength = entriesStream.readByte(); + if (suffixLength != 0) { + throw new CorruptIndexException("unexpected segment suffix, expected zero-length, got=" + (suffixLength & 0xFF), entriesStream); + } + } final int numEntries = entriesStream.readVInt(); mapping = new HashMap<>(numEntries); for (int i = 0; i < numEntries; i++) { diff --git a/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java b/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java index f5568711ac0..6d7254a498a 100644 --- a/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java +++ b/lucene/core/src/java/org/apache/lucene/store/CompoundFileWriter.java @@ -55,7 +55,8 @@ final class CompoundFileWriter implements Closeable{ static final String DATA_CODEC = "CompoundFileWriterData"; static final int VERSION_START = 0; static final int VERSION_CHECKSUM = 1; - static final int VERSION_CURRENT = VERSION_CHECKSUM; + static final int VERSION_SEGMENTHEADER = 2; + static final int VERSION_CURRENT = VERSION_SEGMENTHEADER; // versioning for the .cfe file static final String ENTRY_CODEC = "CompoundFileWriterEntries"; @@ -70,6 +71,7 @@ final class CompoundFileWriter implements Closeable{ private final AtomicBoolean outputTaken = new AtomicBoolean(false); final String entryTableName; final String dataFileName; + final byte[] segmentID; /** * Create the compound stream in the specified file. The file name is the @@ -78,11 +80,17 @@ final class CompoundFileWriter implements Closeable{ * @throws NullPointerException * if dir or name is null */ - CompoundFileWriter(Directory dir, String name) { - if (dir == null) + CompoundFileWriter(byte segmentID[], Directory dir, String name) { + if (dir == null) { throw new NullPointerException("directory cannot be null"); - if (name == null) + } + if (name == null) { throw new NullPointerException("name cannot be null"); + } + if (segmentID == null) { + throw new NullPointerException("segmentID cannot be null"); + } + this.segmentID = segmentID; directory = dir; entryTableName = IndexFileNames.segmentFileName( IndexFileNames.stripExtension(name), "", @@ -96,7 +104,7 @@ final class CompoundFileWriter implements Closeable{ boolean success = false; try { dataOut = directory.createOutput(dataFileName, context); - CodecUtil.writeHeader(dataOut, DATA_CODEC, VERSION_CURRENT); + CodecUtil.writeSegmentHeader(dataOut, DATA_CODEC, VERSION_CURRENT, segmentID, ""); success = true; } finally { if (!success) { @@ -207,7 +215,7 @@ final class CompoundFileWriter implements Closeable{ protected void writeEntryTable(Collection entries, IndexOutput entryOut) throws IOException { - CodecUtil.writeHeader(entryOut, ENTRY_CODEC, VERSION_CURRENT); + CodecUtil.writeSegmentHeader(entryOut, ENTRY_CODEC, VERSION_CURRENT, segmentID, ""); entryOut.writeVInt(entries.size()); for (FileEntry fe : entries) { entryOut.writeString(IndexFileNames.stripSegmentName(fe.file)); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java index 7862606ab31..cef548bd565 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveChecksumFooter.java @@ -28,7 +28,6 @@ import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.MockDirectoryWrapper; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -65,33 +64,33 @@ public class TestAllFilesHaveChecksumFooter extends LuceneTestCase { } } riw.close(); - checkHeaders(dir); + checkFooters(dir); dir.close(); } - private void checkHeaders(Directory dir) throws IOException { - for (String file : dir.listAll()) { - if (file.equals(IndexWriter.WRITE_LOCK_NAME)) { - continue; // write.lock has no footer, thats ok - } - if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) { - CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, newIOContext(random()), false); - checkHeaders(cfsDir); // recurse into cfs - cfsDir.close(); - } - IndexInput in = null; - boolean success = false; - try { - in = dir.openInput(file, newIOContext(random())); - CodecUtil.checksumEntireFile(in); - success = true; - } finally { - if (success) { - IOUtils.close(in); - } else { - IOUtils.closeWhileHandlingException(in); + private void checkFooters(Directory dir) throws IOException { + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + checkFooter(dir, sis.getSegmentsFileName()); + + for (SegmentCommitInfo si : sis) { + for (String file : si.files()) { + checkFooter(dir, file); + if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) { + // recurse into CFS + try (CompoundFileDirectory cfsDir = new CompoundFileDirectory(si.info.getId(), dir, file, newIOContext(random()), false)) { + for (String cfsFile : cfsDir.listAll()) { + checkFooter(cfsDir, cfsFile); + } + } } } } } + + private void checkFooter(Directory dir, String file) throws IOException { + try (IndexInput in = dir.openInput(file, newIOContext(random()))) { + CodecUtil.checksumEntireFile(in); + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java index bbf58576762..fc6c1391d91 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestAllFilesHaveCodecHeader.java @@ -32,7 +32,6 @@ import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.MockDirectoryWrapper; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TestUtil; @@ -83,40 +82,40 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase { } private void checkHeaders(Directory dir, Map namesToExtensions) throws IOException { - for (String file : dir.listAll()) { - if (file.equals(IndexWriter.WRITE_LOCK_NAME)) { - continue; // write.lock has no header, thats ok - } - if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) { - CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, newIOContext(random()), false); - checkHeaders(cfsDir, namesToExtensions); // recurse into cfs - cfsDir.close(); - } - IndexInput in = null; - boolean success = false; - try { - in = dir.openInput(file, newIOContext(random())); - int val = in.readInt(); - assertEquals(file + " has no codec header, instead found: " + val, CodecUtil.CODEC_MAGIC, val); - String codecName = in.readString(); - assertFalse(codecName.isEmpty()); - String extension = IndexFileNames.getExtension(file); - if (extension == null) { - assertTrue(file.startsWith(IndexFileNames.SEGMENTS)); - extension = " (not a real extension, designates segments file)"; - } - String previous = namesToExtensions.put(codecName, extension); - if (previous != null && !previous.equals(extension)) { - fail("extensions " + previous + " and " + extension + " share same codecName " + codecName); - } - success = true; - } finally { - if (success) { - IOUtils.close(in); - } else { - IOUtils.closeWhileHandlingException(in); + SegmentInfos sis = new SegmentInfos(); + sis.read(dir); + checkHeader(dir, sis.getSegmentsFileName(), namesToExtensions); + + for (SegmentCommitInfo si : sis) { + for (String file : si.files()) { + checkHeader(dir, file, namesToExtensions); + if (file.endsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) { + // recurse into CFS + try (CompoundFileDirectory cfsDir = new CompoundFileDirectory(si.info.getId(), dir, file, newIOContext(random()), false)) { + for (String cfsFile : cfsDir.listAll()) { + checkHeader(cfsDir, cfsFile, namesToExtensions); + } + } } } } } + + private void checkHeader(Directory dir, String file, Map namesToExtensions) throws IOException { + try (IndexInput in = dir.openInput(file, newIOContext(random()))) { + int val = in.readInt(); + assertEquals(file + " has no codec header, instead found: " + val, CodecUtil.CODEC_MAGIC, val); + String codecName = in.readString(); + assertFalse(codecName.isEmpty()); + String extension = IndexFileNames.getExtension(file); + if (extension == null) { + assertTrue(file.startsWith(IndexFileNames.SEGMENTS)); + extension = " (not a real extension, designates segments file)"; + } + String previous = namesToExtensions.put(codecName, extension); + if (previous != null && !previous.equals(extension)) { + fail("extensions " + previous + " and " + extension + " share same codecName " + codecName); + } + } + } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java index 98311373b21..47823a94028 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCodecs.java @@ -247,10 +247,10 @@ public class TestCodecs extends LuceneTestCase { final FieldData[] fields = new FieldData[] {field}; final FieldInfos fieldInfos = builder.finish(); final Directory dir = newDirectory(); - this.write(fieldInfos, dir, fields); Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null, StringHelper.randomId()); - + + this.write(si, fieldInfos, dir, fields); final FieldsProducer reader = codec.postingsFormat().fieldsProducer(new SegmentReadState(dir, si, fieldInfos, newIOContext(random()))); final Iterator fieldsEnum = reader.iterator(); @@ -304,9 +304,9 @@ public class TestCodecs extends LuceneTestCase { System.out.println("TEST: now write postings"); } - this.write(fieldInfos, dir, fields); Codec codec = Codec.getDefault(); final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null, StringHelper.randomId()); + this.write(si, fieldInfos, dir, fields); if (VERBOSE) { System.out.println("TEST: now read postings"); @@ -798,10 +798,9 @@ public class TestCodecs extends LuceneTestCase { } } - private void write(final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields) throws Throwable { + private void write(SegmentInfo si, final FieldInfos fieldInfos, final Directory dir, final FieldData[] fields) throws Throwable { - final Codec codec = Codec.getDefault(); - final SegmentInfo si = new SegmentInfo(dir, Version.LATEST, SEGMENT, 10000, false, codec, null, StringHelper.randomId()); + final Codec codec = si.getCodec(); final SegmentWriteState state = new SegmentWriteState(InfoStream.getDefault(), dir, si, fieldInfos, null, newIOContext(random())); Arrays.sort(fields); diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCompoundFile.java b/lucene/core/src/test/org/apache/lucene/index/TestCompoundFile.java index 82a3ece4b5d..c98dd204984 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestCompoundFile.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestCompoundFile.java @@ -25,613 +25,277 @@ import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MockDirectoryWrapper; -import org.apache.lucene.store.SimpleFSDirectory; -import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.TestUtil; import java.io.IOException; import java.nio.file.Path; -public class TestCompoundFile extends LuceneTestCase -{ - private Directory dir; - - @Override - public void setUp() throws Exception { - super.setUp(); - Path file = createTempDir("testIndex"); - // use a simple FSDir here, to be sure to have SimpleFSInputs - dir = new SimpleFSDirectory(file,null); +public class TestCompoundFile extends LuceneTestCase { + private Directory dir; + + @Override + public void setUp() throws Exception { + super.setUp(); + Path file = createTempDir("testIndex"); + dir = newFSDirectory(file); + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + /** Creates a file of the specified size with random data. */ + private void createRandomFile(Directory dir, String name, int size) throws IOException { + IndexOutput os = dir.createOutput(name, newIOContext(random())); + for (int i=0; i 0) { + int readLen = (int) Math.min(remainder, expectedBuffer.length); + expected.readBytes(expectedBuffer, 0, readLen); + test.readBytes(testBuffer, 0, readLen); + assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); + remainder -= readLen; } - - /** Creates a file of the specified size with sequential data. The first - * byte is written as the start byte provided. All subsequent bytes are - * computed as start + offset where offset is the number of the byte. - */ - private void createSequenceFile(Directory dir, - String name, - byte start, - int size) - throws IOException - { - IndexOutput os = dir.createOutput(name, newIOContext(random())); - for (int i=0; i < size; i++) { - os.writeByte(start); - start ++; - } - os.close(); + } + + static void assertSameStreams(String msg, IndexInput expected, IndexInput actual, long seekTo) throws IOException { + if (seekTo >= 0 && seekTo < expected.length()) { + expected.seek(seekTo); + actual.seek(seekTo); + assertSameStreams(msg + ", seek(mid)", expected, actual); } - - - private void assertSameStreams(String msg, - IndexInput expected, - IndexInput test) - throws IOException - { - assertNotNull(msg + " null expected", expected); - assertNotNull(msg + " null test", test); - assertEquals(msg + " length", expected.length(), test.length()); - assertEquals(msg + " position", expected.getFilePointer(), - test.getFilePointer()); - - byte expectedBuffer[] = new byte[512]; - byte testBuffer[] = new byte[expectedBuffer.length]; - - long remainder = expected.length() - expected.getFilePointer(); - while(remainder > 0) { - int readLen = (int) Math.min(remainder, expectedBuffer.length); - expected.readBytes(expectedBuffer, 0, readLen); - test.readBytes(testBuffer, 0, readLen); - assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, - testBuffer, 0, readLen); - remainder -= readLen; - } + } + + static void assertSameSeekBehavior(String msg, IndexInput expected, IndexInput actual) throws IOException { + // seek to 0 + long point = 0; + assertSameStreams(msg + ", seek(0)", expected, actual, point); + + // seek to middle + point = expected.length() / 2l; + assertSameStreams(msg + ", seek(mid)", expected, actual, point); + + // seek to end - 2 + point = expected.length() - 2; + assertSameStreams(msg + ", seek(end-2)", expected, actual, point); + + // seek to end - 1 + point = expected.length() - 1; + assertSameStreams(msg + ", seek(end-1)", expected, actual, point); + + // seek to the end + point = expected.length(); + assertSameStreams(msg + ", seek(end)", expected, actual, point); + + // seek past end + point = expected.length() + 1; + assertSameStreams(msg + ", seek(end+1)", expected, actual, point); + } + + + static void assertEqualArrays(String msg, byte[] expected, byte[] test, int start, int len) { + assertNotNull(msg + " null expected", expected); + assertNotNull(msg + " null test", test); + + for (int i=start; i= 0 && seekTo < expected.length()) - { - expected.seek(seekTo); - actual.seek(seekTo); - assertSameStreams(msg + ", seek(mid)", expected, actual); - } + } + + + // =========================================================== + // Tests of the basic CompoundFile functionality + // =========================================================== + + + /** + * This test creates compound file based on a single file. + * Files of different sizes are tested: 0, 1, 10, 100 bytes. + */ + public void testSingleFile() throws IOException { + int data[] = new int[] { 0, 1, 10, 100 }; + for (int i=0; i"); - return; - } - - Directory dir = null; - CompoundFileDirectory cfr = null; - IOContext context = IOContext.READ; - - try { - Path file = Paths.get(filename); - Path directory = file.toAbsolutePath().getParent(); - filename = file.getFileName().toString(); - if (dirImpl == null) { - dir = FSDirectory.open(directory); - } else { - dir = CommandLineUtil.newFSDirectory(dirImpl, directory); - } - - cfr = new CompoundFileDirectory(dir, filename, IOContext.DEFAULT, false); - - String [] files = cfr.listAll(); - ArrayUtil.timSort(files); // sort the array of filename so that the output is more readable - - for (int i = 0; i < files.length; ++i) { - long len = cfr.fileLength(files[i]); - - if (extract) { - System.out.println("extract " + files[i] + " with " + len + " bytes to local directory..."); - IndexInput ii = cfr.openInput(files[i], context); - - OutputStream f = Files.newOutputStream(Paths.get(files[i])); - - // read and write with a small buffer, which is more effective than reading byte by byte - byte[] buffer = new byte[1024]; - int chunk = buffer.length; - while(len > 0) { - final int bufLen = (int) Math.min(chunk, len); - ii.readBytes(buffer, 0, bufLen); - f.write(buffer, 0, bufLen); - len -= bufLen; - } - - f.close(); - ii.close(); - } - else - System.out.println(files[i] + ": " + len + " bytes"); - } - } catch (IOException ioe) { - ioe.printStackTrace(); - } - finally { - try { - if (dir != null) - dir.close(); - if (cfr != null) - cfr.close(); - } - catch (IOException ioe) { - ioe.printStackTrace(); - } - } - } -} diff --git a/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryTestCase.java index daa23f59599..0edb393546c 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/store/BaseDirectoryTestCase.java @@ -34,6 +34,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexNotFoundException; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.TestUtil; /** Base class for per-Directory tests. */ @@ -587,7 +588,8 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase { // LUCENE-3382 test that delegate compound files correctly. public void testCompoundFileAppendTwice() throws IOException { Directory newDir = getDirectory(createTempDir("testCompoundFileAppendTwice")); - CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", newIOContext(random()), true); + byte id[] = StringHelper.randomId(); + CompoundFileDirectory csw = new CompoundFileDirectory(id, newDir, "d.cfs", newIOContext(random()), true); createSequenceFile(newDir, "d1", (byte) 0, 15); IndexOutput out = csw.createOutput("d.xyz", newIOContext(random())); out.writeInt(0); @@ -597,7 +599,7 @@ public abstract class BaseDirectoryTestCase extends LuceneTestCase { csw.close(); - CompoundFileDirectory cfr = new CompoundFileDirectory(newDir, "d.cfs", newIOContext(random()), false); + CompoundFileDirectory cfr = new CompoundFileDirectory(id, newDir, "d.cfs", newIOContext(random()), false); assertEquals(1, cfr.listAll().length); assertEquals("d.xyz", cfr.listAll()[0]); cfr.close();