From b028f191b377becab9e42414ba34b063af415e20 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Fri, 3 Oct 2014 17:55:52 +0000 Subject: [PATCH] LUCENE-5969: start porting over tests to BaseCompoundFormatTestCase git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5969@1629272 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene40/Lucene40CompoundReader.java | 2 +- .../lucene40/Lucene40CompoundWriter.java | 23 +- .../lucene40/TestLucene40CompoundFormat.java | 108 +++++++ .../lucene50/TestLucene50CompoundFormat.java | 31 ++ .../lucene/store/TestNRTCachingDirectory.java | 21 -- .../index/BaseCompoundFormatTestCase.java | 273 ++++++++++++++++++ .../lucene/store/BaseDirectoryTestCase.java | 63 ++-- 7 files changed, 449 insertions(+), 72 deletions(-) create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40CompoundFormat.java create mode 100644 lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java create mode 100644 lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java index 8bc94b725f4..f04d89365ca 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundReader.java @@ -96,7 +96,7 @@ final class Lucene40CompoundReader extends BaseDirectory { assert !(directory instanceof Lucene40CompoundReader) : "compound file inside of compound file: " + fileName; this.entries = SENTINEL; this.isOpen = true; - writer = new Lucene40CompoundWriter(directory, fileName); + writer = new Lucene40CompoundWriter(directory, fileName, context); handle = null; } } diff --git a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundWriter.java b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundWriter.java index e95deb5bacf..49d82c15b1c 100644 --- a/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundWriter.java +++ b/lucene/backward-codecs/src/java/org/apache/lucene/codecs/lucene40/Lucene40CompoundWriter.java @@ -75,6 +75,10 @@ final class Lucene40CompoundWriter implements Closeable{ private final AtomicBoolean outputTaken = new AtomicBoolean(false); final String entryTableName; final String dataFileName; + + // preserve the IOContext we were originally passed + // previously this was not also passed to the .CFE + private final IOContext context; /** * Create the compound stream in the specified file. The file name is the @@ -83,7 +87,7 @@ final class Lucene40CompoundWriter implements Closeable{ * @throws NullPointerException * if dir or name is null */ - Lucene40CompoundWriter(Directory dir, String name) { + Lucene40CompoundWriter(Directory dir, String name, IOContext context) { if (dir == null) throw new NullPointerException("directory cannot be null"); if (name == null) @@ -93,14 +97,14 @@ final class Lucene40CompoundWriter implements Closeable{ IndexFileNames.stripExtension(name), "", Lucene40CompoundFormat.COMPOUND_FILE_ENTRIES_EXTENSION); dataFileName = name; - + this.context = context; } private synchronized IndexOutput getOutput(IOContext context) throws IOException { if (dataOut == null) { boolean success = false; try { - dataOut = directory.createOutput(dataFileName, context); + dataOut = directory.createOutput(dataFileName, this.context); CodecUtil.writeHeader(dataOut, DATA_CODEC, VERSION_CURRENT); success = true; } finally { @@ -143,10 +147,7 @@ final class Lucene40CompoundWriter implements Closeable{ throw new IllegalStateException("CFS has pending open files"); } closed = true; - // open the compound stream; we can safely use IOContext.DEFAULT - // here because this will only open the output if no file was - // added to the CFS - getOutput(IOContext.DEFAULT); + getOutput(this.context); assert dataOut != null; CodecUtil.writeFooter(dataOut); success = true; @@ -159,7 +160,7 @@ final class Lucene40CompoundWriter implements Closeable{ } success = false; try { - entryTableOut = directory.createOutput(entryTableName, IOContext.DEFAULT); + entryTableOut = directory.createOutput(entryTableName, this.context); writeEntryTable(entries.values(), entryTableOut); success = true; } finally { @@ -240,10 +241,10 @@ final class Lucene40CompoundWriter implements Closeable{ final DirectCFSIndexOutput out; if ((outputLocked = outputTaken.compareAndSet(false, true))) { - out = new DirectCFSIndexOutput(getOutput(context), entry, false); + out = new DirectCFSIndexOutput(getOutput(this.context), entry, false); } else { entry.dir = this.directory; - out = new DirectCFSIndexOutput(directory.createOutput(name, context), entry, + out = new DirectCFSIndexOutput(directory.createOutput(name, this.context), entry, true); } success = true; @@ -269,7 +270,7 @@ final class Lucene40CompoundWriter implements Closeable{ try { while (!pendingEntries.isEmpty()) { FileEntry entry = pendingEntries.poll(); - copyFileEntry(getOutput(new IOContext(new FlushInfo(0, entry.length))), entry); + copyFileEntry(getOutput(this.context), entry); entries.put(entry.file, entry); } } finally { diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40CompoundFormat.java b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40CompoundFormat.java new file mode 100644 index 00000000000..8d12b1be31a --- /dev/null +++ b/lucene/backward-codecs/src/test/org/apache/lucene/codecs/lucene40/TestLucene40CompoundFormat.java @@ -0,0 +1,108 @@ +package org.apache.lucene.codecs.lucene40; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BaseCompoundFormatTestCase; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; + +public class TestLucene40CompoundFormat extends BaseCompoundFormatTestCase { + private final Codec codec = new Lucene40RWCodec(); + + @Override + protected Codec getCodec() { + return codec; + } + + // LUCENE-3382 test that delegate compound files correctly. + public void testCompoundFileAppendTwice() throws IOException { + Directory newDir = newFSDirectory(createTempDir("testCompoundFileAppendTwice")); + Directory csw = new Lucene40CompoundReader(newDir, "d.cfs", newIOContext(random()), true); + createSequenceFile(newDir, "d1", (byte) 0, 15); + IndexOutput out = csw.createOutput("d.xyz", newIOContext(random())); + out.writeInt(0); + out.close(); + assertEquals(1, csw.listAll().length); + assertEquals("d.xyz", csw.listAll()[0]); + + csw.close(); + + Directory cfr = new Lucene40CompoundReader(newDir, "d.cfs", newIOContext(random()), false); + assertEquals(1, cfr.listAll().length); + assertEquals("d.xyz", cfr.listAll()[0]); + cfr.close(); + newDir.close(); + } + + public void testReadNestedCFP() throws IOException { + Directory newDir = newDirectory(); + // manually manipulates directory + if (newDir instanceof MockDirectoryWrapper) { + ((MockDirectoryWrapper)newDir).setEnableVirusScanner(false); + } + Lucene40CompoundReader csw = new Lucene40CompoundReader(newDir, "d.cfs", newIOContext(random()), true); + Lucene40CompoundReader nested = new Lucene40CompoundReader(newDir, "b.cfs", newIOContext(random()), true); + IndexOutput out = nested.createOutput("b.xyz", newIOContext(random())); + IndexOutput out1 = nested.createOutput("b_1.xyz", newIOContext(random())); + out.writeInt(0); + out1.writeInt(1); + out.close(); + out1.close(); + nested.close(); + newDir.copy(csw, "b.cfs", "b.cfs", newIOContext(random())); + newDir.copy(csw, "b.cfe", "b.cfe", newIOContext(random())); + newDir.deleteFile("b.cfs"); + newDir.deleteFile("b.cfe"); + csw.close(); + + assertEquals(2, newDir.listAll().length); + csw = new Lucene40CompoundReader(newDir, "d.cfs", newIOContext(random()), false); + + assertEquals(2, csw.listAll().length); + nested = new Lucene40CompoundReader(csw, "b.cfs", newIOContext(random()), false); + + assertEquals(2, nested.listAll().length); + IndexInput openInput = nested.openInput("b.xyz", newIOContext(random())); + assertEquals(0, openInput.readInt()); + openInput.close(); + openInput = nested.openInput("b_1.xyz", newIOContext(random())); + assertEquals(1, openInput.readInt()); + openInput.close(); + nested.close(); + csw.close(); + newDir.close(); + } + + /** Creates a file of the specified size with sequential data. The first + * byte is written as the start byte provided. All subsequent bytes are + * computed as start + offset where offset is the number of the byte. + */ + private void createSequenceFile(Directory dir, String name, byte start, int size) throws IOException { + IndexOutput os = dir.createOutput(name, newIOContext(random())); + for (int i=0; i < size; i++) { + os.writeByte(start); + start ++; + } + os.close(); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java new file mode 100644 index 00000000000..27d091aca48 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene50/TestLucene50CompoundFormat.java @@ -0,0 +1,31 @@ +package org.apache.lucene.codecs.lucene50; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.index.BaseCompoundFormatTestCase; +import org.apache.lucene.util.TestUtil; + +public class TestLucene50CompoundFormat extends BaseCompoundFormatTestCase { + private final Codec codec = TestUtil.getDefaultCodec(); + + @Override + protected Codec getCodec() { + return codec; + } +} diff --git a/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java b/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java index e5aa7f935d4..5eaa2385cd1 100644 --- a/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java +++ b/lucene/core/src/test/org/apache/lucene/store/TestNRTCachingDirectory.java @@ -35,7 +35,6 @@ import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LineFileDocs; -import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.TestUtil; public class TestNRTCachingDirectory extends BaseDirectoryTestCase { @@ -123,24 +122,4 @@ public class TestNRTCachingDirectory extends BaseDirectoryTestCase { writer.close(); cachedFSDir.close(); } - - // LUCENE-5724 - public void testLargeCFS() throws IOException { - // nocommit: make a fake .si etc - /* - Directory dir = new NRTCachingDirectory(newFSDirectory(createTempDir()), 2.0, 25.0); - IOContext context = new IOContext(new FlushInfo(0, 512*1024*1024)); - IndexOutput out = dir.createOutput("big.bin", context); - byte[] bytes = new byte[512]; - for(int i=0;i<1024*1024;i++) { - out.writeBytes(bytes, 0, bytes.length); - } - out.close(); - - Directory cfsDir = new CompoundFileDirectory(StringHelper.randomId(), dir, "big.cfs", context, true); - dir.copy(cfsDir, "big.bin", "big.bin", context); - cfsDir.close(); - dir.close(); - */ - } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java new file mode 100644 index 00000000000..d3f33474e4d --- /dev/null +++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java @@ -0,0 +1,273 @@ +package org.apache.lucene.index; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collections; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FilterDirectory; +import org.apache.lucene.store.FlushInfo; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.MockDirectoryWrapper; +import org.apache.lucene.store.NRTCachingDirectory; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.TestUtil; +import org.apache.lucene.util.Version; + +/** + * Abstract class to do basic tests for a compound format. + * NOTE: This test focuses on the compound impl, nothing else. + * The [stretch] goal is for this test to be + * so thorough in testing a new CompoundFormat that if this + * test passes, then all Lucene/Solr tests should also pass. Ie, + * if there is some bug in a given CompoundFormat that this + * test fails to catch then this test needs to be improved! */ +public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTestCase { + private Directory dir; + + @Override + public void setUp() throws Exception { + super.setUp(); + Path file = createTempDir("testIndex"); + dir = newFSDirectory(file); + } + + @Override + public void tearDown() throws Exception { + dir.close(); + super.tearDown(); + } + + // test that empty CFS is empty + public void testEmpty() throws IOException { + Directory dir = newDirectory(); + + final SegmentInfo si = newSegmentInfo(dir, "_123"); + si.getCodec().compoundFormat().write(dir, si, Collections.emptyList(), MergeState.CheckAbort.NONE, IOContext.DEFAULT); + Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT); + assertEquals(0, cfs.listAll().length); + cfs.close(); + dir.close(); + } + + // test that a second call to close() behaves according to Closeable + public void testDoubleClose() throws IOException { + final String testfile = "_123.test"; + + Directory dir = newDirectory(); + IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT); + out.writeInt(3); + out.close(); + + final SegmentInfo si = newSegmentInfo(dir, "_123"); + si.getCodec().compoundFormat().write(dir, si, Collections.singleton(testfile), MergeState.CheckAbort.NONE, IOContext.DEFAULT); + Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT); + assertEquals(1, cfs.listAll().length); + cfs.close(); + cfs.close(); // second close should not throw exception + dir.close(); + } + + // LUCENE-5724: things like NRTCachingDir rely upon IOContext being properly passed down + public void testPassIOContext() throws IOException { + final String testfile = "_123.test"; + final IOContext myContext = new IOContext(); + + Directory dir = new FilterDirectory(newDirectory()) { + @Override + public IndexOutput createOutput(String name, IOContext context) throws IOException { + assertSame(myContext, context); + return super.createOutput(name, context); + } + }; + IndexOutput out = dir.createOutput(testfile, myContext); + out.writeInt(3); + out.close(); + + final SegmentInfo si = newSegmentInfo(dir, "_123"); + si.getCodec().compoundFormat().write(dir, si, Collections.singleton(testfile), MergeState.CheckAbort.NONE, myContext); + dir.close(); + } + + // LUCENE-5724: actually test we play nice with NRTCachingDir and massive file + public void testLargeCFS() throws IOException { + final String testfile = "_123.test"; + IOContext context = new IOContext(new FlushInfo(0, 512*1024*1024)); + + Directory dir = new NRTCachingDirectory(newFSDirectory(createTempDir()), 2.0, 25.0); + + IndexOutput out = dir.createOutput(testfile, context); + byte[] bytes = new byte[512]; + for(int i=0;i<1024*1024;i++) { + out.writeBytes(bytes, 0, bytes.length); + } + out.close(); + + final SegmentInfo si = newSegmentInfo(dir, "_123"); + si.getCodec().compoundFormat().write(dir, si, Collections.singleton(testfile), MergeState.CheckAbort.NONE, context); + + dir.close(); + } + + // Just tests that we can open all files returned by listAll + public void testListAll() throws Exception { + Directory dir = newDirectory(); + if (dir instanceof MockDirectoryWrapper) { + // test lists files manually and tries to verify every .cfs it finds, + // but a virus scanner could leave some trash. + ((MockDirectoryWrapper)dir).setEnableVirusScanner(false); + } + // riw should sometimes create docvalues fields, etc + RandomIndexWriter riw = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + // these fields should sometimes get term vectors, etc + Field idField = newStringField("id", "", Field.Store.NO); + Field bodyField = newTextField("body", "", Field.Store.NO); + doc.add(idField); + doc.add(bodyField); + for (int i = 0; i < 100; i++) { + idField.setStringValue(Integer.toString(i)); + bodyField.setStringValue(TestUtil.randomUnicodeString(random())); + riw.addDocument(doc); + if (random().nextInt(7) == 0) { + riw.commit(); + } + } + riw.close(); + SegmentInfos infos = new SegmentInfos(); + infos.read(dir); + for (SegmentCommitInfo si : infos) { + if (si.info.getUseCompoundFile()) { + try (Directory cfsDir = si.info.getCodec().compoundFormat().getCompoundReader(dir, si.info, newIOContext(random()))) { + for (String cfsFile : cfsDir.listAll()) { + try (IndexInput cfsIn = cfsDir.openInput(cfsFile, IOContext.DEFAULT)) {} + } + } + } + } + dir.close(); + } + + /** Returns a new fake segment */ + static SegmentInfo newSegmentInfo(Directory dir, String name) { + return new SegmentInfo(dir, Version.LATEST, name, 10000, false, Codec.getDefault(), null, StringHelper.randomId()); + } + + /** Creates a file of the specified size with random data. */ + static void createRandomFile(Directory dir, String name, int size) throws IOException { + IndexOutput os = dir.createOutput(name, newIOContext(random())); + for (int i=0; i 0) { + int readLen = (int) Math.min(remainder, expectedBuffer.length); + expected.readBytes(expectedBuffer, 0, readLen); + test.readBytes(testBuffer, 0, readLen); + assertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); + remainder -= readLen; + } + } + + static void assertSameStreams(String msg, IndexInput expected, IndexInput actual, long seekTo) throws IOException { + if (seekTo >= 0 && seekTo < expected.length()) { + expected.seek(seekTo); + actual.seek(seekTo); + assertSameStreams(msg + ", seek(mid)", expected, actual); + } + } + + static void assertSameSeekBehavior(String msg, IndexInput expected, IndexInput actual) throws IOException { + // seek to 0 + long point = 0; + assertSameStreams(msg + ", seek(0)", expected, actual, point); + + // seek to middle + point = expected.length() / 2l; + assertSameStreams(msg + ", seek(mid)", expected, actual, point); + + // seek to end - 2 + point = expected.length() - 2; + assertSameStreams(msg + ", seek(end-2)", expected, actual, point); + + // seek to end - 1 + point = expected.length() - 1; + assertSameStreams(msg + ", seek(end-1)", expected, actual, point); + + // seek to the end + point = expected.length(); + assertSameStreams(msg + ", seek(end)", expected, actual, point); + + // seek past end + point = expected.length() + 1; + assertSameStreams(msg + ", seek(end+1)", expected, actual, point); + } + + static void assertEqualArrays(String msg, byte[] expected, byte[] test, int start, int len) { + assertNotNull(msg + " null expected", expected); + assertNotNull(msg + " null test", test); + + for (int i=start; i