diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index a0379bb69ee..f4fa8ba5c9b 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -9,6 +9,12 @@ Bug Fixes
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
+Improvements
+
+* LUCENE-7323: Compound file writing now verifies the incoming
+ sub-files' checkums and segment IDs, to catch hardware issues or
+ filesytem bugs earlier (Robert Muir, Mike McCandless)
+
Other
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
index 46ac9839fa6..a846dc9d36a 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextDocValuesFormat.java
@@ -122,7 +122,7 @@ import org.apache.lucene.index.SegmentWriteState;
* and saving the offset/etc for each field.
* @lucene.experimental
*/
-public class SimpleTextDocValuesFormat extends DocValuesFormat {
+class SimpleTextDocValuesFormat extends DocValuesFormat {
public SimpleTextDocValuesFormat() {
super("SimpleText");
diff --git a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java
index a77050561c6..44371200585 100644
--- a/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java
+++ b/lucene/codecs/src/java/org/apache/lucene/codecs/simpletext/SimpleTextPostingsFormat.java
@@ -34,7 +34,7 @@ import org.apache.lucene.index.SegmentWriteState;
* any text editor, and even edit it to alter your index.
*
* @lucene.experimental */
-public final class SimpleTextPostingsFormat extends PostingsFormat {
+final class SimpleTextPostingsFormat extends PostingsFormat {
public SimpleTextPostingsFormat() {
super("SimpleText");
diff --git a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
index 3e7164d967e..daef7c58536 100644
--- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
+++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.DocValuesFormat
@@ -15,4 +15,3 @@
org.apache.lucene.codecs.memory.MemoryDocValuesFormat
org.apache.lucene.codecs.memory.DirectDocValuesFormat
-org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat
diff --git a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
index b82f15600d8..753b6d7b3ca 100644
--- a/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
+++ b/lucene/codecs/src/resources/META-INF/services/org.apache.lucene.codecs.PostingsFormat
@@ -19,5 +19,4 @@ org.apache.lucene.codecs.memory.DirectPostingsFormat
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
org.apache.lucene.codecs.memory.FSTPostingsFormat
org.apache.lucene.codecs.memory.MemoryPostingsFormat
-org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat
diff --git a/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java
index ea38832593e..2f54e2c9159 100644
--- a/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java
+++ b/lucene/codecs/src/test/org/apache/lucene/codecs/simpletext/TestSimpleTextCompoundFormat.java
@@ -27,4 +27,14 @@ public class TestSimpleTextCompoundFormat extends BaseCompoundFormatTestCase {
protected Codec getCodec() {
return codec;
}
+
+ @Override
+ public void testCorruptFilesAreCaught() {
+ // SimpleText does not catch broken sub-files in CFS!
+ }
+
+ @Override
+ public void testMissingCodecHeadersAreCaught() {
+ // SimpleText does not catch broken sub-files in CFS!
+ }
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
index 62bf2d58dc3..da487d00c91 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CodecUtil.java
@@ -258,6 +258,57 @@ public final class CodecUtil {
return version;
}
+ /**
+ * Expert: verifies the incoming {@link IndexInput} has an index header
+ * and that its segment ID matches the expected one, and then copies
+ * that index header into the provided {@link DataOutput}. This is
+ * useful when building compound files.
+ *
+ * @param in Input stream, positioned at the point where the
+ * index header was previously written. Typically this is located
+ * at the beginning of the file.
+ * @param out Output stream, where the header will be copied to.
+ * @param expectedID Expected segment ID
+ * @throws CorruptIndexException If the first four bytes are not
+ * {@link #CODEC_MAGIC}, or if the expectedID
+ * does not match.
+ * @throws IOException If there is an I/O error reading from the underlying medium.
+ *
+ * @lucene.internal
+ */
+ public static void verifyAndCopyIndexHeader(IndexInput in, DataOutput out, byte[] expectedID) throws IOException {
+ // make sure it's large enough to have a header and footer
+ if (in.length() < footerLength() + headerLength("")) {
+ throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: file is too small (" + in.length() + " bytes)", in);
+ }
+
+ int actualHeader = in.readInt();
+ if (actualHeader != CODEC_MAGIC) {
+ throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CodecUtil.CODEC_MAGIC, in);
+ }
+
+ // we can't verify these, so we pass-through:
+ String codec = in.readString();
+ int version = in.readInt();
+
+ // verify id:
+ checkIndexHeaderID(in, expectedID);
+
+ // we can't verify extension either, so we pass-through:
+ int suffixLength = in.readByte() & 0xFF;
+ byte[] suffixBytes = new byte[suffixLength];
+ in.readBytes(suffixBytes, 0, suffixLength);
+
+ // now write the header we just verified
+ out.writeInt(CodecUtil.CODEC_MAGIC);
+ out.writeString(codec);
+ out.writeInt(version);
+ out.writeBytes(expectedID, 0, expectedID.length);
+ out.writeByte((byte) suffixLength);
+ out.writeBytes(suffixBytes, 0, suffixLength);
+ }
+
+
/** Retrieves the full index header from the provided {@link IndexInput}.
* This throws {@link CorruptIndexException} if this file does
* not appear to be an index file. */
@@ -474,7 +525,7 @@ public final class CodecUtil {
* @throws CorruptIndexException if CRC is formatted incorrectly (wrong bits set)
* @throws IOException if an i/o error occurs
*/
- public static long readCRC(IndexInput input) throws IOException {
+ static long readCRC(IndexInput input) throws IOException {
long value = input.readLong();
if ((value & 0xFFFFFFFF00000000L) != 0) {
throw new CorruptIndexException("Illegal CRC-32 checksum: " + value, input);
@@ -487,7 +538,7 @@ public final class CodecUtil {
* @throws IllegalStateException if CRC is formatted incorrectly (wrong bits set)
* @throws IOException if an i/o error occurs
*/
- public static void writeCRC(IndexOutput output) throws IOException {
+ static void writeCRC(IndexOutput output) throws IOException {
long value = output.getChecksum();
if ((value & 0xFFFFFFFF00000000L) != 0) {
throw new IllegalStateException("Illegal CRC-32 checksum: " + value + " (resource=" + output + ")");
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
index 954a78e0e7f..af1cc2af5e2 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/CompoundFormat.java
@@ -43,7 +43,9 @@ public abstract class CompoundFormat {
public abstract Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException;
/**
- * Packs the provided segment's files into a compound format.
+ * Packs the provided segment's files into a compound format. All files referenced
+ * by the provided {@link SegmentInfo} must have {@link CodecUtil#writeIndexHeader}
+ * and {@link CodecUtil#writeFooter}.
*/
public abstract void write(Directory dir, SegmentInfo si, IOContext context) throws IOException;
}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java
index 2a40bde2ed5..da2b93fcee1 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundFormat.java
@@ -18,17 +18,17 @@ package org.apache.lucene.codecs.lucene50;
import java.io.IOException;
-import java.util.Collection;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.CompoundFormat;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
-import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.util.StringHelper;
/**
* Lucene 5.0 compound file format
@@ -76,6 +76,9 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
String entriesFile = IndexFileNames.segmentFileName(si.name, "", ENTRIES_EXTENSION);
+ byte[] expectedID = si.getId();
+ byte[] id = new byte[StringHelper.ID_LENGTH];
+
try (IndexOutput data = dir.createOutput(dataFile, context);
IndexOutput entries = dir.createOutput(entriesFile, context)) {
CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), "");
@@ -87,8 +90,23 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
// write bytes for file
long startOffset = data.getFilePointer();
- try (IndexInput in = dir.openInput(file, IOContext.READONCE)) {
- data.copyBytes(in, in.length());
+ try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) {
+
+ // just copies the index header, verifying that its id matches what we expect
+ CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId());
+
+ // copy all bytes except the footer
+ long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer();
+ data.copyBytes(in, numBytesToCopy);
+
+ // verify footer (checksum) matches for the incoming file we are copying
+ long checksum = CodecUtil.checkFooter(in);
+
+ // this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not data.getChecksum(), but I think
+ // adding a public method to CodecUtil to do that is somewhat dangerous:
+ data.writeInt(CodecUtil.FOOTER_MAGIC);
+ data.writeInt(0);
+ data.writeLong(checksum);
}
long endOffset = data.getFilePointer();
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
index a4487826d62..f7de16915dc 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene50/Lucene50CompoundReader.java
@@ -100,7 +100,7 @@ final class Lucene50CompoundReader extends Directory {
}
/** Helper method that reads CFS entries from an input stream */
- private final Map readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
+ private Map readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
Map mapping = null;
try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
Throwable priorE = null;
@@ -140,7 +140,8 @@ final class Lucene50CompoundReader extends Directory {
final String id = IndexFileNames.stripSegmentName(name);
final FileEntry entry = entries.get(id);
if (entry == null) {
- throw new FileNotFoundException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
+ String datFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.DATA_EXTENSION);
+ throw new FileNotFoundException("No sub-file with id " + id + " found in compound file \"" + datFileName + "\" (fileName=" + name + " files: " + entries.keySet() + ")");
}
return handle.slice(name, entry.offset, entry.length);
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCodecUtil.java b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
similarity index 99%
rename from lucene/core/src/test/org/apache/lucene/index/TestCodecUtil.java
rename to lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
index 9752ce3e752..d403f81b54f 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestCodecUtil.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/TestCodecUtil.java
@@ -14,13 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.lucene.index;
+package org.apache.lucene.codecs;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.BufferedChecksumIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
index 2eb0d1a94e4..1ebfb69304f 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldDocValuesFormat.java
@@ -79,7 +79,7 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
- final DocValuesFormat slow = DocValuesFormat.forName("SimpleText");
+ final DocValuesFormat slow = DocValuesFormat.forName("Memory");
iwc.setCodec(new AssertingCodec() {
@Override
public DocValuesFormat getDocValuesFormatForField(String field) {
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
index 67d61df375e..58c37fc525b 100644
--- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
+++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldPostingsFormat2.java
@@ -24,8 +24,8 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.asserting.AssertingCodec;
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
+import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
-import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
@@ -33,8 +33,8 @@ import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@@ -202,13 +202,13 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
public static class MockCodec extends AssertingCodec {
final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat();
- final PostingsFormat simpleText = new SimpleTextPostingsFormat();
+ final PostingsFormat direct = new DirectPostingsFormat();
final PostingsFormat memory = new MemoryPostingsFormat();
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) {
- return simpleText;
+ return direct;
} else if (field.equals("content")) {
return memory;
} else {
@@ -219,12 +219,12 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
public static class MockCodec2 extends AssertingCodec {
final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat();
- final PostingsFormat simpleText = new SimpleTextPostingsFormat();
+ final PostingsFormat direct = new DirectPostingsFormat();
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) {
- return simpleText;
+ return direct;
} else {
return luceneDefault;
}
diff --git a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
index d1148ef13a6..a78af965d97 100644
--- a/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
+++ b/lucene/core/src/test/org/apache/lucene/index/TestAddIndexes.java
@@ -1084,14 +1084,14 @@ public class TestAddIndexes extends LuceneTestCase {
}
private static final class CustomPerFieldCodec extends AssertingCodec {
- private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText");
+ private final PostingsFormat directFormat = PostingsFormat.forName("Direct");
private final PostingsFormat defaultFormat = TestUtil.getDefaultPostingsFormat();
private final PostingsFormat memoryFormat = PostingsFormat.forName("Memory");
@Override
public PostingsFormat getPostingsFormatForField(String field) {
if (field.equals("id")) {
- return simpleTextFormat;
+ return directFormat;
} else if (field.equals("content")) {
return memoryFormat;
} else {
diff --git a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
index 4d943e6a495..6b8793930e1 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/codecs/mockrandom/MockRandomPostingsFormat.java
@@ -19,6 +19,7 @@ package org.apache.lucene.codecs.mockrandom;
import java.io.IOException;
import java.util.Random;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.FieldsConsumer;
import org.apache.lucene.codecs.FieldsProducer;
import org.apache.lucene.codecs.PostingsFormat;
@@ -47,6 +48,7 @@ import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
@@ -107,11 +109,10 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
}
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
- final IndexOutput out = state.directory.createOutput(seedFileName, state.context);
- try {
+ try(IndexOutput out = state.directory.createOutput(seedFileName, state.context)) {
+ CodecUtil.writeIndexHeader(out, "MockRandomSeed", 0, state.segmentInfo.getId(), state.segmentSuffix);
out.writeLong(seed);
- } finally {
- out.close();
+ CodecUtil.writeFooter(out);
}
final Random random = new Random(seed);
@@ -267,8 +268,10 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
- final IndexInput in = state.directory.openInput(seedFileName, state.context);
+ final ChecksumIndexInput in = state.directory.openChecksumInput(seedFileName, state.context);
+ CodecUtil.checkIndexHeader(in, "MockRandomSeed", 0, 0, state.segmentInfo.getId(), state.segmentSuffix);
final long seed = in.readLong();
+ CodecUtil.checkFooter(in);
if (LuceneTestCase.VERBOSE) {
System.out.println("MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
index f09be9d05a6..ab5d01d42ea 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/BaseCompoundFormatTestCase.java
@@ -25,6 +25,7 @@ import java.util.List;
import java.util.Random;
import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
@@ -72,9 +73,9 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
for (int i=0; i files = new ArrayList<>();
for (String file : dir.listAll()) {
@@ -345,7 +354,6 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
}
}
- SegmentInfo si = newSegmentInfo(dir, "_123");
si.setFiles(files);
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@@ -370,17 +378,19 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
final int FILE_COUNT = atLeast(500);
List files = new ArrayList<>();
+ SegmentInfo si = newSegmentInfo(dir, "_123");
for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
String file = "_123." + fileIdx;
files.add(file);
- IndexOutput out = dir.createOutput(file, newIOContext(random()));
- out.writeByte((byte) fileIdx);
- out.close();
+ try (IndexOutput out = dir.createOutput(file, newIOContext(random()))) {
+ CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
+ out.writeByte((byte) fileIdx);
+ CodecUtil.writeFooter(out);
+ }
}
assertEquals(0, dir.getFileHandleCount());
- SegmentInfo si = newSegmentInfo(dir, "_123");
si.setFiles(files);
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@@ -388,6 +398,7 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
final IndexInput[] ins = new IndexInput[FILE_COUNT];
for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
ins[fileIdx] = cfs.openInput("_123." + fileIdx, newIOContext(random()));
+ CodecUtil.checkIndexHeader(ins[fileIdx], "Foo", 0, 0, si.getId(), "suffix");
}
assertEquals(1, dir.getFileHandleCount());
@@ -631,27 +642,31 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
}
/** Creates a file of the specified size with random data. */
- protected static void createRandomFile(Directory dir, String name, int size) throws IOException {
- IndexOutput os = dir.createOutput(name, newIOContext(random()));
+ protected static void createRandomFile(Directory dir, String name, int size, byte[] segId) throws IOException {
Random rnd = random();
- for (int i=0; i files = new ArrayList<>();
+ SegmentInfo si = newSegmentInfo(dir, "_123");
for (int i = 0; i < 20; i++) {
- createSequenceFile(dir, "_123.f" + i, (byte) 0, 2000);
+ createSequenceFile(dir, "_123.f" + i, (byte) 0, 2000, si.getId(), "suffix");
files.add("_123.f" + i);
}
- SegmentInfo si = newSegmentInfo(dir, "_123");
si.setFiles(files);
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@@ -750,9 +765,9 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
public void testResourceNameInsideCompoundFile() throws Exception {
Directory dir = newDirectory();
String subFile = "_123.xyz";
- createSequenceFile(dir, subFile, (byte) 0, 10);
-
SegmentInfo si = newSegmentInfo(dir, "_123");
+ createSequenceFile(dir, subFile, (byte) 0, 10, si.getId(), "suffix");
+
si.setFiles(Collections.singletonList(subFile));
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
@@ -762,4 +777,48 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
cfs.close();
dir.close();
}
+
+ public void testMissingCodecHeadersAreCaught() throws Exception {
+ Directory dir = newDirectory();
+ String subFile = "_123.xyz";
+
+ // missing codec header
+ try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) {
+ for (int i=0; i < 1024; i++) {
+ os.writeByte((byte) i);
+ }
+ }
+
+ SegmentInfo si = newSegmentInfo(dir, "_123");
+ si.setFiles(Collections.singletonList(subFile));
+ Exception e = expectThrows(CorruptIndexException.class, () -> si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT));
+ assertTrue(e.getMessage().contains("codec header mismatch"));
+ dir.close();
+ }
+
+ public void testCorruptFilesAreCaught() throws Exception {
+ Directory dir = newDirectory();
+ String subFile = "_123.xyz";
+
+ // wrong checksum
+ SegmentInfo si = newSegmentInfo(dir, "_123");
+ try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) {
+ CodecUtil.writeIndexHeader(os, "Foo", 0, si.getId(), "suffix");
+ for (int i=0; i < 1024; i++) {
+ os.writeByte((byte) i);
+ }
+
+ // write footer w/ wrong checksum
+ os.writeInt(CodecUtil.FOOTER_MAGIC);
+ os.writeInt(0);
+
+ long checksum = os.getChecksum();
+ os.writeLong(checksum+1);
+ }
+
+ si.setFiles(Collections.singletonList(subFile));
+ Exception e = expectThrows(CorruptIndexException.class, () -> si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT));
+ assertTrue(e.getMessage().contains("checksum failed (hardware problem?)"));
+ dir.close();
+ }
}
diff --git a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
index c1c33f895c2..127549ff065 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/index/RandomCodec.java
@@ -22,7 +22,6 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
-import java.util.Locale;
import java.util.Map;
import java.util.Random;
import java.util.Set;
@@ -50,8 +49,6 @@ import org.apache.lucene.codecs.memory.FSTPostingsFormat;
import org.apache.lucene.codecs.memory.MemoryDocValuesFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
-import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
-import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@@ -159,10 +156,6 @@ public class RandomCodec extends AssertingCodec {
PostingsFormat codec = previousMappings.get(name);
if (codec == null) {
codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size());
- if (codec instanceof SimpleTextPostingsFormat && perFieldSeed % 5 != 0) {
- // make simpletext rarer, choose again
- codec = formats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ROOT).hashCode()) % formats.size());
- }
previousMappings.put(name, codec);
// Safety:
assert previousMappings.size() < 10000: "test went insane";
@@ -175,10 +168,6 @@ public class RandomCodec extends AssertingCodec {
DocValuesFormat codec = previousDVMappings.get(name);
if (codec == null) {
codec = dvFormats.get(Math.abs(perFieldSeed ^ name.hashCode()) % dvFormats.size());
- if (codec instanceof SimpleTextDocValuesFormat && perFieldSeed % 5 != 0) {
- // make simpletext rarer, choose again
- codec = dvFormats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ROOT).hashCode()) % dvFormats.size());
- }
previousDVMappings.put(name, codec);
// Safety:
assert previousDVMappings.size() < 10000: "test went insane";
@@ -214,7 +203,7 @@ public class RandomCodec extends AssertingCodec {
new LuceneFixedGap(TestUtil.nextInt(random, 1, 1000)),
new LuceneVarGapFixedInterval(TestUtil.nextInt(random, 1, 1000)),
new LuceneVarGapDocFreqInterval(TestUtil.nextInt(random, 1, 100), TestUtil.nextInt(random, 1, 1000)),
- random.nextInt(10) == 0 ? new SimpleTextPostingsFormat() : TestUtil.getDefaultPostingsFormat(),
+ TestUtil.getDefaultPostingsFormat(),
new AssertingPostingsFormat(),
new MemoryPostingsFormat(true, random.nextFloat()),
new MemoryPostingsFormat(false, random.nextFloat()));
@@ -223,7 +212,7 @@ public class RandomCodec extends AssertingCodec {
TestUtil.getDefaultDocValuesFormat(),
new DirectDocValuesFormat(), // maybe not a great idea...
new MemoryDocValuesFormat(),
- random.nextInt(10) == 0 ? new SimpleTextDocValuesFormat() : TestUtil.getDefaultDocValuesFormat(),
+ TestUtil.getDefaultDocValuesFormat(),
new AssertingDocValuesFormat());
Collections.shuffle(formats, random);
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
index 629396a2117..8cd07297de4 100644
--- a/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/schema_codec.xml
@@ -17,7 +17,6 @@
-->
-
@@ -37,7 +36,6 @@
-
@@ -46,7 +44,6 @@
-
diff --git a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
index a1718e65d5c..0fe6a02dcca 100644
--- a/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
+++ b/solr/core/src/test/org/apache/solr/core/TestCodecSupport.java
@@ -51,9 +51,6 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
SchemaField schemaField = fields.get("string_direct_f");
PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
assertEquals("Direct", format.getPostingsFormatForField(schemaField.getName()).getName());
- schemaField = fields.get("string_simpletext_f");
- assertEquals("SimpleText",
- format.getPostingsFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_standard_f");
assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName());
schemaField = fields.get("string_f");
@@ -78,8 +75,6 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
Codec codec = h.getCore().getCodec();
PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
- assertEquals("SimpleText", format.getPostingsFormatForField("foo_simple").getName());
- assertEquals("SimpleText", format.getPostingsFormatForField("bar_simple").getName());
assertEquals("Direct", format.getPostingsFormatForField("foo_direct").getName());
assertEquals("Direct", format.getPostingsFormatForField("bar_direct").getName());
assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField("foo_standard").getName());