mirror of https://github.com/apache/lucene.git
LUCENE-7323: compound file writing now verifies checksum and segment ID for the incoming sub-files, to catch hardware issues or filesystem bugs earlier
This commit is contained in:
parent
7e86ba8c73
commit
067fb25e43
|
@ -12,6 +12,12 @@ Bug Fixes
|
|||
|
||||
* LUCENE-6662: Fixed potential resource leaks. (Rishabh Patel via Adrien Grand)
|
||||
|
||||
Improvements
|
||||
|
||||
* LUCENE-7323: Compound file writing now verifies the incoming
|
||||
sub-files' checkums and segment IDs, to catch hardware issues or
|
||||
filesytem bugs earlier (Robert Muir, Mike McCandless)
|
||||
|
||||
Other
|
||||
|
||||
* LUCENE-4787: Fixed some highlighting javadocs. (Michael Dodsworth via Adrien
|
||||
|
|
|
@ -122,7 +122,7 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
* and saving the offset/etc for each field.
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class SimpleTextDocValuesFormat extends DocValuesFormat {
|
||||
class SimpleTextDocValuesFormat extends DocValuesFormat {
|
||||
|
||||
public SimpleTextDocValuesFormat() {
|
||||
super("SimpleText");
|
||||
|
|
|
@ -34,7 +34,7 @@ import org.apache.lucene.index.SegmentWriteState;
|
|||
* any text editor, and even edit it to alter your index.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public final class SimpleTextPostingsFormat extends PostingsFormat {
|
||||
final class SimpleTextPostingsFormat extends PostingsFormat {
|
||||
|
||||
public SimpleTextPostingsFormat() {
|
||||
super("SimpleText");
|
||||
|
|
|
@ -15,4 +15,3 @@
|
|||
|
||||
org.apache.lucene.codecs.memory.MemoryDocValuesFormat
|
||||
org.apache.lucene.codecs.memory.DirectDocValuesFormat
|
||||
org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat
|
||||
|
|
|
@ -19,5 +19,4 @@ org.apache.lucene.codecs.memory.DirectPostingsFormat
|
|||
org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
|
||||
org.apache.lucene.codecs.memory.FSTPostingsFormat
|
||||
org.apache.lucene.codecs.memory.MemoryPostingsFormat
|
||||
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
|
||||
org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat
|
||||
|
|
|
@ -27,4 +27,14 @@ public class TestSimpleTextCompoundFormat extends BaseCompoundFormatTestCase {
|
|||
protected Codec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testCorruptFilesAreCaught() {
|
||||
// SimpleText does not catch broken sub-files in CFS!
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testMissingCodecHeadersAreCaught() {
|
||||
// SimpleText does not catch broken sub-files in CFS!
|
||||
}
|
||||
}
|
||||
|
|
|
@ -258,6 +258,57 @@ public final class CodecUtil {
|
|||
return version;
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: verifies the incoming {@link IndexInput} has an index header
|
||||
* and that its segment ID matches the expected one, and then copies
|
||||
* that index header into the provided {@link DataOutput}. This is
|
||||
* useful when building compound files.
|
||||
*
|
||||
* @param in Input stream, positioned at the point where the
|
||||
* index header was previously written. Typically this is located
|
||||
* at the beginning of the file.
|
||||
* @param out Output stream, where the header will be copied to.
|
||||
* @param expectedID Expected segment ID
|
||||
* @throws CorruptIndexException If the first four bytes are not
|
||||
* {@link #CODEC_MAGIC}, or if the <code>expectedID</code>
|
||||
* does not match.
|
||||
* @throws IOException If there is an I/O error reading from the underlying medium.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public static void verifyAndCopyIndexHeader(IndexInput in, DataOutput out, byte[] expectedID) throws IOException {
|
||||
// make sure it's large enough to have a header and footer
|
||||
if (in.length() < footerLength() + headerLength("")) {
|
||||
throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: file is too small (" + in.length() + " bytes)", in);
|
||||
}
|
||||
|
||||
int actualHeader = in.readInt();
|
||||
if (actualHeader != CODEC_MAGIC) {
|
||||
throw new CorruptIndexException("compound sub-files must have a valid codec header and footer: codec header mismatch: actual header=" + actualHeader + " vs expected header=" + CodecUtil.CODEC_MAGIC, in);
|
||||
}
|
||||
|
||||
// we can't verify these, so we pass-through:
|
||||
String codec = in.readString();
|
||||
int version = in.readInt();
|
||||
|
||||
// verify id:
|
||||
checkIndexHeaderID(in, expectedID);
|
||||
|
||||
// we can't verify extension either, so we pass-through:
|
||||
int suffixLength = in.readByte() & 0xFF;
|
||||
byte[] suffixBytes = new byte[suffixLength];
|
||||
in.readBytes(suffixBytes, 0, suffixLength);
|
||||
|
||||
// now write the header we just verified
|
||||
out.writeInt(CodecUtil.CODEC_MAGIC);
|
||||
out.writeString(codec);
|
||||
out.writeInt(version);
|
||||
out.writeBytes(expectedID, 0, expectedID.length);
|
||||
out.writeByte((byte) suffixLength);
|
||||
out.writeBytes(suffixBytes, 0, suffixLength);
|
||||
}
|
||||
|
||||
|
||||
/** Retrieves the full index header from the provided {@link IndexInput}.
|
||||
* This throws {@link CorruptIndexException} if this file does
|
||||
* not appear to be an index file. */
|
||||
|
@ -474,7 +525,7 @@ public final class CodecUtil {
|
|||
* @throws CorruptIndexException if CRC is formatted incorrectly (wrong bits set)
|
||||
* @throws IOException if an i/o error occurs
|
||||
*/
|
||||
public static long readCRC(IndexInput input) throws IOException {
|
||||
static long readCRC(IndexInput input) throws IOException {
|
||||
long value = input.readLong();
|
||||
if ((value & 0xFFFFFFFF00000000L) != 0) {
|
||||
throw new CorruptIndexException("Illegal CRC-32 checksum: " + value, input);
|
||||
|
@ -487,7 +538,7 @@ public final class CodecUtil {
|
|||
* @throws IllegalStateException if CRC is formatted incorrectly (wrong bits set)
|
||||
* @throws IOException if an i/o error occurs
|
||||
*/
|
||||
public static void writeCRC(IndexOutput output) throws IOException {
|
||||
static void writeCRC(IndexOutput output) throws IOException {
|
||||
long value = output.getChecksum();
|
||||
if ((value & 0xFFFFFFFF00000000L) != 0) {
|
||||
throw new IllegalStateException("Illegal CRC-32 checksum: " + value + " (resource=" + output + ")");
|
||||
|
|
|
@ -43,7 +43,9 @@ public abstract class CompoundFormat {
|
|||
public abstract Directory getCompoundReader(Directory dir, SegmentInfo si, IOContext context) throws IOException;
|
||||
|
||||
/**
|
||||
* Packs the provided segment's files into a compound format.
|
||||
* Packs the provided segment's files into a compound format. All files referenced
|
||||
* by the provided {@link SegmentInfo} must have {@link CodecUtil#writeIndexHeader}
|
||||
* and {@link CodecUtil#writeFooter}.
|
||||
*/
|
||||
public abstract void write(Directory dir, SegmentInfo si, IOContext context) throws IOException;
|
||||
}
|
||||
|
|
|
@ -18,17 +18,17 @@ package org.apache.lucene.codecs.lucene50;
|
|||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.CompoundFormat;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentInfo;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.DataOutput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
/**
|
||||
* Lucene 5.0 compound file format
|
||||
|
@ -76,6 +76,9 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
|
|||
String dataFile = IndexFileNames.segmentFileName(si.name, "", DATA_EXTENSION);
|
||||
String entriesFile = IndexFileNames.segmentFileName(si.name, "", ENTRIES_EXTENSION);
|
||||
|
||||
byte[] expectedID = si.getId();
|
||||
byte[] id = new byte[StringHelper.ID_LENGTH];
|
||||
|
||||
try (IndexOutput data = dir.createOutput(dataFile, context);
|
||||
IndexOutput entries = dir.createOutput(entriesFile, context)) {
|
||||
CodecUtil.writeIndexHeader(data, DATA_CODEC, VERSION_CURRENT, si.getId(), "");
|
||||
|
@ -87,8 +90,23 @@ public final class Lucene50CompoundFormat extends CompoundFormat {
|
|||
|
||||
// write bytes for file
|
||||
long startOffset = data.getFilePointer();
|
||||
try (IndexInput in = dir.openInput(file, IOContext.READONCE)) {
|
||||
data.copyBytes(in, in.length());
|
||||
try (ChecksumIndexInput in = dir.openChecksumInput(file, IOContext.READONCE)) {
|
||||
|
||||
// just copies the index header, verifying that its id matches what we expect
|
||||
CodecUtil.verifyAndCopyIndexHeader(in, data, si.getId());
|
||||
|
||||
// copy all bytes except the footer
|
||||
long numBytesToCopy = in.length() - CodecUtil.footerLength() - in.getFilePointer();
|
||||
data.copyBytes(in, numBytesToCopy);
|
||||
|
||||
// verify footer (checksum) matches for the incoming file we are copying
|
||||
long checksum = CodecUtil.checkFooter(in);
|
||||
|
||||
// this is poached from CodecUtil.writeFooter, but we need to use our own checksum, not data.getChecksum(), but I think
|
||||
// adding a public method to CodecUtil to do that is somewhat dangerous:
|
||||
data.writeInt(CodecUtil.FOOTER_MAGIC);
|
||||
data.writeInt(0);
|
||||
data.writeLong(checksum);
|
||||
}
|
||||
long endOffset = data.getFilePointer();
|
||||
|
||||
|
|
|
@ -100,7 +100,7 @@ final class Lucene50CompoundReader extends Directory {
|
|||
}
|
||||
|
||||
/** Helper method that reads CFS entries from an input stream */
|
||||
private final Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
|
||||
private Map<String, FileEntry> readEntries(byte[] segmentID, Directory dir, String entriesFileName) throws IOException {
|
||||
Map<String,FileEntry> mapping = null;
|
||||
try (ChecksumIndexInput entriesStream = dir.openChecksumInput(entriesFileName, IOContext.READONCE)) {
|
||||
Throwable priorE = null;
|
||||
|
@ -140,7 +140,8 @@ final class Lucene50CompoundReader extends Directory {
|
|||
final String id = IndexFileNames.stripSegmentName(name);
|
||||
final FileEntry entry = entries.get(id);
|
||||
if (entry == null) {
|
||||
throw new FileNotFoundException("No sub-file with id " + id + " found (fileName=" + name + " files: " + entries.keySet() + ")");
|
||||
String datFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.DATA_EXTENSION);
|
||||
throw new FileNotFoundException("No sub-file with id " + id + " found in compound file \"" + datFileName + "\" (fileName=" + name + " files: " + entries.keySet() + ")");
|
||||
}
|
||||
return handle.slice(name, entry.offset, entry.length);
|
||||
}
|
||||
|
|
|
@ -14,13 +14,14 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.index;
|
||||
|
||||
package org.apache.lucene.codecs;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.store.BufferedChecksumIndexInput;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
|
@ -79,7 +79,7 @@ public class TestPerFieldDocValuesFormat extends BaseDocValuesFormatTestCase {
|
|||
// we don't use RandomIndexWriter because it might add more docvalues than we expect !!!!1
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
|
||||
final DocValuesFormat fast = TestUtil.getDefaultDocValuesFormat();
|
||||
final DocValuesFormat slow = DocValuesFormat.forName("SimpleText");
|
||||
final DocValuesFormat slow = DocValuesFormat.forName("Memory");
|
||||
iwc.setCodec(new AssertingCodec() {
|
||||
@Override
|
||||
public DocValuesFormat getDocValuesFormatForField(String field) {
|
||||
|
|
|
@ -24,8 +24,8 @@ import org.apache.lucene.codecs.Codec;
|
|||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.asserting.AssertingCodec;
|
||||
import org.apache.lucene.codecs.blockterms.LuceneVarGapFixedInterval;
|
||||
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
|
||||
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
|
@ -33,8 +33,8 @@ import org.apache.lucene.document.TextField;
|
|||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.LogDocMergePolicy;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -202,13 +202,13 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
|
||||
public static class MockCodec extends AssertingCodec {
|
||||
final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat();
|
||||
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
|
||||
final PostingsFormat direct = new DirectPostingsFormat();
|
||||
final PostingsFormat memory = new MemoryPostingsFormat();
|
||||
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
if (field.equals("id")) {
|
||||
return simpleText;
|
||||
return direct;
|
||||
} else if (field.equals("content")) {
|
||||
return memory;
|
||||
} else {
|
||||
|
@ -219,12 +219,12 @@ public class TestPerFieldPostingsFormat2 extends LuceneTestCase {
|
|||
|
||||
public static class MockCodec2 extends AssertingCodec {
|
||||
final PostingsFormat luceneDefault = TestUtil.getDefaultPostingsFormat();
|
||||
final PostingsFormat simpleText = new SimpleTextPostingsFormat();
|
||||
final PostingsFormat direct = new DirectPostingsFormat();
|
||||
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
if (field.equals("id")) {
|
||||
return simpleText;
|
||||
return direct;
|
||||
} else {
|
||||
return luceneDefault;
|
||||
}
|
||||
|
|
|
@ -1086,14 +1086,14 @@ public class TestAddIndexes extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private static final class CustomPerFieldCodec extends AssertingCodec {
|
||||
private final PostingsFormat simpleTextFormat = PostingsFormat.forName("SimpleText");
|
||||
private final PostingsFormat directFormat = PostingsFormat.forName("Direct");
|
||||
private final PostingsFormat defaultFormat = TestUtil.getDefaultPostingsFormat();
|
||||
private final PostingsFormat memoryFormat = PostingsFormat.forName("Memory");
|
||||
|
||||
@Override
|
||||
public PostingsFormat getPostingsFormatForField(String field) {
|
||||
if (field.equals("id")) {
|
||||
return simpleTextFormat;
|
||||
return directFormat;
|
||||
} else if (field.equals("content")) {
|
||||
return memoryFormat;
|
||||
} else {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.codecs.mockrandom;
|
|||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
|
@ -47,6 +48,7 @@ import org.apache.lucene.index.FieldInfo;
|
|||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -107,11 +109,10 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
|
||||
final IndexOutput out = state.directory.createOutput(seedFileName, state.context);
|
||||
try {
|
||||
try(IndexOutput out = state.directory.createOutput(seedFileName, state.context)) {
|
||||
CodecUtil.writeIndexHeader(out, "MockRandomSeed", 0, state.segmentInfo.getId(), state.segmentSuffix);
|
||||
out.writeLong(seed);
|
||||
} finally {
|
||||
out.close();
|
||||
CodecUtil.writeFooter(out);
|
||||
}
|
||||
|
||||
final Random random = new Random(seed);
|
||||
|
@ -267,8 +268,10 @@ public final class MockRandomPostingsFormat extends PostingsFormat {
|
|||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
|
||||
final String seedFileName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, SEED_EXT);
|
||||
final IndexInput in = state.directory.openInput(seedFileName, state.context);
|
||||
final ChecksumIndexInput in = state.directory.openChecksumInput(seedFileName, state.context);
|
||||
CodecUtil.checkIndexHeader(in, "MockRandomSeed", 0, 0, state.segmentInfo.getId(), state.segmentSuffix);
|
||||
final long seed = in.readLong();
|
||||
CodecUtil.checkFooter(in);
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("MockRandomCodec: reading from seg=" + state.segmentInfo.name + " formatID=" + state.segmentSuffix + " seed=" + seed);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.List;
|
|||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
|
@ -72,9 +73,9 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
for (int i=0; i<data.length; i++) {
|
||||
String testfile = "_" + i + ".test";
|
||||
Directory dir = newDirectory();
|
||||
createSequenceFile(dir, testfile, (byte) 0, data[i]);
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_" + i);
|
||||
createSequenceFile(dir, testfile, (byte) 0, data[i], si.getId(), "suffix");
|
||||
|
||||
si.setFiles(Collections.singleton(testfile));
|
||||
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
|
||||
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
|
||||
|
@ -96,10 +97,10 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
public void testTwoFiles() throws IOException {
|
||||
String files[] = { "_123.d1", "_123.d2" };
|
||||
Directory dir = newDirectory();
|
||||
createSequenceFile(dir, files[0], (byte) 0, 15);
|
||||
createSequenceFile(dir, files[1], (byte) 0, 114);
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
createSequenceFile(dir, files[0], (byte) 0, 15, si.getId(), "suffix");
|
||||
createSequenceFile(dir, files[1], (byte) 0, 114, si.getId(), "suffix");
|
||||
|
||||
si.setFiles(Arrays.asList(files));
|
||||
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
|
||||
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
|
||||
|
@ -122,11 +123,13 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
final String testfile = "_123.test";
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT);
|
||||
out.writeInt(3);
|
||||
out.close();
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
try (IndexOutput out = dir.createOutput(testfile, IOContext.DEFAULT)) {
|
||||
CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
|
||||
out.writeInt(3);
|
||||
CodecUtil.writeFooter(out);
|
||||
}
|
||||
|
||||
si.setFiles(Collections.singleton(testfile));
|
||||
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
|
||||
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
|
||||
|
@ -148,11 +151,13 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
return super.createOutput(name, context);
|
||||
}
|
||||
};
|
||||
IndexOutput out = dir.createOutput(testfile, myContext);
|
||||
out.writeInt(3);
|
||||
out.close();
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
try (IndexOutput out = dir.createOutput(testfile, myContext)) {
|
||||
CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
|
||||
out.writeInt(3);
|
||||
CodecUtil.writeFooter(out);
|
||||
}
|
||||
|
||||
si.setFiles(Collections.singleton(testfile));
|
||||
si.getCodec().compoundFormat().write(dir, si, myContext);
|
||||
dir.close();
|
||||
|
@ -165,14 +170,16 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
|
||||
Directory dir = new NRTCachingDirectory(newFSDirectory(createTempDir()), 2.0, 25.0);
|
||||
|
||||
IndexOutput out = dir.createOutput(testfile, context);
|
||||
byte[] bytes = new byte[512];
|
||||
for(int i=0;i<1024*1024;i++) {
|
||||
out.writeBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
out.close();
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
try (IndexOutput out = dir.createOutput(testfile, context)) {
|
||||
CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
|
||||
byte[] bytes = new byte[512];
|
||||
for(int i=0;i<1024*1024;i++) {
|
||||
out.writeBytes(bytes, 0, bytes.length);
|
||||
}
|
||||
CodecUtil.writeFooter(out);
|
||||
}
|
||||
|
||||
si.setFiles(Collections.singleton(testfile));
|
||||
si.getCodec().compoundFormat().write(dir, si, context);
|
||||
|
||||
|
@ -326,17 +333,19 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
// Setup the test segment
|
||||
String segment = "_123";
|
||||
int chunk = 1024; // internal buffer size used by the stream
|
||||
createRandomFile(dir, segment + ".zero", 0);
|
||||
createRandomFile(dir, segment + ".one", 1);
|
||||
createRandomFile(dir, segment + ".ten", 10);
|
||||
createRandomFile(dir, segment + ".hundred", 100);
|
||||
createRandomFile(dir, segment + ".big1", chunk);
|
||||
createRandomFile(dir, segment + ".big2", chunk - 1);
|
||||
createRandomFile(dir, segment + ".big3", chunk + 1);
|
||||
createRandomFile(dir, segment + ".big4", 3 * chunk);
|
||||
createRandomFile(dir, segment + ".big5", 3 * chunk - 1);
|
||||
createRandomFile(dir, segment + ".big6", 3 * chunk + 1);
|
||||
createRandomFile(dir, segment + ".big7", 1000 * chunk);
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
byte[] segId = si.getId();
|
||||
createRandomFile(dir, segment + ".zero", 0, segId);
|
||||
createRandomFile(dir, segment + ".one", 1, segId);
|
||||
createRandomFile(dir, segment + ".ten", 10, segId);
|
||||
createRandomFile(dir, segment + ".hundred", 100, segId);
|
||||
createRandomFile(dir, segment + ".big1", chunk, segId);
|
||||
createRandomFile(dir, segment + ".big2", chunk - 1, segId);
|
||||
createRandomFile(dir, segment + ".big3", chunk + 1, segId);
|
||||
createRandomFile(dir, segment + ".big4", 3 * chunk, segId);
|
||||
createRandomFile(dir, segment + ".big5", 3 * chunk - 1, segId);
|
||||
createRandomFile(dir, segment + ".big6", 3 * chunk + 1, segId);
|
||||
createRandomFile(dir, segment + ".big7", 1000 * chunk, segId);
|
||||
|
||||
List<String> files = new ArrayList<>();
|
||||
for (String file : dir.listAll()) {
|
||||
|
@ -345,7 +354,6 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
}
|
||||
}
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
si.setFiles(files);
|
||||
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
|
||||
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
|
||||
|
@ -370,17 +378,19 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
final int FILE_COUNT = atLeast(500);
|
||||
|
||||
List<String> files = new ArrayList<>();
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
|
||||
String file = "_123." + fileIdx;
|
||||
files.add(file);
|
||||
IndexOutput out = dir.createOutput(file, newIOContext(random()));
|
||||
out.writeByte((byte) fileIdx);
|
||||
out.close();
|
||||
try (IndexOutput out = dir.createOutput(file, newIOContext(random()))) {
|
||||
CodecUtil.writeIndexHeader(out, "Foo", 0, si.getId(), "suffix");
|
||||
out.writeByte((byte) fileIdx);
|
||||
CodecUtil.writeFooter(out);
|
||||
}
|
||||
}
|
||||
|
||||
assertEquals(0, dir.getFileHandleCount());
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
si.setFiles(files);
|
||||
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
|
||||
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
|
||||
|
@ -388,6 +398,7 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
final IndexInput[] ins = new IndexInput[FILE_COUNT];
|
||||
for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) {
|
||||
ins[fileIdx] = cfs.openInput("_123." + fileIdx, newIOContext(random()));
|
||||
CodecUtil.checkIndexHeader(ins[fileIdx], "Foo", 0, 0, si.getId(), "suffix");
|
||||
}
|
||||
|
||||
assertEquals(1, dir.getFileHandleCount());
|
||||
|
@ -631,27 +642,31 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
}
|
||||
|
||||
/** Creates a file of the specified size with random data. */
|
||||
protected static void createRandomFile(Directory dir, String name, int size) throws IOException {
|
||||
IndexOutput os = dir.createOutput(name, newIOContext(random()));
|
||||
protected static void createRandomFile(Directory dir, String name, int size, byte[] segId) throws IOException {
|
||||
Random rnd = random();
|
||||
for (int i=0; i<size; i++) {
|
||||
byte b = (byte) rnd.nextInt(256);
|
||||
os.writeByte(b);
|
||||
try (IndexOutput os = dir.createOutput(name, newIOContext(random()))) {
|
||||
CodecUtil.writeIndexHeader(os, "Foo", 0, segId, "suffix");
|
||||
for (int i=0; i<size; i++) {
|
||||
byte b = (byte) rnd.nextInt(256);
|
||||
os.writeByte(b);
|
||||
}
|
||||
CodecUtil.writeFooter(os);
|
||||
}
|
||||
os.close();
|
||||
}
|
||||
|
||||
/** Creates a file of the specified size with sequential data. The first
|
||||
* byte is written as the start byte provided. All subsequent bytes are
|
||||
* computed as start + offset where offset is the number of the byte.
|
||||
*/
|
||||
protected static void createSequenceFile(Directory dir, String name, byte start, int size) throws IOException {
|
||||
IndexOutput os = dir.createOutput(name, newIOContext(random()));
|
||||
for (int i=0; i < size; i++) {
|
||||
os.writeByte(start);
|
||||
start ++;
|
||||
protected static void createSequenceFile(Directory dir, String name, byte start, int size, byte[] segID, String segSuffix) throws IOException {
|
||||
try (IndexOutput os = dir.createOutput(name, newIOContext(random()))) {
|
||||
CodecUtil.writeIndexHeader(os, "Foo", 0, segID, segSuffix);
|
||||
for (int i=0; i < size; i++) {
|
||||
os.writeByte(start);
|
||||
start ++;
|
||||
}
|
||||
CodecUtil.writeFooter(os);
|
||||
}
|
||||
os.close();
|
||||
}
|
||||
|
||||
protected static void assertSameStreams(String msg, IndexInput expected, IndexInput test) throws IOException {
|
||||
|
@ -724,12 +739,12 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
*/
|
||||
protected static Directory createLargeCFS(Directory dir) throws IOException {
|
||||
List<String> files = new ArrayList<>();
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
for (int i = 0; i < 20; i++) {
|
||||
createSequenceFile(dir, "_123.f" + i, (byte) 0, 2000);
|
||||
createSequenceFile(dir, "_123.f" + i, (byte) 0, 2000, si.getId(), "suffix");
|
||||
files.add("_123.f" + i);
|
||||
}
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
si.setFiles(files);
|
||||
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
|
||||
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
|
||||
|
@ -750,9 +765,9 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
public void testResourceNameInsideCompoundFile() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
String subFile = "_123.xyz";
|
||||
createSequenceFile(dir, subFile, (byte) 0, 10);
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
createSequenceFile(dir, subFile, (byte) 0, 10, si.getId(), "suffix");
|
||||
|
||||
si.setFiles(Collections.singletonList(subFile));
|
||||
si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT);
|
||||
Directory cfs = si.getCodec().compoundFormat().getCompoundReader(dir, si, IOContext.DEFAULT);
|
||||
|
@ -762,4 +777,48 @@ public abstract class BaseCompoundFormatTestCase extends BaseIndexFileFormatTest
|
|||
cfs.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMissingCodecHeadersAreCaught() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
String subFile = "_123.xyz";
|
||||
|
||||
// missing codec header
|
||||
try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) {
|
||||
for (int i=0; i < 1024; i++) {
|
||||
os.writeByte((byte) i);
|
||||
}
|
||||
}
|
||||
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
si.setFiles(Collections.singletonList(subFile));
|
||||
Exception e = expectThrows(CorruptIndexException.class, () -> si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT));
|
||||
assertTrue(e.getMessage().contains("codec header mismatch"));
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testCorruptFilesAreCaught() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
String subFile = "_123.xyz";
|
||||
|
||||
// wrong checksum
|
||||
SegmentInfo si = newSegmentInfo(dir, "_123");
|
||||
try (IndexOutput os = dir.createOutput(subFile, newIOContext(random()))) {
|
||||
CodecUtil.writeIndexHeader(os, "Foo", 0, si.getId(), "suffix");
|
||||
for (int i=0; i < 1024; i++) {
|
||||
os.writeByte((byte) i);
|
||||
}
|
||||
|
||||
// write footer w/ wrong checksum
|
||||
os.writeInt(CodecUtil.FOOTER_MAGIC);
|
||||
os.writeInt(0);
|
||||
|
||||
long checksum = os.getChecksum();
|
||||
os.writeLong(checksum+1);
|
||||
}
|
||||
|
||||
si.setFiles(Collections.singletonList(subFile));
|
||||
Exception e = expectThrows(CorruptIndexException.class, () -> si.getCodec().compoundFormat().write(dir, si, IOContext.DEFAULT));
|
||||
assertTrue(e.getMessage().contains("checksum failed (hardware problem?)"));
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Collections;
|
|||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
@ -50,8 +49,6 @@ import org.apache.lucene.codecs.memory.FSTPostingsFormat;
|
|||
import org.apache.lucene.codecs.memory.MemoryDocValuesFormat;
|
||||
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
|
||||
import org.apache.lucene.codecs.mockrandom.MockRandomPostingsFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextDocValuesFormat;
|
||||
import org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat;
|
||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -159,10 +156,6 @@ public class RandomCodec extends AssertingCodec {
|
|||
PostingsFormat codec = previousMappings.get(name);
|
||||
if (codec == null) {
|
||||
codec = formats.get(Math.abs(perFieldSeed ^ name.hashCode()) % formats.size());
|
||||
if (codec instanceof SimpleTextPostingsFormat && perFieldSeed % 5 != 0) {
|
||||
// make simpletext rarer, choose again
|
||||
codec = formats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ROOT).hashCode()) % formats.size());
|
||||
}
|
||||
previousMappings.put(name, codec);
|
||||
// Safety:
|
||||
assert previousMappings.size() < 10000: "test went insane";
|
||||
|
@ -175,10 +168,6 @@ public class RandomCodec extends AssertingCodec {
|
|||
DocValuesFormat codec = previousDVMappings.get(name);
|
||||
if (codec == null) {
|
||||
codec = dvFormats.get(Math.abs(perFieldSeed ^ name.hashCode()) % dvFormats.size());
|
||||
if (codec instanceof SimpleTextDocValuesFormat && perFieldSeed % 5 != 0) {
|
||||
// make simpletext rarer, choose again
|
||||
codec = dvFormats.get(Math.abs(perFieldSeed ^ name.toUpperCase(Locale.ROOT).hashCode()) % dvFormats.size());
|
||||
}
|
||||
previousDVMappings.put(name, codec);
|
||||
// Safety:
|
||||
assert previousDVMappings.size() < 10000: "test went insane";
|
||||
|
@ -214,7 +203,7 @@ public class RandomCodec extends AssertingCodec {
|
|||
new LuceneFixedGap(TestUtil.nextInt(random, 1, 1000)),
|
||||
new LuceneVarGapFixedInterval(TestUtil.nextInt(random, 1, 1000)),
|
||||
new LuceneVarGapDocFreqInterval(TestUtil.nextInt(random, 1, 100), TestUtil.nextInt(random, 1, 1000)),
|
||||
random.nextInt(10) == 0 ? new SimpleTextPostingsFormat() : TestUtil.getDefaultPostingsFormat(),
|
||||
TestUtil.getDefaultPostingsFormat(),
|
||||
new AssertingPostingsFormat(),
|
||||
new MemoryPostingsFormat(true, random.nextFloat()),
|
||||
new MemoryPostingsFormat(false, random.nextFloat()));
|
||||
|
@ -223,7 +212,7 @@ public class RandomCodec extends AssertingCodec {
|
|||
TestUtil.getDefaultDocValuesFormat(),
|
||||
new DirectDocValuesFormat(), // maybe not a great idea...
|
||||
new MemoryDocValuesFormat(),
|
||||
random.nextInt(10) == 0 ? new SimpleTextDocValuesFormat() : TestUtil.getDefaultDocValuesFormat(),
|
||||
TestUtil.getDefaultDocValuesFormat(),
|
||||
new AssertingDocValuesFormat());
|
||||
|
||||
Collections.shuffle(formats, random);
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
-->
|
||||
<schema name="codec" version="1.2">
|
||||
<fieldType name="string_direct" class="solr.StrField" postingsFormat="Direct"/>
|
||||
<fieldType name="string_simpletext" class="solr.StrField" postingsFormat="SimpleText"/>
|
||||
<fieldType name="string_standard" class="solr.StrField" postingsFormat="Lucene50"/>
|
||||
|
||||
<fieldType name="string_disk" class="solr.StrField" docValuesFormat="Lucene54"/>
|
||||
|
@ -37,7 +36,6 @@
|
|||
</fieldType>
|
||||
|
||||
<field name="string_direct_f" type="string_direct" indexed="true" stored="true"/>
|
||||
<field name="string_simpletext_f" type="string_simpletext" indexed="true" stored="true"/>
|
||||
<field name="string_standard_f" type="string_standard" indexed="true" stored="true"/>
|
||||
|
||||
<field name="string_disk_f" type="string_disk" indexed="false" stored="false" docValues="true" default=""/>
|
||||
|
@ -46,7 +44,6 @@
|
|||
<field name="string_f" type="string" indexed="true" stored="true" docValues="true" required="true"/>
|
||||
<field name="text" type="text_general" indexed="true" stored="true"/>
|
||||
|
||||
<dynamicField name="*_simple" type="string_simpletext" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_direct" type="string_direct" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_standard" type="string_standard" indexed="true" stored="true"/>
|
||||
|
||||
|
|
|
@ -51,9 +51,6 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
|
|||
SchemaField schemaField = fields.get("string_direct_f");
|
||||
PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
|
||||
assertEquals("Direct", format.getPostingsFormatForField(schemaField.getName()).getName());
|
||||
schemaField = fields.get("string_simpletext_f");
|
||||
assertEquals("SimpleText",
|
||||
format.getPostingsFormatForField(schemaField.getName()).getName());
|
||||
schemaField = fields.get("string_standard_f");
|
||||
assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField(schemaField.getName()).getName());
|
||||
schemaField = fields.get("string_f");
|
||||
|
@ -78,8 +75,6 @@ public class TestCodecSupport extends SolrTestCaseJ4 {
|
|||
Codec codec = h.getCore().getCodec();
|
||||
PerFieldPostingsFormat format = (PerFieldPostingsFormat) codec.postingsFormat();
|
||||
|
||||
assertEquals("SimpleText", format.getPostingsFormatForField("foo_simple").getName());
|
||||
assertEquals("SimpleText", format.getPostingsFormatForField("bar_simple").getName());
|
||||
assertEquals("Direct", format.getPostingsFormatForField("foo_direct").getName());
|
||||
assertEquals("Direct", format.getPostingsFormatForField("bar_direct").getName());
|
||||
assertEquals(TestUtil.getDefaultPostingsFormat().getName(), format.getPostingsFormatForField("foo_standard").getName());
|
||||
|
|
Loading…
Reference in New Issue