mirror of https://github.com/apache/lucene.git
LUCENE-6998: fix a couple places to better detect truncated index files; improve corruption testing
This commit is contained in:
parent
105c6dfe26
commit
af3a19574e
|
@ -211,6 +211,9 @@ Bug Fixes
|
||||||
* LUCENE-6984: SpanMultiTermQueryWrapper no longer modifies its wrapped query.
|
* LUCENE-6984: SpanMultiTermQueryWrapper no longer modifies its wrapped query.
|
||||||
(Alan Woodward, Adrien Grand)
|
(Alan Woodward, Adrien Grand)
|
||||||
|
|
||||||
|
* LUCENE-6998: Fix a couple places to better detect truncated index files
|
||||||
|
as corruption. (Robert Muir, Mike McCandless)
|
||||||
|
|
||||||
Other
|
Other
|
||||||
|
|
||||||
* LUCENE-6924: Upgrade randomizedtesting to 2.3.2. (Dawid Weiss)
|
* LUCENE-6924: Upgrade randomizedtesting to 2.3.2. (Dawid Weiss)
|
||||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.codecs;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
@ -397,6 +396,9 @@ public final class CodecUtil {
|
||||||
* @throws IOException if the footer is invalid
|
* @throws IOException if the footer is invalid
|
||||||
*/
|
*/
|
||||||
public static long retrieveChecksum(IndexInput in) throws IOException {
|
public static long retrieveChecksum(IndexInput in) throws IOException {
|
||||||
|
if (in.length() < footerLength()) {
|
||||||
|
throw new CorruptIndexException("misplaced codec footer (file truncated?): length=" + in.length() + " but footerLength==" + footerLength(), in);
|
||||||
|
}
|
||||||
in.seek(in.length() - footerLength());
|
in.seek(in.length() - footerLength());
|
||||||
validateFooter(in);
|
validateFooter(in);
|
||||||
return readCRC(in);
|
return readCRC(in);
|
||||||
|
|
|
@ -68,6 +68,13 @@ final class Lucene50CompoundReader extends Directory {
|
||||||
String entriesFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.ENTRIES_EXTENSION);
|
String entriesFileName = IndexFileNames.segmentFileName(segmentName, "", Lucene50CompoundFormat.ENTRIES_EXTENSION);
|
||||||
this.entries = readEntries(si.getId(), directory, entriesFileName);
|
this.entries = readEntries(si.getId(), directory, entriesFileName);
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
|
|
||||||
|
long expectedLength = CodecUtil.indexHeaderLength(Lucene50CompoundFormat.DATA_CODEC, "");
|
||||||
|
for(Map.Entry<String,FileEntry> ent : entries.entrySet()) {
|
||||||
|
expectedLength += ent.getValue().length;
|
||||||
|
}
|
||||||
|
expectedLength += CodecUtil.footerLength();
|
||||||
|
|
||||||
handle = directory.openInput(dataFileName, context);
|
handle = directory.openInput(dataFileName, context);
|
||||||
try {
|
try {
|
||||||
CodecUtil.checkIndexHeader(handle, Lucene50CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
|
CodecUtil.checkIndexHeader(handle, Lucene50CompoundFormat.DATA_CODEC, version, version, si.getId(), "");
|
||||||
|
@ -77,6 +84,13 @@ final class Lucene50CompoundReader extends Directory {
|
||||||
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
// such as file truncation.
|
// such as file truncation.
|
||||||
CodecUtil.retrieveChecksum(handle);
|
CodecUtil.retrieveChecksum(handle);
|
||||||
|
|
||||||
|
// We also validate length, because e.g. if you strip 16 bytes off the .cfs we otherwise
|
||||||
|
// would not detect it:
|
||||||
|
if (handle.length() != expectedLength) {
|
||||||
|
throw new CorruptIndexException("length should be " + expectedLength + " bytes, but is " + handle.length() + " instead", handle);
|
||||||
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
|
|
|
@ -73,7 +73,8 @@ import org.apache.lucene.index.SegmentWriteState;
|
||||||
|
|
||||||
public final class Lucene60PointFormat extends PointFormat {
|
public final class Lucene60PointFormat extends PointFormat {
|
||||||
|
|
||||||
static final String CODEC_NAME = "Lucene60PointFormat";
|
static final String DATA_CODEC_NAME = "Lucene60PointFormatData";
|
||||||
|
static final String META_CODEC_NAME = "Lucene60PointFormatMeta";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filename extension for the leaf blocks
|
* Filename extension for the leaf blocks
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
package org.apache.lucene.codecs.lucene60;
|
package org.apache.lucene.codecs.lucene60;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
@ -48,38 +47,69 @@ public class Lucene60PointReader extends PointReader implements Closeable {
|
||||||
/** Sole constructor */
|
/** Sole constructor */
|
||||||
public Lucene60PointReader(SegmentReadState readState) throws IOException {
|
public Lucene60PointReader(SegmentReadState readState) throws IOException {
|
||||||
this.readState = readState;
|
this.readState = readState;
|
||||||
String dataFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
|
|
||||||
readState.segmentSuffix,
|
|
||||||
Lucene60PointFormat.DATA_EXTENSION);
|
|
||||||
dataIn = readState.directory.openInput(dataFileName, readState.context);
|
|
||||||
String indexFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
|
String indexFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
|
||||||
readState.segmentSuffix,
|
readState.segmentSuffix,
|
||||||
Lucene60PointFormat.INDEX_EXTENSION);
|
Lucene60PointFormat.INDEX_EXTENSION);
|
||||||
|
|
||||||
boolean success = false;
|
Map<Integer,Long> fieldToFileOffset = new HashMap<>();
|
||||||
|
|
||||||
// Read index file
|
// Read index file
|
||||||
try (ChecksumIndexInput indexIn = readState.directory.openChecksumInput(indexFileName, readState.context)) {
|
try (ChecksumIndexInput indexIn = readState.directory.openChecksumInput(indexFileName, readState.context)) {
|
||||||
CodecUtil.checkIndexHeader(indexIn,
|
Throwable priorE = null;
|
||||||
Lucene60PointFormat.CODEC_NAME,
|
try {
|
||||||
Lucene60PointFormat.INDEX_VERSION_START,
|
CodecUtil.checkIndexHeader(indexIn,
|
||||||
Lucene60PointFormat.INDEX_VERSION_START,
|
Lucene60PointFormat.META_CODEC_NAME,
|
||||||
|
Lucene60PointFormat.INDEX_VERSION_START,
|
||||||
|
Lucene60PointFormat.INDEX_VERSION_START,
|
||||||
|
readState.segmentInfo.getId(),
|
||||||
|
readState.segmentSuffix);
|
||||||
|
int count = indexIn.readVInt();
|
||||||
|
for(int i=0;i<count;i++) {
|
||||||
|
int fieldNumber = indexIn.readVInt();
|
||||||
|
long fp = indexIn.readVLong();
|
||||||
|
fieldToFileOffset.put(fieldNumber, fp);
|
||||||
|
}
|
||||||
|
} catch (Throwable t) {
|
||||||
|
priorE = t;
|
||||||
|
} finally {
|
||||||
|
CodecUtil.checkFooter(indexIn, priorE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
String dataFileName = IndexFileNames.segmentFileName(readState.segmentInfo.name,
|
||||||
|
readState.segmentSuffix,
|
||||||
|
Lucene60PointFormat.DATA_EXTENSION);
|
||||||
|
boolean success = false;
|
||||||
|
dataIn = readState.directory.openInput(dataFileName, readState.context);
|
||||||
|
try {
|
||||||
|
|
||||||
|
CodecUtil.checkIndexHeader(dataIn,
|
||||||
|
Lucene60PointFormat.DATA_CODEC_NAME,
|
||||||
|
Lucene60PointFormat.DATA_VERSION_START,
|
||||||
|
Lucene60PointFormat.DATA_VERSION_START,
|
||||||
readState.segmentInfo.getId(),
|
readState.segmentInfo.getId(),
|
||||||
readState.segmentSuffix);
|
readState.segmentSuffix);
|
||||||
int count = indexIn.readVInt();
|
|
||||||
for(int i=0;i<count;i++) {
|
// NOTE: data file is too costly to verify checksum against all the bytes on open,
|
||||||
int fieldNumber = indexIn.readVInt();
|
// but for now we at least verify proper structure of the checksum footer: which looks
|
||||||
long fp = indexIn.readVLong();
|
// for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
|
||||||
|
// such as file truncation.
|
||||||
|
CodecUtil.retrieveChecksum(dataIn);
|
||||||
|
|
||||||
|
for(Map.Entry<Integer,Long> ent : fieldToFileOffset.entrySet()) {
|
||||||
|
int fieldNumber = ent.getKey();
|
||||||
|
long fp = ent.getValue();
|
||||||
dataIn.seek(fp);
|
dataIn.seek(fp);
|
||||||
BKDReader reader = new BKDReader(dataIn);
|
BKDReader reader = new BKDReader(dataIn);
|
||||||
readers.put(fieldNumber, reader);
|
readers.put(fieldNumber, reader);
|
||||||
//reader.verify(readState.segmentInfo.maxDoc());
|
|
||||||
}
|
}
|
||||||
CodecUtil.checkFooter(indexIn);
|
|
||||||
success = true;
|
success = true;
|
||||||
} finally {
|
} finally {
|
||||||
if (success == false) {
|
if (success == false) {
|
||||||
IOUtils.closeWhileHandlingException(dataIn);
|
IOUtils.closeWhileHandlingException(this);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,12 +27,12 @@ import java.util.Map;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.codecs.PointReader;
|
import org.apache.lucene.codecs.PointReader;
|
||||||
import org.apache.lucene.codecs.PointWriter;
|
import org.apache.lucene.codecs.PointWriter;
|
||||||
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
|
||||||
import org.apache.lucene.index.PointValues.Relation;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.FieldInfos;
|
import org.apache.lucene.index.FieldInfos;
|
||||||
import org.apache.lucene.index.IndexFileNames;
|
import org.apache.lucene.index.IndexFileNames;
|
||||||
import org.apache.lucene.index.MergeState;
|
import org.apache.lucene.index.MergeState;
|
||||||
|
import org.apache.lucene.index.PointValues.IntersectVisitor;
|
||||||
|
import org.apache.lucene.index.PointValues.Relation;
|
||||||
import org.apache.lucene.index.SegmentWriteState;
|
import org.apache.lucene.index.SegmentWriteState;
|
||||||
import org.apache.lucene.store.IndexOutput;
|
import org.apache.lucene.store.IndexOutput;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
@ -62,7 +62,7 @@ public class Lucene60PointWriter extends PointWriter implements Closeable {
|
||||||
boolean success = false;
|
boolean success = false;
|
||||||
try {
|
try {
|
||||||
CodecUtil.writeIndexHeader(dataOut,
|
CodecUtil.writeIndexHeader(dataOut,
|
||||||
Lucene60PointFormat.CODEC_NAME,
|
Lucene60PointFormat.DATA_CODEC_NAME,
|
||||||
Lucene60PointFormat.DATA_VERSION_CURRENT,
|
Lucene60PointFormat.DATA_VERSION_CURRENT,
|
||||||
writeState.segmentInfo.getId(),
|
writeState.segmentInfo.getId(),
|
||||||
writeState.segmentSuffix);
|
writeState.segmentSuffix);
|
||||||
|
@ -184,7 +184,7 @@ public class Lucene60PointWriter extends PointWriter implements Closeable {
|
||||||
// Write index file
|
// Write index file
|
||||||
try (IndexOutput indexOut = writeState.directory.createOutput(indexFileName, writeState.context)) {
|
try (IndexOutput indexOut = writeState.directory.createOutput(indexFileName, writeState.context)) {
|
||||||
CodecUtil.writeIndexHeader(indexOut,
|
CodecUtil.writeIndexHeader(indexOut,
|
||||||
Lucene60PointFormat.CODEC_NAME,
|
Lucene60PointFormat.META_CODEC_NAME,
|
||||||
Lucene60PointFormat.INDEX_VERSION_CURRENT,
|
Lucene60PointFormat.INDEX_VERSION_CURRENT,
|
||||||
writeState.segmentInfo.getId(),
|
writeState.segmentInfo.getId(),
|
||||||
writeState.segmentSuffix);
|
writeState.segmentSuffix);
|
||||||
|
|
|
@ -0,0 +1,132 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.EOFException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that a plain default detects broken index headers early (on opening a reader).
|
||||||
|
*/
|
||||||
|
public class TestAllFilesCheckIndexHeader extends LuceneTestCase {
|
||||||
|
public void test() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
|
||||||
|
if (dir instanceof MockDirectoryWrapper) {
|
||||||
|
// otherwise we can have unref'd files left in the index that won't be visited when opening a reader and lead to scary looking false failures:
|
||||||
|
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
conf.setCodec(TestUtil.getDefaultCodec());
|
||||||
|
|
||||||
|
// Disable CFS 80% of the time so we can truncate individual files, but the other 20% of the time we test truncation of .cfs/.cfe too:
|
||||||
|
if (random().nextInt(5) != 1) {
|
||||||
|
conf.setUseCompoundFile(false);
|
||||||
|
conf.getMergePolicy().setNoCFSRatio(0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
||||||
|
// Use LineFileDocs so we (hopefully) get most Lucene features
|
||||||
|
// tested, e.g. IntPoint was recently added to it:
|
||||||
|
LineFileDocs docs = new LineFileDocs(random());
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
riw.addDocument(docs.nextDoc());
|
||||||
|
if (random().nextInt(7) == 0) {
|
||||||
|
riw.commit();
|
||||||
|
}
|
||||||
|
if (random().nextInt(20) == 0) {
|
||||||
|
riw.deleteDocuments(new Term("docid", Integer.toString(i)));
|
||||||
|
}
|
||||||
|
if (random().nextInt(15) == 0) {
|
||||||
|
riw.updateNumericDocValue(new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TEST_NIGHTLY == false) {
|
||||||
|
riw.forceMerge(1);
|
||||||
|
}
|
||||||
|
riw.close();
|
||||||
|
checkIndexHeader(dir);
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkIndexHeader(Directory dir) throws IOException {
|
||||||
|
for(String name : dir.listAll()) {
|
||||||
|
checkOneFile(dir, name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkOneFile(Directory dir, String victim) throws IOException {
|
||||||
|
try (BaseDirectoryWrapper dirCopy = newDirectory()) {
|
||||||
|
dirCopy.setCheckIndexOnClose(false);
|
||||||
|
long victimLength = dir.fileLength(victim);
|
||||||
|
int wrongBytes = TestUtil.nextInt(random(), 1, (int) Math.min(100, victimLength));
|
||||||
|
assert victimLength > 0;
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: now break file " + victim + " by randomizing first " + wrongBytes + " of " + victimLength);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(String name : dir.listAll()) {
|
||||||
|
if (name.equals(victim) == false) {
|
||||||
|
dirCopy.copyFrom(dir, name, name, IOContext.DEFAULT);
|
||||||
|
} else {
|
||||||
|
try(IndexOutput out = dirCopy.createOutput(name, IOContext.DEFAULT);
|
||||||
|
IndexInput in = dir.openInput(name, IOContext.DEFAULT)) {
|
||||||
|
// keeps same file length, but replaces the first wrongBytes with random bytes:
|
||||||
|
byte[] bytes = new byte[wrongBytes];
|
||||||
|
random().nextBytes(bytes);
|
||||||
|
out.writeBytes(bytes, 0, bytes.length);
|
||||||
|
in.seek(wrongBytes);
|
||||||
|
out.copyBytes(in, victimLength - wrongBytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dirCopy.sync(Collections.singleton(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// NOTE: we .close so that if the test fails (truncation not detected) we don't also get all these confusing errors about open files:
|
||||||
|
DirectoryReader.open(dirCopy).close();
|
||||||
|
fail("wrong bytes not detected after randomizing first " + wrongBytes + " bytes out of " + victimLength + " for file " + victim);
|
||||||
|
} catch (CorruptIndexException | EOFException | IndexFormatTooOldException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckIndex should also fail:
|
||||||
|
try {
|
||||||
|
TestUtil.checkIndex(dirCopy, true, true);
|
||||||
|
fail("wrong bytes not detected after randomizing first " + wrongBytes + " bytes out of " + victimLength + " for file " + victim);
|
||||||
|
} catch (CorruptIndexException | EOFException | IndexFormatTooOldException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,127 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.EOFException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.store.IndexOutput;
|
||||||
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that a plain default detects index file truncation early (on opening a reader).
|
||||||
|
*/
|
||||||
|
public class TestAllFilesDetectTruncation extends LuceneTestCase {
|
||||||
|
public void test() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
|
||||||
|
if (dir instanceof MockDirectoryWrapper) {
|
||||||
|
// otherwise we can have unref'd files left in the index that won't be visited when opening a reader and lead to scary looking false failures:
|
||||||
|
((MockDirectoryWrapper) dir).setEnableVirusScanner(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
|
conf.setCodec(TestUtil.getDefaultCodec());
|
||||||
|
|
||||||
|
// Disable CFS 80% of the time so we can truncate individual files, but the other 20% of the time we test truncation of .cfs/.cfe too:
|
||||||
|
if (random().nextInt(5) != 1) {
|
||||||
|
conf.setUseCompoundFile(false);
|
||||||
|
conf.getMergePolicy().setNoCFSRatio(0.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
||||||
|
// Use LineFileDocs so we (hopefully) get most Lucene features
|
||||||
|
// tested, e.g. IntPoint was recently added to it:
|
||||||
|
LineFileDocs docs = new LineFileDocs(random());
|
||||||
|
for (int i = 0; i < 100; i++) {
|
||||||
|
riw.addDocument(docs.nextDoc());
|
||||||
|
if (random().nextInt(7) == 0) {
|
||||||
|
riw.commit();
|
||||||
|
}
|
||||||
|
if (random().nextInt(20) == 0) {
|
||||||
|
riw.deleteDocuments(new Term("docid", Integer.toString(i)));
|
||||||
|
}
|
||||||
|
if (random().nextInt(15) == 0) {
|
||||||
|
riw.updateNumericDocValue(new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (TEST_NIGHTLY == false) {
|
||||||
|
riw.forceMerge(1);
|
||||||
|
}
|
||||||
|
riw.close();
|
||||||
|
checkTruncation(dir);
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkTruncation(Directory dir) throws IOException {
|
||||||
|
for(String name : dir.listAll()) {
|
||||||
|
truncateOneFile(dir, name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void truncateOneFile(Directory dir, String victim) throws IOException {
|
||||||
|
try (BaseDirectoryWrapper dirCopy = newDirectory()) {
|
||||||
|
dirCopy.setCheckIndexOnClose(false);
|
||||||
|
long victimLength = dir.fileLength(victim);
|
||||||
|
int lostBytes = TestUtil.nextInt(random(), 1, (int) Math.min(100, victimLength));
|
||||||
|
assert victimLength > 0;
|
||||||
|
|
||||||
|
if (VERBOSE) {
|
||||||
|
System.out.println("TEST: now truncate file " + victim + " by removing " + lostBytes + " of " + victimLength + " bytes");
|
||||||
|
}
|
||||||
|
|
||||||
|
for(String name : dir.listAll()) {
|
||||||
|
if (name.equals(victim) == false) {
|
||||||
|
dirCopy.copyFrom(dir, name, name, IOContext.DEFAULT);
|
||||||
|
} else {
|
||||||
|
try(IndexOutput out = dirCopy.createOutput(name, IOContext.DEFAULT);
|
||||||
|
IndexInput in = dir.openInput(name, IOContext.DEFAULT)) {
|
||||||
|
out.copyBytes(in, victimLength - lostBytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
dirCopy.sync(Collections.singleton(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// NOTE: we .close so that if the test fails (truncation not detected) we don't also get all these confusing errors about open files:
|
||||||
|
DirectoryReader.open(dirCopy).close();
|
||||||
|
fail("truncation not detected after removing " + lostBytes + " bytes out of " + victimLength + " for file " + victim);
|
||||||
|
} catch (CorruptIndexException | EOFException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckIndex should also fail:
|
||||||
|
try {
|
||||||
|
TestUtil.checkIndex(dirCopy, true, true);
|
||||||
|
fail("truncation not detected after removing " + lostBytes + " bytes out of " + victimLength + " for file " + victim);
|
||||||
|
} catch (CorruptIndexException | EOFException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -21,11 +21,9 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
@ -38,23 +36,19 @@ public class TestAllFilesHaveChecksumFooter extends LuceneTestCase {
|
||||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
conf.setCodec(TestUtil.getDefaultCodec());
|
conf.setCodec(TestUtil.getDefaultCodec());
|
||||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
||||||
Document doc = new Document();
|
// Use LineFileDocs so we (hopefully) get most Lucene features
|
||||||
// these fields should sometimes get term vectors, etc
|
// tested, e.g. IntPoint was recently added to it:
|
||||||
Field idField = newStringField("id", "", Field.Store.NO);
|
LineFileDocs docs = new LineFileDocs(random());
|
||||||
Field bodyField = newTextField("body", "", Field.Store.NO);
|
|
||||||
Field dvField = new NumericDocValuesField("dv", 5);
|
|
||||||
doc.add(idField);
|
|
||||||
doc.add(bodyField);
|
|
||||||
doc.add(dvField);
|
|
||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
idField.setStringValue(Integer.toString(i));
|
riw.addDocument(docs.nextDoc());
|
||||||
bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
|
|
||||||
riw.addDocument(doc);
|
|
||||||
if (random().nextInt(7) == 0) {
|
if (random().nextInt(7) == 0) {
|
||||||
riw.commit();
|
riw.commit();
|
||||||
}
|
}
|
||||||
if (random().nextInt(20) == 0) {
|
if (random().nextInt(20) == 0) {
|
||||||
riw.deleteDocuments(new Term("id", Integer.toString(i)));
|
riw.deleteDocuments(new Term("docid", Integer.toString(i)));
|
||||||
|
}
|
||||||
|
if (random().nextInt(15) == 0) {
|
||||||
|
riw.updateNumericDocValue(new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
riw.close();
|
riw.close();
|
||||||
|
|
|
@ -23,13 +23,9 @@ import java.util.Map;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.codecs.CodecUtil;
|
import org.apache.lucene.codecs.CodecUtil;
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.FieldType;
|
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.util.TestUtil;
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
@ -43,32 +39,19 @@ public class TestAllFilesHaveCodecHeader extends LuceneTestCase {
|
||||||
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||||
conf.setCodec(TestUtil.getDefaultCodec());
|
conf.setCodec(TestUtil.getDefaultCodec());
|
||||||
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
RandomIndexWriter riw = new RandomIndexWriter(random(), dir, conf);
|
||||||
Document doc = new Document();
|
// Use LineFileDocs so we (hopefully) get most Lucene features
|
||||||
Field idField = newStringField("id", "", Field.Store.YES);
|
// tested, e.g. IntPoint was recently added to it:
|
||||||
Field bodyField = newTextField("body", "", Field.Store.YES);
|
LineFileDocs docs = new LineFileDocs(random());
|
||||||
FieldType vectorsType = new FieldType(TextField.TYPE_STORED);
|
|
||||||
vectorsType.setStoreTermVectors(true);
|
|
||||||
vectorsType.setStoreTermVectorPositions(true);
|
|
||||||
Field vectorsField = new Field("vectors", "", vectorsType);
|
|
||||||
Field dvField = new NumericDocValuesField("dv", 5);
|
|
||||||
doc.add(idField);
|
|
||||||
doc.add(bodyField);
|
|
||||||
doc.add(vectorsField);
|
|
||||||
doc.add(dvField);
|
|
||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
idField.setStringValue(Integer.toString(i));
|
riw.addDocument(docs.nextDoc());
|
||||||
bodyField.setStringValue(TestUtil.randomUnicodeString(random()));
|
|
||||||
dvField.setLongValue(random().nextInt(5));
|
|
||||||
vectorsField.setStringValue(TestUtil.randomUnicodeString(random()));
|
|
||||||
riw.addDocument(doc);
|
|
||||||
if (random().nextInt(7) == 0) {
|
if (random().nextInt(7) == 0) {
|
||||||
riw.commit();
|
riw.commit();
|
||||||
}
|
}
|
||||||
if (random().nextInt(20) == 0) {
|
if (random().nextInt(20) == 0) {
|
||||||
riw.deleteDocuments(new Term("id", Integer.toString(i)));
|
riw.deleteDocuments(new Term("docid", Integer.toString(i)));
|
||||||
}
|
}
|
||||||
if (random().nextInt(15) == 0) {
|
if (random().nextInt(15) == 0) {
|
||||||
riw.updateNumericDocValue(new Term("id"), "dv", Long.valueOf(i));
|
riw.updateNumericDocValue(new Term("docid", Integer.toString(i)), "docid_intDV", Long.valueOf(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
riw.close();
|
riw.close();
|
||||||
|
|
|
@ -0,0 +1,127 @@
|
||||||
|
package org.apache.lucene.index;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.EOFException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.store.BaseDirectoryWrapper;
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.IOContext;
|
||||||
|
import org.apache.lucene.store.MockDirectoryWrapper;
|
||||||
|
import org.apache.lucene.util.LineFileDocs;
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
import org.apache.lucene.util.TestUtil;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that the same file name, but from a different index, is detected as foreign.
|
||||||
|
*/
|
||||||
|
public class TestSwappedIndexFiles extends LuceneTestCase {
|
||||||
|
public void test() throws Exception {
|
||||||
|
Directory dir1 = newDirectory();
|
||||||
|
Directory dir2 = newDirectory();
|
||||||
|
|
||||||
|
if (dir1 instanceof MockDirectoryWrapper) {
|
||||||
|
// otherwise we can have unref'd files left in the index that won't be visited when opening a reader and lead to scary looking false failures:
|
||||||
|
((MockDirectoryWrapper) dir1).setEnableVirusScanner(false);
|
||||||
|
}
|
||||||
|
if (dir2 instanceof MockDirectoryWrapper) {
|
||||||
|
// otherwise we can have unref'd files left in the index that won't be visited when opening a reader and lead to scary looking false failures:
|
||||||
|
((MockDirectoryWrapper) dir2).setEnableVirusScanner(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disable CFS 80% of the time so we can truncate individual files, but the other 20% of the time we test truncation of .cfs/.cfe too:
|
||||||
|
boolean useCFS = random().nextInt(5) == 1;
|
||||||
|
|
||||||
|
// Use LineFileDocs so we (hopefully) get most Lucene features
|
||||||
|
// tested, e.g. IntPoint was recently added to it:
|
||||||
|
LineFileDocs docs = new LineFileDocs(random());
|
||||||
|
Document doc = docs.nextDoc();
|
||||||
|
long seed = random().nextLong();
|
||||||
|
|
||||||
|
indexOneDoc(seed, dir1, doc, useCFS);
|
||||||
|
indexOneDoc(seed, dir2, doc, useCFS);
|
||||||
|
|
||||||
|
swapFiles(dir1, dir2);
|
||||||
|
dir1.close();
|
||||||
|
dir2.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void indexOneDoc(long seed, Directory dir, Document doc, boolean useCFS) throws IOException {
|
||||||
|
Random random = new Random(seed);
|
||||||
|
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random));
|
||||||
|
conf.setCodec(TestUtil.getDefaultCodec());
|
||||||
|
|
||||||
|
if (useCFS == false) {
|
||||||
|
conf.setUseCompoundFile(false);
|
||||||
|
conf.getMergePolicy().setNoCFSRatio(0.0);
|
||||||
|
} else {
|
||||||
|
conf.setUseCompoundFile(true);
|
||||||
|
conf.getMergePolicy().setNoCFSRatio(1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
RandomIndexWriter w = new RandomIndexWriter(random, dir, conf);
|
||||||
|
w.addDocument(doc);
|
||||||
|
w.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void swapFiles(Directory dir1, Directory dir2) throws IOException {
|
||||||
|
for(String name : dir1.listAll()) {
|
||||||
|
if (name.equals(IndexWriter.WRITE_LOCK_NAME)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
swapOneFile(dir1, dir2, name);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void swapOneFile(Directory dir1, Directory dir2, String victim) throws IOException {
|
||||||
|
try (BaseDirectoryWrapper dirCopy = newDirectory()) {
|
||||||
|
dirCopy.setCheckIndexOnClose(false);
|
||||||
|
|
||||||
|
// Copy all files from dir1 to dirCopy, except victim which we copy from dir2:
|
||||||
|
for(String name : dir1.listAll()) {
|
||||||
|
if (name.equals(victim) == false) {
|
||||||
|
dirCopy.copyFrom(dir1, name, name, IOContext.DEFAULT);
|
||||||
|
} else {
|
||||||
|
dirCopy.copyFrom(dir2, name, name, IOContext.DEFAULT);
|
||||||
|
}
|
||||||
|
dirCopy.sync(Collections.singleton(name));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// NOTE: we .close so that if the test fails (truncation not detected) we don't also get all these confusing errors about open files:
|
||||||
|
DirectoryReader.open(dirCopy).close();
|
||||||
|
fail("wrong file " + victim + " not detected");
|
||||||
|
} catch (CorruptIndexException | EOFException | IndexFormatTooOldException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckIndex should also fail:
|
||||||
|
try {
|
||||||
|
TestUtil.checkIndex(dirCopy, true, true);
|
||||||
|
fail("wrong file " + victim + " not detected");
|
||||||
|
} catch (CorruptIndexException | EOFException | IndexFormatTooOldException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue