[STORE] Use Lucene checksums if segment version is >= 4.9.0

We started to use the lucene CRC32 checksums instead of the legacy Adler32
in `v1.3.0` which was the first version using lucene `4.9.0`. We can safely
assume that if the segment was written with this version that checksums
from lucene can be used even if the legacy checksum claims that it has a Adler32
for a given file / segment.

Closes #8587

Conflicts:
	src/main/java/org/elasticsearch/index/store/Store.java
	src/test/java/org/elasticsearch/index/store/StoreTest.java
This commit is contained in:
Simon Willnauer 2014-11-21 12:18:24 +01:00
parent 95a396159c
commit b6b3382a8b
2 changed files with 98 additions and 19 deletions

View File

@ -576,6 +576,9 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
*/
public final static class MetadataSnapshot implements Iterable<StoreFileMetaData> {
private static final ESLogger logger = Loggers.getLogger(MetadataSnapshot.class);
private static final Version FIRST_LUCENE_CHECKSUM_VERSION = Version.LUCENE_4_8;
// we stopped writing legacy checksums in 1.3.0 so all segments here must use the new CRC32 version
private static final Version FIRST_ES_CRC32_VERSION = org.elasticsearch.Version.V_1_3_0.luceneVersion;
private final Map<String, StoreFileMetaData> metadata;
@ -593,6 +596,11 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
metadata = buildMetadata(commit, directory, logger);
}
private static final boolean useLuceneChecksum(Version version, boolean hasLegacyChecksum) {
return (version.onOrAfter(FIRST_LUCENE_CHECKSUM_VERSION) && hasLegacyChecksum == false) // no legacy checksum and a guarantee that lucene has checksums
|| version.onOrAfter(FIRST_ES_CRC32_VERSION); // OR we know that we didn't even write legacy checksums anymore when this segment was written.
}
ImmutableMap<String, StoreFileMetaData> buildMetadata(IndexCommit commit, Directory directory, ESLogger logger) throws IOException {
ImmutableMap.Builder<String, StoreFileMetaData> builder = ImmutableMap.builder();
Map<String, String> checksumMap = readLegacyChecksums(directory).v1();
@ -606,7 +614,7 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
}
for (String file : info.files()) {
String legacyChecksum = checksumMap.get(file);
if (version.onOrAfter(Version.LUCENE_4_8) && legacyChecksum == null) {
if (useLuceneChecksum(version, legacyChecksum != null)) {
checksumFromLuceneFile(directory, file, builder, logger, version, SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file)));
} else {
builder.put(file, new StoreFileMetaData(file, directory.fileLength(file), legacyChecksum, null));
@ -615,7 +623,7 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
}
final String segmentsFile = segmentCommitInfos.getSegmentsFileName();
String legacyChecksum = checksumMap.get(segmentsFile);
if (maxVersion.onOrAfter(Version.LUCENE_4_8) && legacyChecksum == null) {
if (useLuceneChecksum(maxVersion, legacyChecksum != null)) {
checksumFromLuceneFile(directory, segmentsFile, builder, logger, maxVersion, true);
} else {
builder.put(segmentsFile, new StoreFileMetaData(segmentsFile, directory.fileLength(segmentsFile), legacyChecksum, null));

View File

@ -19,14 +19,13 @@
package org.elasticsearch.index.store;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.*;
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
import org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.store.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.*;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.env.ShardLock;
import org.elasticsearch.index.Index;
@ -36,6 +35,8 @@ import org.elasticsearch.index.store.distributor.LeastUsedDistributor;
import org.elasticsearch.index.store.distributor.RandomWeightedDistributor;
import org.elasticsearch.test.DummyShardLock;
import org.elasticsearch.test.ElasticsearchLuceneTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.FileNotFoundException;
@ -188,13 +189,72 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
IOUtils.close(verifyingOutput, dir);
}
private static final class OldSIMockingCodec extends FilterCodec {
protected OldSIMockingCodec() {
super(new Lucene50Codec().getName(), new Lucene50Codec());
}
@Override
public SegmentInfoFormat segmentInfoFormat() {
final SegmentInfoFormat segmentInfoFormat = super.segmentInfoFormat();
return new SegmentInfoFormat() {
@Override
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
return segmentInfoFormat.read(directory, segmentName, segmentID, context);
}
// this sucks it's a full copy of Lucene50SegmentInfoFormat but hey I couldn't find a way to make it write 4_5_0 versions
// somebody was too paranoid when implementing this. ey rmuir, was that you? - go fix it :P
@Override
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
si.addFile(fileName);
boolean success = false;
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
CodecUtil.writeIndexHeader(output,
"Lucene50SegmentInfo",
0,
si.getId(),
"");
Version version = Version.LUCENE_4_5_0; // FOOOOOO!!
// Write the Lucene version that created this segment, since 3.1
output.writeInt(version.major);
output.writeInt(version.minor);
output.writeInt(version.bugfix);
assert version.prerelease == 0;
output.writeInt(si.getDocCount());
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
output.writeStringStringMap(si.getDiagnostics());
Set<String> files = si.files();
for (String file : files) {
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
}
}
output.writeStringSet(files);
CodecUtil.writeFooter(output);
success = true;
} finally {
if (!success) {
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
}
}
}
};
}
}
@Test
public void testWriteLegacyChecksums() throws IOException {
final ShardId shardId = new ShardId(new Index("index"), 1);
DirectoryService directoryService = new LuceneManagedDirectoryService(random());
Store store = new Store(shardId, ImmutableSettings.EMPTY, null, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId));
// set default codec - all segments need checksums
IndexWriter writer = new IndexWriter(store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(actualDefaultCodec()));
final boolean usesOldCodec = randomBoolean();
IndexWriter writer = new IndexWriter(store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(usesOldCodec ? new OldSIMockingCodec() : actualDefaultCodec()));
int docs = 1 + random().nextInt(100);
for (int i = 0; i < docs; i++) {
@ -234,23 +294,34 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
continue;
}
try (IndexInput input = store.directory().openInput(file, IOContext.READONCE)) {
String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file), checksum, null);
legacyMeta.put(file, storeFileMetaData);
checksums.add(storeFileMetaData);
}
StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file), file + "checksum", null);
legacyMeta.put(file, storeFileMetaData);
checksums.add(storeFileMetaData);
}
checksums.write(store);
metadata = store.getMetadata();
Map<String, StoreFileMetaData> stringStoreFileMetaDataMap = metadata.asMap();
assertThat(legacyMeta.size(), equalTo(stringStoreFileMetaDataMap.size()));
for (StoreFileMetaData meta : legacyMeta.values()) {
assertTrue(stringStoreFileMetaDataMap.containsKey(meta.name()));
assertTrue(stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
if (usesOldCodec) {
for (StoreFileMetaData meta : legacyMeta.values()) {
assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name()));
assertEquals(meta.name() + "checksum", meta.checksum());
assertTrue(meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()), stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
}
} else {
// even if we have a legacy checksum - if we use a new codec we should reuse
for (StoreFileMetaData meta : legacyMeta.values()) {
assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name()));
assertFalse(meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()), stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
StoreFileMetaData storeFileMetaData = metadata.get(meta.name());
try (IndexInput input = store.openVerifyingInput(meta.name(), IOContext.DEFAULT, storeFileMetaData)) {
assertTrue(storeFileMetaData.toString(), input instanceof Store.VerifyingIndexInput);
input.seek(meta.length());
Store.verify(input);
}
}
}
assertDeleteContent(store, directoryService);
IOUtils.close(store);