diff --git a/src/main/java/org/elasticsearch/index/store/Store.java b/src/main/java/org/elasticsearch/index/store/Store.java index ce166cec892..4bdc3fb6902 100644 --- a/src/main/java/org/elasticsearch/index/store/Store.java +++ b/src/main/java/org/elasticsearch/index/store/Store.java @@ -576,6 +576,9 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex */ public final static class MetadataSnapshot implements Iterable { private static final ESLogger logger = Loggers.getLogger(MetadataSnapshot.class); + private static final Version FIRST_LUCENE_CHECKSUM_VERSION = Version.LUCENE_4_8; + // we stopped writing legacy checksums in 1.3.0 so all segments here must use the new CRC32 version + private static final Version FIRST_ES_CRC32_VERSION = org.elasticsearch.Version.V_1_3_0.luceneVersion; private final Map metadata; @@ -593,6 +596,11 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex metadata = buildMetadata(commit, directory, logger); } + private static final boolean useLuceneChecksum(Version version, boolean hasLegacyChecksum) { + return (version.onOrAfter(FIRST_LUCENE_CHECKSUM_VERSION) && hasLegacyChecksum == false) // no legacy checksum and a guarantee that lucene has checksums + || version.onOrAfter(FIRST_ES_CRC32_VERSION); // OR we know that we didn't even write legacy checksums anymore when this segment was written. + } + ImmutableMap buildMetadata(IndexCommit commit, Directory directory, ESLogger logger) throws IOException { ImmutableMap.Builder builder = ImmutableMap.builder(); Map checksumMap = readLegacyChecksums(directory).v1(); @@ -606,7 +614,7 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex } for (String file : info.files()) { String legacyChecksum = checksumMap.get(file); - if (version.onOrAfter(Version.LUCENE_4_8) && legacyChecksum == null) { + if (useLuceneChecksum(version, legacyChecksum != null)) { checksumFromLuceneFile(directory, file, builder, logger, version, SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file))); } else { builder.put(file, new StoreFileMetaData(file, directory.fileLength(file), legacyChecksum, null)); @@ -615,7 +623,7 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex } final String segmentsFile = segmentCommitInfos.getSegmentsFileName(); String legacyChecksum = checksumMap.get(segmentsFile); - if (maxVersion.onOrAfter(Version.LUCENE_4_8) && legacyChecksum == null) { + if (useLuceneChecksum(maxVersion, legacyChecksum != null)) { checksumFromLuceneFile(directory, segmentsFile, builder, logger, maxVersion, true); } else { builder.put(segmentsFile, new StoreFileMetaData(segmentsFile, directory.fileLength(segmentsFile), legacyChecksum, null)); diff --git a/src/test/java/org/elasticsearch/index/store/StoreTest.java b/src/test/java/org/elasticsearch/index/store/StoreTest.java index 9463084235c..04772c46ce1 100644 --- a/src/test/java/org/elasticsearch/index/store/StoreTest.java +++ b/src/test/java/org/elasticsearch/index/store/StoreTest.java @@ -19,14 +19,13 @@ package org.elasticsearch.index.store; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.*; +import org.apache.lucene.codecs.lucene50.Lucene50Codec; +import org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat; import org.apache.lucene.document.*; import org.apache.lucene.index.*; import org.apache.lucene.store.*; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.TestUtil; -import org.apache.lucene.util.Version; +import org.apache.lucene.util.*; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.env.ShardLock; import org.elasticsearch.index.Index; @@ -36,6 +35,8 @@ import org.elasticsearch.index.store.distributor.LeastUsedDistributor; import org.elasticsearch.index.store.distributor.RandomWeightedDistributor; import org.elasticsearch.test.DummyShardLock; import org.elasticsearch.test.ElasticsearchLuceneTestCase; +import org.junit.AfterClass; +import org.junit.BeforeClass; import org.junit.Test; import java.io.FileNotFoundException; @@ -188,13 +189,72 @@ public class StoreTest extends ElasticsearchLuceneTestCase { IOUtils.close(verifyingOutput, dir); } + private static final class OldSIMockingCodec extends FilterCodec { + + protected OldSIMockingCodec() { + super(new Lucene50Codec().getName(), new Lucene50Codec()); + } + + @Override + public SegmentInfoFormat segmentInfoFormat() { + final SegmentInfoFormat segmentInfoFormat = super.segmentInfoFormat(); + return new SegmentInfoFormat() { + @Override + public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException { + return segmentInfoFormat.read(directory, segmentName, segmentID, context); + } + // this sucks it's a full copy of Lucene50SegmentInfoFormat but hey I couldn't find a way to make it write 4_5_0 versions + // somebody was too paranoid when implementing this. ey rmuir, was that you? - go fix it :P + @Override + public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException { + final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION); + si.addFile(fileName); + + boolean success = false; + try (IndexOutput output = dir.createOutput(fileName, ioContext)) { + CodecUtil.writeIndexHeader(output, + "Lucene50SegmentInfo", + 0, + si.getId(), + ""); + Version version = Version.LUCENE_4_5_0; // FOOOOOO!! + // Write the Lucene version that created this segment, since 3.1 + output.writeInt(version.major); + output.writeInt(version.minor); + output.writeInt(version.bugfix); + assert version.prerelease == 0; + output.writeInt(si.getDocCount()); + + output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); + output.writeStringStringMap(si.getDiagnostics()); + Set files = si.files(); + for (String file : files) { + if (!IndexFileNames.parseSegmentName(file).equals(si.name)) { + throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files); + } + } + output.writeStringSet(files); + CodecUtil.writeFooter(output); + success = true; + } finally { + if (!success) { + // TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this? + IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName); + } + } + } + }; + } + } + @Test public void testWriteLegacyChecksums() throws IOException { final ShardId shardId = new ShardId(new Index("index"), 1); DirectoryService directoryService = new LuceneManagedDirectoryService(random()); Store store = new Store(shardId, ImmutableSettings.EMPTY, null, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId)); // set default codec - all segments need checksums - IndexWriter writer = new IndexWriter(store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(actualDefaultCodec())); + final boolean usesOldCodec = randomBoolean(); + IndexWriter writer = new IndexWriter(store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(usesOldCodec ? new OldSIMockingCodec() : actualDefaultCodec())); int docs = 1 + random().nextInt(100); for (int i = 0; i < docs; i++) { @@ -234,23 +294,34 @@ public class StoreTest extends ElasticsearchLuceneTestCase { if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) { continue; } - try (IndexInput input = store.directory().openInput(file, IOContext.READONCE)) { - String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input)); - StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file), checksum, null); - legacyMeta.put(file, storeFileMetaData); - checksums.add(storeFileMetaData); - - } - + StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file), file + "checksum", null); + legacyMeta.put(file, storeFileMetaData); + checksums.add(storeFileMetaData); } checksums.write(store); metadata = store.getMetadata(); Map stringStoreFileMetaDataMap = metadata.asMap(); assertThat(legacyMeta.size(), equalTo(stringStoreFileMetaDataMap.size())); - for (StoreFileMetaData meta : legacyMeta.values()) { - assertTrue(stringStoreFileMetaDataMap.containsKey(meta.name())); - assertTrue(stringStoreFileMetaDataMap.get(meta.name()).isSame(meta)); + if (usesOldCodec) { + for (StoreFileMetaData meta : legacyMeta.values()) { + assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name())); + assertEquals(meta.name() + "checksum", meta.checksum()); + assertTrue(meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()), stringStoreFileMetaDataMap.get(meta.name()).isSame(meta)); + } + } else { + + // even if we have a legacy checksum - if we use a new codec we should reuse + for (StoreFileMetaData meta : legacyMeta.values()) { + assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name())); + assertFalse(meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()), stringStoreFileMetaDataMap.get(meta.name()).isSame(meta)); + StoreFileMetaData storeFileMetaData = metadata.get(meta.name()); + try (IndexInput input = store.openVerifyingInput(meta.name(), IOContext.DEFAULT, storeFileMetaData)) { + assertTrue(storeFileMetaData.toString(), input instanceof Store.VerifyingIndexInput); + input.seek(meta.length()); + Store.verify(input); + } + } } assertDeleteContent(store, directoryService); IOUtils.close(store);