mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-25 22:36:20 +00:00
[STORE] Use Lucene checksums if segment version is >= 4.9.0
We started to use the lucene CRC32 checksums instead of the legacy Adler32 in `v1.3.0` which was the first version using lucene `4.9.0`. We can safely assume that if the segment was written with this version that checksums from lucene can be used even if the legacy checksum claims that it has a Adler32 for a given file / segment. Closes #8587 Conflicts: src/main/java/org/elasticsearch/index/store/Store.java src/test/java/org/elasticsearch/index/store/StoreTest.java
This commit is contained in:
parent
95a396159c
commit
b6b3382a8b
@ -576,6 +576,9 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
||||
*/
|
||||
public final static class MetadataSnapshot implements Iterable<StoreFileMetaData> {
|
||||
private static final ESLogger logger = Loggers.getLogger(MetadataSnapshot.class);
|
||||
private static final Version FIRST_LUCENE_CHECKSUM_VERSION = Version.LUCENE_4_8;
|
||||
// we stopped writing legacy checksums in 1.3.0 so all segments here must use the new CRC32 version
|
||||
private static final Version FIRST_ES_CRC32_VERSION = org.elasticsearch.Version.V_1_3_0.luceneVersion;
|
||||
|
||||
private final Map<String, StoreFileMetaData> metadata;
|
||||
|
||||
@ -593,6 +596,11 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
||||
metadata = buildMetadata(commit, directory, logger);
|
||||
}
|
||||
|
||||
private static final boolean useLuceneChecksum(Version version, boolean hasLegacyChecksum) {
|
||||
return (version.onOrAfter(FIRST_LUCENE_CHECKSUM_VERSION) && hasLegacyChecksum == false) // no legacy checksum and a guarantee that lucene has checksums
|
||||
|| version.onOrAfter(FIRST_ES_CRC32_VERSION); // OR we know that we didn't even write legacy checksums anymore when this segment was written.
|
||||
}
|
||||
|
||||
ImmutableMap<String, StoreFileMetaData> buildMetadata(IndexCommit commit, Directory directory, ESLogger logger) throws IOException {
|
||||
ImmutableMap.Builder<String, StoreFileMetaData> builder = ImmutableMap.builder();
|
||||
Map<String, String> checksumMap = readLegacyChecksums(directory).v1();
|
||||
@ -606,7 +614,7 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
||||
}
|
||||
for (String file : info.files()) {
|
||||
String legacyChecksum = checksumMap.get(file);
|
||||
if (version.onOrAfter(Version.LUCENE_4_8) && legacyChecksum == null) {
|
||||
if (useLuceneChecksum(version, legacyChecksum != null)) {
|
||||
checksumFromLuceneFile(directory, file, builder, logger, version, SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file)));
|
||||
} else {
|
||||
builder.put(file, new StoreFileMetaData(file, directory.fileLength(file), legacyChecksum, null));
|
||||
@ -615,7 +623,7 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
||||
}
|
||||
final String segmentsFile = segmentCommitInfos.getSegmentsFileName();
|
||||
String legacyChecksum = checksumMap.get(segmentsFile);
|
||||
if (maxVersion.onOrAfter(Version.LUCENE_4_8) && legacyChecksum == null) {
|
||||
if (useLuceneChecksum(maxVersion, legacyChecksum != null)) {
|
||||
checksumFromLuceneFile(directory, segmentsFile, builder, logger, maxVersion, true);
|
||||
} else {
|
||||
builder.put(segmentsFile, new StoreFileMetaData(segmentsFile, directory.fileLength(segmentsFile), legacyChecksum, null));
|
||||
|
@ -19,14 +19,13 @@
|
||||
package org.elasticsearch.index.store;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.*;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50Codec;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50SegmentInfoFormat;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.env.ShardLock;
|
||||
import org.elasticsearch.index.Index;
|
||||
@ -36,6 +35,8 @@ import org.elasticsearch.index.store.distributor.LeastUsedDistributor;
|
||||
import org.elasticsearch.index.store.distributor.RandomWeightedDistributor;
|
||||
import org.elasticsearch.test.DummyShardLock;
|
||||
import org.elasticsearch.test.ElasticsearchLuceneTestCase;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
@ -188,13 +189,72 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
||||
IOUtils.close(verifyingOutput, dir);
|
||||
}
|
||||
|
||||
private static final class OldSIMockingCodec extends FilterCodec {
|
||||
|
||||
protected OldSIMockingCodec() {
|
||||
super(new Lucene50Codec().getName(), new Lucene50Codec());
|
||||
}
|
||||
|
||||
@Override
|
||||
public SegmentInfoFormat segmentInfoFormat() {
|
||||
final SegmentInfoFormat segmentInfoFormat = super.segmentInfoFormat();
|
||||
return new SegmentInfoFormat() {
|
||||
@Override
|
||||
public SegmentInfo read(Directory directory, String segmentName, byte[] segmentID, IOContext context) throws IOException {
|
||||
return segmentInfoFormat.read(directory, segmentName, segmentID, context);
|
||||
}
|
||||
// this sucks it's a full copy of Lucene50SegmentInfoFormat but hey I couldn't find a way to make it write 4_5_0 versions
|
||||
// somebody was too paranoid when implementing this. ey rmuir, was that you? - go fix it :P
|
||||
@Override
|
||||
public void write(Directory dir, SegmentInfo si, IOContext ioContext) throws IOException {
|
||||
final String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene50SegmentInfoFormat.SI_EXTENSION);
|
||||
si.addFile(fileName);
|
||||
|
||||
boolean success = false;
|
||||
try (IndexOutput output = dir.createOutput(fileName, ioContext)) {
|
||||
CodecUtil.writeIndexHeader(output,
|
||||
"Lucene50SegmentInfo",
|
||||
0,
|
||||
si.getId(),
|
||||
"");
|
||||
Version version = Version.LUCENE_4_5_0; // FOOOOOO!!
|
||||
// Write the Lucene version that created this segment, since 3.1
|
||||
output.writeInt(version.major);
|
||||
output.writeInt(version.minor);
|
||||
output.writeInt(version.bugfix);
|
||||
assert version.prerelease == 0;
|
||||
output.writeInt(si.getDocCount());
|
||||
|
||||
output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO));
|
||||
output.writeStringStringMap(si.getDiagnostics());
|
||||
Set<String> files = si.files();
|
||||
for (String file : files) {
|
||||
if (!IndexFileNames.parseSegmentName(file).equals(si.name)) {
|
||||
throw new IllegalArgumentException("invalid files: expected segment=" + si.name + ", got=" + files);
|
||||
}
|
||||
}
|
||||
output.writeStringSet(files);
|
||||
CodecUtil.writeFooter(output);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
// TODO: are we doing this outside of the tracking wrapper? why must SIWriter cleanup like this?
|
||||
IOUtils.deleteFilesIgnoringExceptions(si.dir, fileName);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWriteLegacyChecksums() throws IOException {
|
||||
final ShardId shardId = new ShardId(new Index("index"), 1);
|
||||
DirectoryService directoryService = new LuceneManagedDirectoryService(random());
|
||||
Store store = new Store(shardId, ImmutableSettings.EMPTY, null, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId));
|
||||
// set default codec - all segments need checksums
|
||||
IndexWriter writer = new IndexWriter(store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(actualDefaultCodec()));
|
||||
final boolean usesOldCodec = randomBoolean();
|
||||
IndexWriter writer = new IndexWriter(store.directory(), newIndexWriterConfig(random(), new MockAnalyzer(random())).setCodec(usesOldCodec ? new OldSIMockingCodec() : actualDefaultCodec()));
|
||||
int docs = 1 + random().nextInt(100);
|
||||
|
||||
for (int i = 0; i < docs; i++) {
|
||||
@ -234,23 +294,34 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
||||
if (file.equals("write.lock") || file.equals(IndexFileNames.OLD_SEGMENTS_GEN)) {
|
||||
continue;
|
||||
}
|
||||
try (IndexInput input = store.directory().openInput(file, IOContext.READONCE)) {
|
||||
String checksum = Store.digestToString(CodecUtil.retrieveChecksum(input));
|
||||
StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file), checksum, null);
|
||||
legacyMeta.put(file, storeFileMetaData);
|
||||
checksums.add(storeFileMetaData);
|
||||
|
||||
}
|
||||
|
||||
StoreFileMetaData storeFileMetaData = new StoreFileMetaData(file, store.directory().fileLength(file), file + "checksum", null);
|
||||
legacyMeta.put(file, storeFileMetaData);
|
||||
checksums.add(storeFileMetaData);
|
||||
}
|
||||
checksums.write(store);
|
||||
|
||||
metadata = store.getMetadata();
|
||||
Map<String, StoreFileMetaData> stringStoreFileMetaDataMap = metadata.asMap();
|
||||
assertThat(legacyMeta.size(), equalTo(stringStoreFileMetaDataMap.size()));
|
||||
for (StoreFileMetaData meta : legacyMeta.values()) {
|
||||
assertTrue(stringStoreFileMetaDataMap.containsKey(meta.name()));
|
||||
assertTrue(stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
|
||||
if (usesOldCodec) {
|
||||
for (StoreFileMetaData meta : legacyMeta.values()) {
|
||||
assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name()));
|
||||
assertEquals(meta.name() + "checksum", meta.checksum());
|
||||
assertTrue(meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()), stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
|
||||
}
|
||||
} else {
|
||||
|
||||
// even if we have a legacy checksum - if we use a new codec we should reuse
|
||||
for (StoreFileMetaData meta : legacyMeta.values()) {
|
||||
assertTrue(meta.toString(), stringStoreFileMetaDataMap.containsKey(meta.name()));
|
||||
assertFalse(meta + " vs. " + stringStoreFileMetaDataMap.get(meta.name()), stringStoreFileMetaDataMap.get(meta.name()).isSame(meta));
|
||||
StoreFileMetaData storeFileMetaData = metadata.get(meta.name());
|
||||
try (IndexInput input = store.openVerifyingInput(meta.name(), IOContext.DEFAULT, storeFileMetaData)) {
|
||||
assertTrue(storeFileMetaData.toString(), input instanceof Store.VerifyingIndexInput);
|
||||
input.seek(meta.length());
|
||||
Store.verify(input);
|
||||
}
|
||||
}
|
||||
}
|
||||
assertDeleteContent(store, directoryService);
|
||||
IOUtils.close(store);
|
||||
|
Loading…
x
Reference in New Issue
Block a user