[Store] Only fail recovery if files are inconsistent
the recovery diff can return file in the `different` category since it's conservative if it can't tell if the files are the same. Yet, the cleanup code only needs to ensure both ends of the recovery are consistent. If we have a very old segments_N file no checksum is present but for the delete files they might be such that the segments file passes the consistency check but the .del file doesn't sicne it's in-fact the same but this check was missing in the last commit.
This commit is contained in:
parent
164853fd0b
commit
8b5bc2643e
|
@ -560,32 +560,49 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
|
|||
}
|
||||
}
|
||||
final Store.MetadataSnapshot metadataOrEmpty = getMetadata();
|
||||
final Store.RecoveryDiff recoveryDiff = metadataOrEmpty.recoveryDiff(sourceMetaData);
|
||||
if (recoveryDiff.identical.size() != recoveryDiff.size()) {
|
||||
if (recoveryDiff.missing.isEmpty()) {
|
||||
for (StoreFileMetaData meta : recoveryDiff.different) {
|
||||
StoreFileMetaData local = metadataOrEmpty.get(meta.name());
|
||||
StoreFileMetaData remote = sourceMetaData.get(meta.name());
|
||||
// if we have different files the they must have no checksums otherwise something went wrong during recovery.
|
||||
// we have that problem when we have an empty index is only a segments_1 file then we can't tell if it's a Lucene 4.8 file
|
||||
// and therefore no checksum. That isn't much of a problem since we simply copy it over anyway but those files come out as
|
||||
// different in the diff. That's why we have to double check here again if the rest of it matches.
|
||||
boolean consistent = (local.checksum() == null && remote.checksum() == null && local.hash().equals(remote.hash()) && local.length() == remote.length());
|
||||
if (consistent == false) {
|
||||
throw new ElasticsearchIllegalStateException("local version: " + local + " is different from remote version after recovery: " + remote, null);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logger.debug("Files are missing on the recovery target: {} ", recoveryDiff);
|
||||
throw new ElasticsearchIllegalStateException("Files are missing on the recovery target: [different="
|
||||
+ recoveryDiff.different + ", missing=" + recoveryDiff.missing +']', null);
|
||||
}
|
||||
}
|
||||
verifyAfterCleanup(sourceMetaData, metadataOrEmpty);
|
||||
} finally {
|
||||
metadataLock.writeLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
// pkg private for testing
|
||||
final void verifyAfterCleanup(MetadataSnapshot sourceMetaData, MetadataSnapshot targetMetaData) {
|
||||
final RecoveryDiff recoveryDiff = targetMetaData.recoveryDiff(sourceMetaData);
|
||||
if (recoveryDiff.identical.size() != recoveryDiff.size()) {
|
||||
if (recoveryDiff.missing.isEmpty()) {
|
||||
for (StoreFileMetaData meta : recoveryDiff.different) {
|
||||
StoreFileMetaData local = targetMetaData.get(meta.name());
|
||||
StoreFileMetaData remote = sourceMetaData.get(meta.name());
|
||||
// if we have different files the they must have no checksums otherwise something went wrong during recovery.
|
||||
// we have that problem when we have an empty index is only a segments_1 file then we can't tell if it's a Lucene 4.8 file
|
||||
// and therefore no checksum. That isn't much of a problem since we simply copy it over anyway but those files come out as
|
||||
// different in the diff. That's why we have to double check here again if the rest of it matches.
|
||||
|
||||
// all is fine this file is just part of a commit or a segment that is different
|
||||
final boolean same = local.isSame(remote);
|
||||
|
||||
// this check ensures that the two files are consistent ie. if we don't have checksums only the rest needs to match we are just
|
||||
// verifying that we are consistent on both ends source and target
|
||||
final boolean hashAndLengthEqual = (
|
||||
local.checksum() == null
|
||||
&& remote.checksum() == null
|
||||
&& local.hash().equals(remote.hash())
|
||||
&& local.length() == remote.length());
|
||||
final boolean consistent = hashAndLengthEqual || same;
|
||||
if (consistent == false) {
|
||||
logger.debug("Files are different on the recovery target: {} ", recoveryDiff);
|
||||
throw new ElasticsearchIllegalStateException("local version: " + local + " is different from remote version after recovery: " + remote, null);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logger.debug("Files are missing on the recovery target: {} ", recoveryDiff);
|
||||
throw new ElasticsearchIllegalStateException("Files are missing on the recovery target: [different="
|
||||
+ recoveryDiff.different + ", missing=" + recoveryDiff.missing +']', null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This exists so {@link org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat} can load its boolean setting; can we find a more straightforward way?
|
||||
*/
|
||||
|
|
|
@ -752,6 +752,22 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
|||
return random.nextBoolean() ? new LeastUsedDistributor(service) : new RandomWeightedDistributor(service);
|
||||
}
|
||||
|
||||
/**
|
||||
* Legacy indices without lucene CRC32 did never write or calculate checksums for segments_N files
|
||||
* but for other files
|
||||
*/
|
||||
@Test
|
||||
public void testRecoveryDiffWithLegacyCommit() {
|
||||
Map<String, StoreFileMetaData> metaDataMap = new HashMap<>();
|
||||
metaDataMap.put("segments_1", new StoreFileMetaData("segments_1", 50, null, null, new BytesRef(new byte[] {1})));
|
||||
metaDataMap.put("_0_1.del", new StoreFileMetaData("_0_1.del", 42, "foobarbaz", null, new BytesRef()));
|
||||
Store.MetadataSnapshot first = new Store.MetadataSnapshot(metaDataMap);
|
||||
|
||||
Store.MetadataSnapshot second = new Store.MetadataSnapshot(metaDataMap);
|
||||
Store.RecoveryDiff recoveryDiff = first.recoveryDiff(second);
|
||||
assertEquals(recoveryDiff.toString(), recoveryDiff.different.size(), 2);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testRecoveryDiff() throws IOException, InterruptedException {
|
||||
|
@ -1005,4 +1021,27 @@ public class StoreTest extends ElasticsearchLuceneTestCase {
|
|||
store.deleteContent();
|
||||
IOUtils.close(store);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCleanUpWithLegacyChecksums() throws IOException {
|
||||
Map<String, StoreFileMetaData> metaDataMap = new HashMap<>();
|
||||
metaDataMap.put("segments_1", new StoreFileMetaData("segments_1", 50, null, null, new BytesRef(new byte[] {1})));
|
||||
metaDataMap.put("_0_1.del", new StoreFileMetaData("_0_1.del", 42, "foobarbaz", null, new BytesRef()));
|
||||
Store.MetadataSnapshot snapshot = new Store.MetadataSnapshot(metaDataMap);
|
||||
|
||||
final ShardId shardId = new ShardId(new Index("index"), 1);
|
||||
DirectoryService directoryService = new LuceneManagedDirectoryService(random());
|
||||
Store store = new Store(shardId, ImmutableSettings.EMPTY, directoryService, randomDistributor(directoryService), new DummyShardLock(shardId));
|
||||
for (String file : metaDataMap.keySet()) {
|
||||
try (IndexOutput output = store.directory().createOutput(file, IOContext.DEFAULT)) {
|
||||
BytesRef bytesRef = new BytesRef(TestUtil.randomRealisticUnicodeString(random(), 10, 1024));
|
||||
output.writeBytes(bytesRef.bytes, bytesRef.offset, bytesRef.length);
|
||||
CodecUtil.writeFooter(output);
|
||||
}
|
||||
}
|
||||
|
||||
store.verifyAfterCleanup(snapshot, snapshot);
|
||||
store.deleteContent();
|
||||
IOUtils.close(store);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue