From 7915b5f7aaa1f5f67051dd699dfb2eb8d74c6bd1 Mon Sep 17 00:00:00 2001 From: Tanguy Leroux Date: Thu, 17 May 2018 10:57:25 +0200 Subject: [PATCH] [Tests] Add debug information to CorruptedFileIT This test failed but the cause is not obvious. This commit adds more debug logging traces so that if it reproduces we could gather more information. Related #30577 --- .../blobstore/BlobStoreRepository.java | 4 +--- .../index/store/CorruptedFileIT.java | 17 +++++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index fcbc54efbf7..ba4b68b20b2 100644 --- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -1147,6 +1147,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp // TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should final Collection fileNames; try { + logger.trace("[{}] [{}] Loading store metadata using index commit [{}]", shardId, snapshotId, snapshotIndexCommit); metadata = store.getMetadata(snapshotIndexCommit); fileNames = snapshotIndexCommit.getFileNames(); } catch (IOException e) { @@ -1242,9 +1243,6 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp /** * Snapshot individual file - *

- * This is asynchronous method. Upon completion of the operation latch is getting counted down and any failures are - * added to the {@code failures} list * * @param fileInfo file to be snapshotted */ diff --git a/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java b/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java index bbfa56a0e55..59156c91558 100644 --- a/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java +++ b/server/src/test/java/org/elasticsearch/index/store/CorruptedFileIT.java @@ -470,7 +470,7 @@ public class CorruptedFileIT extends ESIntegTestCase { * TODO once checksum verification on snapshotting is implemented this test needs to be fixed or split into several * parts... We should also corrupt files on the actual snapshot and check that we don't restore the corrupted shard. */ - @TestLogging("org.elasticsearch.monitor.fs:DEBUG") + @TestLogging("org.elasticsearch.repositories:TRACE,org.elasticsearch.snapshots:TRACE") public void testCorruptFileThenSnapshotAndRestore() throws ExecutionException, InterruptedException, IOException { int numDocs = scaledRandomIntBetween(100, 1000); internalCluster().ensureAtLeastNumDataNodes(2); @@ -494,6 +494,7 @@ public class CorruptedFileIT extends ESIntegTestCase { assertHitCount(countResponse, numDocs); ShardRouting shardRouting = corruptRandomPrimaryFile(false); + logger.info("--> shard {} has a corrupted file", shardRouting); // we don't corrupt segments.gen since S/R doesn't snapshot this file // the other problem here why we can't corrupt segments.X files is that the snapshot flushes again before // it snapshots and that will write a new segments.X+1 file @@ -504,9 +505,12 @@ public class CorruptedFileIT extends ESIntegTestCase { .put("compress", randomBoolean()) .put("chunk_size", randomIntBetween(100, 1000), ByteSizeUnit.BYTES))); logger.info("--> snapshot"); - CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap").setWaitForCompletion(true).setIndices("test").get(); - assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL)); - logger.info("failed during snapshot -- maybe SI file got corrupted"); + final CreateSnapshotResponse createSnapshotResponse = client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap") + .setWaitForCompletion(true) + .setIndices("test") + .get(); + final SnapshotState snapshotState = createSnapshotResponse.getSnapshotInfo().state(); + logger.info("--> snapshot terminated with state " + snapshotState); final List files = listShardFiles(shardRouting); Path corruptedFile = null; for (Path file : files) { @@ -515,6 +519,11 @@ public class CorruptedFileIT extends ESIntegTestCase { break; } } + if (snapshotState != SnapshotState.PARTIAL) { + logger.info("--> listing shard files for investigation"); + files.forEach(f -> logger.info("path: {}", f.toAbsolutePath())); + } + assertThat(createSnapshotResponse.getSnapshotInfo().state(), equalTo(SnapshotState.PARTIAL)); assertThat(corruptedFile, notNullValue()); }