diff --git a/core/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/core/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java index 2a615649fcf..1b829d700d4 100644 --- a/core/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java +++ b/core/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java @@ -550,7 +550,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus /** * Returns status of shards currently finished snapshots *
- * This method is executed on master node and it's complimentary to the {@link SnapshotShardsService#currentSnapshotShards(Snapshot)} because it + * This method is executed on master node and it's complimentary to the + * {@link SnapshotShardsService#currentSnapshotShards(Snapshot)} because it * returns similar information but for already finished snapshots. *
* @@ -578,8 +579,25 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus shardSnapshotStatus.failure(shardFailure.reason()); shardStatus.put(shardId, shardSnapshotStatus); } else { - IndexShardSnapshotStatus shardSnapshotStatus = - repository.getShardSnapshotStatus(snapshotInfo.snapshotId(), snapshotInfo.version(), indexId, shardId); + final IndexShardSnapshotStatus shardSnapshotStatus; + if (snapshotInfo.state() == SnapshotState.FAILED) { + // If the snapshot failed, but the shard's snapshot does + // not have an exception, it means that partial snapshots + // were disabled and in this case, the shard snapshot will + // *not* have any metadata, so attempting to read the shard + // snapshot status will throw an exception. Instead, we create + // a status for the shard to indicate that the shard snapshot + // could not be taken due to partial being set to false. + shardSnapshotStatus = new IndexShardSnapshotStatus(); + shardSnapshotStatus.updateStage(IndexShardSnapshotStatus.Stage.FAILURE); + shardSnapshotStatus.failure("skipped"); + } else { + shardSnapshotStatus = repository.getShardSnapshotStatus( + snapshotInfo.snapshotId(), + snapshotInfo.version(), + indexId, + shardId); + } shardStatus.put(shardId, shardSnapshotStatus); } } diff --git a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index 45bd369a941..c3f801f77fb 100644 --- a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -54,6 +54,7 @@ import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MappingMetaData; import org.elasticsearch.cluster.metadata.MetaDataIndexStateService; import org.elasticsearch.cluster.routing.IndexRoutingTable; +import org.elasticsearch.cluster.routing.allocation.decider.EnableAllocationDecider; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.Strings; @@ -2737,4 +2738,74 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas assertEquals(SnapshotState.SUCCESS, getSnapshotsResponse.getSnapshots().get(0).state()); } + public void testSnapshotStatusOnFailedIndex() throws Exception { + logger.info("--> creating repository"); + final Path repoPath = randomRepoPath(); + final Client client = client(); + assertAcked(client.admin().cluster() + .preparePutRepository("test-repo") + .setType("fs") + .setVerify(false) + .setSettings(Settings.builder().put("location", repoPath))); + + logger.info("--> creating good index"); + assertAcked(prepareCreate("test-idx-good") + .setSettings(Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0))); + ensureGreen(); + final int numDocs = randomIntBetween(1, 5); + for (int i = 0; i < numDocs; i++) { + index("test-idx-good", "doc", Integer.toString(i), "foo", "bar" + i); + } + refresh(); + + logger.info("--> creating bad index"); + assertAcked(prepareCreate("test-idx-bad") + .setWaitForActiveShards(ActiveShardCount.NONE) + .setSettings(Settings.builder() + .put(SETTING_NUMBER_OF_SHARDS, 1) + .put(SETTING_NUMBER_OF_REPLICAS, 0) + // set shard allocation to none so the primary cannot be + // allocated - simulates a "bad" index that fails to snapshot + .put(EnableAllocationDecider.INDEX_ROUTING_ALLOCATION_ENABLE_SETTING.getKey(), + "none"))); + + logger.info("--> snapshot bad index and get status"); + client.admin().cluster() + .prepareCreateSnapshot("test-repo", "test-snap1") + .setWaitForCompletion(true) + .setIndices("test-idx-bad") + .get(); + SnapshotsStatusResponse snapshotsStatusResponse = client.admin().cluster() + .prepareSnapshotStatus("test-repo") + .setSnapshots("test-snap1") + .get(); + assertEquals(1, snapshotsStatusResponse.getSnapshots().size()); + assertEquals(State.FAILED, snapshotsStatusResponse.getSnapshots().get(0).getState()); + + logger.info("--> snapshot both good and bad index and get status"); + client.admin().cluster() + .prepareCreateSnapshot("test-repo", "test-snap2") + .setWaitForCompletion(true) + .setIndices("test-idx-good", "test-idx-bad") + .get(); + snapshotsStatusResponse = client.admin().cluster() + .prepareSnapshotStatus("test-repo") + .setSnapshots("test-snap2") + .get(); + assertEquals(1, snapshotsStatusResponse.getSnapshots().size()); + // verify a FAILED status is returned instead of a 500 status code + // see https://github.com/elastic/elasticsearch/issues/23716 + SnapshotStatus snapshotStatus = snapshotsStatusResponse.getSnapshots().get(0); + assertEquals(State.FAILED, snapshotStatus.getState()); + for (SnapshotIndexShardStatus shardStatus : snapshotStatus.getShards()) { + assertEquals(SnapshotIndexShardStage.FAILURE, shardStatus.getStage()); + if (shardStatus.getIndex().equals("test-idx-good")) { + assertEquals("skipped", shardStatus.getFailure()); + } else { + assertEquals("primary shard is not allocated", shardStatus.getFailure()); + } + } + } }