Fix AllocationIdIT test failure on WindowFS (#67179)

This test failed on WindowsFS. We failed to remove the corrupted file if
it's being opened (for a short window by ListShardStore action) and the
pending delete files were clear when we restarted that node.

This commit fixes the issue by shutting down the node before removing
the corrupted file to avoid any access to that file.

Closes #66893
This commit is contained in:
Nhat Nguyen 2021-01-13 09:02:42 -05:00
parent 3ca2702a8f
commit 84d8416294
1 changed files with 2 additions and 6 deletions

View File

@ -136,20 +136,16 @@ public class AllocationIdIT extends ESIntegTestCase {
assertThat(shardRouting.unassignedInfo().getReason(), equalTo(UnassignedInfo.Reason.ALLOCATION_FAILED));
});
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(node1));
try(Store store = new Store(shardId, indexSettings, new SimpleFSDirectory(indexPath), new DummyShardLock(shardId))) {
store.removeCorruptionMarker();
}
node1 = internalCluster().startNode(node1DataPathSettings);
// index is red: no any shard is allocated (allocation id is a fake id that does not match to anything)
checkHealthStatus(indexName, ClusterHealthStatus.RED);
checkNoValidShardCopy(indexName, shardId);
internalCluster().restartNode(node1, InternalTestCluster.EMPTY_CALLBACK);
// index is still red due to mismatch of allocation id
checkHealthStatus(indexName, ClusterHealthStatus.RED);
checkNoValidShardCopy(indexName, shardId);
// no any valid shard is there; have to invoke AllocateStalePrimary again
client().admin().cluster().prepareReroute()
.add(new AllocateStalePrimaryAllocationCommand(indexName, 0, node1, true))