Shard CLI tool always check shards (#41480)

The shard CLI tool would not do anything if a corruption marker was not
present. But a corruption marker is only added if a corruption is
detected during indexing/writing, not if a search or other read fails.

Changed the tool to always check shards regardless of corruption marker
presence.

Related to #41298
This commit is contained in:
Henning Andersen 2019-05-24 16:48:45 +02:00 committed by Henning Andersen
parent 523b5bfdb5
commit 075fd2a0ac
2 changed files with 79 additions and 28 deletions

View File

@ -38,9 +38,7 @@ public class RemoveCorruptedLuceneSegmentsAction {
Lock writeLock,
PrintStream printStream,
boolean verbose) throws IOException {
if (RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(indexDirectory) == false) {
return Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null);
}
boolean markedCorrupted = RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(indexDirectory);
final CheckIndex.Status status;
try (CheckIndex checker = new CheckIndex(indexDirectory, writeLock)) {
@ -55,7 +53,9 @@ public class RemoveCorruptedLuceneSegmentsAction {
}
return status.clean
? Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN_WITH_CORRUPTED_MARKER, null)
? Tuple.tuple(markedCorrupted
? RemoveCorruptedShardDataCommand.CleanStatus.CLEAN_WITH_CORRUPTED_MARKER
: RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null)
: Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CORRUPTED,
"Corrupted Lucene index segments found - " + status.totLoseDocCount + " documents will be lost.");
}
@ -67,8 +67,6 @@ public class RemoveCorruptedLuceneSegmentsAction {
Lock writeLock,
PrintStream printStream,
boolean verbose) throws IOException {
checkCorruptMarkerFileIsPresent(indexDirectory);
final CheckIndex.Status status;
try (CheckIndex checker = new CheckIndex(indexDirectory, writeLock)) {
@ -90,11 +88,4 @@ public class RemoveCorruptedLuceneSegmentsAction {
}
}
}
protected void checkCorruptMarkerFileIsPresent(Directory directory) throws IOException {
if (RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(directory) == false) {
throw new ElasticsearchException("There is no corruption file marker");
}
}
}

View File

@ -76,6 +76,9 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
private Path translogPath;
private Path indexPath;
private static final Pattern NUM_CORRUPT_DOCS_PATTERN =
Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
@Before
public void setup() throws IOException {
shardId = new ShardId("index0", "_na_", 0);
@ -154,11 +157,13 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
final boolean corruptSegments = randomBoolean();
CorruptionUtils.corruptIndex(random(), indexPath, corruptSegments);
// test corrupted shard
final IndexShard corruptedShard = reopenIndexShard(true);
allowShardFailures();
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);
if (randomBoolean()) {
// test corrupted shard and add corruption marker
final IndexShard corruptedShard = reopenIndexShard(true);
allowShardFailures();
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);
}
final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
final MockTerminal t = new MockTerminal();
@ -196,8 +201,7 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
final Matcher matcher = pattern.matcher(output);
final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
assertThat(matcher.find(), equalTo(true));
final int expectedNumDocs = numDocs - Integer.parseInt(matcher.group("docs"));
@ -272,12 +276,13 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
CorruptionUtils.corruptIndex(random(), indexPath, false);
// test corrupted shard
final IndexShard corruptedShard = reopenIndexShard(true);
allowShardFailures();
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);
if (randomBoolean()) {
// test corrupted shard and add corruption marker
final IndexShard corruptedShard = reopenIndexShard(true);
allowShardFailures();
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);
}
TestTranslog.corruptRandomTranslogFile(logger, random(), Arrays.asList(translogPath));
final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
@ -313,8 +318,7 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
final Matcher matcher = pattern.matcher(output);
final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
assertThat(matcher.find(), equalTo(true));
final int expectedNumDocs = numDocsToKeep - Integer.parseInt(matcher.group("docs"));
@ -347,6 +351,62 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
shardPath -> assertThat(shardPath.resolveIndex(), equalTo(indexPath)));
}
public void testCleanWithCorruptionMarker() throws Exception {
// index some docs in several segments
final int numDocs = indexDocs(indexShard, true);
indexShard.store().markStoreCorrupted(null);
closeShards(indexShard);
allowShardFailures();
final IndexShard corruptedShard = reopenIndexShard(true);
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
closeShards(corruptedShard);
final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
final MockTerminal t = new MockTerminal();
final OptionParser parser = command.getParser();
final OptionSet options = parser.parse("-d", translogPath.toString());
// run command with dry-run
t.addTextInput("n"); // mean dry run
t.addTextInput("n"); // mean dry run
t.setVerbosity(Terminal.Verbosity.VERBOSE);
try {
command.execute(t, options, environment);
fail();
} catch (ElasticsearchException e) {
assertThat(e.getMessage(), containsString("aborted by user"));
assertThat(t.getOutput(), containsString("Continue and remove corrupted data from the shard ?"));
assertThat(t.getOutput(), containsString("Lucene index is marked corrupted, but no corruption detected"));
}
logger.info("--> output:\n{}", t.getOutput());
// run command without dry-run
t.reset();
t.addTextInput("y");
t.addTextInput("y");
command.execute(t, options, environment);
final String output = t.getOutput();
logger.info("--> output:\n{}", output);
failOnShardFailures();
final IndexShard newShard = newStartedShard(p -> reopenIndexShard(false), true);
final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
assertEquals(numDocs, shardDocUIDs.size());
assertThat(t.getOutput(), containsString("This shard has been marked as corrupted but no corruption can now be detected."));
final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
assertFalse(matcher.find());
closeShards(newShard);
}
private IndexShard reopenIndexShard(boolean corrupted) throws IOException {
// open shard with the same location
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),