mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-25 06:16:40 +00:00
Shard CLI tool always check shards (#41480)
The shard CLI tool would not do anything if a corruption marker was not present. But a corruption marker is only added if a corruption is detected during indexing/writing, not if a search or other read fails. Changed the tool to always check shards regardless of corruption marker presence. Related to #41298
This commit is contained in:
parent
523b5bfdb5
commit
075fd2a0ac
@ -38,9 +38,7 @@ public class RemoveCorruptedLuceneSegmentsAction {
|
||||
Lock writeLock,
|
||||
PrintStream printStream,
|
||||
boolean verbose) throws IOException {
|
||||
if (RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(indexDirectory) == false) {
|
||||
return Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null);
|
||||
}
|
||||
boolean markedCorrupted = RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(indexDirectory);
|
||||
|
||||
final CheckIndex.Status status;
|
||||
try (CheckIndex checker = new CheckIndex(indexDirectory, writeLock)) {
|
||||
@ -55,7 +53,9 @@ public class RemoveCorruptedLuceneSegmentsAction {
|
||||
}
|
||||
|
||||
return status.clean
|
||||
? Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CLEAN_WITH_CORRUPTED_MARKER, null)
|
||||
? Tuple.tuple(markedCorrupted
|
||||
? RemoveCorruptedShardDataCommand.CleanStatus.CLEAN_WITH_CORRUPTED_MARKER
|
||||
: RemoveCorruptedShardDataCommand.CleanStatus.CLEAN, null)
|
||||
: Tuple.tuple(RemoveCorruptedShardDataCommand.CleanStatus.CORRUPTED,
|
||||
"Corrupted Lucene index segments found - " + status.totLoseDocCount + " documents will be lost.");
|
||||
}
|
||||
@ -67,8 +67,6 @@ public class RemoveCorruptedLuceneSegmentsAction {
|
||||
Lock writeLock,
|
||||
PrintStream printStream,
|
||||
boolean verbose) throws IOException {
|
||||
checkCorruptMarkerFileIsPresent(indexDirectory);
|
||||
|
||||
final CheckIndex.Status status;
|
||||
try (CheckIndex checker = new CheckIndex(indexDirectory, writeLock)) {
|
||||
|
||||
@ -90,11 +88,4 @@ public class RemoveCorruptedLuceneSegmentsAction {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void checkCorruptMarkerFileIsPresent(Directory directory) throws IOException {
|
||||
if (RemoveCorruptedShardDataCommand.isCorruptMarkerFileIsPresent(directory) == false) {
|
||||
throw new ElasticsearchException("There is no corruption file marker");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -76,6 +76,9 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
|
||||
private Path translogPath;
|
||||
private Path indexPath;
|
||||
|
||||
private static final Pattern NUM_CORRUPT_DOCS_PATTERN =
|
||||
Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
|
||||
|
||||
@Before
|
||||
public void setup() throws IOException {
|
||||
shardId = new ShardId("index0", "_na_", 0);
|
||||
@ -154,11 +157,13 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
|
||||
final boolean corruptSegments = randomBoolean();
|
||||
CorruptionUtils.corruptIndex(random(), indexPath, corruptSegments);
|
||||
|
||||
// test corrupted shard
|
||||
final IndexShard corruptedShard = reopenIndexShard(true);
|
||||
allowShardFailures();
|
||||
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
|
||||
closeShards(corruptedShard);
|
||||
if (randomBoolean()) {
|
||||
// test corrupted shard and add corruption marker
|
||||
final IndexShard corruptedShard = reopenIndexShard(true);
|
||||
allowShardFailures();
|
||||
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
|
||||
closeShards(corruptedShard);
|
||||
}
|
||||
|
||||
final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
|
||||
final MockTerminal t = new MockTerminal();
|
||||
@ -196,8 +201,7 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
|
||||
|
||||
final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
|
||||
|
||||
final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
|
||||
final Matcher matcher = pattern.matcher(output);
|
||||
final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
|
||||
assertThat(matcher.find(), equalTo(true));
|
||||
final int expectedNumDocs = numDocs - Integer.parseInt(matcher.group("docs"));
|
||||
|
||||
@ -272,12 +276,13 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
|
||||
|
||||
CorruptionUtils.corruptIndex(random(), indexPath, false);
|
||||
|
||||
// test corrupted shard
|
||||
final IndexShard corruptedShard = reopenIndexShard(true);
|
||||
allowShardFailures();
|
||||
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
|
||||
closeShards(corruptedShard);
|
||||
|
||||
if (randomBoolean()) {
|
||||
// test corrupted shard and add corruption marker
|
||||
final IndexShard corruptedShard = reopenIndexShard(true);
|
||||
allowShardFailures();
|
||||
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
|
||||
closeShards(corruptedShard);
|
||||
}
|
||||
TestTranslog.corruptRandomTranslogFile(logger, random(), Arrays.asList(translogPath));
|
||||
|
||||
final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
|
||||
@ -313,8 +318,7 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
|
||||
|
||||
final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
|
||||
|
||||
final Pattern pattern = Pattern.compile("Corrupted Lucene index segments found -\\s+(?<docs>\\d+) documents will be lost.");
|
||||
final Matcher matcher = pattern.matcher(output);
|
||||
final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
|
||||
assertThat(matcher.find(), equalTo(true));
|
||||
final int expectedNumDocs = numDocsToKeep - Integer.parseInt(matcher.group("docs"));
|
||||
|
||||
@ -347,6 +351,62 @@ public class RemoveCorruptedShardDataCommandTests extends IndexShardTestCase {
|
||||
shardPath -> assertThat(shardPath.resolveIndex(), equalTo(indexPath)));
|
||||
}
|
||||
|
||||
public void testCleanWithCorruptionMarker() throws Exception {
|
||||
// index some docs in several segments
|
||||
final int numDocs = indexDocs(indexShard, true);
|
||||
|
||||
indexShard.store().markStoreCorrupted(null);
|
||||
|
||||
closeShards(indexShard);
|
||||
|
||||
allowShardFailures();
|
||||
final IndexShard corruptedShard = reopenIndexShard(true);
|
||||
expectThrows(IndexShardRecoveryException.class, () -> newStartedShard(p -> corruptedShard, true));
|
||||
closeShards(corruptedShard);
|
||||
|
||||
final RemoveCorruptedShardDataCommand command = new RemoveCorruptedShardDataCommand();
|
||||
final MockTerminal t = new MockTerminal();
|
||||
final OptionParser parser = command.getParser();
|
||||
|
||||
final OptionSet options = parser.parse("-d", translogPath.toString());
|
||||
// run command with dry-run
|
||||
t.addTextInput("n"); // mean dry run
|
||||
t.addTextInput("n"); // mean dry run
|
||||
t.setVerbosity(Terminal.Verbosity.VERBOSE);
|
||||
try {
|
||||
command.execute(t, options, environment);
|
||||
fail();
|
||||
} catch (ElasticsearchException e) {
|
||||
assertThat(e.getMessage(), containsString("aborted by user"));
|
||||
assertThat(t.getOutput(), containsString("Continue and remove corrupted data from the shard ?"));
|
||||
assertThat(t.getOutput(), containsString("Lucene index is marked corrupted, but no corruption detected"));
|
||||
}
|
||||
|
||||
logger.info("--> output:\n{}", t.getOutput());
|
||||
|
||||
// run command without dry-run
|
||||
t.reset();
|
||||
t.addTextInput("y");
|
||||
t.addTextInput("y");
|
||||
command.execute(t, options, environment);
|
||||
|
||||
final String output = t.getOutput();
|
||||
logger.info("--> output:\n{}", output);
|
||||
|
||||
failOnShardFailures();
|
||||
final IndexShard newShard = newStartedShard(p -> reopenIndexShard(false), true);
|
||||
|
||||
final Set<String> shardDocUIDs = getShardDocUIDs(newShard);
|
||||
assertEquals(numDocs, shardDocUIDs.size());
|
||||
|
||||
assertThat(t.getOutput(), containsString("This shard has been marked as corrupted but no corruption can now be detected."));
|
||||
|
||||
final Matcher matcher = NUM_CORRUPT_DOCS_PATTERN.matcher(output);
|
||||
assertFalse(matcher.find());
|
||||
|
||||
closeShards(newShard);
|
||||
}
|
||||
|
||||
private IndexShard reopenIndexShard(boolean corrupted) throws IOException {
|
||||
// open shard with the same location
|
||||
final ShardRouting shardRouting = ShardRoutingHelper.initWithSameId(indexShard.routingEntry(),
|
||||
|
Loading…
x
Reference in New Issue
Block a user