Truncate tlog cli should assign global checkpoint (#28192)

We are targeting to always have a safe index once the recovery is done. 
This invariant does not hold if the translog is manually truncated by 
users because the truncate translog cli resets the global checkpoint to
unassigned. This commit assigns the global checkpoint to the max_seqno
of the last commit when truncating translog. We can only safely do it
because the truncate translog command will generate a new history uuid
for that shard. With a new history UUID, sequence-based recovery between
that shard and other old shards will be disabled.

Relates #28181
This commit is contained in:
Nhat Nguyen 2018-01-12 19:06:04 -05:00 committed by GitHub
parent a15ba75d93
commit f2db2a02e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 4 deletions

View File

@ -132,9 +132,19 @@ public class TruncateTranslogCommand extends EnvironmentAwareCommand {
} }
// Retrieve the generation and UUID from the existing data // Retrieve the generation and UUID from the existing data
commitData = commits.get(commits.size() - 1).getUserData(); commitData = new HashMap<>(commits.get(commits.size() - 1).getUserData());
String translogGeneration = commitData.get(Translog.TRANSLOG_GENERATION_KEY); String translogGeneration = commitData.get(Translog.TRANSLOG_GENERATION_KEY);
String translogUUID = commitData.get(Translog.TRANSLOG_UUID_KEY); String translogUUID = commitData.get(Translog.TRANSLOG_UUID_KEY);
final long globalCheckpoint;
// In order to have a safe commit invariant, we have to assign the global checkpoint to the max_seqno of the last commit.
// We can only safely do it because we will generate a new history uuid this shard.
if (commitData.containsKey(SequenceNumbers.MAX_SEQ_NO)) {
globalCheckpoint = Long.parseLong(commitData.get(SequenceNumbers.MAX_SEQ_NO));
// Also advances the local checkpoint of the last commit to its max_seqno.
commitData.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(globalCheckpoint));
} else {
globalCheckpoint = SequenceNumbers.UNASSIGNED_SEQ_NO;
}
if (translogGeneration == null || translogUUID == null) { if (translogGeneration == null || translogUUID == null) {
throw new ElasticsearchException("shard must have a valid translog generation and UUID but got: [{}] and: [{}]", throw new ElasticsearchException("shard must have a valid translog generation and UUID but got: [{}] and: [{}]",
translogGeneration, translogUUID); translogGeneration, translogUUID);
@ -153,7 +163,7 @@ public class TruncateTranslogCommand extends EnvironmentAwareCommand {
// Write empty checkpoint and translog to empty files // Write empty checkpoint and translog to empty files
long gen = Long.parseLong(translogGeneration); long gen = Long.parseLong(translogGeneration);
int translogLen = writeEmptyTranslog(tempEmptyTranslog, translogUUID); int translogLen = writeEmptyTranslog(tempEmptyTranslog, translogUUID);
writeEmptyCheckpoint(tempEmptyCheckpoint, translogLen, gen); writeEmptyCheckpoint(tempEmptyCheckpoint, translogLen, gen, globalCheckpoint);
terminal.println("Removing existing translog files"); terminal.println("Removing existing translog files");
IOUtils.rm(translogFiles.toArray(new Path[]{})); IOUtils.rm(translogFiles.toArray(new Path[]{}));
@ -190,9 +200,9 @@ public class TruncateTranslogCommand extends EnvironmentAwareCommand {
} }
/** Write a checkpoint file to the given location with the given generation */ /** Write a checkpoint file to the given location with the given generation */
public static void writeEmptyCheckpoint(Path filename, int translogLength, long translogGeneration) throws IOException { static void writeEmptyCheckpoint(Path filename, int translogLength, long translogGeneration, long globalCheckpoint) throws IOException {
Checkpoint emptyCheckpoint = Checkpoint.emptyTranslogCheckpoint(translogLength, translogGeneration, Checkpoint emptyCheckpoint = Checkpoint.emptyTranslogCheckpoint(translogLength, translogGeneration,
SequenceNumbers.UNASSIGNED_SEQ_NO, translogGeneration); globalCheckpoint, translogGeneration);
Checkpoint.write(FileChannel::open, filename, emptyCheckpoint, Checkpoint.write(FileChannel::open, filename, emptyCheckpoint,
StandardOpenOption.WRITE, StandardOpenOption.READ, StandardOpenOption.CREATE_NEW); StandardOpenOption.WRITE, StandardOpenOption.READ, StandardOpenOption.CREATE_NEW);
// fsync with metadata here to make sure. // fsync with metadata here to make sure.

View File

@ -31,6 +31,7 @@ import org.apache.lucene.store.NativeFSLockFactory;
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse; import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
import org.elasticsearch.action.admin.indices.flush.FlushRequest; import org.elasticsearch.action.admin.indices.flush.FlushRequest;
import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse; import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
import org.elasticsearch.action.admin.indices.stats.ShardStats;
import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder;
@ -48,6 +49,7 @@ import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.index.Index; import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.MockEngineFactoryPlugin; import org.elasticsearch.index.MockEngineFactoryPlugin;
import org.elasticsearch.index.seqno.SeqNoStats;
import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.IndicesService;
@ -74,6 +76,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThan;
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0) @ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0)
@ -214,6 +217,10 @@ public class TruncateTranslogIT extends ESIntegTestCase {
final RecoveryState replicaRecoveryState = recoveryResponse.shardRecoveryStates().get("test").stream() final RecoveryState replicaRecoveryState = recoveryResponse.shardRecoveryStates().get("test").stream()
.filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get(); .filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get();
assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0)); assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0));
// Ensure that the global checkpoint and local checkpoint are restored from the max seqno of the last commit.
final SeqNoStats seqNoStats = getSeqNoStats("test", 0);
assertThat(seqNoStats.getGlobalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
assertThat(seqNoStats.getLocalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
} }
public void testCorruptTranslogTruncationOfReplica() throws Exception { public void testCorruptTranslogTruncationOfReplica() throws Exception {
@ -316,6 +323,10 @@ public class TruncateTranslogIT extends ESIntegTestCase {
.filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get(); .filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get();
// the replica translog was disabled so it doesn't know what hte global checkpoint is and thus can't do ops based recovery // the replica translog was disabled so it doesn't know what hte global checkpoint is and thus can't do ops based recovery
assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0)); assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0));
// Ensure that the global checkpoint and local checkpoint are restored from the max seqno of the last commit.
final SeqNoStats seqNoStats = getSeqNoStats("test", 0);
assertThat(seqNoStats.getGlobalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
assertThat(seqNoStats.getLocalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
} }
private Set<Path> getTranslogDirs(String indexName) throws IOException { private Set<Path> getTranslogDirs(String indexName) throws IOException {
@ -360,4 +371,10 @@ public class TruncateTranslogIT extends ESIntegTestCase {
client().admin().indices().prepareUpdateSettings(index).setSettings(settings).get(); client().admin().indices().prepareUpdateSettings(index).setSettings(settings).get();
} }
private SeqNoStats getSeqNoStats(String index, int shardId) {
final ShardStats[] shardStats = client().admin().indices()
.prepareStats(index).get()
.getIndices().get(index).getShards();
return shardStats[shardId].getSeqNoStats();
}
} }