Truncate tlog cli should assign global checkpoint (#28192)
We are targeting to always have a safe index once the recovery is done. This invariant does not hold if the translog is manually truncated by users because the truncate translog cli resets the global checkpoint to unassigned. This commit assigns the global checkpoint to the max_seqno of the last commit when truncating translog. We can only safely do it because the truncate translog command will generate a new history uuid for that shard. With a new history UUID, sequence-based recovery between that shard and other old shards will be disabled. Relates #28181
This commit is contained in:
parent
a15ba75d93
commit
f2db2a02e2
|
@ -132,9 +132,19 @@ public class TruncateTranslogCommand extends EnvironmentAwareCommand {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Retrieve the generation and UUID from the existing data
|
// Retrieve the generation and UUID from the existing data
|
||||||
commitData = commits.get(commits.size() - 1).getUserData();
|
commitData = new HashMap<>(commits.get(commits.size() - 1).getUserData());
|
||||||
String translogGeneration = commitData.get(Translog.TRANSLOG_GENERATION_KEY);
|
String translogGeneration = commitData.get(Translog.TRANSLOG_GENERATION_KEY);
|
||||||
String translogUUID = commitData.get(Translog.TRANSLOG_UUID_KEY);
|
String translogUUID = commitData.get(Translog.TRANSLOG_UUID_KEY);
|
||||||
|
final long globalCheckpoint;
|
||||||
|
// In order to have a safe commit invariant, we have to assign the global checkpoint to the max_seqno of the last commit.
|
||||||
|
// We can only safely do it because we will generate a new history uuid this shard.
|
||||||
|
if (commitData.containsKey(SequenceNumbers.MAX_SEQ_NO)) {
|
||||||
|
globalCheckpoint = Long.parseLong(commitData.get(SequenceNumbers.MAX_SEQ_NO));
|
||||||
|
// Also advances the local checkpoint of the last commit to its max_seqno.
|
||||||
|
commitData.put(SequenceNumbers.LOCAL_CHECKPOINT_KEY, Long.toString(globalCheckpoint));
|
||||||
|
} else {
|
||||||
|
globalCheckpoint = SequenceNumbers.UNASSIGNED_SEQ_NO;
|
||||||
|
}
|
||||||
if (translogGeneration == null || translogUUID == null) {
|
if (translogGeneration == null || translogUUID == null) {
|
||||||
throw new ElasticsearchException("shard must have a valid translog generation and UUID but got: [{}] and: [{}]",
|
throw new ElasticsearchException("shard must have a valid translog generation and UUID but got: [{}] and: [{}]",
|
||||||
translogGeneration, translogUUID);
|
translogGeneration, translogUUID);
|
||||||
|
@ -153,7 +163,7 @@ public class TruncateTranslogCommand extends EnvironmentAwareCommand {
|
||||||
// Write empty checkpoint and translog to empty files
|
// Write empty checkpoint and translog to empty files
|
||||||
long gen = Long.parseLong(translogGeneration);
|
long gen = Long.parseLong(translogGeneration);
|
||||||
int translogLen = writeEmptyTranslog(tempEmptyTranslog, translogUUID);
|
int translogLen = writeEmptyTranslog(tempEmptyTranslog, translogUUID);
|
||||||
writeEmptyCheckpoint(tempEmptyCheckpoint, translogLen, gen);
|
writeEmptyCheckpoint(tempEmptyCheckpoint, translogLen, gen, globalCheckpoint);
|
||||||
|
|
||||||
terminal.println("Removing existing translog files");
|
terminal.println("Removing existing translog files");
|
||||||
IOUtils.rm(translogFiles.toArray(new Path[]{}));
|
IOUtils.rm(translogFiles.toArray(new Path[]{}));
|
||||||
|
@ -190,9 +200,9 @@ public class TruncateTranslogCommand extends EnvironmentAwareCommand {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Write a checkpoint file to the given location with the given generation */
|
/** Write a checkpoint file to the given location with the given generation */
|
||||||
public static void writeEmptyCheckpoint(Path filename, int translogLength, long translogGeneration) throws IOException {
|
static void writeEmptyCheckpoint(Path filename, int translogLength, long translogGeneration, long globalCheckpoint) throws IOException {
|
||||||
Checkpoint emptyCheckpoint = Checkpoint.emptyTranslogCheckpoint(translogLength, translogGeneration,
|
Checkpoint emptyCheckpoint = Checkpoint.emptyTranslogCheckpoint(translogLength, translogGeneration,
|
||||||
SequenceNumbers.UNASSIGNED_SEQ_NO, translogGeneration);
|
globalCheckpoint, translogGeneration);
|
||||||
Checkpoint.write(FileChannel::open, filename, emptyCheckpoint,
|
Checkpoint.write(FileChannel::open, filename, emptyCheckpoint,
|
||||||
StandardOpenOption.WRITE, StandardOpenOption.READ, StandardOpenOption.CREATE_NEW);
|
StandardOpenOption.WRITE, StandardOpenOption.READ, StandardOpenOption.CREATE_NEW);
|
||||||
// fsync with metadata here to make sure.
|
// fsync with metadata here to make sure.
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.NativeFSLockFactory;
|
||||||
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
|
import org.elasticsearch.action.admin.cluster.node.stats.NodesStatsResponse;
|
||||||
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
|
import org.elasticsearch.action.admin.indices.flush.FlushRequest;
|
||||||
import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
|
import org.elasticsearch.action.admin.indices.recovery.RecoveryResponse;
|
||||||
|
import org.elasticsearch.action.admin.indices.stats.ShardStats;
|
||||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||||
import org.elasticsearch.action.search.SearchRequestBuilder;
|
import org.elasticsearch.action.search.SearchRequestBuilder;
|
||||||
|
@ -48,6 +49,7 @@ import org.elasticsearch.common.unit.TimeValue;
|
||||||
import org.elasticsearch.index.Index;
|
import org.elasticsearch.index.Index;
|
||||||
import org.elasticsearch.index.IndexSettings;
|
import org.elasticsearch.index.IndexSettings;
|
||||||
import org.elasticsearch.index.MockEngineFactoryPlugin;
|
import org.elasticsearch.index.MockEngineFactoryPlugin;
|
||||||
|
import org.elasticsearch.index.seqno.SeqNoStats;
|
||||||
import org.elasticsearch.index.shard.IndexShard;
|
import org.elasticsearch.index.shard.IndexShard;
|
||||||
import org.elasticsearch.index.shard.ShardId;
|
import org.elasticsearch.index.shard.ShardId;
|
||||||
import org.elasticsearch.indices.IndicesService;
|
import org.elasticsearch.indices.IndicesService;
|
||||||
|
@ -74,6 +76,7 @@ import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
||||||
import static org.hamcrest.Matchers.containsString;
|
import static org.hamcrest.Matchers.containsString;
|
||||||
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
import static org.hamcrest.Matchers.greaterThan;
|
import static org.hamcrest.Matchers.greaterThan;
|
||||||
|
|
||||||
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0)
|
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.SUITE, numDataNodes = 0)
|
||||||
|
@ -214,6 +217,10 @@ public class TruncateTranslogIT extends ESIntegTestCase {
|
||||||
final RecoveryState replicaRecoveryState = recoveryResponse.shardRecoveryStates().get("test").stream()
|
final RecoveryState replicaRecoveryState = recoveryResponse.shardRecoveryStates().get("test").stream()
|
||||||
.filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get();
|
.filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get();
|
||||||
assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0));
|
assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0));
|
||||||
|
// Ensure that the global checkpoint and local checkpoint are restored from the max seqno of the last commit.
|
||||||
|
final SeqNoStats seqNoStats = getSeqNoStats("test", 0);
|
||||||
|
assertThat(seqNoStats.getGlobalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
|
||||||
|
assertThat(seqNoStats.getLocalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCorruptTranslogTruncationOfReplica() throws Exception {
|
public void testCorruptTranslogTruncationOfReplica() throws Exception {
|
||||||
|
@ -316,6 +323,10 @@ public class TruncateTranslogIT extends ESIntegTestCase {
|
||||||
.filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get();
|
.filter(recoveryState -> recoveryState.getPrimary() == false).findFirst().get();
|
||||||
// the replica translog was disabled so it doesn't know what hte global checkpoint is and thus can't do ops based recovery
|
// the replica translog was disabled so it doesn't know what hte global checkpoint is and thus can't do ops based recovery
|
||||||
assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0));
|
assertThat(replicaRecoveryState.getIndex().toString(), replicaRecoveryState.getIndex().recoveredFileCount(), greaterThan(0));
|
||||||
|
// Ensure that the global checkpoint and local checkpoint are restored from the max seqno of the last commit.
|
||||||
|
final SeqNoStats seqNoStats = getSeqNoStats("test", 0);
|
||||||
|
assertThat(seqNoStats.getGlobalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
|
||||||
|
assertThat(seqNoStats.getLocalCheckpoint(), equalTo(seqNoStats.getMaxSeqNo()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Set<Path> getTranslogDirs(String indexName) throws IOException {
|
private Set<Path> getTranslogDirs(String indexName) throws IOException {
|
||||||
|
@ -360,4 +371,10 @@ public class TruncateTranslogIT extends ESIntegTestCase {
|
||||||
client().admin().indices().prepareUpdateSettings(index).setSettings(settings).get();
|
client().admin().indices().prepareUpdateSettings(index).setSettings(settings).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private SeqNoStats getSeqNoStats(String index, int shardId) {
|
||||||
|
final ShardStats[] shardStats = client().admin().indices()
|
||||||
|
.prepareStats(index).get()
|
||||||
|
.getIndices().get(index).getShards();
|
||||||
|
return shardStats[shardId].getSeqNoStats();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue