[CORE] Allow primary promotion on shadow replica without failing the shard
Today we fail the shard if we need to upgrade a replica to a primary on shadow replicas on shared filesystem. Yet, this commit allows promotion by re-initializing on the master preventing reallocation of all replicas.
This commit is contained in:
parent
306b7b0f2b
commit
1ed6451229
|
@ -101,16 +101,12 @@ public class MutableShardRouting extends ImmutableShardRouting {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the shards state to <code>UNASSIGNED</code>.
|
* Moves the shard from started to initializing and bumps the version
|
||||||
* //TODO document the state
|
|
||||||
*/
|
*/
|
||||||
void deassignNode() {
|
void reinitializeShard() {
|
||||||
|
assert state == ShardRoutingState.STARTED;
|
||||||
version++;
|
version++;
|
||||||
assert state != ShardRoutingState.UNASSIGNED;
|
state = ShardRoutingState.INITIALIZING;
|
||||||
|
|
||||||
state = ShardRoutingState.UNASSIGNED;
|
|
||||||
this.currentNodeId = null;
|
|
||||||
this.relocatingNodeId = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -520,6 +520,16 @@ public class RoutingNodes implements Iterable<RoutingNode> {
|
||||||
return nodesToShards.values().toArray(new RoutingNode[nodesToShards.size()]);
|
return nodesToShards.values().toArray(new RoutingNode[nodesToShards.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void reinitShadowPrimary(MutableShardRouting candidate) {
|
||||||
|
if (candidate.relocating()) {
|
||||||
|
cancelRelocation(candidate);
|
||||||
|
}
|
||||||
|
candidate.reinitializeShard();
|
||||||
|
inactivePrimaryCount++;
|
||||||
|
inactiveShardCount++;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
public final static class UnassignedShards implements Iterable<MutableShardRouting> {
|
public final static class UnassignedShards implements Iterable<MutableShardRouting> {
|
||||||
|
|
||||||
private final List<MutableShardRouting> unassigned;
|
private final List<MutableShardRouting> unassigned;
|
||||||
|
|
|
@ -21,11 +21,13 @@ package org.elasticsearch.cluster.routing.allocation;
|
||||||
|
|
||||||
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
import com.carrotsearch.hppc.cursors.ObjectCursor;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
|
import com.google.common.collect.Iterators;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import org.elasticsearch.ElasticsearchException;
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||||
import org.elasticsearch.cluster.ClusterInfoService;
|
import org.elasticsearch.cluster.ClusterInfoService;
|
||||||
import org.elasticsearch.cluster.ClusterState;
|
import org.elasticsearch.cluster.ClusterState;
|
||||||
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||||
import org.elasticsearch.cluster.routing.*;
|
import org.elasticsearch.cluster.routing.*;
|
||||||
import org.elasticsearch.cluster.routing.allocation.allocator.ShardsAllocators;
|
import org.elasticsearch.cluster.routing.allocation.allocator.ShardsAllocators;
|
||||||
|
@ -285,6 +287,7 @@ public class AllocationService extends AbstractComponent {
|
||||||
shardsToFail.add(routing);
|
shardsToFail.add(routing);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (ShardRouting shardToFail : shardsToFail) {
|
for (ShardRouting shardToFail : shardsToFail) {
|
||||||
|
@ -293,10 +296,12 @@ public class AllocationService extends AbstractComponent {
|
||||||
|
|
||||||
// now, go over and elect a new primary if possible, not, from this code block on, if one is elected,
|
// now, go over and elect a new primary if possible, not, from this code block on, if one is elected,
|
||||||
// routingNodes.hasUnassignedPrimaries() will potentially be false
|
// routingNodes.hasUnassignedPrimaries() will potentially be false
|
||||||
|
|
||||||
for (MutableShardRouting shardEntry : routingNodes.unassigned()) {
|
for (MutableShardRouting shardEntry : routingNodes.unassigned()) {
|
||||||
if (shardEntry.primary()) {
|
if (shardEntry.primary()) {
|
||||||
MutableShardRouting candidate = allocation.routingNodes().activeReplica(shardEntry);
|
MutableShardRouting candidate = allocation.routingNodes().activeReplica(shardEntry);
|
||||||
if (candidate != null) {
|
if (candidate != null) {
|
||||||
|
IndexMetaData index = allocation.metaData().index(candidate.index());
|
||||||
routingNodes.swapPrimaryFlag(shardEntry, candidate);
|
routingNodes.swapPrimaryFlag(shardEntry, candidate);
|
||||||
if (candidate.relocatingNodeId() != null) {
|
if (candidate.relocatingNodeId() != null) {
|
||||||
changed = true;
|
changed = true;
|
||||||
|
@ -311,6 +316,10 @@ public class AllocationService extends AbstractComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (IndexMetaData.isIndexUsingShadowReplicas(index.settings())) {
|
||||||
|
routingNodes.reinitShadowPrimary(candidate);
|
||||||
|
changed = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1187,13 +1187,25 @@ public class IndexShard extends AbstractIndexShardComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void createNewEngine() {
|
private void createNewEngine() {
|
||||||
synchronized (mutex) {
|
synchronized (mutex) {
|
||||||
if (state == IndexShardState.CLOSED) {
|
if (state == IndexShardState.CLOSED) {
|
||||||
throw new EngineClosedException(shardId);
|
throw new EngineClosedException(shardId);
|
||||||
}
|
}
|
||||||
assert this.currentEngineReference.get() == null;
|
assert this.currentEngineReference.get() == null;
|
||||||
this.currentEngineReference.set(engineFactory.newReadWriteEngine(config));
|
this.currentEngineReference.set(newEngine());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Engine newEngine() {
|
||||||
|
return engineFactory.newReadWriteEngine(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns <code>true</code> iff this shard allows primary promotion, otherwise <code>false</code>
|
||||||
|
*/
|
||||||
|
public boolean allowsPrimaryPromotion() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.index.shard;
|
package org.elasticsearch.index.shard;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||||
import org.elasticsearch.cluster.routing.ShardRouting;
|
import org.elasticsearch.cluster.routing.ShardRouting;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
|
@ -31,6 +32,7 @@ import org.elasticsearch.index.cache.filter.ShardFilterCache;
|
||||||
import org.elasticsearch.index.cache.query.ShardQueryCache;
|
import org.elasticsearch.index.cache.query.ShardQueryCache;
|
||||||
import org.elasticsearch.index.codec.CodecService;
|
import org.elasticsearch.index.codec.CodecService;
|
||||||
import org.elasticsearch.index.deletionpolicy.SnapshotDeletionPolicy;
|
import org.elasticsearch.index.deletionpolicy.SnapshotDeletionPolicy;
|
||||||
|
import org.elasticsearch.index.engine.Engine;
|
||||||
import org.elasticsearch.index.engine.EngineClosedException;
|
import org.elasticsearch.index.engine.EngineClosedException;
|
||||||
import org.elasticsearch.index.engine.EngineFactory;
|
import org.elasticsearch.index.engine.EngineFactory;
|
||||||
import org.elasticsearch.index.fielddata.IndexFieldDataService;
|
import org.elasticsearch.index.fielddata.IndexFieldDataService;
|
||||||
|
@ -62,7 +64,7 @@ import org.elasticsearch.threadpool.ThreadPool;
|
||||||
* promoted to a primary causes the shard to fail, kicking off a re-allocation
|
* promoted to a primary causes the shard to fail, kicking off a re-allocation
|
||||||
* of the primary shard.
|
* of the primary shard.
|
||||||
*/
|
*/
|
||||||
public class ShadowIndexShard extends IndexShard {
|
public final class ShadowIndexShard extends IndexShard {
|
||||||
|
|
||||||
private final Object mutex = new Object();
|
private final Object mutex = new Object();
|
||||||
|
|
||||||
|
@ -98,28 +100,19 @@ public class ShadowIndexShard extends IndexShard {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public IndexShard routingEntry(ShardRouting newRouting) {
|
public IndexShard routingEntry(ShardRouting newRouting) {
|
||||||
ShardRouting shardRouting = this.routingEntry();
|
if (newRouting.primary() == true) {// becoming a primary
|
||||||
super.routingEntry(newRouting);
|
throw new ElasticsearchIllegalStateException("can't promote shard to primary");
|
||||||
// check for a shadow replica that now needs to be transformed into
|
|
||||||
// a normal primary today we simply fail it to force reallocation
|
|
||||||
if (shardRouting != null && shardRouting.primary() == false && // currently a replica
|
|
||||||
newRouting.primary() == true) {// becoming a primary
|
|
||||||
failShard("can't promote shadow replica to primary",
|
|
||||||
new ElasticsearchIllegalStateException("can't promote shadow replica to primary"));
|
|
||||||
}
|
}
|
||||||
return this;
|
return super.routingEntry(newRouting);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void createNewEngine() {
|
protected Engine newEngine() {
|
||||||
synchronized (mutex) {
|
|
||||||
if (state == IndexShardState.CLOSED) {
|
|
||||||
throw new EngineClosedException(shardId);
|
|
||||||
}
|
|
||||||
assert this.currentEngineReference.get() == null;
|
|
||||||
assert this.shardRouting.primary() == false;
|
assert this.shardRouting.primary() == false;
|
||||||
// Use the read-only engine for shadow replicas
|
return engineFactory.newReadOnlyEngine(config);
|
||||||
this.currentEngineReference.set(engineFactory.newReadOnlyEngine(config));
|
}
|
||||||
}
|
|
||||||
|
public boolean allowsPrimaryPromotion() {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,6 @@ import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
|
||||||
import org.elasticsearch.env.NodeEnvironment;
|
|
||||||
import org.elasticsearch.index.IndexShardAlreadyExistsException;
|
import org.elasticsearch.index.IndexShardAlreadyExistsException;
|
||||||
import org.elasticsearch.index.IndexShardMissingException;
|
import org.elasticsearch.index.IndexShardMissingException;
|
||||||
import org.elasticsearch.index.aliases.IndexAlias;
|
import org.elasticsearch.index.aliases.IndexAlias;
|
||||||
|
@ -70,7 +69,6 @@ import org.elasticsearch.threadpool.ThreadPool;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
import java.util.concurrent.ConcurrentMap;
|
import java.util.concurrent.ConcurrentMap;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
|
|
||||||
import static com.google.common.collect.Maps.newHashMap;
|
import static com.google.common.collect.Maps.newHashMap;
|
||||||
import static org.elasticsearch.ExceptionsHelper.detailedMessage;
|
import static org.elasticsearch.ExceptionsHelper.detailedMessage;
|
||||||
|
@ -95,7 +93,6 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
|
||||||
// a list of shards that failed during recovery
|
// a list of shards that failed during recovery
|
||||||
// we keep track of these shards in order to prevent repeated recovery of these shards on each cluster state update
|
// we keep track of these shards in order to prevent repeated recovery of these shards on each cluster state update
|
||||||
private final ConcurrentMap<ShardId, FailedShard> failedShards = ConcurrentCollections.newConcurrentMap();
|
private final ConcurrentMap<ShardId, FailedShard> failedShards = ConcurrentCollections.newConcurrentMap();
|
||||||
private final NodeEnvironment nodeEnvironment;
|
|
||||||
|
|
||||||
static class FailedShard {
|
static class FailedShard {
|
||||||
public final long version;
|
public final long version;
|
||||||
|
@ -111,15 +108,13 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
|
||||||
private final FailedEngineHandler failedEngineHandler = new FailedEngineHandler();
|
private final FailedEngineHandler failedEngineHandler = new FailedEngineHandler();
|
||||||
|
|
||||||
private final boolean sendRefreshMapping;
|
private final boolean sendRefreshMapping;
|
||||||
private final AtomicLong recoveryIdGenerator = new AtomicLong();
|
|
||||||
|
|
||||||
@Inject
|
@Inject
|
||||||
public IndicesClusterStateService(Settings settings, IndicesService indicesService, ClusterService clusterService,
|
public IndicesClusterStateService(Settings settings, IndicesService indicesService, ClusterService clusterService,
|
||||||
ThreadPool threadPool, RecoveryTarget recoveryTarget,
|
ThreadPool threadPool, RecoveryTarget recoveryTarget,
|
||||||
ShardStateAction shardStateAction,
|
ShardStateAction shardStateAction,
|
||||||
NodeIndexDeletedAction nodeIndexDeletedAction,
|
NodeIndexDeletedAction nodeIndexDeletedAction,
|
||||||
NodeMappingRefreshAction nodeMappingRefreshAction,
|
NodeMappingRefreshAction nodeMappingRefreshAction) {
|
||||||
NodeEnvironment nodeEnvironment) {
|
|
||||||
super(settings);
|
super(settings);
|
||||||
this.indicesService = indicesService;
|
this.indicesService = indicesService;
|
||||||
this.clusterService = clusterService;
|
this.clusterService = clusterService;
|
||||||
|
@ -130,7 +125,6 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
|
||||||
this.nodeMappingRefreshAction = nodeMappingRefreshAction;
|
this.nodeMappingRefreshAction = nodeMappingRefreshAction;
|
||||||
|
|
||||||
this.sendRefreshMapping = componentSettings.getAsBoolean("send_refresh_mapping", true);
|
this.sendRefreshMapping = componentSettings.getAsBoolean("send_refresh_mapping", true);
|
||||||
this.nodeEnvironment = nodeEnvironment;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -584,11 +578,16 @@ public class IndicesClusterStateService extends AbstractLifecycleComponent<Indic
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (shardHasBeenRemoved == false && !shardRouting.equals(indexShard.routingEntry())) {
|
if (shardHasBeenRemoved == false && !shardRouting.equals(indexShard.routingEntry())) {
|
||||||
|
if (shardRouting.primary() && indexShard.routingEntry().primary() == false && shardRouting.initializing() && indexShard.allowsPrimaryPromotion() == false) {
|
||||||
|
logger.debug("{} reinitialize shard on primary promotion", indexShard.shardId());
|
||||||
|
indexService.removeShard(shardId, "promoted to primary");
|
||||||
|
} else {
|
||||||
// if we happen to remove the shardRouting by id above we don't need to jump in here!
|
// if we happen to remove the shardRouting by id above we don't need to jump in here!
|
||||||
indexShard.routingEntry(shardRouting);
|
indexShard.routingEntry(shardRouting);
|
||||||
indexService.shardInjectorSafe(shardId).getInstance(IndexShardGatewayService.class).routingStateChanged();
|
indexService.shardInjectorSafe(shardId).getInstance(IndexShardGatewayService.class).routingStateChanged();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (shardRouting.initializing()) {
|
if (shardRouting.initializing()) {
|
||||||
applyInitializingShard(routingTable, nodes, indexMetaData, routingTable.index(shardRouting.index()).shard(shardRouting.id()), shardRouting);
|
applyInitializingShard(routingTable, nodes, indexMetaData, routingTable.index(shardRouting.index()).shard(shardRouting.id()), shardRouting);
|
||||||
|
|
|
@ -32,6 +32,8 @@ import org.elasticsearch.search.SearchHit;
|
||||||
import org.elasticsearch.search.sort.SortOrder;
|
import org.elasticsearch.search.sort.SortOrder;
|
||||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||||
import org.elasticsearch.test.InternalTestCluster;
|
import org.elasticsearch.test.InternalTestCluster;
|
||||||
|
import org.elasticsearch.test.junit.annotations.TestLogging;
|
||||||
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
|
@ -165,6 +167,15 @@ public class IndexWithShadowReplicasTests extends ElasticsearchIntegrationTest {
|
||||||
assertTrue(gResp2.toString(), gResp2.isExists());
|
assertTrue(gResp2.toString(), gResp2.isExists());
|
||||||
assertThat(gResp1.getField("foo").getValue().toString(), equalTo("bar"));
|
assertThat(gResp1.getField("foo").getValue().toString(), equalTo("bar"));
|
||||||
assertThat(gResp2.getField("foo").getValue().toString(), equalTo("bar"));
|
assertThat(gResp2.getField("foo").getValue().toString(), equalTo("bar"));
|
||||||
|
|
||||||
|
client().prepareIndex(IDX, "doc", "1").setSource("foo", "foobar").get();
|
||||||
|
client().prepareIndex(IDX, "doc", "2").setSource("foo", "foobar").get();
|
||||||
|
gResp1 = client().prepareGet(IDX, "doc", "1").setFields("foo").get();
|
||||||
|
gResp2 = client().prepareGet(IDX, "doc", "2").setFields("foo").get();
|
||||||
|
assertTrue(gResp1.isExists());
|
||||||
|
assertTrue(gResp2.toString(), gResp2.isExists());
|
||||||
|
assertThat(gResp1.getField("foo").getValue().toString(), equalTo("foobar"));
|
||||||
|
assertThat(gResp2.getField("foo").getValue().toString(), equalTo("foobar"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.store.LockFactory;
|
||||||
import org.apache.lucene.store.StoreRateLimiting;
|
import org.apache.lucene.store.StoreRateLimiting;
|
||||||
import org.apache.lucene.util.AbstractRandomizedTest;
|
import org.apache.lucene.util.AbstractRandomizedTest;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
import org.elasticsearch.common.io.stream.BytesStreamOutput;
|
||||||
|
@ -86,7 +87,7 @@ public class MockFSDirectoryService extends FsDirectoryService {
|
||||||
@IndexSettings Settings indexSettings) {
|
@IndexSettings Settings indexSettings) {
|
||||||
if (indexShard != null && shardId.equals(sid)) {
|
if (indexShard != null && shardId.equals(sid)) {
|
||||||
logger.info("Shard state before potentially flushing is {}", indexShard.state());
|
logger.info("Shard state before potentially flushing is {}", indexShard.state());
|
||||||
if (validCheckIndexStates.contains(indexShard.state()) && indexShard.engine() instanceof InternalEngine) {
|
if (validCheckIndexStates.contains(indexShard.state()) && IndexMetaData.isOnSharedFilesystem(indexSettings) == false) {
|
||||||
// When the the internal engine closes we do a rollback, which removes uncommitted segments
|
// When the the internal engine closes we do a rollback, which removes uncommitted segments
|
||||||
// By doing a commit flush we perform a Lucene commit, but don't clear the translog,
|
// By doing a commit flush we perform a Lucene commit, but don't clear the translog,
|
||||||
// so that even in tests where don't flush we can check the integrity of the Lucene index
|
// so that even in tests where don't flush we can check the integrity of the Lucene index
|
||||||
|
|
Loading…
Reference in New Issue