Merge pull request #11615 from bleskes/async_fetch_non_existent_nodes
Internal: AsyncShardFetch can hang if there are new nodes in cluster state
This commit is contained in:
commit
df8a3006fc
|
@ -83,6 +83,11 @@ public abstract class TransportNodesAction<NodesRequest extends BaseNodesRequest
|
||||||
return nodesIds;
|
return nodesIds;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected String[] resolveNodes(NodesRequest request, ClusterState clusterState) {
|
||||||
|
return clusterState.nodes().resolveNodesIds(request.nodesIds());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private class AsyncAction {
|
private class AsyncAction {
|
||||||
|
|
||||||
private final NodesRequest request;
|
private final NodesRequest request;
|
||||||
|
@ -96,7 +101,7 @@ public abstract class TransportNodesAction<NodesRequest extends BaseNodesRequest
|
||||||
this.request = request;
|
this.request = request;
|
||||||
this.listener = listener;
|
this.listener = listener;
|
||||||
clusterState = clusterService.state();
|
clusterState = clusterService.state();
|
||||||
String[] nodesIds = clusterState.nodes().resolveNodesIds(request.nodesIds());
|
String[] nodesIds = resolveNodes(request, clusterState);
|
||||||
this.nodesIds = filterNodeIds(clusterState.nodes(), nodesIds);
|
this.nodesIds = filterNodeIds(clusterState.nodes(), nodesIds);
|
||||||
this.responses = new AtomicReferenceArray<>(this.nodesIds.length);
|
this.responses = new AtomicReferenceArray<>(this.nodesIds.length);
|
||||||
}
|
}
|
||||||
|
|
|
@ -165,19 +165,29 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
||||||
protected synchronized void processAsyncFetch(ShardId shardId, T[] responses, FailedNodeException[] failures) {
|
protected synchronized void processAsyncFetch(ShardId shardId, T[] responses, FailedNodeException[] failures) {
|
||||||
if (closed) {
|
if (closed) {
|
||||||
// we are closed, no need to process this async fetch at all
|
// we are closed, no need to process this async fetch at all
|
||||||
|
logger.trace("{} ignoring fetched [{}] results, already closed", shardId, type);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
logger.trace("{} processing fetched [{}] results", shardId, type);
|
||||||
|
|
||||||
if (responses != null) {
|
if (responses != null) {
|
||||||
for (T response : responses) {
|
for (T response : responses) {
|
||||||
NodeEntry<T> nodeEntry = cache.get(response.getNode().getId());
|
NodeEntry<T> nodeEntry = cache.get(response.getNode().getId());
|
||||||
// if the entry is there, and not marked as failed already, process it
|
// if the entry is there, and not marked as failed already, process it
|
||||||
if (nodeEntry != null && nodeEntry.isFailed() == false) {
|
if (nodeEntry == null) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nodeEntry.isFailed()) {
|
||||||
|
logger.trace("{} node {} has failed for [{}] (failure [{}])", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFailure());
|
||||||
|
} else {
|
||||||
|
logger.trace("{} marking {} as done for [{}]", shardId, nodeEntry.getNodeId(), type);
|
||||||
nodeEntry.doneFetching(response);
|
nodeEntry.doneFetching(response);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (failures != null) {
|
if (failures != null) {
|
||||||
for (FailedNodeException failure : failures) {
|
for (FailedNodeException failure : failures) {
|
||||||
|
logger.trace("{} processing failure {} for [{}]", shardId, failure, type);
|
||||||
NodeEntry<T> nodeEntry = cache.get(failure.nodeId());
|
NodeEntry<T> nodeEntry = cache.get(failure.nodeId());
|
||||||
// if the entry is there, and not marked as failed already, process it
|
// if the entry is there, and not marked as failed already, process it
|
||||||
if (nodeEntry != null && nodeEntry.isFailed() == false) {
|
if (nodeEntry != null && nodeEntry.isFailed() == false) {
|
||||||
|
@ -253,6 +263,7 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
||||||
// visible for testing
|
// visible for testing
|
||||||
void asyncFetch(final ShardId shardId, final String[] nodesIds, final MetaData metaData) {
|
void asyncFetch(final ShardId shardId, final String[] nodesIds, final MetaData metaData) {
|
||||||
IndexMetaData indexMetaData = metaData.index(shardId.getIndex());
|
IndexMetaData indexMetaData = metaData.index(shardId.getIndex());
|
||||||
|
logger.trace("{} fetching [{}] from {}", shardId, type, nodesIds);
|
||||||
action.list(shardId, indexMetaData, nodesIds, new ActionListener<BaseNodesResponse<T>>() {
|
action.list(shardId, indexMetaData, nodesIds, new ActionListener<BaseNodesResponse<T>>() {
|
||||||
@Override
|
@Override
|
||||||
public void onResponse(BaseNodesResponse<T> response) {
|
public void onResponse(BaseNodesResponse<T> response) {
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.elasticsearch.action.support.ActionFilters;
|
||||||
import org.elasticsearch.action.support.nodes.*;
|
import org.elasticsearch.action.support.nodes.*;
|
||||||
import org.elasticsearch.cluster.ClusterName;
|
import org.elasticsearch.cluster.ClusterName;
|
||||||
import org.elasticsearch.cluster.ClusterService;
|
import org.elasticsearch.cluster.ClusterService;
|
||||||
|
import org.elasticsearch.cluster.ClusterState;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
@ -68,6 +69,13 @@ public class TransportNodesListGatewayStartedShards extends TransportNodesAction
|
||||||
execute(new Request(shardId, indexMetaData.getUUID(), nodesIds), listener);
|
execute(new Request(shardId, indexMetaData.getUUID(), nodesIds), listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String[] resolveNodes(Request request, ClusterState clusterState) {
|
||||||
|
// default implementation may filter out non existent nodes. it's important to keep exactly the ids
|
||||||
|
// we were given for accounting on the caller
|
||||||
|
return request.nodesIds();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected boolean transportCompress() {
|
protected boolean transportCompress() {
|
||||||
return true; // this can become big...
|
return true; // this can become big...
|
||||||
|
|
|
@ -27,6 +27,7 @@ import org.elasticsearch.action.support.ActionFilters;
|
||||||
import org.elasticsearch.action.support.nodes.*;
|
import org.elasticsearch.action.support.nodes.*;
|
||||||
import org.elasticsearch.cluster.ClusterName;
|
import org.elasticsearch.cluster.ClusterName;
|
||||||
import org.elasticsearch.cluster.ClusterService;
|
import org.elasticsearch.cluster.ClusterService;
|
||||||
|
import org.elasticsearch.cluster.ClusterState;
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||||
import org.elasticsearch.common.inject.Inject;
|
import org.elasticsearch.common.inject.Inject;
|
||||||
|
@ -81,6 +82,13 @@ public class TransportNodesListShardStoreMetaData extends TransportNodesAction<T
|
||||||
execute(new Request(shardId, false, nodesIds), listener);
|
execute(new Request(shardId, false, nodesIds), listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected String[] resolveNodes(Request request, ClusterState clusterState) {
|
||||||
|
// default implementation may filter out non existent nodes. it's important to keep exactly the ids
|
||||||
|
// we were given for accounting on the caller
|
||||||
|
return request.nodesIds();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected NodeRequest newNodeRequest(String nodeId, Request request) {
|
protected NodeRequest newNodeRequest(String nodeId, Request request) {
|
||||||
return new NodeRequest(nodeId, request);
|
return new NodeRequest(nodeId, request);
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.elasticsearch.discovery.zen.elect.ElectMasterService;
|
||||||
import org.elasticsearch.index.query.QueryBuilders;
|
import org.elasticsearch.index.query.QueryBuilders;
|
||||||
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||||
import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
|
import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
|
||||||
|
import org.elasticsearch.test.junit.annotations.TestLogging;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
@ -46,6 +47,7 @@ import static org.hamcrest.Matchers.*;
|
||||||
public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
|
public class MinimumMasterNodesTests extends ElasticsearchIntegrationTest {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@TestLogging("cluster.service:TRACE,discovery.zen:TRACE,gateway:TRACE,transport.tracer:TRACE")
|
||||||
public void simpleMinimumMasterNodes() throws Exception {
|
public void simpleMinimumMasterNodes() throws Exception {
|
||||||
|
|
||||||
Settings settings = settingsBuilder()
|
Settings settings = settingsBuilder()
|
||||||
|
|
|
@ -20,24 +20,27 @@
|
||||||
package org.elasticsearch.indices.state;
|
package org.elasticsearch.indices.state;
|
||||||
|
|
||||||
import com.google.common.collect.ImmutableMap;
|
import com.google.common.collect.ImmutableMap;
|
||||||
|
import org.elasticsearch.Version;
|
||||||
import org.elasticsearch.action.ActionListener;
|
import org.elasticsearch.action.ActionListener;
|
||||||
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
|
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
|
||||||
import org.elasticsearch.action.index.IndexResponse;
|
import org.elasticsearch.action.index.IndexResponse;
|
||||||
import org.elasticsearch.cluster.ClusterInfo;
|
import org.elasticsearch.cluster.*;
|
||||||
import org.elasticsearch.cluster.ClusterState;
|
import org.elasticsearch.cluster.block.ClusterBlocks;
|
||||||
import org.elasticsearch.cluster.DiskUsage;
|
|
||||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||||
import org.elasticsearch.cluster.metadata.MappingMetaData;
|
import org.elasticsearch.cluster.metadata.MappingMetaData;
|
||||||
|
import org.elasticsearch.cluster.metadata.MetaData;
|
||||||
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||||
import org.elasticsearch.cluster.routing.RoutingNodes;
|
import org.elasticsearch.cluster.routing.RoutingNodes;
|
||||||
import org.elasticsearch.cluster.routing.RoutingTable;
|
import org.elasticsearch.cluster.routing.RoutingTable;
|
||||||
import org.elasticsearch.cluster.routing.ShardRouting;
|
import org.elasticsearch.cluster.routing.ShardRouting;
|
||||||
|
import org.elasticsearch.cluster.routing.allocation.AllocationService;
|
||||||
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
|
import org.elasticsearch.cluster.routing.allocation.RoutingAllocation;
|
||||||
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
|
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDecider;
|
||||||
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
|
import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders;
|
||||||
import org.elasticsearch.common.collect.ImmutableOpenMap;
|
import org.elasticsearch.common.collect.ImmutableOpenMap;
|
||||||
import org.elasticsearch.common.settings.Settings;
|
import org.elasticsearch.common.settings.Settings;
|
||||||
|
import org.elasticsearch.common.transport.DummyTransportAddress;
|
||||||
import org.elasticsearch.common.unit.TimeValue;
|
import org.elasticsearch.common.unit.TimeValue;
|
||||||
import org.elasticsearch.discovery.DiscoveryModule;
|
import org.elasticsearch.discovery.DiscoveryModule;
|
||||||
import org.elasticsearch.discovery.DiscoverySettings;
|
import org.elasticsearch.discovery.DiscoverySettings;
|
||||||
|
@ -52,19 +55,12 @@ import org.elasticsearch.test.junit.annotations.TestLogging;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Arrays;
|
import java.util.*;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
|
||||||
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
|
||||||
import static org.hamcrest.Matchers.equalTo;
|
import static org.hamcrest.Matchers.*;
|
||||||
import static org.hamcrest.Matchers.hasItem;
|
|
||||||
import static org.hamcrest.Matchers.hasSize;
|
|
||||||
import static org.hamcrest.Matchers.instanceOf;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*/
|
*/
|
||||||
|
@ -102,6 +98,72 @@ public class RareClusterStateTests extends ElasticsearchIntegrationTest {
|
||||||
allocator.allocateUnassigned(routingAllocation);
|
allocator.allocateUnassigned(routingAllocation);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@TestLogging("gateway:TRACE")
|
||||||
|
public void testAssignmentWithJustAddedNodes() throws Exception {
|
||||||
|
internalCluster().startNode();
|
||||||
|
final String index = "index";
|
||||||
|
prepareCreate(index).setSettings(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1, IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0).get();
|
||||||
|
ensureGreen(index);
|
||||||
|
|
||||||
|
// close to have some unassigned started shards shards..
|
||||||
|
client().admin().indices().prepareClose(index).get();
|
||||||
|
|
||||||
|
|
||||||
|
final String masterName = internalCluster().getMasterName();
|
||||||
|
final ClusterService clusterService = internalCluster().clusterService(masterName);
|
||||||
|
final AllocationService allocationService = internalCluster().getInstance(AllocationService.class, masterName);
|
||||||
|
clusterService.submitStateUpdateTask("test-inject-node-and-reroute", new ClusterStateUpdateTask() {
|
||||||
|
@Override
|
||||||
|
public ClusterState execute(ClusterState currentState) throws Exception {
|
||||||
|
// inject a node
|
||||||
|
ClusterState.Builder builder = ClusterState.builder(currentState);
|
||||||
|
builder.nodes(DiscoveryNodes.builder(currentState.nodes()).put(new DiscoveryNode("_non_existent", DummyTransportAddress.INSTANCE, Version.CURRENT)));
|
||||||
|
|
||||||
|
// open index
|
||||||
|
final IndexMetaData indexMetaData = IndexMetaData.builder(currentState.metaData().index(index)).state(IndexMetaData.State.OPEN).build();
|
||||||
|
|
||||||
|
builder.metaData(MetaData.builder(currentState.metaData()).put(indexMetaData, true));
|
||||||
|
builder.blocks(ClusterBlocks.builder().blocks(currentState.blocks()).removeIndexBlocks(index));
|
||||||
|
ClusterState updatedState = builder.build();
|
||||||
|
|
||||||
|
RoutingTable.Builder routingTable = RoutingTable.builder(updatedState.routingTable());
|
||||||
|
routingTable.addAsRecovery(updatedState.metaData().index(index));
|
||||||
|
updatedState = ClusterState.builder(updatedState).routingTable(routingTable).build();
|
||||||
|
|
||||||
|
RoutingAllocation.Result result = allocationService.reroute(updatedState);
|
||||||
|
return ClusterState.builder(updatedState).routingResult(result).build();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onFailure(String source, Throwable t) {
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
ensureGreen(index);
|
||||||
|
// remove the extra node
|
||||||
|
clusterService.submitStateUpdateTask("test-remove-injected-node", new ClusterStateUpdateTask() {
|
||||||
|
@Override
|
||||||
|
public ClusterState execute(ClusterState currentState) throws Exception {
|
||||||
|
// inject a node
|
||||||
|
ClusterState.Builder builder = ClusterState.builder(currentState);
|
||||||
|
builder.nodes(DiscoveryNodes.builder(currentState.nodes()).remove("_non_existent"));
|
||||||
|
|
||||||
|
currentState = builder.build();
|
||||||
|
RoutingAllocation.Result result = allocationService.reroute(currentState);
|
||||||
|
return ClusterState.builder(currentState).routingResult(result).build();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onFailure(String source, Throwable t) {
|
||||||
|
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@TestLogging(value = "cluster.service:TRACE")
|
@TestLogging(value = "cluster.service:TRACE")
|
||||||
public void testDeleteCreateInOneBulk() throws Exception {
|
public void testDeleteCreateInOneBulk() throws Exception {
|
||||||
|
@ -190,6 +252,7 @@ public class RareClusterStateTests extends ElasticsearchIntegrationTest {
|
||||||
public void onResponse(PutMappingResponse response) {
|
public void onResponse(PutMappingResponse response) {
|
||||||
putMappingResponse.set(response);
|
putMappingResponse.set(response);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFailure(Throwable e) {
|
public void onFailure(Throwable e) {
|
||||||
putMappingResponse.set(e);
|
putMappingResponse.set(e);
|
||||||
|
@ -221,6 +284,7 @@ public class RareClusterStateTests extends ElasticsearchIntegrationTest {
|
||||||
public void onResponse(IndexResponse response) {
|
public void onResponse(IndexResponse response) {
|
||||||
docIndexResponse.set(response);
|
docIndexResponse.set(response);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFailure(Throwable e) {
|
public void onFailure(Throwable e) {
|
||||||
docIndexResponse.set(e);
|
docIndexResponse.set(e);
|
||||||
|
@ -304,6 +368,7 @@ public class RareClusterStateTests extends ElasticsearchIntegrationTest {
|
||||||
public void onResponse(PutMappingResponse response) {
|
public void onResponse(PutMappingResponse response) {
|
||||||
putMappingResponse.set(response);
|
putMappingResponse.set(response);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFailure(Throwable e) {
|
public void onFailure(Throwable e) {
|
||||||
putMappingResponse.set(e);
|
putMappingResponse.set(e);
|
||||||
|
@ -329,6 +394,7 @@ public class RareClusterStateTests extends ElasticsearchIntegrationTest {
|
||||||
public void onResponse(IndexResponse response) {
|
public void onResponse(IndexResponse response) {
|
||||||
docIndexResponse.set(response);
|
docIndexResponse.set(response);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onFailure(Throwable e) {
|
public void onFailure(Throwable e) {
|
||||||
docIndexResponse.set(e);
|
docIndexResponse.set(e);
|
||||||
|
|
Loading…
Reference in New Issue