Discard stale node responses from async shard fetching (#24434)
Async shard fetching only uses the node id to correlate responses to requests. This can lead to a situation where a response from an earlier request is mistaken as response from a new request when a node is restarted. This commit adds unique round ids to correlate responses to requests.
This commit is contained in:
parent
4a578e9c71
commit
be19ccef57
|
@ -29,7 +29,6 @@ import org.elasticsearch.cluster.block.ClusterBlockException;
|
|||
import org.elasticsearch.cluster.block.ClusterBlockLevel;
|
||||
import org.elasticsearch.cluster.health.ClusterHealthStatus;
|
||||
import org.elasticsearch.cluster.health.ClusterShardHealth;
|
||||
import org.elasticsearch.cluster.metadata.IndexMetaData;
|
||||
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||
|
@ -155,7 +154,7 @@ public class TransportIndicesShardStoresAction extends TransportMasterNodeReadAc
|
|||
}
|
||||
|
||||
@Override
|
||||
protected synchronized void processAsyncFetch(ShardId shardId, List<NodeGatewayStartedShards> responses, List<FailedNodeException> failures) {
|
||||
protected synchronized void processAsyncFetch(List<NodeGatewayStartedShards> responses, List<FailedNodeException> failures, long fetchingRound) {
|
||||
fetchResponses.add(new Response(shardId, responses, failures));
|
||||
if (expectedOps.countDown()) {
|
||||
finish();
|
||||
|
|
|
@ -44,6 +44,7 @@ import java.util.Iterator;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import static java.util.Collections.emptySet;
|
||||
import static java.util.Collections.unmodifiableSet;
|
||||
|
@ -67,10 +68,11 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
|
||||
protected final Logger logger;
|
||||
protected final String type;
|
||||
private final ShardId shardId;
|
||||
protected final ShardId shardId;
|
||||
private final Lister<BaseNodesResponse<T>, T> action;
|
||||
private final Map<String, NodeEntry<T>> cache = new HashMap<>();
|
||||
private final Set<String> nodesToIgnore = new HashSet<>();
|
||||
private final AtomicLong round = new AtomicLong();
|
||||
private boolean closed;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
|
@ -112,20 +114,22 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
}
|
||||
nodesToIgnore.addAll(ignoreNodes);
|
||||
fillShardCacheWithDataNodes(cache, nodes);
|
||||
Set<NodeEntry<T>> nodesToFetch = findNodesToFetch(cache);
|
||||
List<NodeEntry<T>> nodesToFetch = findNodesToFetch(cache);
|
||||
if (nodesToFetch.isEmpty() == false) {
|
||||
// mark all node as fetching and go ahead and async fetch them
|
||||
// use a unique round id to detect stale responses in processAsyncFetch
|
||||
final long fetchingRound = round.incrementAndGet();
|
||||
for (NodeEntry<T> nodeEntry : nodesToFetch) {
|
||||
nodeEntry.markAsFetching();
|
||||
nodeEntry.markAsFetching(fetchingRound);
|
||||
}
|
||||
DiscoveryNode[] discoNodesToFetch = nodesToFetch.stream().map(NodeEntry::getNodeId).map(nodes::get)
|
||||
.toArray(DiscoveryNode[]::new);
|
||||
asyncFetch(shardId, discoNodesToFetch);
|
||||
asyncFetch(discoNodesToFetch, fetchingRound);
|
||||
}
|
||||
|
||||
// if we are still fetching, return null to indicate it
|
||||
if (hasAnyNodeFetching(cache)) {
|
||||
return new FetchResult<>(shardId, null, emptySet(), emptySet());
|
||||
return new FetchResult<>(shardId, null, emptySet());
|
||||
} else {
|
||||
// nothing to fetch, yay, build the return value
|
||||
Map<DiscoveryNode, T> fetchData = new HashMap<>();
|
||||
|
@ -158,7 +162,7 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
if (failedNodes.isEmpty() == false || allIgnoreNodes.isEmpty() == false) {
|
||||
reroute(shardId, "nodes failed [" + failedNodes.size() + "], ignored [" + allIgnoreNodes.size() + "]");
|
||||
}
|
||||
return new FetchResult<>(shardId, fetchData, failedNodes, allIgnoreNodes);
|
||||
return new FetchResult<>(shardId, fetchData, allIgnoreNodes);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -168,7 +172,7 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
* the shard (response + failures), issuing a reroute at the end of it to make sure there will be another round
|
||||
* of allocations taking this new data into account.
|
||||
*/
|
||||
protected synchronized void processAsyncFetch(ShardId shardId, List<T> responses, List<FailedNodeException> failures) {
|
||||
protected synchronized void processAsyncFetch(List<T> responses, List<FailedNodeException> failures, long fetchingRound) {
|
||||
if (closed) {
|
||||
// we are closed, no need to process this async fetch at all
|
||||
logger.trace("{} ignoring fetched [{}] results, already closed", shardId, type);
|
||||
|
@ -179,35 +183,48 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
if (responses != null) {
|
||||
for (T response : responses) {
|
||||
NodeEntry<T> nodeEntry = cache.get(response.getNode().getId());
|
||||
// if the entry is there, and not marked as failed already, process it
|
||||
if (nodeEntry == null) {
|
||||
continue;
|
||||
}
|
||||
if (nodeEntry.isFailed()) {
|
||||
logger.trace("{} node {} has failed for [{}] (failure [{}])", shardId, nodeEntry.getNodeId(), type, nodeEntry.getFailure());
|
||||
if (nodeEntry != null) {
|
||||
if (nodeEntry.getFetchingRound() != fetchingRound) {
|
||||
assert nodeEntry.getFetchingRound() > fetchingRound : "node entries only replaced by newer rounds";
|
||||
logger.trace("{} received response for [{}] from node {} for an older fetching round (expected: {} but was: {})",
|
||||
shardId, nodeEntry.getNodeId(), type, nodeEntry.getFetchingRound(), fetchingRound);
|
||||
} else if (nodeEntry.isFailed()) {
|
||||
logger.trace("{} node {} has failed for [{}] (failure [{}])", shardId, nodeEntry.getNodeId(), type,
|
||||
nodeEntry.getFailure());
|
||||
} else {
|
||||
// if the entry is there, for the right fetching round and not marked as failed already, process it
|
||||
logger.trace("{} marking {} as done for [{}], result is [{}]", shardId, nodeEntry.getNodeId(), type, response);
|
||||
nodeEntry.doneFetching(response);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (failures != null) {
|
||||
for (FailedNodeException failure : failures) {
|
||||
logger.trace("{} processing failure {} for [{}]", shardId, failure, type);
|
||||
NodeEntry<T> nodeEntry = cache.get(failure.nodeId());
|
||||
// if the entry is there, and not marked as failed already, process it
|
||||
if (nodeEntry != null && nodeEntry.isFailed() == false) {
|
||||
if (nodeEntry != null) {
|
||||
if (nodeEntry.getFetchingRound() != fetchingRound) {
|
||||
assert nodeEntry.getFetchingRound() > fetchingRound : "node entries only replaced by newer rounds";
|
||||
logger.trace("{} received failure for [{}] from node {} for an older fetching round (expected: {} but was: {})",
|
||||
shardId, nodeEntry.getNodeId(), type, nodeEntry.getFetchingRound(), fetchingRound);
|
||||
} else if (nodeEntry.isFailed() == false) {
|
||||
// if the entry is there, for the right fetching round and not marked as failed already, process it
|
||||
Throwable unwrappedCause = ExceptionsHelper.unwrapCause(failure.getCause());
|
||||
// if the request got rejected or timed out, we need to try it again next time...
|
||||
if (unwrappedCause instanceof EsRejectedExecutionException || unwrappedCause instanceof ReceiveTimeoutTransportException || unwrappedCause instanceof ElasticsearchTimeoutException) {
|
||||
if (unwrappedCause instanceof EsRejectedExecutionException ||
|
||||
unwrappedCause instanceof ReceiveTimeoutTransportException ||
|
||||
unwrappedCause instanceof ElasticsearchTimeoutException) {
|
||||
nodeEntry.restartFetching();
|
||||
} else {
|
||||
logger.warn((Supplier<?>) () -> new ParameterizedMessage("{}: failed to list shard for {} on node [{}]", shardId, type, failure.nodeId()), failure);
|
||||
logger.warn((Supplier<?>) () -> new ParameterizedMessage("{}: failed to list shard for {} on node [{}]",
|
||||
shardId, type, failure.nodeId()), failure);
|
||||
nodeEntry.doneFetching(failure.getCause());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
reroute(shardId, "post_response");
|
||||
}
|
||||
|
||||
|
@ -241,8 +258,8 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
* Finds all the nodes that need to be fetched. Those are nodes that have no
|
||||
* data, and are not in fetch mode.
|
||||
*/
|
||||
private Set<NodeEntry<T>> findNodesToFetch(Map<String, NodeEntry<T>> shardCache) {
|
||||
Set<NodeEntry<T>> nodesToFetch = new HashSet<>();
|
||||
private List<NodeEntry<T>> findNodesToFetch(Map<String, NodeEntry<T>> shardCache) {
|
||||
List<NodeEntry<T>> nodesToFetch = new ArrayList<>();
|
||||
for (NodeEntry<T> nodeEntry : shardCache.values()) {
|
||||
if (nodeEntry.hasData() == false && nodeEntry.isFetching() == false) {
|
||||
nodesToFetch.add(nodeEntry);
|
||||
|
@ -267,12 +284,12 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
* Async fetches data for the provided shard with the set of nodes that need to be fetched from.
|
||||
*/
|
||||
// visible for testing
|
||||
void asyncFetch(final ShardId shardId, final DiscoveryNode[] nodes) {
|
||||
void asyncFetch(final DiscoveryNode[] nodes, long fetchingRound) {
|
||||
logger.trace("{} fetching [{}] from {}", shardId, type, nodes);
|
||||
action.list(shardId, nodes, new ActionListener<BaseNodesResponse<T>>() {
|
||||
@Override
|
||||
public void onResponse(BaseNodesResponse<T> response) {
|
||||
processAsyncFetch(shardId, response.getNodes(), response.failures());
|
||||
processAsyncFetch(response.getNodes(), response.failures(), fetchingRound);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -281,7 +298,7 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
for (final DiscoveryNode node: nodes) {
|
||||
failures.add(new FailedNodeException(node.getId(), "total failure in fetching", e));
|
||||
}
|
||||
processAsyncFetch(shardId, null, failures);
|
||||
processAsyncFetch(null, failures, fetchingRound);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
@ -294,13 +311,11 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
|
||||
private final ShardId shardId;
|
||||
private final Map<DiscoveryNode, T> data;
|
||||
private final Set<String> failedNodes;
|
||||
private final Set<String> ignoreNodes;
|
||||
|
||||
public FetchResult(ShardId shardId, Map<DiscoveryNode, T> data, Set<String> failedNodes, Set<String> ignoreNodes) {
|
||||
public FetchResult(ShardId shardId, Map<DiscoveryNode, T> data, Set<String> ignoreNodes) {
|
||||
this.shardId = shardId;
|
||||
this.data = data;
|
||||
this.failedNodes = failedNodes;
|
||||
this.ignoreNodes = ignoreNodes;
|
||||
}
|
||||
|
||||
|
@ -342,6 +357,7 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
private T value;
|
||||
private boolean valueSet;
|
||||
private Throwable failure;
|
||||
private long fetchingRound;
|
||||
|
||||
NodeEntry(String nodeId) {
|
||||
this.nodeId = nodeId;
|
||||
|
@ -355,9 +371,10 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
return fetching;
|
||||
}
|
||||
|
||||
void markAsFetching() {
|
||||
void markAsFetching(long fetchingRound) {
|
||||
assert fetching == false : "double marking a node as fetching";
|
||||
fetching = true;
|
||||
this.fetching = true;
|
||||
this.fetchingRound = fetchingRound;
|
||||
}
|
||||
|
||||
void doneFetching(T value) {
|
||||
|
@ -402,5 +419,9 @@ public abstract class AsyncShardFetch<T extends BaseNodeResponse> implements Rel
|
|||
assert valueSet : "value is not set, hasn't been fetched yet";
|
||||
return value;
|
||||
}
|
||||
|
||||
long getFetchingRound() {
|
||||
return fetchingRound;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -140,6 +140,55 @@ public class AsyncShardFetchTests extends ESTestCase {
|
|||
assertThat(fetchData.getData().get(node1), sameInstance(response1));
|
||||
}
|
||||
|
||||
public void testIgnoreResponseFromDifferentRound() throws Exception {
|
||||
DiscoveryNodes nodes = DiscoveryNodes.builder().add(node1).build();
|
||||
test.addSimulation(node1.getId(), response1);
|
||||
|
||||
// first fetch, no data, still on going
|
||||
AsyncShardFetch.FetchResult<Response> fetchData = test.fetchData(nodes, emptySet());
|
||||
assertThat(fetchData.hasData(), equalTo(false));
|
||||
assertThat(test.reroute.get(), equalTo(0));
|
||||
|
||||
// handle a response with incorrect round id, wait on reroute incrementing
|
||||
test.processAsyncFetch(Collections.singletonList(response1), Collections.emptyList(), 0);
|
||||
assertThat(fetchData.hasData(), equalTo(false));
|
||||
assertThat(test.reroute.get(), equalTo(1));
|
||||
|
||||
// fire a response (with correct round id), wait on reroute incrementing
|
||||
test.fireSimulationAndWait(node1.getId());
|
||||
// verify we get back the data node
|
||||
assertThat(test.reroute.get(), equalTo(2));
|
||||
fetchData = test.fetchData(nodes, emptySet());
|
||||
assertThat(fetchData.hasData(), equalTo(true));
|
||||
assertThat(fetchData.getData().size(), equalTo(1));
|
||||
assertThat(fetchData.getData().get(node1), sameInstance(response1));
|
||||
}
|
||||
|
||||
public void testIgnoreFailureFromDifferentRound() throws Exception {
|
||||
DiscoveryNodes nodes = DiscoveryNodes.builder().add(node1).build();
|
||||
// add a failed response for node1
|
||||
test.addSimulation(node1.getId(), failure1);
|
||||
|
||||
// first fetch, no data, still on going
|
||||
AsyncShardFetch.FetchResult<Response> fetchData = test.fetchData(nodes, emptySet());
|
||||
assertThat(fetchData.hasData(), equalTo(false));
|
||||
assertThat(test.reroute.get(), equalTo(0));
|
||||
|
||||
// handle a failure with incorrect round id, wait on reroute incrementing
|
||||
test.processAsyncFetch(Collections.emptyList(), Collections.singletonList(
|
||||
new FailedNodeException(node1.getId(), "dummy failure", failure1)), 0);
|
||||
assertThat(fetchData.hasData(), equalTo(false));
|
||||
assertThat(test.reroute.get(), equalTo(1));
|
||||
|
||||
// fire a response, wait on reroute incrementing
|
||||
test.fireSimulationAndWait(node1.getId());
|
||||
// failure, fetched data exists, but has no data
|
||||
assertThat(test.reroute.get(), equalTo(2));
|
||||
fetchData = test.fetchData(nodes, emptySet());
|
||||
assertThat(fetchData.hasData(), equalTo(true));
|
||||
assertThat(fetchData.getData().size(), equalTo(0));
|
||||
}
|
||||
|
||||
public void testTwoNodesOnSetup() throws Exception {
|
||||
DiscoveryNodes nodes = DiscoveryNodes.builder().add(node1).add(node2).build();
|
||||
test.addSimulation(node1.getId(), response1);
|
||||
|
@ -267,7 +316,7 @@ public class AsyncShardFetchTests extends ESTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void asyncFetch(final ShardId shardId, DiscoveryNode[] nodes) {
|
||||
protected void asyncFetch(DiscoveryNode[] nodes, long fetchingRound) {
|
||||
for (final DiscoveryNode node : nodes) {
|
||||
final String nodeId = node.getId();
|
||||
threadPool.generic().execute(new Runnable() {
|
||||
|
@ -283,11 +332,10 @@ public class AsyncShardFetchTests extends ESTestCase {
|
|||
assert entry != null;
|
||||
entry.executeLatch.await();
|
||||
if (entry.failure != null) {
|
||||
processAsyncFetch(shardId, null, Collections.singletonList(new FailedNodeException(nodeId,
|
||||
"unexpected",
|
||||
entry.failure)));
|
||||
processAsyncFetch(null,
|
||||
Collections.singletonList(new FailedNodeException(nodeId, "unexpected", entry.failure)), fetchingRound);
|
||||
} else {
|
||||
processAsyncFetch(shardId, Collections.singletonList(entry.response), null);
|
||||
processAsyncFetch(Collections.singletonList(entry.response), null, fetchingRound);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
logger.error("unexpected failure", e);
|
||||
|
|
|
@ -481,7 +481,7 @@ public class PrimaryShardAllocatorTests extends ESAllocationTestCase {
|
|||
|
||||
@Override
|
||||
protected AsyncShardFetch.FetchResult<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> fetchData(ShardRouting shard, RoutingAllocation allocation) {
|
||||
return new AsyncShardFetch.FetchResult<>(shardId, data, Collections.<String>emptySet(), Collections.<String>emptySet());
|
||||
return new AsyncShardFetch.FetchResult<>(shardId, data, Collections.<String>emptySet());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -389,7 +389,7 @@ public class ReplicaShardAllocatorTests extends ESAllocationTestCase {
|
|||
tData.put(entry.getKey(), new TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData(entry.getKey(), entry.getValue()));
|
||||
}
|
||||
}
|
||||
return new AsyncShardFetch.FetchResult<>(shardId, tData, Collections.<String>emptySet(), Collections.<String>emptySet());
|
||||
return new AsyncShardFetch.FetchResult<>(shardId, tData, Collections.emptySet());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -77,7 +77,7 @@ public class TestGatewayAllocator extends GatewayAllocator {
|
|||
new NodeGatewayStartedShards(
|
||||
currentNodes.get(routing.currentNodeId()), routing.allocationId().getId(), routing.primary())));
|
||||
|
||||
return new AsyncShardFetch.FetchResult<>(shardId, foundShards, Collections.emptySet(), ignoreNodes);
|
||||
return new AsyncShardFetch.FetchResult<>(shardId, foundShards, ignoreNodes);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -86,8 +86,7 @@ public class TestGatewayAllocator extends GatewayAllocator {
|
|||
protected AsyncShardFetch.FetchResult<NodeStoreFilesMetaData> fetchData(ShardRouting shard, RoutingAllocation allocation) {
|
||||
// for now, just pretend no node has data
|
||||
final ShardId shardId = shard.shardId();
|
||||
return new AsyncShardFetch.FetchResult<>(shardId, Collections.emptyMap(), Collections.emptySet(),
|
||||
allocation.getIgnoreNodes(shardId));
|
||||
return new AsyncShardFetch.FetchResult<>(shardId, Collections.emptyMap(), allocation.getIgnoreNodes(shardId));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue