Async Fetch: Better logging classification + log when ignored
This commit is contained in:
parent
afb7aabea7
commit
08e87bd81e
|
@ -58,6 +58,7 @@ public abstract class AsyncShardFetch<T extends NodeOperationResponse> implement
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final ESLogger logger;
|
protected final ESLogger logger;
|
||||||
|
protected final String type;
|
||||||
private final ShardId shardId;
|
private final ShardId shardId;
|
||||||
private final List<NodesOperationResponse<T>, T> action;
|
private final List<NodesOperationResponse<T>, T> action;
|
||||||
private final Map<String, NodeEntry<T>> cache = new HashMap<>();
|
private final Map<String, NodeEntry<T>> cache = new HashMap<>();
|
||||||
|
@ -65,8 +66,9 @@ public abstract class AsyncShardFetch<T extends NodeOperationResponse> implement
|
||||||
private boolean closed;
|
private boolean closed;
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
protected AsyncShardFetch(ESLogger logger, ShardId shardId, List<? extends NodesOperationResponse<T>, T> action) {
|
protected AsyncShardFetch(ESLogger logger, String type, ShardId shardId, List<? extends NodesOperationResponse<T>, T> action) {
|
||||||
this.logger = logger;
|
this.logger = logger;
|
||||||
|
this.type = type;
|
||||||
this.shardId = shardId;
|
this.shardId = shardId;
|
||||||
this.action = (List<NodesOperationResponse<T>, T>) action;
|
this.action = (List<NodesOperationResponse<T>, T>) action;
|
||||||
}
|
}
|
||||||
|
@ -148,7 +150,7 @@ public abstract class AsyncShardFetch<T extends NodeOperationResponse> implement
|
||||||
// if at least one node failed, make sure to have a protective reroute
|
// if at least one node failed, make sure to have a protective reroute
|
||||||
// here, just case this round won't find anything, and we need to retry fetching data
|
// here, just case this round won't find anything, and we need to retry fetching data
|
||||||
if (failedNodes.isEmpty() == false || allIgnoreNodes.isEmpty() == false) {
|
if (failedNodes.isEmpty() == false || allIgnoreNodes.isEmpty() == false) {
|
||||||
reroute(shardId, "at_least_one_node_failed");
|
reroute(shardId, "nodes failed [" + failedNodes.size() + "], ignored [" + allIgnoreNodes.size() + "]");
|
||||||
}
|
}
|
||||||
return new FetchResult<>(shardId, fetchData, failedNodes, allIgnoreNodes);
|
return new FetchResult<>(shardId, fetchData, failedNodes, allIgnoreNodes);
|
||||||
}
|
}
|
||||||
|
@ -184,7 +186,7 @@ public abstract class AsyncShardFetch<T extends NodeOperationResponse> implement
|
||||||
if (unwrappedCause instanceof EsRejectedExecutionException || unwrappedCause instanceof ReceiveTimeoutTransportException || unwrappedCause instanceof ElasticsearchTimeoutException) {
|
if (unwrappedCause instanceof EsRejectedExecutionException || unwrappedCause instanceof ReceiveTimeoutTransportException || unwrappedCause instanceof ElasticsearchTimeoutException) {
|
||||||
nodeEntry.restartFetching();
|
nodeEntry.restartFetching();
|
||||||
} else {
|
} else {
|
||||||
logger.warn("{}: failed to list shard for {} on node [{}]", failure, shardId, getClass().getSimpleName(), failure.nodeId());
|
logger.warn("{}: failed to list shard for {} on node [{}]", failure, shardId, type, failure.nodeId());
|
||||||
nodeEntry.doneFetching(failure.getCause());
|
nodeEntry.doneFetching(failure.getCause());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,12 +166,12 @@ public class GatewayAllocator extends AbstractComponent {
|
||||||
|
|
||||||
AsyncShardFetch<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> fetch = asyncFetchStarted.get(shard.shardId());
|
AsyncShardFetch<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> fetch = asyncFetchStarted.get(shard.shardId());
|
||||||
if (fetch == null) {
|
if (fetch == null) {
|
||||||
fetch = new InternalAsyncFetch<>(logger, shard.shardId(), startedAction, clusterService, allocationService);
|
fetch = new InternalAsyncFetch<>(logger, "shard_started", shard.shardId(), startedAction, clusterService, allocationService);
|
||||||
asyncFetchStarted.put(shard.shardId(), fetch);
|
asyncFetchStarted.put(shard.shardId(), fetch);
|
||||||
}
|
}
|
||||||
AsyncShardFetch.FetchResult<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> shardState = fetch.fetchData(nodes, metaData, allocation.getIgnoreNodes(shard.shardId()));
|
AsyncShardFetch.FetchResult<TransportNodesListGatewayStartedShards.NodeGatewayStartedShards> shardState = fetch.fetchData(nodes, metaData, allocation.getIgnoreNodes(shard.shardId()));
|
||||||
if (shardState.hasData() == false) {
|
if (shardState.hasData() == false) {
|
||||||
// still fetching data, remove from the unassigned, and try the next
|
logger.trace("{}: ignoring allocation, still fetching shard started state");
|
||||||
unassignedIterator.remove();
|
unassignedIterator.remove();
|
||||||
routingNodes.ignoredUnassigned().add(shard);
|
routingNodes.ignoredUnassigned().add(shard);
|
||||||
continue;
|
continue;
|
||||||
|
@ -395,7 +395,7 @@ public class GatewayAllocator extends AbstractComponent {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!canBeAllocatedToAtLeastOneNode) {
|
if (!canBeAllocatedToAtLeastOneNode) {
|
||||||
// still fetching data, remove from the unassigned, and try the next
|
logger.trace("{}: ignoring allocation, can't be allocated on any node");
|
||||||
unassignedIterator.remove();
|
unassignedIterator.remove();
|
||||||
routingNodes.ignoredUnassigned().add(shard);
|
routingNodes.ignoredUnassigned().add(shard);
|
||||||
continue;
|
continue;
|
||||||
|
@ -403,12 +403,12 @@ public class GatewayAllocator extends AbstractComponent {
|
||||||
|
|
||||||
AsyncShardFetch<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> fetch = asyncFetchStore.get(shard.shardId());
|
AsyncShardFetch<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> fetch = asyncFetchStore.get(shard.shardId());
|
||||||
if (fetch == null) {
|
if (fetch == null) {
|
||||||
fetch = new InternalAsyncFetch<>(logger, shard.shardId(), storeAction, clusterService, allocationService);
|
fetch = new InternalAsyncFetch<>(logger, "shard_store", shard.shardId(), storeAction, clusterService, allocationService);
|
||||||
asyncFetchStore.put(shard.shardId(), fetch);
|
asyncFetchStore.put(shard.shardId(), fetch);
|
||||||
}
|
}
|
||||||
AsyncShardFetch.FetchResult<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> shardStores = fetch.fetchData(nodes, metaData, allocation.getIgnoreNodes(shard.shardId()));
|
AsyncShardFetch.FetchResult<TransportNodesListShardStoreMetaData.NodeStoreFilesMetaData> shardStores = fetch.fetchData(nodes, metaData, allocation.getIgnoreNodes(shard.shardId()));
|
||||||
if (shardStores.hasData() == false) {
|
if (shardStores.hasData() == false) {
|
||||||
// still fetching data, remove from the unassigned, and try the next
|
logger.trace("{}: ignoring allocation, still fetching shard stores");
|
||||||
unassignedIterator.remove();
|
unassignedIterator.remove();
|
||||||
routingNodes.ignoredUnassigned().add(shard);
|
routingNodes.ignoredUnassigned().add(shard);
|
||||||
continue; // still fetching
|
continue; // still fetching
|
||||||
|
@ -518,16 +518,16 @@ public class GatewayAllocator extends AbstractComponent {
|
||||||
private final ClusterService clusterService;
|
private final ClusterService clusterService;
|
||||||
private final AllocationService allocationService;
|
private final AllocationService allocationService;
|
||||||
|
|
||||||
public InternalAsyncFetch(ESLogger logger, ShardId shardId, List<? extends NodesOperationResponse<T>, T> action,
|
public InternalAsyncFetch(ESLogger logger, String type, ShardId shardId, List<? extends NodesOperationResponse<T>, T> action,
|
||||||
ClusterService clusterService, AllocationService allocationService) {
|
ClusterService clusterService, AllocationService allocationService) {
|
||||||
super(logger, shardId, action);
|
super(logger, type, shardId, action);
|
||||||
this.clusterService = clusterService;
|
this.clusterService = clusterService;
|
||||||
this.allocationService = allocationService;
|
this.allocationService = allocationService;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void reroute(ShardId shardId, String reason) {
|
protected void reroute(ShardId shardId, String reason) {
|
||||||
clusterService.submitStateUpdateTask("async_shard_fetch(" + getClass().getSimpleName() + ") " + shardId + ", reasons (" + reason + ")", Priority.HIGH, new ClusterStateUpdateTask() {
|
clusterService.submitStateUpdateTask("async_shard_fetch(" + type + ") " + shardId + ", reasons (" + reason + ")", Priority.HIGH, new ClusterStateUpdateTask() {
|
||||||
@Override
|
@Override
|
||||||
public ClusterState execute(ClusterState currentState) throws Exception {
|
public ClusterState execute(ClusterState currentState) throws Exception {
|
||||||
if (currentState.nodes().masterNode() == null) {
|
if (currentState.nodes().masterNode() == null) {
|
||||||
|
|
|
@ -252,7 +252,7 @@ public class AsyncShardFetchTests extends ElasticsearchTestCase {
|
||||||
private AtomicInteger reroute = new AtomicInteger();
|
private AtomicInteger reroute = new AtomicInteger();
|
||||||
|
|
||||||
public TestFetch(ThreadPool threadPool) {
|
public TestFetch(ThreadPool threadPool) {
|
||||||
super(Loggers.getLogger(TestFetch.class), new ShardId("test", 1), null);
|
super(Loggers.getLogger(TestFetch.class), "test", new ShardId("test", 1), null);
|
||||||
this.threadPool = threadPool;
|
this.threadPool = threadPool;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue