Log more info when search ops higher than expected (#61108)

We have seen a situation where the total search operations are higher 
than expected. Unfortunately, we did not have enough info to figure it
out. This commit adds the failures to the error to provide more context
and adjusts the log level in case of failure to debug.
This commit is contained in:
Nhat Nguyen 2020-08-13 14:56:50 -04:00
parent b120368aee
commit 08b0e78ef4
1 changed files with 9 additions and 25 deletions

View File

@ -377,36 +377,20 @@ abstract class AbstractSearchAsyncAction<Result extends SearchPhaseResult> exten
onShardFailure(shardIndex, shardTarget, e);
final ShardRouting nextShard = shardIt.nextOrNull();
final boolean lastShard = nextShard == null;
logger.debug(() -> new ParameterizedMessage("{}: Failed to execute [{}] lastShard [{}]",
shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e);
if (lastShard) {
onShardGroupFailure(shardIndex, shardTarget, e);
}
if (totalOps.incrementAndGet() == expectedTotalOps) {
if (logger.isDebugEnabled()) {
if (e != null && !TransportActions.isShardNotAvailableException(e)) {
logger.debug(new ParameterizedMessage(
"{}: Failed to execute [{}]", shard != null ? shard.shortSummary() : shardIt.shardId(), request), e);
} else if (logger.isTraceEnabled()) {
logger.trace(new ParameterizedMessage("{}: Failed to execute [{}]", shard, request), e);
}
}
final int totalOps = this.totalOps.incrementAndGet();
if (totalOps == expectedTotalOps) {
onPhaseDone();
} else if (totalOps > expectedTotalOps) {
throw new AssertionError("unexpected higher total ops [" + totalOps + "] compared to expected [" + expectedTotalOps + "]",
new SearchPhaseExecutionException(getName(), "Shard failures", null, buildShardFailures()));
} else {
// trace log this exception
logger.trace(() -> new ParameterizedMessage(
"{}: Failed to execute [{}] lastShard [{}]",
shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e);
if (lastShard == false) {
performPhaseOnShard(shardIndex, shardIt, nextShard);
} else {
// no more shards active, add a failure
if (logger.isDebugEnabled() && !logger.isTraceEnabled()) { // do not double log this exception
if (e != null && !TransportActions.isShardNotAvailableException(e)) {
logger.debug(new ParameterizedMessage(
"{}: Failed to execute [{}] lastShard [{}]",
shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e);
}
}
}
}
}
@ -505,8 +489,8 @@ abstract class AbstractSearchAsyncAction<Result extends SearchPhaseResult> exten
if (xTotalOps == expectedTotalOps) {
onPhaseDone();
} else if (xTotalOps > expectedTotalOps) {
throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected ["
+ expectedTotalOps + "]");
throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected [" + expectedTotalOps + "]",
new SearchPhaseExecutionException(getName(), "Shard failures", null, buildShardFailures()));
}
}