From 08b0e78ef4e6f64c304f9e715100a566c414a7bd Mon Sep 17 00:00:00 2001 From: Nhat Nguyen Date: Thu, 13 Aug 2020 14:56:50 -0400 Subject: [PATCH] Log more info when search ops higher than expected (#61108) We have seen a situation where the total search operations are higher than expected. Unfortunately, we did not have enough info to figure it out. This commit adds the failures to the error to provide more context and adjusts the log level in case of failure to debug. --- .../search/AbstractSearchAsyncAction.java | 34 +++++-------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java index 1c1b4ad9eee..94499f60fc4 100644 --- a/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/AbstractSearchAsyncAction.java @@ -377,36 +377,20 @@ abstract class AbstractSearchAsyncAction exten onShardFailure(shardIndex, shardTarget, e); final ShardRouting nextShard = shardIt.nextOrNull(); final boolean lastShard = nextShard == null; + logger.debug(() -> new ParameterizedMessage("{}: Failed to execute [{}] lastShard [{}]", + shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e); if (lastShard) { onShardGroupFailure(shardIndex, shardTarget, e); } - - if (totalOps.incrementAndGet() == expectedTotalOps) { - if (logger.isDebugEnabled()) { - if (e != null && !TransportActions.isShardNotAvailableException(e)) { - logger.debug(new ParameterizedMessage( - "{}: Failed to execute [{}]", shard != null ? shard.shortSummary() : shardIt.shardId(), request), e); - } else if (logger.isTraceEnabled()) { - logger.trace(new ParameterizedMessage("{}: Failed to execute [{}]", shard, request), e); - } - } + final int totalOps = this.totalOps.incrementAndGet(); + if (totalOps == expectedTotalOps) { onPhaseDone(); + } else if (totalOps > expectedTotalOps) { + throw new AssertionError("unexpected higher total ops [" + totalOps + "] compared to expected [" + expectedTotalOps + "]", + new SearchPhaseExecutionException(getName(), "Shard failures", null, buildShardFailures())); } else { - // trace log this exception - logger.trace(() -> new ParameterizedMessage( - "{}: Failed to execute [{}] lastShard [{}]", - shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e); if (lastShard == false) { performPhaseOnShard(shardIndex, shardIt, nextShard); - } else { - // no more shards active, add a failure - if (logger.isDebugEnabled() && !logger.isTraceEnabled()) { // do not double log this exception - if (e != null && !TransportActions.isShardNotAvailableException(e)) { - logger.debug(new ParameterizedMessage( - "{}: Failed to execute [{}] lastShard [{}]", - shard != null ? shard.shortSummary() : shardIt.shardId(), request, lastShard), e); - } - } } } } @@ -505,8 +489,8 @@ abstract class AbstractSearchAsyncAction exten if (xTotalOps == expectedTotalOps) { onPhaseDone(); } else if (xTotalOps > expectedTotalOps) { - throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected [" - + expectedTotalOps + "]"); + throw new AssertionError("unexpected higher total ops [" + xTotalOps + "] compared to expected [" + expectedTotalOps + "]", + new SearchPhaseExecutionException(getName(), "Shard failures", null, buildShardFailures())); } }