mirror of https://github.com/apache/druid.git
MSQ: Add exception messages to WorkerFailedFault and WorkerRpcFailedFault.
It's useful for the fault message to have some description of the underlying error: why did the worker fail, or why did the RPC call fail? This patch updates the fault messages to include the toString of the error. The full stack trace for these faults is available to end users in "exceptionStackTrace" of the report. But for cluster operators, if debug logging is not enabled, then for Dart only the message is logged, not the stack trace. In this scenario it's useful for the message to be more detailed.
This commit is contained in:
parent
4fdb38118a
commit
7645681b2a
|
@ -1276,7 +1276,7 @@ public class ControllerImpl implements Controller
|
||||||
workerNumber
|
workerNumber
|
||||||
);
|
);
|
||||||
|
|
||||||
addToRetryQueue(queryKernel, workerNumber, new WorkerRpcFailedFault(workerId));
|
addToRetryQueue(queryKernel, workerNumber, new WorkerRpcFailedFault(workerId, workerResult.error().toString()));
|
||||||
} else {
|
} else {
|
||||||
// Nonretryable failure.
|
// Nonretryable failure.
|
||||||
throw new RuntimeException(workerResult.error());
|
throw new RuntimeException(workerResult.error());
|
||||||
|
|
|
@ -142,7 +142,7 @@ public class ExceptionWrappingWorkerClient implements WorkerClient
|
||||||
clientFuture = clientFn.apply(client);
|
clientFuture = clientFn.apply(client);
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
catch (Exception e) {
|
||||||
throw new MSQException(e, new WorkerRpcFailedFault(workerTaskId));
|
throw new MSQException(e, new WorkerRpcFailedFault(workerTaskId, e.toString()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Futures.addCallback(
|
Futures.addCallback(
|
||||||
|
@ -158,7 +158,7 @@ public class ExceptionWrappingWorkerClient implements WorkerClient
|
||||||
@Override
|
@Override
|
||||||
public void onFailure(Throwable t)
|
public void onFailure(Throwable t)
|
||||||
{
|
{
|
||||||
retVal.setException(new MSQException(t, new WorkerRpcFailedFault(workerTaskId)));
|
retVal.setException(new MSQException(t, new WorkerRpcFailedFault(workerTaskId, t.toString())));
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
MoreExecutors.directExecutor()
|
MoreExecutors.directExecutor()
|
||||||
|
|
|
@ -190,7 +190,7 @@ public class WorkerSketchFetcher implements AutoCloseable
|
||||||
try {
|
try {
|
||||||
kernelActions.accept((kernel) -> {
|
kernelActions.accept((kernel) -> {
|
||||||
try {
|
try {
|
||||||
retryOperation.accept(kernel, worker, new WorkerRpcFailedFault(taskId));
|
retryOperation.accept(kernel, worker, new WorkerRpcFailedFault(taskId, t.toString()));
|
||||||
kernelActionFuture.set(false);
|
kernelActionFuture.set(false);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,7 +43,7 @@ public class WorkerFailedFault extends BaseMSQFault
|
||||||
@JsonProperty("errorMsg") @Nullable final String errorMsg
|
@JsonProperty("errorMsg") @Nullable final String errorMsg
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
super(CODE, "Worker task failed: [%s]%s", workerTaskId, errorMsg != null ? " (" + errorMsg + ")" : "");
|
super(CODE, "Worker[%s] failed%s", workerTaskId, errorMsg != null ? ": " + errorMsg : "");
|
||||||
this.workerTaskId = workerTaskId;
|
this.workerTaskId = workerTaskId;
|
||||||
this.errorMsg = errorMsg;
|
this.errorMsg = errorMsg;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,11 @@
|
||||||
package org.apache.druid.msq.indexing.error;
|
package org.apache.druid.msq.indexing.error;
|
||||||
|
|
||||||
import com.fasterxml.jackson.annotation.JsonCreator;
|
import com.fasterxml.jackson.annotation.JsonCreator;
|
||||||
|
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||||
import com.fasterxml.jackson.annotation.JsonTypeName;
|
import com.fasterxml.jackson.annotation.JsonTypeName;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
@JsonTypeName(WorkerRpcFailedFault.CODE)
|
@JsonTypeName(WorkerRpcFailedFault.CODE)
|
||||||
|
@ -31,12 +33,17 @@ public class WorkerRpcFailedFault extends BaseMSQFault
|
||||||
public static final String CODE = "WorkerRpcFailed";
|
public static final String CODE = "WorkerRpcFailed";
|
||||||
|
|
||||||
private final String workerTaskId;
|
private final String workerTaskId;
|
||||||
|
private final String errorMsg;
|
||||||
|
|
||||||
@JsonCreator
|
@JsonCreator
|
||||||
public WorkerRpcFailedFault(@JsonProperty("workerTaskId") final String workerTaskId)
|
public WorkerRpcFailedFault(
|
||||||
|
@JsonProperty("workerTaskId") final String workerTaskId,
|
||||||
|
@JsonProperty("errorMsg") @Nullable final String errorMsg
|
||||||
|
)
|
||||||
{
|
{
|
||||||
super(CODE, "RPC call to task failed unrecoverably: [%s]", workerTaskId);
|
super(CODE, "RPC to worker[%s] failed%s", workerTaskId, errorMsg != null ? ": " + errorMsg : "");
|
||||||
this.workerTaskId = workerTaskId;
|
this.workerTaskId = workerTaskId;
|
||||||
|
this.errorMsg = errorMsg;
|
||||||
}
|
}
|
||||||
|
|
||||||
@JsonProperty
|
@JsonProperty
|
||||||
|
@ -45,6 +52,14 @@ public class WorkerRpcFailedFault extends BaseMSQFault
|
||||||
return workerTaskId;
|
return workerTaskId;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Nullable
|
||||||
|
@JsonProperty
|
||||||
|
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||||
|
public String getErrorMsg()
|
||||||
|
{
|
||||||
|
return errorMsg;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o)
|
public boolean equals(Object o)
|
||||||
{
|
{
|
||||||
|
@ -58,12 +73,12 @@ public class WorkerRpcFailedFault extends BaseMSQFault
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
WorkerRpcFailedFault that = (WorkerRpcFailedFault) o;
|
WorkerRpcFailedFault that = (WorkerRpcFailedFault) o;
|
||||||
return Objects.equals(workerTaskId, that.workerTaskId);
|
return Objects.equals(workerTaskId, that.workerTaskId) && Objects.equals(errorMsg, that.errorMsg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode()
|
public int hashCode()
|
||||||
{
|
{
|
||||||
return Objects.hash(super.hashCode(), workerTaskId);
|
return Objects.hash(super.hashCode(), workerTaskId, errorMsg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,7 +134,7 @@ public class MSQTasksTest
|
||||||
final MSQErrorReport controllerReport = MSQTasks.makeErrorReport(
|
final MSQErrorReport controllerReport = MSQTasks.makeErrorReport(
|
||||||
WORKER_ID,
|
WORKER_ID,
|
||||||
WORKER_HOST,
|
WORKER_HOST,
|
||||||
MSQErrorReport.fromFault(WORKER_ID, WORKER_HOST, null, new WorkerRpcFailedFault(WORKER_ID)),
|
MSQErrorReport.fromFault(WORKER_ID, WORKER_HOST, null, new WorkerRpcFailedFault(WORKER_ID, null)),
|
||||||
null
|
null
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
@ -94,8 +94,10 @@ public class MSQFaultSerdeTest
|
||||||
assertFaultSerde(new TooManyAttemptsForJob(2, 2, "taskId", "rootError"));
|
assertFaultSerde(new TooManyAttemptsForJob(2, 2, "taskId", "rootError"));
|
||||||
assertFaultSerde(UnknownFault.forMessage(null));
|
assertFaultSerde(UnknownFault.forMessage(null));
|
||||||
assertFaultSerde(UnknownFault.forMessage("the message"));
|
assertFaultSerde(UnknownFault.forMessage("the message"));
|
||||||
|
assertFaultSerde(new WorkerFailedFault("the worker task", null));
|
||||||
assertFaultSerde(new WorkerFailedFault("the worker task", "the error msg"));
|
assertFaultSerde(new WorkerFailedFault("the worker task", "the error msg"));
|
||||||
assertFaultSerde(new WorkerRpcFailedFault("the worker task"));
|
assertFaultSerde(new WorkerRpcFailedFault("the worker task", null));
|
||||||
|
assertFaultSerde(new WorkerRpcFailedFault("the worker task", "the error msg"));
|
||||||
assertFaultSerde(new NotEnoughTemporaryStorageFault(250, 2));
|
assertFaultSerde(new NotEnoughTemporaryStorageFault(250, 2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue