mirror of https://github.com/apache/druid.git
Catch exception inside ITRetryUtil to fix one of the causes for flaky integration tests (#11265)
* Do not stop retrying when an exception is encountered. Save & propagate last exception if retry count is exceeded. * Add one more log message to help with debugging * Limit schema registry heap to attempt to control OOMs
This commit is contained in:
parent
4c3077390f
commit
383daa4029
|
@ -398,4 +398,4 @@ services:
|
||||||
SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
|
SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
|
||||||
SCHEMA_REGISTRY_AUTHENTICATION_REALM: druid
|
SCHEMA_REGISTRY_AUTHENTICATION_REALM: druid
|
||||||
SCHEMA_REGISTRY_AUTHENTICATION_ROLES: users
|
SCHEMA_REGISTRY_AUTHENTICATION_ROLES: users
|
||||||
SCHEMA_REGISTRY_OPTS: -Djava.security.auth.login.config=/usr/lib/druid/conf/jaas_config.file
|
SCHEMA_REGISTRY_OPTS: -Djava.security.auth.login.config=/usr/lib/druid/conf/jaas_config.file -Xmx32m
|
||||||
|
|
|
@ -52,23 +52,43 @@ public class ITRetryUtil
|
||||||
String taskMessage
|
String taskMessage
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
try {
|
int currentTry = 0;
|
||||||
int currentTry = 0;
|
Exception lastException = null;
|
||||||
while (callable.call() != expectedValue) {
|
|
||||||
if (currentTry > retryCount) {
|
while (true) {
|
||||||
throw new ISE("Max number of retries[%d] exceeded for Task[%s]. Failing.", retryCount, taskMessage);
|
try {
|
||||||
|
LOG.info("Trying attempt[%d/%d]...", currentTry, retryCount);
|
||||||
|
if (currentTry > retryCount || callable.call() == expectedValue) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Attempt[%d]: Task %s still not complete. Next retry in %d ms",
|
"Attempt[%d/%d] did not pass: Task %s still not complete. Next retry in %d ms",
|
||||||
currentTry, taskMessage, delayInMillis
|
currentTry, retryCount, taskMessage, delayInMillis
|
||||||
);
|
);
|
||||||
Thread.sleep(delayInMillis);
|
Thread.sleep(delayInMillis);
|
||||||
|
|
||||||
currentTry++;
|
currentTry++;
|
||||||
}
|
}
|
||||||
|
catch (Exception e) {
|
||||||
|
// just continue retrying if there is an exception (it may be transient!) but save the last:
|
||||||
|
lastException = e;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
catch (Exception e) {
|
|
||||||
throw new RuntimeException(e);
|
if (currentTry > retryCount) {
|
||||||
|
if (lastException != null) {
|
||||||
|
throw new ISE(
|
||||||
|
"Max number of retries[%d] exceeded for Task[%s]. Failing.",
|
||||||
|
retryCount,
|
||||||
|
taskMessage,
|
||||||
|
lastException
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
throw new ISE(
|
||||||
|
"Max number of retries[%d] exceeded for Task[%s]. Failing.",
|
||||||
|
retryCount,
|
||||||
|
taskMessage
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue