Catch exception inside ITRetryUtil to fix one of the causes for flaky integration tests (#11265)

* Do not stop retrying when an exception is encountered. Save & propagate last exception if retry count is exceeded.

* Add one more log message to help with debugging

* Limit schema registry heap to attempt to control OOMs
This commit is contained in:
Agustin Gonzalez 2021-05-19 13:56:02 -07:00 committed by GitHub
parent 4c3077390f
commit 383daa4029
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 11 deletions

View File

@ -398,4 +398,4 @@ services:
SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC SCHEMA_REGISTRY_AUTHENTICATION_METHOD: BASIC
SCHEMA_REGISTRY_AUTHENTICATION_REALM: druid SCHEMA_REGISTRY_AUTHENTICATION_REALM: druid
SCHEMA_REGISTRY_AUTHENTICATION_ROLES: users SCHEMA_REGISTRY_AUTHENTICATION_ROLES: users
SCHEMA_REGISTRY_OPTS: -Djava.security.auth.login.config=/usr/lib/druid/conf/jaas_config.file SCHEMA_REGISTRY_OPTS: -Djava.security.auth.login.config=/usr/lib/druid/conf/jaas_config.file -Xmx32m

View File

@ -52,23 +52,43 @@ public class ITRetryUtil
String taskMessage String taskMessage
) )
{ {
try { int currentTry = 0;
int currentTry = 0; Exception lastException = null;
while (callable.call() != expectedValue) {
if (currentTry > retryCount) { while (true) {
throw new ISE("Max number of retries[%d] exceeded for Task[%s]. Failing.", retryCount, taskMessage); try {
LOG.info("Trying attempt[%d/%d]...", currentTry, retryCount);
if (currentTry > retryCount || callable.call() == expectedValue) {
break;
} }
LOG.info( LOG.info(
"Attempt[%d]: Task %s still not complete. Next retry in %d ms", "Attempt[%d/%d] did not pass: Task %s still not complete. Next retry in %d ms",
currentTry, taskMessage, delayInMillis currentTry, retryCount, taskMessage, delayInMillis
); );
Thread.sleep(delayInMillis); Thread.sleep(delayInMillis);
currentTry++; currentTry++;
} }
catch (Exception e) {
// just continue retrying if there is an exception (it may be transient!) but save the last:
lastException = e;
}
} }
catch (Exception e) {
throw new RuntimeException(e); if (currentTry > retryCount) {
if (lastException != null) {
throw new ISE(
"Max number of retries[%d] exceeded for Task[%s]. Failing.",
retryCount,
taskMessage,
lastException
);
} else {
throw new ISE(
"Max number of retries[%d] exceeded for Task[%s]. Failing.",
retryCount,
taskMessage
);
}
} }
} }