YARN-11210. Fix YARN RMAdminCLI retry logic for non-retryable kerbero… (#4563)

Co-authored-by: Kevin Wikant <wikak@amazon.com>
This commit is contained in:
KevinWikant 2022-07-25 23:51:37 -04:00 committed by GitHub
parent 01a2e0f6bd
commit 213ea03758
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 31 additions and 15 deletions

View File

@ -181,15 +181,20 @@ public class RetryPolicies {
}
/**
* A retry policy for exceptions other than RemoteException.
* <p>
* A retry policy where RemoteException and SaslException are not retried, other individual
* exception types can have RetryPolicy overrides, and any other exception type without an
* override is not retried.
* </p>
*
* @param defaultPolicy defaultPolicy.
* @param exceptionToPolicyMap exceptionToPolicyMap.
* @return RetryPolicy.
*/
public static final RetryPolicy retryOtherThanRemoteException(
public static final RetryPolicy retryOtherThanRemoteAndSaslException(
RetryPolicy defaultPolicy,
Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap) {
return new OtherThanRemoteExceptionDependentRetry(defaultPolicy,
return new OtherThanRemoteAndSaslExceptionDependentRetry(defaultPolicy,
exceptionToPolicyMap);
}
@ -589,12 +594,12 @@ public class RetryPolicies {
}
}
static class OtherThanRemoteExceptionDependentRetry implements RetryPolicy {
static class OtherThanRemoteAndSaslExceptionDependentRetry implements RetryPolicy {
private RetryPolicy defaultPolicy;
private Map<Class<? extends Exception>, RetryPolicy> exceptionToPolicyMap;
public OtherThanRemoteExceptionDependentRetry(RetryPolicy defaultPolicy,
OtherThanRemoteAndSaslExceptionDependentRetry(RetryPolicy defaultPolicy,
Map<Class<? extends Exception>,
RetryPolicy> exceptionToPolicyMap) {
this.defaultPolicy = defaultPolicy;
@ -605,10 +610,8 @@ public class RetryPolicies {
public RetryAction shouldRetry(Exception e, int retries, int failovers,
boolean isIdempotentOrAtMostOnce) throws Exception {
RetryPolicy policy = null;
// ignore Remote Exception
if (e instanceof RemoteException) {
// do nothing
} else {
// ignore RemoteException and SaslException
if (!(e instanceof RemoteException || isSaslFailure(e))) {
policy = exceptionToPolicyMap.get(e.getClass());
}
if (policy == null) {

View File

@ -61,6 +61,7 @@ import org.slf4j.LoggerFactory;
import javax.net.SocketFactory;
import javax.security.sasl.Sasl;
import javax.security.sasl.SaslException;
import java.io.*;
import java.net.*;
import java.nio.ByteBuffer;
@ -1620,7 +1621,8 @@ public class Client implements AutoCloseable {
}
if (call.error != null) {
if (call.error instanceof RemoteException) {
if (call.error instanceof RemoteException ||
call.error instanceof SaslException) {
call.error.fillInStackTrace();
throw call.error;
} else { // local exception

View File

@ -237,7 +237,14 @@ public class SaslRpcClient {
LOG.debug("client isn't using kerberos");
return null;
}
String serverPrincipal = getServerPrincipal(authType);
final String serverPrincipal;
try {
serverPrincipal = getServerPrincipal(authType);
} catch (IllegalArgumentException ex) {
// YARN-11210: getServerPrincipal can throw IllegalArgumentException if Kerberos
// configuration is bad, this is surfaced as a non-retryable SaslException
throw new SaslException("Bad Kerberos server principal configuration", ex);
}
if (serverPrincipal == null) {
LOG.debug("protocol doesn't use kerberos");
return null;

View File

@ -291,7 +291,7 @@ public class TestRetryProxy {
UnreliableInterface unreliable = (UnreliableInterface)
RetryProxy.create(UnreliableInterface.class, unreliableImpl,
retryOtherThanRemoteException(TRY_ONCE_THEN_FAIL,
retryOtherThanRemoteAndSaslException(TRY_ONCE_THEN_FAIL,
exceptionToPolicyMap));
// should retry with local IOException.
unreliable.failsOnceWithIOException();

View File

@ -64,8 +64,12 @@ public class HAUtil {
* configuration; else false.
*/
public static boolean isFederationFailoverEnabled(Configuration conf) {
return conf.getBoolean(YarnConfiguration.FEDERATION_FAILOVER_ENABLED,
YarnConfiguration.DEFAULT_FEDERATION_FAILOVER_ENABLED);
// Federation failover is not enabled unless federation is enabled. This previously caused
// YARN RMProxy to use the HA Retry policy in a non-HA & non-federation environments because
// the default federation failover enabled value is true.
return isFederationEnabled(conf) &&
conf.getBoolean(YarnConfiguration.FEDERATION_FAILOVER_ENABLED,
YarnConfiguration.DEFAULT_FEDERATION_FAILOVER_ENABLED);
}
/**

View File

@ -300,7 +300,7 @@ public class RMProxy<T> {
// YARN-4288: local IOException is also possible.
exceptionToPolicyMap.put(IOException.class, retryPolicy);
// Not retry on remote IO exception.
return RetryPolicies.retryOtherThanRemoteException(
return RetryPolicies.retryOtherThanRemoteAndSaslException(
RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap);
}
}