HDFS-13749. [SBN read] Use getServiceStatus to discover observer namenodes. Contributed by Chao Sun.

This commit is contained in:
Erik Krogen 2018-09-20 13:27:58 -07:00 committed by Chen Liang
parent 0de06c010a
commit 556b7a840b
5 changed files with 149 additions and 85 deletions

View File

@ -25,12 +25,16 @@ import java.net.InetSocketAddress;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB;
import org.apache.hadoop.hdfs.server.namenode.ha.ClientHAProxyFactory;
import org.apache.hadoop.hdfs.server.namenode.ha.HAProxyFactory;
import org.apache.hadoop.ipc.AlignmentContext;
import org.apache.hadoop.ipc.Client;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -62,8 +66,9 @@ import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.security.UserGroupInformation;
/**
* Create proxy objects with {@link ClientProtocol} to communicate with a remote
* NN. Generally use {@link NameNodeProxiesClient#createProxyWithClientProtocol(
* Create proxy objects with {@link ClientProtocol} and
* {@link HAServiceProtocol} to communicate with a remote NN. For the former,
* generally use {@link NameNodeProxiesClient#createProxyWithClientProtocol(
* Configuration, URI, AtomicBoolean)}, which will create either an HA- or
* non-HA-enabled client proxy as appropriate.
*
@ -76,6 +81,11 @@ public class NameNodeProxiesClient {
private static final Logger LOG = LoggerFactory.getLogger(
NameNodeProxiesClient.class);
/** Maximum # of retries for HAProxy with HAServiceProtocol. */
private static final int MAX_RETRIES = 3;
/** Initial retry delay for HAProxy with HAServiceProtocol. */
private static final int DELAY_MILLISECONDS = 200;
/**
* Wrapper for a client proxy as well as its associated service ID.
* This is simply used as a tuple-like return type for created NN proxy.
@ -342,6 +352,34 @@ public class NameNodeProxiesClient {
fallbackToSimpleAuth, null);
}
/**
* Creates a non-HA proxy object with {@link HAServiceProtocol} to the
* given NameNode address, using the provided configuration. The proxy will
* use the RPC timeout configuration specified via {@link
* org.apache.hadoop.fs.CommonConfigurationKeys#IPC_CLIENT_RPC_TIMEOUT_KEY}.
* Upon failures, this will retry up to certain times with {@link RetryProxy}.
*
* @param address the NameNode address
* @param conf the configuration to be used
* @return a non-HA proxy with {@link HAServiceProtocol}.
*/
public static HAServiceProtocol createNonHAProxyWithHAServiceProtocol(
InetSocketAddress address, Configuration conf) throws IOException {
RetryPolicy timeoutPolicy = RetryPolicies.exponentialBackoffRetry(
MAX_RETRIES, DELAY_MILLISECONDS, TimeUnit.MILLISECONDS);
HAServiceProtocol proxy =
new HAServiceProtocolClientSideTranslatorPB(
address, conf, NetUtils.getDefaultSocketFactory(conf),
Client.getRpcTimeout(conf));
return (HAServiceProtocol) RetryProxy.create(
HAServiceProtocol.class,
new DefaultFailoverProxyProvider<>(HAServiceProtocol.class, proxy),
new HashMap<>(),
timeoutPolicy
);
}
public static ClientProtocol createProxyWithAlignmentContext(
InetSocketAddress address, Configuration conf, UserGroupInformation ugi,
boolean withRetries, AtomicBoolean fallbackToSimpleAuth,

View File

@ -28,11 +28,14 @@ import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.DFSUtilClient;
import org.apache.hadoop.hdfs.HAUtilClient;
import org.apache.hadoop.hdfs.NameNodeProxiesClient;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.io.retry.FailoverProxyProvider;
import org.apache.hadoop.security.UserGroupInformation;
@ -119,23 +122,44 @@ public abstract class AbstractNNFailoverProxyProvider<T> implements
*/
private HAServiceState cachedState;
public NNProxyInfo(InetSocketAddress address) {
/** Proxy for getting HA service status from the given NameNode. */
private HAServiceProtocol serviceProxy;
public NNProxyInfo(InetSocketAddress address, Configuration conf) {
super(null, address.toString());
this.address = address;
try {
serviceProxy = NameNodeProxiesClient
.createNonHAProxyWithHAServiceProtocol(address, conf);
} catch (IOException ioe) {
LOG.error("Failed to create HAServiceProtocol proxy to NameNode" +
" at {}", address, ioe);
throw new RuntimeException(ioe);
}
}
public InetSocketAddress getAddress() {
return address;
}
public void setCachedState(HAServiceState state) {
cachedState = state;
public void refreshCachedState() {
try {
cachedState = serviceProxy.getServiceStatus().getState();
} catch (IOException e) {
LOG.warn("Failed to connect to {}. Setting cached state to Standby",
address, e);
cachedState = HAServiceState.STANDBY;
}
}
public HAServiceState getCachedState() {
return cachedState;
}
@VisibleForTesting
public void setServiceProxyForTesting(HAServiceProtocol proxy) {
this.serviceProxy = proxy;
}
}
@Override
@ -153,8 +177,8 @@ public abstract class AbstractNNFailoverProxyProvider<T> implements
pi.proxy = factory.createProxy(conf,
pi.getAddress(), xface, ugi, false, getFallbackToSimpleAuth());
} catch (IOException ioe) {
LOG.error("{} Failed to create RPC proxy to NameNode",
this.getClass().getSimpleName(), ioe);
LOG.error("{} Failed to create RPC proxy to NameNode at {}",
this.getClass().getSimpleName(), pi.address, ioe);
throw new RuntimeException(ioe);
}
}
@ -178,7 +202,7 @@ public abstract class AbstractNNFailoverProxyProvider<T> implements
Collection<InetSocketAddress> addressesOfNns = addressesInNN.values();
for (InetSocketAddress address : addressesOfNns) {
proxies.add(new NNProxyInfo<T>(address));
proxies.add(new NNProxyInfo<T>(address, conf));
}
// Randomize the list to prevent all clients pointing to the same one
boolean randomized = getRandomOrder(conf, uri);

View File

@ -48,7 +48,7 @@ public class IPFailoverProxyProvider<T> extends
public IPFailoverProxyProvider(Configuration conf, URI uri,
Class<T> xface, HAProxyFactory<T> factory) {
super(conf, uri, xface, factory);
this.nnProxyInfo = new NNProxyInfo<T>(DFSUtilClient.getNNAddress(uri));
this.nnProxyInfo = new NNProxyInfo<>(DFSUtilClient.getNNAddress(uri), conf);
}
@Override

View File

@ -27,12 +27,10 @@ import java.net.URI;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import org.apache.hadoop.hdfs.ClientGSIContext;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.io.retry.AtMostOnce;
import org.apache.hadoop.io.retry.Idempotent;
import org.apache.hadoop.io.retry.RetryPolicies;
@ -40,8 +38,6 @@ import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryPolicy.RetryAction;
import org.apache.hadoop.ipc.AlignmentContext;
import org.apache.hadoop.ipc.RPC;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.ipc.StandbyException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -181,49 +177,6 @@ public class ObserverReadProxyProvider<T extends ClientProtocol>
return lastProxy;
}
private static <T extends ClientProtocol> HAServiceState getServiceState(
NNProxyInfo<T> pi) {
// TODO: should introduce new ClientProtocol method to verify the
// underlying service state, which does not require superuser access
// The is a workaround
IOException ioe = null;
try {
// Verify write access first
pi.proxy.reportBadBlocks(new LocatedBlock[0]);
return HAServiceState.ACTIVE; // Only active NameNode allows write
} catch (RemoteException re) {
IOException sbe = re.unwrapRemoteException(StandbyException.class);
if (!(sbe instanceof StandbyException)) {
ioe = re;
}
} catch (IOException e) {
ioe = e;
}
if (ioe != null) {
LOG.warn("Failed to connect to {}", pi.getAddress(), ioe);
return HAServiceState.STANDBY; // Just assume standby in this case
// Anything besides observer is fine
}
// Verify read access
// For now we assume only Observer nodes allow reads
// Stale reads on StandbyNode should be turned off
try {
pi.proxy.checkAccess("/", FsAction.READ);
return HAServiceState.OBSERVER;
} catch (RemoteException re) {
IOException sbe = re.unwrapRemoteException(StandbyException.class);
if (!(sbe instanceof StandbyException)) {
ioe = re;
}
} catch (IOException e) {
ioe = e;
}
if (ioe != null) {
LOG.warn("Failed to connect to {}", pi.getAddress(), ioe);
}
return HAServiceState.STANDBY;
}
/**
* Return the currently used proxy. If there is none, first calls
* {@link #changeProxy(NNProxyInfo)} to initialize one.
@ -254,7 +207,7 @@ public class ObserverReadProxyProvider<T extends ClientProtocol>
currentProxy = null;
currentIndex = (currentIndex + 1) % nameNodeProxies.size();
currentProxy = createProxyIfNeeded(nameNodeProxies.get(currentIndex));
currentProxy.setCachedState(getServiceState(currentProxy));
currentProxy.refreshCachedState();
LOG.debug("Changed current proxy from {} to {}",
initial == null ? "none" : initial.proxyInfo,
currentProxy.proxyInfo);

View File

@ -22,10 +22,13 @@ import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.URI;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.ha.HAServiceProtocol;
import org.apache.hadoop.ha.HAServiceStatus;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
@ -38,10 +41,12 @@ import org.mockito.Mockito;
import org.mockito.invocation.InvocationOnMock;
import org.mockito.stubbing.Answer;
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
/**
* Tests for {@link ObserverReadProxyProvider} under various configurations of
@ -56,7 +61,7 @@ public class TestObserverReadProxyProvider {
private Configuration conf;
private ObserverReadProxyProvider<ClientProtocol> proxyProvider;
private ClientProtocolAnswer[] namenodeAnswers;
private NameNodeAnswer[] namenodeAnswers;
private String[] namenodeAddrs;
@Before
@ -70,32 +75,53 @@ public class TestObserverReadProxyProvider {
private void setupProxyProvider(int namenodeCount) throws Exception {
String[] namenodeIDs = new String[namenodeCount];
namenodeAddrs = new String[namenodeCount];
namenodeAnswers = new ClientProtocolAnswer[namenodeCount];
namenodeAnswers = new NameNodeAnswer[namenodeCount];
ClientProtocol[] proxies = new ClientProtocol[namenodeCount];
Map<String, ClientProtocol> proxyMap = new HashMap<>();
HAServiceProtocol[] serviceProxies = new HAServiceProtocol[namenodeCount];
Map<String, HAServiceProtocol> serviceProxyMap = new HashMap<>();
for (int i = 0; i < namenodeCount; i++) {
namenodeIDs[i] = "nn" + i;
namenodeAddrs[i] = "namenode" + i + ".test:8020";
conf.set(HdfsClientConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY + "." + ns +
"." + namenodeIDs[i], namenodeAddrs[i]);
namenodeAnswers[i] = new ClientProtocolAnswer();
namenodeAnswers[i] = new NameNodeAnswer();
proxies[i] = mock(ClientProtocol.class);
doWrite(Mockito.doAnswer(namenodeAnswers[i]).when(proxies[i]));
doRead(Mockito.doAnswer(namenodeAnswers[i]).when(proxies[i]));
doWrite(Mockito.doAnswer(namenodeAnswers[i].clientAnswer)
.when(proxies[i]));
doRead(Mockito.doAnswer(namenodeAnswers[i].clientAnswer)
.when(proxies[i]));
serviceProxies[i] = mock(HAServiceProtocol.class);
Mockito.doAnswer(namenodeAnswers[i].serviceAnswer)
.when(serviceProxies[i]).getServiceStatus();
proxyMap.put(namenodeAddrs[i], proxies[i]);
serviceProxyMap.put(namenodeAddrs[i], serviceProxies[i]);
}
conf.set(HdfsClientConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX + "." + ns,
Joiner.on(",").join(namenodeIDs));
proxyProvider = new ObserverReadProxyProvider<>(conf, nnURI,
ClientProtocol.class, new ClientHAProxyFactory<ClientProtocol>() {
proxyProvider = new ObserverReadProxyProvider<ClientProtocol>(conf, nnURI,
ClientProtocol.class,
new ClientHAProxyFactory<ClientProtocol>() {
@Override
public ClientProtocol createProxy(Configuration config,
InetSocketAddress nnAddr, Class<ClientProtocol> xface,
UserGroupInformation ugi, boolean withRetries,
AtomicBoolean fallbackToSimpleAuth) {
return proxyMap.get(nnAddr.toString());
}
}) {
@Override
public ClientProtocol createProxy(Configuration conf,
InetSocketAddress nnAddr, Class<ClientProtocol> xface,
UserGroupInformation ugi, boolean withRetries,
AtomicBoolean fallbackToSimpleAuth) {
return proxyMap.get(nnAddr.toString());
protected List<NNProxyInfo<ClientProtocol>> getProxyAddresses(
URI uri, String addressKey) {
List<NNProxyInfo<ClientProtocol>> nnProxies =
super.getProxyAddresses(uri, addressKey);
for (NNProxyInfo<ClientProtocol> nnProxy : nnProxies) {
String addressStr = nnProxy.getAddress().toString();
nnProxy.setServiceProxyForTesting(serviceProxyMap.get(addressStr));
}
return nnProxies;
}
});
};
proxyProvider.setObserverReadEnabled(true);
}
@ -275,39 +301,62 @@ public class TestObserverReadProxyProvider {
}
/**
* An {@link Answer} used for mocking of a {@link ClientProtocol}. Setting
* the state or unreachability of this Answer will make the linked
* ClientProtocol respond as if it was communicating with a NameNode of
* the corresponding state. It is in Standby state by default.
* An {@link Answer} used for mocking of {@link ClientProtocol} and
* {@link HAServiceProtocol}. Setting the state or unreachability of this
* Answer will make the linked ClientProtocol respond as if it was
* communicating with a NameNode of the corresponding state. It is in Standby
* state by default.
*/
private static class ClientProtocolAnswer implements Answer<Void> {
private static class NameNodeAnswer {
private volatile boolean unreachable = false;
// Standby state by default
private volatile boolean allowWrites = false;
private volatile boolean allowReads = false;
@Override
public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
if (unreachable) {
throw new IOException("Unavailable");
private ClientProtocolAnswer clientAnswer = new ClientProtocolAnswer();
private HAServiceProtocolAnswer serviceAnswer =
new HAServiceProtocolAnswer();
private class HAServiceProtocolAnswer implements Answer<HAServiceStatus> {
@Override
public HAServiceStatus answer(InvocationOnMock invocation)
throws Throwable {
HAServiceStatus status = mock(HAServiceStatus.class);
if (allowReads && allowWrites) {
when(status.getState()).thenReturn(HAServiceState.ACTIVE);
} else if (allowReads) {
when(status.getState()).thenReturn(HAServiceState.OBSERVER);
} else {
when(status.getState()).thenReturn(HAServiceState.STANDBY);
}
return status;
}
switch (invocationOnMock.getMethod().getName()) {
}
private class ClientProtocolAnswer implements Answer<Void> {
@Override
public Void answer(InvocationOnMock invocationOnMock) throws Throwable {
if (unreachable) {
throw new IOException("Unavailable");
}
switch (invocationOnMock.getMethod().getName()) {
case "reportBadBlocks":
if (!allowWrites) {
throw new RemoteException(StandbyException.class.getCanonicalName(),
"No writes!");
throw new RemoteException(
StandbyException.class.getCanonicalName(), "No writes!");
}
return null;
case "checkAccess":
if (!allowReads) {
throw new RemoteException(StandbyException.class.getCanonicalName(),
"No reads!");
throw new RemoteException(
StandbyException.class.getCanonicalName(), "No reads!");
}
return null;
default:
throw new IllegalArgumentException(
"Only reportBadBlocks and checkAccess supported!");
}
}
}