HDFS-5122. Support failover and retry in WebHdfsFileSystem for NN HA. Contributed by Haohui Mai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1524562 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jing Zhao 2013-09-18 20:47:16 +00:00
parent 22b401284b
commit 7a2443e9f8
6 changed files with 322 additions and 43 deletions

View File

@ -363,6 +363,9 @@ Release 2.1.1-beta - 2013-09-23
HDFS-5199 Add more debug trace for NFS READ and WRITE. (brandonli)
HDFS-5122. Support failover and retry in WebHdfsFileSystem for NN HA.
(Haohui Mai via jing9)
IMPROVEMENTS
HDFS-4513. Clarify in the WebHDFS REST API that all JSON respsonses may

View File

@ -38,6 +38,7 @@ import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URISyntaxException;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
@ -593,6 +594,48 @@ public class DFSUtil {
Configuration conf) {
return getAddresses(conf, null, DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY);
}
/**
* Returns list of InetSocketAddress corresponding to HA NN HTTP addresses from
* the configuration.
*
* @param conf configuration
* @return list of InetSocketAddresses
*/
public static Map<String, Map<String, InetSocketAddress>> getHaNnHttpAddresses(
Configuration conf) {
return getAddresses(conf, null, DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY);
}
/**
* Resolve an HDFS URL into real INetSocketAddress. It works like a DNS resolver
* when the URL points to an non-HA cluster. When the URL points to an HA
* cluster, the resolver further resolves the logical name (i.e., the authority
* in the URL) into real namenode addresses.
*/
public static InetSocketAddress[] resolve(URI uri, int schemeDefaultPort,
Configuration conf) throws IOException {
ArrayList<InetSocketAddress> ret = new ArrayList<InetSocketAddress>();
if (!HAUtil.isLogicalUri(conf, uri)) {
InetSocketAddress addr = NetUtils.createSocketAddr(uri.getAuthority(),
schemeDefaultPort);
ret.add(addr);
} else {
Map<String, Map<String, InetSocketAddress>> addresses = DFSUtil
.getHaNnHttpAddresses(conf);
for (Map<String, InetSocketAddress> addrs : addresses.values()) {
for (InetSocketAddress addr : addrs.values()) {
ret.add(addr);
}
}
}
InetSocketAddress[] r = new InetSocketAddress[ret.size()];
return ret.toArray(r);
}
/**
* Returns list of InetSocketAddress corresponding to backup node rpc

View File

@ -54,6 +54,7 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.ByteRangeInputStream;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.protocol.HdfsFileStatus;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSelector;
@ -86,6 +87,7 @@ import org.apache.hadoop.hdfs.web.resources.ReplicationParam;
import org.apache.hadoop.hdfs.web.resources.TokenArgumentParam;
import org.apache.hadoop.hdfs.web.resources.UserParam;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.retry.RetryPolicies;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.io.retry.RetryUtils;
import org.apache.hadoop.ipc.RemoteException;
@ -119,7 +121,7 @@ public class WebHdfsFileSystem extends FileSystem
/** SPNEGO authenticator */
private static final KerberosUgiAuthenticator AUTH = new KerberosUgiAuthenticator();
/** Default connection factory may be overriden in tests to use smaller timeout values */
/** Default connection factory may be overridden in tests to use smaller timeout values */
URLConnectionFactory connectionFactory = URLConnectionFactory.DEFAULT_CONNECTION_FACTORY;
/** Configures connections for AuthenticatedURL */
private final ConnectionConfigurator CONN_CONFIGURATOR =
@ -159,12 +161,13 @@ public class WebHdfsFileSystem extends FileSystem
}
private UserGroupInformation ugi;
private InetSocketAddress nnAddr;
private URI uri;
private boolean hasInitedToken;
private Token<?> delegationToken;
private RetryPolicy retryPolicy = null;
private Path workingDir;
private InetSocketAddress nnAddrs[];
private int currentNNAddrIndex;
/**
* Return the protocol scheme for the FileSystem.
@ -174,7 +177,7 @@ public class WebHdfsFileSystem extends FileSystem
*/
@Override
public String getScheme() {
return "webhdfs";
return SCHEME;
}
@Override
@ -183,20 +186,42 @@ public class WebHdfsFileSystem extends FileSystem
super.initialize(uri, conf);
setConf(conf);
ugi = UserGroupInformation.getCurrentUser();
try {
this.uri = new URI(uri.getScheme(), uri.getAuthority(), null, null, null);
this.uri = new URI(uri.getScheme(), uri.getAuthority(), null,
null, null);
this.nnAddrs = DFSUtil.resolve(this.uri, getDefaultPort(), conf);
} catch (URISyntaxException e) {
throw new IllegalArgumentException(e);
}
this.nnAddr = NetUtils.createSocketAddr(uri.getAuthority(), getDefaultPort());
this.retryPolicy =
RetryUtils.getDefaultRetryPolicy(
conf,
DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY,
DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_DEFAULT,
DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY,
DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT,
SafeModeException.class);
if (!HAUtil.isLogicalUri(conf, this.uri)) {
this.retryPolicy =
RetryUtils.getDefaultRetryPolicy(
conf,
DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_KEY,
DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_ENABLED_DEFAULT,
DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_KEY,
DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT,
SafeModeException.class);
} else {
int maxFailoverAttempts = conf.getInt(
DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY,
DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT);
int failoverSleepBaseMillis = conf.getInt(
DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY,
DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT);
int failoverSleepMaxMillis = conf.getInt(
DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY,
DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT);
this.retryPolicy = RetryPolicies
.failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL,
maxFailoverAttempts, failoverSleepBaseMillis,
failoverSleepMaxMillis);
}
this.workingDir = getHomeDirectory();
if (UserGroupInformation.isSecurityEnabled()) {
@ -348,6 +373,19 @@ public class WebHdfsFileSystem extends FileSystem
return ((RemoteException)ioe).unwrapRemoteException();
}
private synchronized InetSocketAddress getCurrentNNAddr() {
return nnAddrs[currentNNAddrIndex];
}
/**
* Reset the appropriate state to gracefully fail over to another name node
*/
private synchronized void resetStateToFailOver() {
currentNNAddrIndex = (currentNNAddrIndex + 1) % nnAddrs.length;
delegationToken = null;
hasInitedToken = false;
}
/**
* Return a URL pointing to given path on the namenode.
*
@ -357,6 +395,7 @@ public class WebHdfsFileSystem extends FileSystem
* @throws IOException on error constructing the URL
*/
private URL getNamenodeURL(String path, String query) throws IOException {
InetSocketAddress nnAddr = getCurrentNNAddr();
final URL url = new URL("http", nnAddr.getHostName(),
nnAddr.getPort(), path + '?' + query);
if (LOG.isTraceEnabled()) {
@ -414,38 +453,28 @@ public class WebHdfsFileSystem extends FileSystem
*/
private Map<?, ?> run(final HttpOpParam.Op op, final Path fspath,
final Param<?,?>... parameters) throws IOException {
return new Runner(op, fspath, parameters).run().json;
return new FsPathRunner(op, fspath, parameters).run().json;
}
/**
* This class is for initialing a HTTP connection, connecting to server,
* obtaining a response, and also handling retry on failures.
*/
class Runner {
private final HttpOpParam.Op op;
private final URL url;
abstract class AbstractRunner {
abstract protected URL getUrl() throws IOException;
protected final HttpOpParam.Op op;
private final boolean redirected;
private boolean checkRetry;
private HttpURLConnection conn = null;
protected HttpURLConnection conn = null;
private Map<?, ?> json = null;
Runner(final HttpOpParam.Op op, final URL url, final boolean redirected) {
protected AbstractRunner(final HttpOpParam.Op op, boolean redirected) {
this.op = op;
this.url = url;
this.redirected = redirected;
}
Runner(final HttpOpParam.Op op, final Path fspath,
final Param<?,?>... parameters) throws IOException {
this(op, toUrl(op, fspath, parameters), false);
}
Runner(final HttpOpParam.Op op, final HttpURLConnection conn) {
this(op, null, false);
this.conn = conn;
}
private HttpURLConnection getHttpUrlConnection(final URL url)
throws IOException, AuthenticationException {
UserGroupInformation connectUgi = ugi.getRealUser();
@ -493,6 +522,7 @@ public class WebHdfsFileSystem extends FileSystem
private void init() throws IOException {
checkRetry = !redirected;
URL url = getUrl();
try {
conn = getHttpUrlConnection(url);
} catch(AuthenticationException ae) {
@ -519,7 +549,23 @@ public class WebHdfsFileSystem extends FileSystem
}
}
Runner run() throws IOException {
AbstractRunner run() throws IOException {
/**
* Do the real work.
*
* There are three cases that the code inside the loop can throw an
* IOException:
*
* <ul>
* <li>The connection has failed (e.g., ConnectException,
* @see FailoverOnNetworkExceptionRetry for more details)</li>
* <li>The namenode enters the standby state (i.e., StandbyException).</li>
* <li>The server returns errors for the command (i.e., RemoteException)</li>
* </ul>
*
* The call to shouldRetry() will conduct the retry policy. The policy
* examines the exception and swallows it if it decides to rerun the work.
*/
for(int retry = 0; ; retry++) {
try {
init();
@ -537,14 +583,25 @@ public class WebHdfsFileSystem extends FileSystem
private void shouldRetry(final IOException ioe, final int retry
) throws IOException {
InetSocketAddress nnAddr = getCurrentNNAddr();
if (checkRetry) {
try {
final RetryPolicy.RetryAction a = retryPolicy.shouldRetry(
ioe, retry, 0, true);
if (a.action == RetryPolicy.RetryAction.RetryDecision.RETRY) {
boolean isRetry = a.action == RetryPolicy.RetryAction.RetryDecision.RETRY;
boolean isFailoverAndRetry =
a.action == RetryPolicy.RetryAction.RetryDecision.FAILOVER_AND_RETRY;
if (isRetry || isFailoverAndRetry) {
LOG.info("Retrying connect to namenode: " + nnAddr
+ ". Already tried " + retry + " time(s); retry policy is "
+ retryPolicy + ", delay " + a.delayMillis + "ms.");
+ retryPolicy + ", delay " + a.delayMillis + "ms.");
if (isFailoverAndRetry) {
resetStateToFailOver();
}
Thread.sleep(a.delayMillis);
return;
}
@ -617,6 +674,48 @@ public class WebHdfsFileSystem extends FileSystem
}
}
final class FsPathRunner extends AbstractRunner {
private final Path fspath;
private final Param<?, ?>[] parameters;
FsPathRunner(final HttpOpParam.Op op, final Path fspath, final Param<?,?>... parameters) {
super(op, false);
this.fspath = fspath;
this.parameters = parameters;
}
@Override
protected URL getUrl() throws IOException {
return toUrl(op, fspath, parameters);
}
}
final class URLRunner extends AbstractRunner {
private final URL url;
@Override
protected URL getUrl() {
return url;
}
protected URLRunner(final HttpOpParam.Op op, final URL url, boolean redirected) {
super(op, redirected);
this.url = url;
}
}
@VisibleForTesting
final class ConnRunner extends AbstractRunner {
protected ConnRunner(final HttpOpParam.Op op, HttpURLConnection conn) {
super(op, false);
this.conn = conn;
}
@Override
protected URL getUrl() {
return null;
}
}
private FsPermission applyUMask(FsPermission permission) {
if (permission == null) {
permission = FsPermission.getDefault();
@ -772,7 +871,7 @@ public class WebHdfsFileSystem extends FileSystem
statistics.incrementWriteOps(1);
final HttpOpParam.Op op = PutOpParam.Op.CREATE;
return new Runner(op, f,
return new FsPathRunner(op, f,
new PermissionParam(applyUMask(permission)),
new OverwriteParam(overwrite),
new BufferSizeParam(bufferSize),
@ -788,7 +887,7 @@ public class WebHdfsFileSystem extends FileSystem
statistics.incrementWriteOps(1);
final HttpOpParam.Op op = PostOpParam.Op.APPEND;
return new Runner(op, f, new BufferSizeParam(bufferSize))
return new FsPathRunner(op, f, new BufferSizeParam(bufferSize))
.run()
.write(bufferSize);
}
@ -835,7 +934,7 @@ public class WebHdfsFileSystem extends FileSystem
final boolean resolved) throws IOException {
final URL offsetUrl = offset == 0L? url
: new URL(url + "&" + new OffsetParam(offset));
return new Runner(GetOpParam.Op.OPEN, offsetUrl, resolved).run().conn;
return new URLRunner(GetOpParam.Op.OPEN, offsetUrl, resolved).run().conn;
}
}
@ -909,7 +1008,7 @@ public class WebHdfsFileSystem extends FileSystem
final HttpOpParam.Op op = GetOpParam.Op.GETDELEGATIONTOKEN;
final Map<?, ?> m = run(op, null, new RenewerParam(renewer));
final Token<DelegationTokenIdentifier> token = JsonUtil.toDelegationToken(m);
SecurityUtil.setTokenService(token, nnAddr);
SecurityUtil.setTokenService(token, getCurrentNNAddr());
return token;
}

View File

@ -20,20 +20,25 @@ package org.apache.hadoop.hdfs;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHENTICATION;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_BACKUP_ADDRESS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTPS_PORT_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_HTTP_PORT_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_RPC_ADDRESS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SECONDARY_HTTP_ADDRESS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SERVICE_RPC_ADDRESS_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICES;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMESERVICE_ID;
import org.apache.hadoop.util.Shell;
import static org.junit.Assert.*;
import org.junit.Assume;
import static org.hamcrest.CoreMatchers.*;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.IOException;
import java.net.InetSocketAddress;
@ -54,8 +59,11 @@ import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Shell;
import org.junit.Assume;
import org.junit.Before;
import org.junit.Test;
@ -539,6 +547,55 @@ public class TestDFSUtil {
assertEquals("ns1", DFSUtil.getSecondaryNameServiceId(conf));
}
@Test
public void testGetHaNnHttpAddresses() throws IOException {
final String LOGICAL_HOST_NAME = "ns1";
final String NS1_NN1_ADDR = "ns1-nn1.example.com:8020";
final String NS1_NN2_ADDR = "ns1-nn2.example.com:8020";
Configuration conf = createWebHDFSHAConfiguration(LOGICAL_HOST_NAME, NS1_NN1_ADDR, NS1_NN2_ADDR);
Map<String, Map<String, InetSocketAddress>> map =
DFSUtil.getHaNnHttpAddresses(conf);
assertEquals(NS1_NN1_ADDR, map.get("ns1").get("nn1").toString());
assertEquals(NS1_NN2_ADDR, map.get("ns1").get("nn2").toString());
}
@Test
public void testResolve() throws IOException, URISyntaxException {
final String LOGICAL_HOST_NAME = "ns1";
final String NS1_NN1_HOST = "ns1-nn1.example.com";
final String NS1_NN2_HOST = "ns1-nn2.example.com";
final String NS1_NN1_ADDR = "ns1-nn1.example.com:8020";
final String NS1_NN2_ADDR = "ns1-nn2.example.com:8020";
final int DEFAULT_PORT = NameNode.DEFAULT_PORT;
Configuration conf = createWebHDFSHAConfiguration(LOGICAL_HOST_NAME, NS1_NN1_ADDR, NS1_NN2_ADDR);
URI uri = new URI("webhdfs://ns1");
assertTrue(HAUtil.isLogicalUri(conf, uri));
InetSocketAddress[] addrs = DFSUtil.resolve(uri, DEFAULT_PORT, conf);
assertArrayEquals(new InetSocketAddress[] {
new InetSocketAddress(NS1_NN1_HOST, DEFAULT_PORT),
new InetSocketAddress(NS1_NN2_HOST, DEFAULT_PORT),
}, addrs);
}
private static Configuration createWebHDFSHAConfiguration(String logicalHostName, String nnaddr1, String nnaddr2) {
HdfsConfiguration conf = new HdfsConfiguration();
conf.set(DFS_NAMESERVICES, "ns1");
conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, "ns1"),"nn1,nn2");
conf.set(DFSUtil.addKeySuffixes(
DFS_NAMENODE_HTTP_ADDRESS_KEY, "ns1", "nn1"), nnaddr1);
conf.set(DFSUtil.addKeySuffixes(
DFS_NAMENODE_HTTP_ADDRESS_KEY, "ns1", "nn2"), nnaddr2);
conf.set(DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX + "." + logicalHostName,
ConfiguredFailoverProxyProvider.class.getName());
return conf;
}
@Test
public void testSubstituteForWildcardAddress() throws IOException {
assertEquals("foo:12345",

View File

@ -0,0 +1,77 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.web;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
import org.junit.Assert;
import org.junit.Test;
/** Test whether WebHDFS can connect to an HA cluster */
public class TestWebHDFSForHA {
private static final String LOGICAL_NAME = "minidfs";
@Test
public void test() throws Exception {
Configuration conf = new Configuration();
conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true);
MiniDFSNNTopology topo = new MiniDFSNNTopology()
.addNameservice(new MiniDFSNNTopology.NSConf(LOGICAL_NAME).addNN(
new MiniDFSNNTopology.NNConf("nn1")).addNN(
new MiniDFSNNTopology.NNConf("nn2")));
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo)
.numDataNodes(3).build();
HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
FileSystem fs = null;
try {
cluster.waitActive();
final String uri = WebHdfsFileSystem.SCHEME + "://" + LOGICAL_NAME;
fs = (WebHdfsFileSystem) FileSystem.get(new URI(uri), conf);
cluster.transitionToActive(0);
final Path dir = new Path("/test");
Assert.assertTrue(fs.mkdirs(dir));
cluster.shutdownNameNode(0);
cluster.transitionToActive(1);
final Path dir2 = new Path("/test2");
Assert.assertTrue(fs.mkdirs(dir2));
} finally {
if (fs != null) {
fs.close();
}
cluster.shutdown();
}
}
}

View File

@ -81,7 +81,7 @@ public class WebHdfsTestUtil {
public static HttpURLConnection twoStepWrite(final WebHdfsFileSystem webhdfs,
final HttpOpParam.Op op, HttpURLConnection conn) throws IOException {
return webhdfs.new Runner(op, conn).twoStepWrite();
return webhdfs.new ConnRunner(op, conn).twoStepWrite();
}
public static FSDataOutputStream write(final WebHdfsFileSystem webhdfs,