HDFS-3518. Add a utility method HdfsUtils.isHealthy(uri) for checking if the given HDFS is healthy.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1350825 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2012-06-15 23:00:36 +00:00
parent 7a5c2e8790
commit 5572f73d45
4 changed files with 108 additions and 4 deletions

View File

@ -13,8 +13,6 @@ Trunk (unreleased changes)
HDFS-3125. Add JournalService to enable Journal Daemon. (suresh) HDFS-3125. Add JournalService to enable Journal Daemon. (suresh)
HDFS-744. Support hsync in HDFS. (Lars Hofhansl via szetszwo)
IMPROVEMENTS IMPROVEMENTS
HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants -> HDFS-1620. Rename HdfsConstants -> HdfsServerConstants, FSConstants ->
@ -162,9 +160,14 @@ Branch-2 ( Unreleased changes )
NEW FEATURES NEW FEATURES
HDFS-744. Support hsync in HDFS. (Lars Hofhansl via szetszwo)
HDFS-3042. Automatic failover support for NameNode HA (todd) HDFS-3042. Automatic failover support for NameNode HA (todd)
(see dedicated section below for breakdown of subtasks) (see dedicated section below for breakdown of subtasks)
HDFS-3518. Add a utility method HdfsUtils.isHealthy(uri) for checking if
the given HDFS is healthy. (szetszwo)
IMPROVEMENTS IMPROVEMENTS
HDFS-3390. DFSAdmin should print full stack traces of errors when DEBUG HDFS-3390. DFSAdmin should print full stack traces of errors when DEBUG

View File

@ -96,7 +96,7 @@ public class DistributedFileSystem extends FileSystem {
*/ */
@Override @Override
public String getScheme() { public String getScheme() {
return "hdfs"; return HdfsConstants.HDFS_URI_SCHEME;
} }
@Deprecated @Deprecated

View File

@ -0,0 +1,87 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.client;
import java.io.IOException;
import java.net.URI;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.io.IOUtils;
/**
* The public utility API for HDFS.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class HdfsUtils {
private static final Log LOG = LogFactory.getLog(HdfsUtils.class);
/**
* Is the HDFS healthy?
* HDFS is considered as healthy if it is up and not in safemode.
*
* @param uri the HDFS URI. Note that the URI path is ignored.
* @return true if HDFS is healthy; false, otherwise.
*/
public static boolean isHealthy(URI uri) {
//check scheme
final String scheme = uri.getScheme();
if (!HdfsConstants.HDFS_URI_SCHEME.equalsIgnoreCase(scheme)) {
throw new IllegalArgumentException("The scheme is not "
+ HdfsConstants.HDFS_URI_SCHEME + ", uri=" + uri);
}
final Configuration conf = new Configuration();
//disable FileSystem cache
conf.setBoolean(String.format("fs.%s.impl.disable.cache", scheme), true);
//disable client retry for rpc connection and rpc calls
conf.setBoolean(DFSConfigKeys.DFS_CLIENT_RETRY_POLICY_ENABLED_KEY, false);
conf.setInt(
CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, 0);
DistributedFileSystem fs = null;
try {
fs = (DistributedFileSystem)FileSystem.get(uri, conf);
final boolean safemode = fs.setSafeMode(SafeModeAction.SAFEMODE_GET);
if (LOG.isDebugEnabled()) {
LOG.debug("Is namenode in safemode? " + safemode + "; uri=" + uri);
}
fs.close();
fs = null;
return !safemode;
} catch(IOException e) {
if (LOG.isDebugEnabled()) {
LOG.debug("Got an exception for uri=" + uri, e);
}
return false;
} finally {
IOUtils.cleanup(LOG, fs);
}
}
}

View File

@ -31,6 +31,7 @@ import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.net.SocketTimeoutException; import java.net.SocketTimeoutException;
import java.net.URI;
import java.security.MessageDigest; import java.security.MessageDigest;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -51,6 +52,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.UnresolvedLinkException;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.client.HdfsUtils;
import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol; import org.apache.hadoop.hdfs.protocol.ClientDatanodeProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.DatanodeID;
@ -823,9 +825,11 @@ public class TestDFSClientRetries extends TestCase {
.build(); .build();
try { try {
cluster.waitActive(); cluster.waitActive();
final DistributedFileSystem dfs = cluster.getFileSystem();
final URI uri = dfs.getUri();
assertTrue(HdfsUtils.isHealthy(uri));
//create a file //create a file
final DistributedFileSystem dfs = cluster.getFileSystem();
final long length = 1L << 20; final long length = 1L << 20;
final Path file1 = new Path(dir, "foo"); final Path file1 = new Path(dir, "foo");
DFSTestUtil.createFile(dfs, file1, length, numDatanodes, 20120406L); DFSTestUtil.createFile(dfs, file1, length, numDatanodes, 20120406L);
@ -835,7 +839,9 @@ public class TestDFSClientRetries extends TestCase {
assertEquals(length, s1.getLen()); assertEquals(length, s1.getLen());
//shutdown namenode //shutdown namenode
assertTrue(HdfsUtils.isHealthy(uri));
cluster.shutdownNameNode(0); cluster.shutdownNameNode(0);
assertFalse(HdfsUtils.isHealthy(uri));
//namenode is down, create another file in a thread //namenode is down, create another file in a thread
final Path file3 = new Path(dir, "file"); final Path file3 = new Path(dir, "file");
@ -860,8 +866,10 @@ public class TestDFSClientRetries extends TestCase {
try { try {
//sleep, restart, and then wait active //sleep, restart, and then wait active
TimeUnit.SECONDS.sleep(30); TimeUnit.SECONDS.sleep(30);
assertFalse(HdfsUtils.isHealthy(uri));
cluster.restartNameNode(0, false); cluster.restartNameNode(0, false);
cluster.waitActive(); cluster.waitActive();
assertTrue(HdfsUtils.isHealthy(uri));
} catch (Exception e) { } catch (Exception e) {
exceptions.add(e); exceptions.add(e);
} }
@ -877,7 +885,9 @@ public class TestDFSClientRetries extends TestCase {
assertEquals(dfs.getFileChecksum(file1), dfs.getFileChecksum(file3)); assertEquals(dfs.getFileChecksum(file1), dfs.getFileChecksum(file3));
//enter safe mode //enter safe mode
assertTrue(HdfsUtils.isHealthy(uri));
dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); dfs.setSafeMode(SafeModeAction.SAFEMODE_ENTER);
assertFalse(HdfsUtils.isHealthy(uri));
//leave safe mode in a new thread //leave safe mode in a new thread
new Thread(new Runnable() { new Thread(new Runnable() {
@ -886,7 +896,9 @@ public class TestDFSClientRetries extends TestCase {
try { try {
//sleep and then leave safe mode //sleep and then leave safe mode
TimeUnit.SECONDS.sleep(30); TimeUnit.SECONDS.sleep(30);
assertFalse(HdfsUtils.isHealthy(uri));
dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); dfs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
assertTrue(HdfsUtils.isHealthy(uri));
} catch (Exception e) { } catch (Exception e) {
exceptions.add(e); exceptions.add(e);
} }
@ -898,6 +910,8 @@ public class TestDFSClientRetries extends TestCase {
DFSTestUtil.createFile(dfs, file2, length, numDatanodes, 20120406L); DFSTestUtil.createFile(dfs, file2, length, numDatanodes, 20120406L);
assertEquals(dfs.getFileChecksum(file1), dfs.getFileChecksum(file2)); assertEquals(dfs.getFileChecksum(file1), dfs.getFileChecksum(file2));
assertTrue(HdfsUtils.isHealthy(uri));
//make sure it won't retry on exceptions like FileNotFoundException //make sure it won't retry on exceptions like FileNotFoundException
final Path nonExisting = new Path(dir, "nonExisting"); final Path nonExisting = new Path(dir, "nonExisting");
LOG.info("setPermission: " + nonExisting); LOG.info("setPermission: " + nonExisting);