From 3af51887b40df8de7482040cf8a90600a2c4305f Mon Sep 17 00:00:00 2001 From: Eli Collins Date: Thu, 30 Jun 2011 20:56:37 +0000 Subject: [PATCH] HDFS-2118. Couple dfs data dir improvements. Contributed by Eli Collins git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1141713 13f79535-47bb-0310-9956-ffa450edef68 --- hdfs/CHANGES.txt | 2 ++ .../hadoop/hdfs/server/datanode/DataNode.java | 17 ++++++----- .../hdfs/server/datanode/DataXceiver.java | 2 +- .../server/datanode/DataXceiverServer.java | 8 ++--- .../hdfs/server/datanode/FSDataset.java | 2 +- .../TestDataNodeVolumeFailureToleration.java | 29 +++++++------------ 6 files changed, 28 insertions(+), 32 deletions(-) diff --git a/hdfs/CHANGES.txt b/hdfs/CHANGES.txt index 48e020aaa66..3acf54faad0 100644 --- a/hdfs/CHANGES.txt +++ b/hdfs/CHANGES.txt @@ -552,6 +552,8 @@ Trunk (unreleased changes) HDFS-2056. Update fetchdt usage. (Tanping Wang via jitendra) + HDFS-2118. Couple dfs data dir improvements. (eli) + BUG FIXES HDFS-1955. FSImage.doUpgrade() was made too fault-tolerant by HDFS-1826. diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java index d83d0fccb27..d84ca46cedc 100644 --- a/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java +++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java @@ -2186,20 +2186,21 @@ public class DataNode extends Configured continue; } // drop any (illegal) authority in the URI for backwards compatibility - File data = new File(dirURI.getPath()); + File dir = new File(dirURI.getPath()); try { - DiskChecker.checkDir(localFS, new Path(data.toURI()), permission); - dirs.add(data); - } catch (IOException e) { - LOG.warn("Invalid directory in: " - + DFS_DATANODE_DATA_DIR_KEY + ": ", e); - invalidDirs.append("\"").append(data.getCanonicalPath()).append("\" "); + DiskChecker.checkDir(localFS, new Path(dir.toURI()), permission); + dirs.add(dir); + } catch (IOException ioe) { + LOG.warn("Invalid " + DFS_DATANODE_DATA_DIR_KEY + " " + + dir + " : ", ioe); + invalidDirs.append("\"").append(dir.getCanonicalPath()).append("\" "); } } - if (dirs.size() == 0) + if (dirs.size() == 0) { throw new IOException("All directories in " + DFS_DATANODE_DATA_DIR_KEY + " are invalid: " + invalidDirs); + } return dirs; } diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index 46d547f320c..37dd08b9102 100644 --- a/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -125,7 +125,7 @@ class DataXceiver extends Receiver implements Runnable, FSConstants { DataNode getDataNode() {return datanode;} /** - * Read/write data from/to the DataXceiveServer. + * Read/write data from/to the DataXceiverServer. */ public void run() { updateCurrentThreadName("Waiting for operation"); diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java index 435333e07e1..b1617d89d2a 100644 --- a/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java +++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/DataXceiverServer.java @@ -146,10 +146,10 @@ class DataXceiverServer implements Runnable, FSConstants { } catch (SocketTimeoutException ignored) { // wake up to see if should continue to run } catch (IOException ie) { - LOG.warn(datanode.getMachineName() + ":DataXceiveServer: ", ie); + LOG.warn(datanode.getMachineName() + ":DataXceiverServer: ", ie); } catch (Throwable te) { LOG.error(datanode.getMachineName() - + ":DataXceiveServer: Exiting due to: ", te); + + ":DataXceiverServer: Exiting due to: ", te); datanode.shouldRun = false; } } @@ -157,7 +157,7 @@ class DataXceiverServer implements Runnable, FSConstants { ss.close(); } catch (IOException ie) { LOG.warn(datanode.getMachineName() - + ":DataXceiveServer: Close exception due to: ", ie); + + ":DataXceiverServer: Close exception due to: ", ie); } } @@ -167,7 +167,7 @@ class DataXceiverServer implements Runnable, FSConstants { try { this.ss.close(); } catch (IOException ie) { - LOG.warn(datanode.getMachineName() + ":DataXceiveServer.kill(): " + LOG.warn(datanode.getMachineName() + ":DataXceiverServer.kill(): " + StringUtils.stringifyException(ie)); } diff --git a/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java b/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java index 85c689abb74..3f893bf1a9e 100644 --- a/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java +++ b/hdfs/src/java/org/apache/hadoop/hdfs/server/datanode/FSDataset.java @@ -1150,7 +1150,7 @@ public class FSDataset implements FSConstants, FSDatasetInterface { conf.getInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_DEFAULT); - String[] dataDirs = conf.getStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY); + String[] dataDirs = conf.getTrimmedStrings(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY); int volsConfigured = (dataDirs == null) ? 0 : dataDirs.length; diff --git a/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java index 610241d8f79..d569e2e086b 100644 --- a/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java +++ b/hdfs/src/test/hdfs/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailureToleration.java @@ -133,17 +133,17 @@ public class TestDataNodeVolumeFailureToleration { } /** - * Restart the cluster with a new volume tolerated value. - * @param volTolerated - * @param manageCluster + * Restart the datanodes with a new volume tolerated value. + * @param volTolerated number of dfs data dir failures to tolerate + * @param manageDfsDirs whether the mini cluster should manage data dirs * @throws IOException */ - private void restartCluster(int volTolerated, boolean manageCluster) + private void restartDatanodes(int volTolerated, boolean manageDfsDirs) throws IOException { //Make sure no datanode is running cluster.shutdownDataNodes(); conf.setInt(DFSConfigKeys.DFS_DATANODE_FAILED_VOLUMES_TOLERATED_KEY, volTolerated); - cluster.startDataNodes(conf, 1, manageCluster, null, null); + cluster.startDataNodes(conf, 1, manageDfsDirs, null, null); cluster.waitActive(); } @@ -174,19 +174,14 @@ public class TestDataNodeVolumeFailureToleration { /** * Tests for a given volumes to be tolerated and volumes failed. - * - * @param volumesTolerated - * @param volumesFailed - * @param expectedBPServiceState - * @param clusterManaged - * @throws IOException - * @throws InterruptedException */ private void testVolumeConfig(int volumesTolerated, int volumesFailed, - boolean expectedBPServiceState, boolean clusterManaged) + boolean expectedBPServiceState, boolean manageDfsDirs) throws IOException, InterruptedException { assumeTrue(!System.getProperty("os.name").startsWith("Windows")); final int dnIndex = 0; + // Fail the current directory since invalid storage directory perms + // get fixed up automatically on datanode startup. File[] dirs = { new File(MiniDFSCluster.getStorageDir(dnIndex, 0), "current"), new File(MiniDFSCluster.getStorageDir(dnIndex, 1), "current") }; @@ -195,11 +190,10 @@ public class TestDataNodeVolumeFailureToleration { for (int i = 0; i < volumesFailed; i++) { prepareDirToFail(dirs[i]); } - restartCluster(volumesTolerated, clusterManaged); + restartDatanodes(volumesTolerated, manageDfsDirs); assertEquals(expectedBPServiceState, cluster.getDataNodes().get(0) .isBPServiceAlive(cluster.getNamesystem().getBlockPoolId())); } finally { - // restore its old permission for (File dir : dirs) { FileUtil.chmod(dir.toString(), "755"); } @@ -215,8 +209,7 @@ public class TestDataNodeVolumeFailureToleration { private void prepareDirToFail(File dir) throws IOException, InterruptedException { dir.mkdirs(); - assertTrue("Couldn't chmod local vol", FileUtil - .chmod(dir.toString(), "000") == 0); + assertEquals("Couldn't chmod local vol", 0, + FileUtil.chmod(dir.toString(), "000")); } - }