From ca75280b0e47026b8204defc2316e6328d9f99cc Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 7 Oct 2011 15:31:14 +0000 Subject: [PATCH] HDFS-2209. Make MiniDFS easier to embed in other apps. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-0.23@1180078 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + .../apache/hadoop/hdfs/MiniDFSCluster.java | 119 ++++++++++++++---- .../apache/hadoop/hdfs/TestCrcCorruption.java | 4 +- .../hadoop/hdfs/TestFileCorruption.java | 6 +- .../hadoop/hdfs/TestMiniDFSCluster.java | 108 ++++++++++++++++ .../TestOverReplicatedBlocks.java | 2 +- .../datanode/TestDataNodeVolumeFailure.java | 2 +- .../TestDataNodeVolumeFailureToleration.java | 6 +- .../server/datanode/TestDeleteBlockPool.java | 12 +- .../hdfs/server/datanode/TestDiskError.java | 8 +- .../hadoop/hdfs/server/namenode/TestFsck.java | 2 +- .../namenode/TestListCorruptFileBlocks.java | 10 +- 12 files changed, 229 insertions(+), 52 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index e927d9d210a..4a37f9b68e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -692,6 +692,8 @@ Release 0.23.0 - Unreleased HDFS-2363. Move datanodes size printing from FSNamesystem.metasave(..) to BlockManager. (Uma Maheswara Rao G via szetszwo) + HDFS-2209. Make MiniDFS easier to embed in other apps. (stevel) + OPTIMIZATIONS HDFS-1458. Improve checkpoint performance by avoiding unnecessary image diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java index e51401cfc0b..750265ea946 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/MiniDFSCluster.java @@ -86,6 +86,10 @@ public class MiniDFSCluster { private static final String NAMESERVICE_ID_PREFIX = "nameserviceId"; private static final Log LOG = LogFactory.getLog(MiniDFSCluster.class); + /** System property to set the data dir: {@value} */ + public static final String PROP_TEST_BUILD_DATA = "test.build.data"; + /** Configuration option to set the data dir: {@value} */ + public static final String HDFS_MINIDFS_BASEDIR = "hdfs.minidfs.basedir"; static { DefaultMetricsSystem.setMiniClusterMode(true); } @@ -495,7 +499,7 @@ private void initMiniDFSCluster(int nameNodePort, int nameNodeHttpPort, boolean waitSafeMode, boolean setupHostsFile, boolean federation) throws IOException { this.conf = conf; - base_dir = new File(getBaseDirectory()); + base_dir = new File(determineDfsBaseDir()); data_dir = new File(base_dir, "data"); this.federation = federation; this.waitSafeMode = waitSafeMode; @@ -504,7 +508,7 @@ private void initMiniDFSCluster(int nameNodePort, int nameNodeHttpPort, String rpcEngineName = System.getProperty("hdfs.rpc.engine"); if (rpcEngineName != null && !"".equals(rpcEngineName)) { - System.out.println("HDFS using RPCEngine: "+rpcEngineName); + LOG.info("HDFS using RPCEngine: " + rpcEngineName); try { Class rpcEngine = conf.getClassByName(rpcEngineName); setRpcEngine(conf, NamenodeProtocols.class, rpcEngine); @@ -856,8 +860,8 @@ public synchronized void startDataNodes(Configuration conf, int numDataNodes, // Set up datanode address setupDatanodeAddress(dnConf, setupHostsFile, checkDataNodeAddrConfig); if (manageDfsDirs) { - File dir1 = getStorageDir(i, 0); - File dir2 = getStorageDir(i, 1); + File dir1 = getInstanceStorageDir(i, 0); + File dir2 = getInstanceStorageDir(i, 1); dir1.mkdirs(); dir2.mkdirs(); if (!dir1.isDirectory() || !dir2.isDirectory()) { @@ -873,17 +877,17 @@ public synchronized void startDataNodes(Configuration conf, int numDataNodes, dnConf.setLong(SimulatedFSDataset.CONFIG_PROPERTY_CAPACITY, simulatedCapacities[i-curDatanodesNum]); } - System.out.println("Starting DataNode " + i + " with " + LOG.info("Starting DataNode " + i + " with " + DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY + ": " + dnConf.get(DFSConfigKeys.DFS_DATANODE_DATA_DIR_KEY)); if (hosts != null) { dnConf.set(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY, hosts[i - curDatanodesNum]); - System.out.println("Starting DataNode " + i + " with hostname set to: " + LOG.info("Starting DataNode " + i + " with hostname set to: " + dnConf.get(DFSConfigKeys.DFS_DATANODE_HOST_NAME_KEY)); } if (racks != null) { String name = hosts[i - curDatanodesNum]; - System.out.println("Adding node with hostname : " + name + " to rack "+ + LOG.info("Adding node with hostname : " + name + " to rack " + racks[i-curDatanodesNum]); StaticMapping.addNodeToRack(name, racks[i-curDatanodesNum]); @@ -901,7 +905,7 @@ public synchronized void startDataNodes(Configuration conf, int numDataNodes, String ipAddr = dn.getSelfAddr().getAddress().getHostAddress(); if (racks != null) { int port = dn.getSelfAddr().getPort(); - System.out.println("Adding node with IP:port : " + ipAddr + ":" + port+ + LOG.info("Adding node with IP:port : " + ipAddr + ":" + port + " to rack " + racks[i-curDatanodesNum]); StaticMapping.addNodeToRack(ipAddr + ":" + port, racks[i-curDatanodesNum]); @@ -1097,7 +1101,7 @@ public int getNameNodePort(int nnIndex) { * Shutdown all the nodes in the cluster. */ public void shutdown() { - System.out.println("Shutting down the Mini HDFS Cluster"); + LOG.info("Shutting down the Mini HDFS Cluster"); shutdownDataNodes(); for (NameNodeInfo nnInfo : nameNodes) { NameNode nameNode = nnInfo.nameNode; @@ -1137,7 +1141,7 @@ public synchronized void shutdownNameNodes() { public synchronized void shutdownNameNode(int nnIndex) { NameNode nn = nameNodes[nnIndex].nameNode; if (nn != null) { - System.out.println("Shutting down the namenode"); + LOG.info("Shutting down the namenode"); nn.stop(); nn.join(); Configuration conf = nameNodes[nnIndex].conf; @@ -1181,9 +1185,9 @@ public synchronized void restartNameNode(int nnIndex, boolean waitActive) nameNodes[nnIndex] = new NameNodeInfo(nn, conf); if (waitActive) { waitClusterUp(); - System.out.println("Restarted the namenode"); + LOG.info("Restarted the namenode"); waitActive(); - System.out.println("Cluster is active"); + LOG.info("Cluster is active"); } } @@ -1259,7 +1263,7 @@ public synchronized DataNodeProperties stopDataNode(int i) { } DataNodeProperties dnprop = dataNodes.remove(i); DataNode dn = dnprop.datanode; - System.out.println("MiniDFSCluster Stopping DataNode " + + LOG.info("MiniDFSCluster Stopping DataNode " + dn.getMachineName() + " from a total of " + (dataNodes.size() + 1) + " datanodes."); @@ -1348,7 +1352,7 @@ public synchronized boolean restartDataNodes(boolean keepPort) for (int i = dataNodes.size() - 1; i >= 0; i--) { if (!restartDataNode(i, keepPort)) return false; - System.out.println("Restarted DataNode " + i); + LOG.info("Restarted DataNode " + i); } return true; } @@ -1375,8 +1379,8 @@ public boolean isNameNodeUp(int nnIndex) { } catch (IOException ioe) { // This method above should never throw. // It only throws IOE since it is exposed via RPC - throw new AssertionError("Unexpected IOE thrown: " - + StringUtils.stringifyException(ioe)); + throw (AssertionError)(new AssertionError("Unexpected IOE thrown: " + + StringUtils.stringifyException(ioe)).initCause(ioe)); } boolean isUp = false; synchronized (this) { @@ -1522,7 +1526,7 @@ public void waitActive() throws IOException { failedCount++; // Cached RPC connection to namenode, if any, is expected to fail once if (failedCount > 1) { - System.out.println("Tried waitActive() " + failedCount + LOG.warn("Tried waitActive() " + failedCount + " time(s) and failed, giving up. " + StringUtils.stringifyException(e)); throw e; @@ -1574,7 +1578,7 @@ private synchronized boolean shouldWait(DatanodeInfo[] dnInfo, } public void formatDataNodeDirs() throws IOException { - base_dir = new File(getBaseDirectory()); + base_dir = new File(determineDfsBaseDir()); data_dir = new File(base_dir, "data"); if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) { throw new IOException("Cannot remove data directory: " + data_dir); @@ -1695,8 +1699,49 @@ public String getDataDirectory() { return data_dir.getAbsolutePath(); } + /** + * Get the base directory for this MiniDFS instance. + *

+ * Within the MiniDFCluster class and any subclasses, this method should be + * used instead of {@link #getBaseDirectory()} which doesn't support + * configuration-specific base directories. + *

+ * First the Configuration property {@link #HDFS_MINIDFS_BASEDIR} is fetched. + * If non-null, this is returned. + * If this is null, then {@link #getBaseDirectory()} is called. + * @return the base directory for this instance. + */ + protected String determineDfsBaseDir() { + String dfsdir = conf.get(HDFS_MINIDFS_BASEDIR, null); + if (dfsdir == null) { + dfsdir = getBaseDirectory(); + } + return dfsdir; + } + + /** + * Get the base directory for any DFS cluster whose configuration does + * not explicitly set it. This is done by retrieving the system property + * {@link #PROP_TEST_BUILD_DATA} (defaulting to "build/test/data" ), + * and returning that directory with a subdir of /dfs. + * @return a directory for use as a miniDFS filesystem. + */ public static String getBaseDirectory() { - return System.getProperty("test.build.data", "build/test/data") + "/dfs/"; + return System.getProperty(PROP_TEST_BUILD_DATA, "build/test/data") + "/dfs/"; + } + + /** + * Get a storage directory for a datanode in this specific instance of + * a MiniCluster. + * + * @param dnIndex datanode index (starts from 0) + * @param dirIndex directory index (0 or 1). Index 0 provides access to the + * first storage directory. Index 1 provides access to the second + * storage directory. + * @return Storage directory + */ + public File getInstanceStorageDir(int dnIndex, int dirIndex) { + return new File(base_dir, getStorageDirPath(dnIndex, dirIndex)); } /** @@ -1714,13 +1759,25 @@ public static String getBaseDirectory() { * @return Storage directory */ public static File getStorageDir(int dnIndex, int dirIndex) { - return new File(getBaseDirectory() + "data/data" + (2*dnIndex + 1 + dirIndex)); + return new File(getBaseDirectory(), getStorageDirPath(dnIndex, dirIndex)); } - + /** - * Get current directory corresponding to the datanode - * @param storageDir - * @return current directory + * Calculate the DN instance-specific path for appending to the base dir + * to determine the location of the storage of a DN instance in the mini cluster + * @param dnIndex datanode index + * @param dirIndex directory index (0 or 1). + * @return + */ + private static String getStorageDirPath(int dnIndex, int dirIndex) { + return "data/data" + (2 * dnIndex + 1 + dirIndex); + } + + /** + * Get current directory corresponding to the datanode as defined in + * (@link Storage#STORAGE_DIR_CURRENT} + * @param storageDir the storage directory of a datanode. + * @return the datanode current directory */ public static String getDNCurrentDir(File storageDir) { return storageDir + "/" + Storage.STORAGE_DIR_CURRENT + "/"; @@ -1728,8 +1785,8 @@ public static String getDNCurrentDir(File storageDir) { /** * Get directory corresponding to block pool directory in the datanode - * @param storageDir - * @return current directory + * @param storageDir the storage directory of a datanode. + * @return the block pool directory */ public static String getBPDir(File storageDir, String bpid) { return getDNCurrentDir(storageDir) + bpid + "/"; @@ -1775,6 +1832,16 @@ public static File getBlockFile(File storageDir, ExtendedBlock blk) { return new File(getFinalizedDir(storageDir, blk.getBlockPoolId()), blk.getBlockName()); } + + /** + * Shut down a cluster if it is not null + * @param cluster cluster reference or null + */ + public static void shutdownCluster(MiniDFSCluster cluster) { + if (cluster != null) { + cluster.shutdown(); + } + } /** * Get all files related to a block from all the datanodes diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java index 9944f1f2c8d..482f12b00dd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestCrcCorruption.java @@ -83,7 +83,7 @@ private void thistest(Configuration conf, DFSTestUtil util) throws Exception { // file disallows this Datanode to send data to another datanode. // However, a client is alowed access to this block. // - File storageDir = MiniDFSCluster.getStorageDir(0, 1); + File storageDir = cluster.getInstanceStorageDir(0, 1); String bpid = cluster.getNamesystem().getBlockPoolId(); File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); assertTrue("data directory does not exist", data_dir.exists()); @@ -142,7 +142,7 @@ private void thistest(Configuration conf, DFSTestUtil util) throws Exception { // Now deliberately corrupt all meta blocks from the second // directory of the first datanode // - storageDir = MiniDFSCluster.getStorageDir(0, 1); + storageDir = cluster.getInstanceStorageDir(0, 1); data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); assertTrue("data directory does not exist", data_dir.exists()); blocks = data_dir.listFiles(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java index 8c0db5f2cc9..d5ba1992a8f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestFileCorruption.java @@ -65,7 +65,7 @@ public void testFileCorruption() throws Exception { FileSystem fs = cluster.getFileSystem(); util.createFiles(fs, "/srcdat"); // Now deliberately remove the blocks - File storageDir = MiniDFSCluster.getStorageDir(2, 0); + File storageDir = cluster.getInstanceStorageDir(2, 0); String bpid = cluster.getNamesystem().getBlockPoolId(); File data_dir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); assertTrue("data directory does not exist", data_dir.exists()); @@ -127,11 +127,11 @@ public void testArrayOutOfBoundsException() throws Exception { // get the block final String bpid = cluster.getNamesystem().getBlockPoolId(); - File storageDir = MiniDFSCluster.getStorageDir(0, 0); + File storageDir = cluster.getInstanceStorageDir(0, 0); File dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); ExtendedBlock blk = getBlock(bpid, dataDir); if (blk == null) { - storageDir = MiniDFSCluster.getStorageDir(0, 1); + storageDir = cluster.getInstanceStorageDir(0, 1); dataDir = MiniDFSCluster.getFinalizedDir(storageDir, bpid); blk = getBlock(bpid, dataDir); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java new file mode 100644 index 00000000000..5948178e79c --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestMiniDFSCluster.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs; + +import junit.framework.Assert; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +import java.io.File; + +/** + * Tests MiniDFS cluster setup/teardown and isolation. + * Every instance is brought up with a new data dir, to ensure that + * shutdown work in background threads don't interfere with bringing up + * the new cluster. + */ +public class TestMiniDFSCluster { + + private static final String CLUSTER_1 = "cluster1"; + private static final String CLUSTER_2 = "cluster2"; + private static final String CLUSTER_3 = "cluster3"; + protected String testDataPath; + protected File testDataDir; + @Before + public void setUp() { + testDataPath = System.getProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA); + testDataDir = new File(new File(testDataPath).getParentFile(), + "miniclusters"); + + + } + @After + public void tearDown() { + System.setProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA, testDataPath); + } + + /** + * Verify that without system properties the cluster still comes up, provided + * the configuration is set + * + * @throws Throwable on a failure + */ + @Test + public void testClusterWithoutSystemProperties() throws Throwable { + System.clearProperty(MiniDFSCluster.PROP_TEST_BUILD_DATA); + Configuration conf = new HdfsConfiguration(); + File testDataCluster1 = new File(testDataPath, CLUSTER_1); + String c1Path = testDataCluster1.getAbsolutePath(); + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, c1Path); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); + try { + Assert.assertEquals(c1Path+"/data", cluster.getDataDirectory()); + } finally { + cluster.shutdown(); + } + } + + /** + * Bring up two clusters and assert that they are in different directories. + * @throws Throwable on a failure + */ + @Test + public void testDualClusters() throws Throwable { + File testDataCluster2 = new File(testDataPath, CLUSTER_2); + File testDataCluster3 = new File(testDataPath, CLUSTER_3); + Configuration conf = new HdfsConfiguration(); + String c2Path = testDataCluster2.getAbsolutePath(); + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, c2Path); + MiniDFSCluster cluster2 = new MiniDFSCluster.Builder(conf).build(); + MiniDFSCluster cluster3 = null; + try { + String dataDir2 = cluster2.getDataDirectory(); + Assert.assertEquals(c2Path + "/data", dataDir2); + //change the data dir + conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, + testDataCluster3.getAbsolutePath()); + MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf); + cluster3 = builder.build(); + String dataDir3 = cluster3.getDataDirectory(); + Assert.assertTrue("Clusters are bound to the same directory: " + dataDir2, + !dataDir2.equals(dataDir3)); + } finally { + MiniDFSCluster.shutdownCluster(cluster3); + MiniDFSCluster.shutdownCluster(cluster2); + } + } + + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java index 102b41ca8e0..f7a5c0e065e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestOverReplicatedBlocks.java @@ -63,7 +63,7 @@ public void testProcesOverReplicateBlock() throws IOException { DataNodeProperties dnProps = cluster.stopDataNode(0); // remove block scanner log to trigger block scanning File scanLog = new File(MiniDFSCluster.getFinalizedDir( - MiniDFSCluster.getStorageDir(0, 0), + cluster.getInstanceStorageDir(0, 0), cluster.getNamesystem().getBlockPoolId()).getParent().toString() + "/../dncp_block_verification.log.prev"); //wait for one minute for deletion to succeed; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java index a541bcb5d2d..89e48fb586f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestDataNodeVolumeFailure.java @@ -324,7 +324,7 @@ private int countRealBlocks(Map map) { final String bpid = cluster.getNamesystem().getBlockPoolId(); for(int i=0; i