diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index fc7c10c656c..ce0184bd92f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -101,6 +101,9 @@ Release 2.0.1-alpha - UNRELEASED HDFS-3604. Add dfs.webhdfs.enabled to hdfs-default.xml. (eli) + HDFS-2988. Improve error message when storage directory lock fails + (Miomir Boljanovic via harsh) + OPTIMIZATIONS HDFS-2982. Startup performance suffers when there are many edit log diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java index 5ed6e358867..5ad63340cfc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/Storage.java @@ -22,6 +22,7 @@ import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; +import java.lang.management.ManagementFactory; import java.nio.channels.FileLock; import java.nio.channels.OverlappingFileLockException; import java.util.ArrayList; @@ -600,14 +601,20 @@ public abstract class Storage extends StorageInfo { deletionHookAdded = true; } RandomAccessFile file = new RandomAccessFile(lockF, "rws"); + String jvmName = ManagementFactory.getRuntimeMXBean().getName(); FileLock res = null; try { res = file.getChannel().tryLock(); + file.write(jvmName.getBytes()); + LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName); } catch(OverlappingFileLockException oe) { + LOG.error("It appears that another namenode " + file.readLine() + + " has already locked the storage directory"); file.close(); return null; } catch(IOException e) { - LOG.error("Cannot create lock on " + lockF, e); + LOG.error("Failed to acquire lock on " + lockF + ". If this storage directory is mounted via NFS, " + + "ensure that the appropriate nfs lock services are running.", e); file.close(); throw e; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index 49d0f5d3a25..080c038689c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI; import junit.framework.TestCase; + +import java.lang.management.ManagementFactory; import java.net.InetSocketAddress; import java.io.File; import java.io.IOException; @@ -48,6 +50,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; +import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; @@ -60,6 +63,7 @@ import org.apache.hadoop.hdfs.tools.DFSAdmin; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; +import org.apache.hadoop.test.GenericTestUtils.LogCapturer; import org.apache.hadoop.util.StringUtils; import org.apache.log4j.Level; import org.mockito.ArgumentMatcher; @@ -679,6 +683,38 @@ public class TestCheckpoint extends TestCase { } } + /** + * Test that, an attempt to lock a storage that is already locked by a nodename, + * logs error message that includes JVM name of the namenode that locked it. + */ + public void testStorageAlreadyLockedErrorMessage() throws Exception { + Configuration conf = new HdfsConfiguration(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf) + .numDataNodes(0) + .build(); + + StorageDirectory savedSd = null; + try { + NNStorage storage = cluster.getNameNode().getFSImage().getStorage(); + for (StorageDirectory sd : storage.dirIterable(null)) { + assertLockFails(sd); + savedSd = sd; + } + + LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(LogFactory.getLog(Storage.class)); + try { + // try to lock the storage that's already locked + savedSd.lock(); + fail("Namenode should not be able to lock a storage that is already locked"); + } catch (IOException ioe) { + String jvmName = ManagementFactory.getRuntimeMXBean().getName(); + assertTrue("Error message does not include JVM name '" + jvmName + + "'", logs.getOutput().contains(jvmName)); + } + } finally { + cluster.shutdown(); + } + } /** * Assert that the given storage directory can't be locked, because