HDFS-2988. Improve error message when storage directory lock fails. Contributed by Miomir Boljanovic. (harsh)

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1358683 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Harsh J 2012-07-08 00:36:38 +00:00
parent 535b039581
commit e5b00f1bdc
3 changed files with 47 additions and 1 deletions

View File

@ -276,6 +276,9 @@ Branch-2 ( Unreleased changes )
HDFS-3604. Add dfs.webhdfs.enabled to hdfs-default.xml. (eli) HDFS-3604. Add dfs.webhdfs.enabled to hdfs-default.xml. (eli)
HDFS-2988. Improve error message when storage directory lock fails
(Miomir Boljanovic via harsh)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-2982. Startup performance suffers when there are many edit log HDFS-2982. Startup performance suffers when there are many edit log

View File

@ -22,6 +22,7 @@ import java.io.FileInputStream;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.RandomAccessFile; import java.io.RandomAccessFile;
import java.lang.management.ManagementFactory;
import java.nio.channels.FileLock; import java.nio.channels.FileLock;
import java.nio.channels.OverlappingFileLockException; import java.nio.channels.OverlappingFileLockException;
import java.util.ArrayList; import java.util.ArrayList;
@ -615,14 +616,20 @@ public abstract class Storage extends StorageInfo {
deletionHookAdded = true; deletionHookAdded = true;
} }
RandomAccessFile file = new RandomAccessFile(lockF, "rws"); RandomAccessFile file = new RandomAccessFile(lockF, "rws");
String jvmName = ManagementFactory.getRuntimeMXBean().getName();
FileLock res = null; FileLock res = null;
try { try {
res = file.getChannel().tryLock(); res = file.getChannel().tryLock();
file.write(jvmName.getBytes());
LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
} catch(OverlappingFileLockException oe) { } catch(OverlappingFileLockException oe) {
LOG.error("It appears that another namenode " + file.readLine()
+ " has already locked the storage directory");
file.close(); file.close();
return null; return null;
} catch(IOException e) { } catch(IOException e) {
LOG.error("Cannot create lock on " + lockF, e); LOG.error("Failed to acquire lock on " + lockF + ". If this storage directory is mounted via NFS, "
+ "ensure that the appropriate nfs lock services are running.", e);
file.close(); file.close();
throw e; throw e;
} }

View File

@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode;
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI; import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
import junit.framework.TestCase; import junit.framework.TestCase;
import java.lang.management.ManagementFactory;
import java.net.InetSocketAddress; import java.net.InetSocketAddress;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
@ -48,6 +50,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.Storage;
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory; import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
import org.apache.hadoop.hdfs.server.common.StorageInfo; import org.apache.hadoop.hdfs.server.common.StorageInfo;
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType; import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
@ -60,6 +63,7 @@ import org.apache.hadoop.hdfs.tools.DFSAdmin;
import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Level; import org.apache.log4j.Level;
import org.mockito.ArgumentMatcher; import org.mockito.ArgumentMatcher;
@ -679,6 +683,38 @@ public class TestCheckpoint extends TestCase {
} }
} }
/**
* Test that, an attempt to lock a storage that is already locked by a nodename,
* logs error message that includes JVM name of the namenode that locked it.
*/
public void testStorageAlreadyLockedErrorMessage() throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(0)
.build();
StorageDirectory savedSd = null;
try {
NNStorage storage = cluster.getNameNode().getFSImage().getStorage();
for (StorageDirectory sd : storage.dirIterable(null)) {
assertLockFails(sd);
savedSd = sd;
}
LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(LogFactory.getLog(Storage.class));
try {
// try to lock the storage that's already locked
savedSd.lock();
fail("Namenode should not be able to lock a storage that is already locked");
} catch (IOException ioe) {
String jvmName = ManagementFactory.getRuntimeMXBean().getName();
assertTrue("Error message does not include JVM name '" + jvmName
+ "'", logs.getOutput().contains(jvmName));
}
} finally {
cluster.shutdown();
}
}
/** /**
* Assert that the given storage directory can't be locked, because * Assert that the given storage directory can't be locked, because