HDFS-2988. Improve error message when storage directory lock fails. Contributed by Miomir Boljanovic. (harsh)
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1358683 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
535b039581
commit
e5b00f1bdc
|
@ -276,6 +276,9 @@ Branch-2 ( Unreleased changes )
|
||||||
|
|
||||||
HDFS-3604. Add dfs.webhdfs.enabled to hdfs-default.xml. (eli)
|
HDFS-3604. Add dfs.webhdfs.enabled to hdfs-default.xml. (eli)
|
||||||
|
|
||||||
|
HDFS-2988. Improve error message when storage directory lock fails
|
||||||
|
(Miomir Boljanovic via harsh)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-2982. Startup performance suffers when there are many edit log
|
HDFS-2982. Startup performance suffers when there are many edit log
|
||||||
|
|
|
@ -22,6 +22,7 @@ import java.io.FileInputStream;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.RandomAccessFile;
|
import java.io.RandomAccessFile;
|
||||||
|
import java.lang.management.ManagementFactory;
|
||||||
import java.nio.channels.FileLock;
|
import java.nio.channels.FileLock;
|
||||||
import java.nio.channels.OverlappingFileLockException;
|
import java.nio.channels.OverlappingFileLockException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -615,14 +616,20 @@ public abstract class Storage extends StorageInfo {
|
||||||
deletionHookAdded = true;
|
deletionHookAdded = true;
|
||||||
}
|
}
|
||||||
RandomAccessFile file = new RandomAccessFile(lockF, "rws");
|
RandomAccessFile file = new RandomAccessFile(lockF, "rws");
|
||||||
|
String jvmName = ManagementFactory.getRuntimeMXBean().getName();
|
||||||
FileLock res = null;
|
FileLock res = null;
|
||||||
try {
|
try {
|
||||||
res = file.getChannel().tryLock();
|
res = file.getChannel().tryLock();
|
||||||
|
file.write(jvmName.getBytes());
|
||||||
|
LOG.info("Lock on " + lockF + " acquired by nodename " + jvmName);
|
||||||
} catch(OverlappingFileLockException oe) {
|
} catch(OverlappingFileLockException oe) {
|
||||||
|
LOG.error("It appears that another namenode " + file.readLine()
|
||||||
|
+ " has already locked the storage directory");
|
||||||
file.close();
|
file.close();
|
||||||
return null;
|
return null;
|
||||||
} catch(IOException e) {
|
} catch(IOException e) {
|
||||||
LOG.error("Cannot create lock on " + lockF, e);
|
LOG.error("Failed to acquire lock on " + lockF + ". If this storage directory is mounted via NFS, "
|
||||||
|
+ "ensure that the appropriate nfs lock services are running.", e);
|
||||||
file.close();
|
file.close();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.hadoop.hdfs.server.namenode;
|
||||||
|
|
||||||
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
|
import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI;
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import java.lang.management.ManagementFactory;
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -48,6 +50,7 @@ import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||||
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
import org.apache.hadoop.hdfs.MiniDFSNNTopology;
|
||||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||||
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
|
||||||
|
import org.apache.hadoop.hdfs.server.common.Storage;
|
||||||
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
import org.apache.hadoop.hdfs.server.common.Storage.StorageDirectory;
|
||||||
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
import org.apache.hadoop.hdfs.server.common.StorageInfo;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
import org.apache.hadoop.hdfs.server.namenode.NNStorage.NameNodeDirType;
|
||||||
|
@ -60,6 +63,7 @@ import org.apache.hadoop.hdfs.tools.DFSAdmin;
|
||||||
import org.apache.hadoop.net.NetUtils;
|
import org.apache.hadoop.net.NetUtils;
|
||||||
import org.apache.hadoop.test.GenericTestUtils;
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
|
import org.apache.hadoop.test.GenericTestUtils.DelayAnswer;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
||||||
import org.apache.hadoop.util.StringUtils;
|
import org.apache.hadoop.util.StringUtils;
|
||||||
import org.apache.log4j.Level;
|
import org.apache.log4j.Level;
|
||||||
import org.mockito.ArgumentMatcher;
|
import org.mockito.ArgumentMatcher;
|
||||||
|
@ -679,6 +683,38 @@ public class TestCheckpoint extends TestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that, an attempt to lock a storage that is already locked by a nodename,
|
||||||
|
* logs error message that includes JVM name of the namenode that locked it.
|
||||||
|
*/
|
||||||
|
public void testStorageAlreadyLockedErrorMessage() throws Exception {
|
||||||
|
Configuration conf = new HdfsConfiguration();
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.numDataNodes(0)
|
||||||
|
.build();
|
||||||
|
|
||||||
|
StorageDirectory savedSd = null;
|
||||||
|
try {
|
||||||
|
NNStorage storage = cluster.getNameNode().getFSImage().getStorage();
|
||||||
|
for (StorageDirectory sd : storage.dirIterable(null)) {
|
||||||
|
assertLockFails(sd);
|
||||||
|
savedSd = sd;
|
||||||
|
}
|
||||||
|
|
||||||
|
LogCapturer logs = GenericTestUtils.LogCapturer.captureLogs(LogFactory.getLog(Storage.class));
|
||||||
|
try {
|
||||||
|
// try to lock the storage that's already locked
|
||||||
|
savedSd.lock();
|
||||||
|
fail("Namenode should not be able to lock a storage that is already locked");
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
String jvmName = ManagementFactory.getRuntimeMXBean().getName();
|
||||||
|
assertTrue("Error message does not include JVM name '" + jvmName
|
||||||
|
+ "'", logs.getOutput().contains(jvmName));
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
cluster.shutdown();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Assert that the given storage directory can't be locked, because
|
* Assert that the given storage directory can't be locked, because
|
||||||
|
|
Loading…
Reference in New Issue