HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition ordering.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1156847 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2011-08-11 22:16:16 +00:00
parent 7528853197
commit 1dd48b1aee
3 changed files with 42 additions and 36 deletions

View File

@ -954,6 +954,9 @@ Trunk (unreleased changes)
HDFS-2245. Fix a NullPointerException in BlockManager.chooseTarget(..).
(szetszwo)
HDFS-2229. Fix a deadlock in namenode by enforcing lock acquisition
ordering. (szetszwo)
BREAKDOWN OF HDFS-1073 SUBTASKS
HDFS-1521. Persist transaction ID on disk between NN restarts.

View File

@ -1829,39 +1829,37 @@ public class BlockManager {
* over or under replicated. Place it into the respective queue.
*/
public void processMisReplicatedBlocks() {
long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
namesystem.writeLock();
try {
neededReplications.clear();
for (BlockInfo block : blocksMap.getBlocks()) {
INodeFile fileINode = block.getINode();
if (fileINode == null) {
// block does not belong to any file
nrInvalid++;
addToInvalidates(block);
continue;
}
// calculate current replication
short expectedReplication = fileINode.getReplication();
NumberReplicas num = countNodes(block);
int numCurrentReplica = num.liveReplicas();
// add to under-replicated queue if need to be
if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
if (neededReplications.add(block, numCurrentReplica, num
.decommissionedReplicas(), expectedReplication)) {
nrUnderReplicated++;
}
}
assert namesystem.hasWriteLock();
if (numCurrentReplica > expectedReplication) {
// over-replicated block
nrOverReplicated++;
processOverReplicatedBlock(block, expectedReplication, null, null);
long nrInvalid = 0, nrOverReplicated = 0, nrUnderReplicated = 0;
neededReplications.clear();
for (BlockInfo block : blocksMap.getBlocks()) {
INodeFile fileINode = block.getINode();
if (fileINode == null) {
// block does not belong to any file
nrInvalid++;
addToInvalidates(block);
continue;
}
// calculate current replication
short expectedReplication = fileINode.getReplication();
NumberReplicas num = countNodes(block);
int numCurrentReplica = num.liveReplicas();
// add to under-replicated queue if need to be
if (isNeededReplication(block, expectedReplication, numCurrentReplica)) {
if (neededReplications.add(block, numCurrentReplica, num
.decommissionedReplicas(), expectedReplication)) {
nrUnderReplicated++;
}
}
} finally {
namesystem.writeUnlock();
if (numCurrentReplica > expectedReplication) {
// over-replicated block
nrOverReplicated++;
processOverReplicatedBlock(block, expectedReplication, null, null);
}
}
LOG.info("Total number of blocks = " + blocksMap.size());
LOG.info("Number of invalid blocks = " + nrInvalid);
LOG.info("Number of under-replicated blocks = " + nrUnderReplicated);

View File

@ -313,13 +313,18 @@ public class FSNamesystem implements RwLock, FSClusterStats,
* Activate FSNamesystem daemons.
*/
void activate(Configuration conf) throws IOException {
setBlockTotal();
blockManager.activate(conf);
this.lmthread = new Daemon(leaseManager.new Monitor());
lmthread.start();
writeLock();
try {
setBlockTotal();
blockManager.activate(conf);
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
nnrmthread.start();
this.lmthread = new Daemon(leaseManager.new Monitor());
lmthread.start();
this.nnrmthread = new Daemon(new NameNodeResourceMonitor());
nnrmthread.start();
} finally {
writeUnlock();
}
registerMXBean();
DefaultMetricsSystem.instance().register(this);