svn merge -c 1532932 from trunk for HDFS-4376. Fix race conditions in Balancer.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1532933 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2013-10-16 23:06:46 +00:00
parent 186581fef2
commit 0b18a51c33
2 changed files with 29 additions and 11 deletions

View File

@ -108,6 +108,8 @@ Release 2.3.0 - UNRELEASED
HDFS-5283. Under construction blocks only inside snapshots should not be
counted in safemode threshhold. (Vinay via szetszwo)
HDFS-4376. Fix race conditions in Balancer. (Junping Du via szetszwo)
Release 2.2.1 - UNRELEASED
INCOMPATIBLE CHANGES

View File

@ -506,7 +506,7 @@ private static class BalancerDatanode {
final DatanodeInfo datanode;
final double utilization;
final long maxSize2Move;
protected long scheduledSize = 0L;
private long scheduledSize = 0L;
// blocks being moved but not confirmed yet
private List<PendingBlockMove> pendingBlocks =
new ArrayList<PendingBlockMove>(MAX_NUM_CONCURRENT_MOVES);
@ -555,20 +555,35 @@ protected String getStorageID() {
}
/** Decide if still need to move more bytes */
protected boolean hasSpaceForScheduling() {
protected synchronized boolean hasSpaceForScheduling() {
return scheduledSize<maxSize2Move;
}
/** Return the total number of bytes that need to be moved */
protected long availableSizeToMove() {
protected synchronized long availableSizeToMove() {
return maxSize2Move-scheduledSize;
}
/* increment scheduled size */
protected void incScheduledSize(long size) {
/** increment scheduled size */
protected synchronized void incScheduledSize(long size) {
scheduledSize += size;
}
/** decrement scheduled size */
protected synchronized void decScheduledSize(long size) {
scheduledSize -= size;
}
/** get scheduled size */
protected synchronized long getScheduledSize(){
return scheduledSize;
}
/** get scheduled size */
protected synchronized void setScheduledSize(long size){
scheduledSize = size;
}
/* Check if the node can schedule more blocks to move */
synchronized private boolean isPendingQNotFull() {
if ( pendingBlocks.size() < MAX_NUM_CONCURRENT_MOVES ) {
@ -702,8 +717,8 @@ private PendingBlockMove chooseNextBlockToMove() {
pendingBlock.source = this;
pendingBlock.target = target;
if ( pendingBlock.chooseBlockAndProxy() ) {
long blockSize = pendingBlock.block.getNumBytes();
scheduledSize -= blockSize;
long blockSize = pendingBlock.block.getNumBytes();
decScheduledSize(blockSize);
task.size -= blockSize;
if (task.size == 0) {
tasks.remove();
@ -747,10 +762,11 @@ private boolean shouldFetchMoreBlocks() {
private static final long MAX_ITERATION_TIME = 20*60*1000L; //20 mins
private void dispatchBlocks() {
long startTime = Time.now();
long scheduledSize = getScheduledSize();
this.blocksToReceive = 2*scheduledSize;
boolean isTimeUp = false;
int noPendingBlockIteration = 0;
while(!isTimeUp && scheduledSize>0 &&
while(!isTimeUp && getScheduledSize()>0 &&
(!srcBlockList.isEmpty() || blocksToReceive>0)) {
PendingBlockMove pendingBlock = chooseNextBlockToMove();
if (pendingBlock != null) {
@ -779,7 +795,7 @@ private void dispatchBlocks() {
// in case no blocks can be moved for source node's task,
// jump out of while-loop after 5 iterations.
if (noPendingBlockIteration >= MAX_NO_PENDING_BLOCK_ITERATIONS) {
scheduledSize = 0;
setScheduledSize(0);
}
}
@ -992,7 +1008,7 @@ private long chooseNodes() {
long bytesToMove = 0L;
for (Source src : sources) {
bytesToMove += src.scheduledSize;
bytesToMove += src.getScheduledSize();
}
return bytesToMove;
}
@ -1093,7 +1109,7 @@ private synchronized void inc( long bytes ) {
bytesMoved += bytes;
}
private long get() {
private synchronized long get() {
return bytesMoved;
}
};