HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move progress. Contributed by Surendra Singh Lilhore

This commit is contained in:
Tsz-Wo Nicholas Sze 2015-07-13 15:12:26 -07:00
parent f7c8311e98
commit 9ef03a4c5b
4 changed files with 43 additions and 7 deletions

View File

@ -716,6 +716,9 @@ Release 2.8.0 - UNRELEASED
HDFS-8751. Remove setBlocks API from INodeFile and misc code cleanup. (Zhe HDFS-8751. Remove setBlocks API from INodeFile and misc code cleanup. (Zhe
Zhang via jing9) Zhang via jing9)
HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move
progress. (Surendra Singh Lilhore via szetszwo)
OPTIMIZATIONS OPTIMIZATIONS
HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than

View File

@ -317,6 +317,7 @@ public class Dispatcher {
sendRequest(out, eb, accessToken); sendRequest(out, eb, accessToken);
receiveResponse(in); receiveResponse(in);
nnc.getBytesMoved().addAndGet(block.getNumBytes()); nnc.getBytesMoved().addAndGet(block.getNumBytes());
target.getDDatanode().setHasSuccess();
LOG.info("Successfully moved " + this); LOG.info("Successfully moved " + this);
} catch (IOException e) { } catch (IOException e) {
LOG.warn("Failed to move " + this + ": " + e.getMessage()); LOG.warn("Failed to move " + this + ": " + e.getMessage());
@ -500,6 +501,7 @@ public class Dispatcher {
/** blocks being moved but not confirmed yet */ /** blocks being moved but not confirmed yet */
private final List<PendingMove> pendings; private final List<PendingMove> pendings;
private volatile boolean hasFailure = false; private volatile boolean hasFailure = false;
private volatile boolean hasSuccess = false;
private final int maxConcurrentMoves; private final int maxConcurrentMoves;
@Override @Override
@ -573,6 +575,10 @@ public class Dispatcher {
void setHasFailure() { void setHasFailure() {
this.hasFailure = true; this.hasFailure = true;
} }
void setHasSuccess() {
this.hasSuccess = true;
}
} }
/** A node that can be the sources of a block move */ /** A node that can be the sources of a block move */
@ -964,6 +970,18 @@ public class Dispatcher {
} }
} }
/**
* @return true if some moves are success.
*/
public static boolean checkForSuccess(
Iterable<? extends StorageGroup> targets) {
boolean hasSuccess = false;
for (StorageGroup t : targets) {
hasSuccess |= t.getDDatanode().hasSuccess;
}
return hasSuccess;
}
/** /**
* Decide if the block is a good candidate to be moved from source to target. * Decide if the block is a good candidate to be moved from source to target.
* A block is a good candidate if * A block is a good candidate if

View File

@ -269,10 +269,14 @@ public class Mover {
// wait for pending move to finish and retry the failed migration // wait for pending move to finish and retry the failed migration
boolean hasFailed = Dispatcher.waitForMoveCompletion(storages.targets boolean hasFailed = Dispatcher.waitForMoveCompletion(storages.targets
.values()); .values());
if (hasFailed) { boolean hasSuccess = Dispatcher.checkForSuccess(storages.targets
.values());
if (hasFailed && !hasSuccess) {
if (retryCount.get() == retryMaxAttempts) { if (retryCount.get() == retryMaxAttempts) {
throw new IOException("Failed to move some block's after " result.setRetryFailed();
LOG.error("Failed to move some block's after "
+ retryMaxAttempts + " retries."); + retryMaxAttempts + " retries.");
return result;
} else { } else {
retryCount.incrementAndGet(); retryCount.incrementAndGet();
} }
@ -713,10 +717,12 @@ public class Mover {
private boolean hasRemaining; private boolean hasRemaining;
private boolean noBlockMoved; private boolean noBlockMoved;
private boolean retryFailed;
Result() { Result() {
hasRemaining = false; hasRemaining = false;
noBlockMoved = true; noBlockMoved = true;
retryFailed = false;
} }
boolean isHasRemaining() { boolean isHasRemaining() {
@ -735,17 +741,26 @@ public class Mover {
this.noBlockMoved = noBlockMoved; this.noBlockMoved = noBlockMoved;
} }
void setRetryFailed() {
this.retryFailed = true;
}
/** /**
* @return SUCCESS if all moves are success and there is no remaining move. * @return NO_MOVE_PROGRESS if no progress in move after some retry. Return
* SUCCESS if all moves are success and there is no remaining move.
* Return NO_MOVE_BLOCK if there moves available but all the moves * Return NO_MOVE_BLOCK if there moves available but all the moves
* cannot be scheduled. Otherwise, return IN_PROGRESS since there * cannot be scheduled. Otherwise, return IN_PROGRESS since there
* must be some remaining moves. * must be some remaining moves.
*/ */
ExitStatus getExitStatus() { ExitStatus getExitStatus() {
if (retryFailed) {
return ExitStatus.NO_MOVE_PROGRESS;
} else {
return !isHasRemaining() ? ExitStatus.SUCCESS return !isHasRemaining() ? ExitStatus.SUCCESS
: isNoBlockMoved() ? ExitStatus.NO_MOVE_BLOCK : isNoBlockMoved() ? ExitStatus.NO_MOVE_BLOCK
: ExitStatus.IN_PROGRESS; : ExitStatus.IN_PROGRESS;
} }
}
} }
/** /**

View File

@ -404,7 +404,7 @@ public class TestMover {
int rc = ToolRunner.run(conf, new Mover.Cli(), int rc = ToolRunner.run(conf, new Mover.Cli(),
new String[] {"-p", file.toString()}); new String[] {"-p", file.toString()});
Assert.assertEquals("Movement should fail after some retry", Assert.assertEquals("Movement should fail after some retry",
ExitStatus.IO_EXCEPTION.getExitCode(), rc); ExitStatus.NO_MOVE_PROGRESS.getExitCode(), rc);
} finally { } finally {
cluster.shutdown(); cluster.shutdown();
} }