From 9ef03a4c5bb5573eadc7d04e371c4af2dc6bae37 Mon Sep 17 00:00:00 2001 From: Tsz-Wo Nicholas Sze Date: Mon, 13 Jul 2015 15:12:26 -0700 Subject: [PATCH] HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move progress. Contributed by Surendra Singh Lilhore --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/balancer/Dispatcher.java | 18 +++++++++++++ .../hadoop/hdfs/server/mover/Mover.java | 27 ++++++++++++++----- .../hadoop/hdfs/server/mover/TestMover.java | 2 +- 4 files changed, 43 insertions(+), 7 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 14919902a62..e843dcceadb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -716,6 +716,9 @@ Release 2.8.0 - UNRELEASED HDFS-8751. Remove setBlocks API from INodeFile and misc code cleanup. (Zhe Zhang via jing9) + HDFS-8541. Mover should exit with NO_MOVE_PROGRESS if there is no move + progress. (Surendra Singh Lilhore via szetszwo) + OPTIMIZATIONS HDFS-8026. Trace FSOutputSummer#writeChecksumChunks rather than diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java index 4a8f40fa33b..298b86df100 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java @@ -317,6 +317,7 @@ public class Dispatcher { sendRequest(out, eb, accessToken); receiveResponse(in); nnc.getBytesMoved().addAndGet(block.getNumBytes()); + target.getDDatanode().setHasSuccess(); LOG.info("Successfully moved " + this); } catch (IOException e) { LOG.warn("Failed to move " + this + ": " + e.getMessage()); @@ -500,6 +501,7 @@ public class Dispatcher { /** blocks being moved but not confirmed yet */ private final List pendings; private volatile boolean hasFailure = false; + private volatile boolean hasSuccess = false; private final int maxConcurrentMoves; @Override @@ -573,6 +575,10 @@ public class Dispatcher { void setHasFailure() { this.hasFailure = true; } + + void setHasSuccess() { + this.hasSuccess = true; + } } /** A node that can be the sources of a block move */ @@ -964,6 +970,18 @@ public class Dispatcher { } } + /** + * @return true if some moves are success. + */ + public static boolean checkForSuccess( + Iterable targets) { + boolean hasSuccess = false; + for (StorageGroup t : targets) { + hasSuccess |= t.getDDatanode().hasSuccess; + } + return hasSuccess; + } + /** * Decide if the block is a good candidate to be moved from source to target. * A block is a good candidate if diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java index 344b9fcb8c5..afacebb51f8 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/mover/Mover.java @@ -269,10 +269,14 @@ public class Mover { // wait for pending move to finish and retry the failed migration boolean hasFailed = Dispatcher.waitForMoveCompletion(storages.targets .values()); - if (hasFailed) { + boolean hasSuccess = Dispatcher.checkForSuccess(storages.targets + .values()); + if (hasFailed && !hasSuccess) { if (retryCount.get() == retryMaxAttempts) { - throw new IOException("Failed to move some block's after " + result.setRetryFailed(); + LOG.error("Failed to move some block's after " + retryMaxAttempts + " retries."); + return result; } else { retryCount.incrementAndGet(); } @@ -713,10 +717,12 @@ public class Mover { private boolean hasRemaining; private boolean noBlockMoved; + private boolean retryFailed; Result() { hasRemaining = false; noBlockMoved = true; + retryFailed = false; } boolean isHasRemaining() { @@ -735,16 +741,25 @@ public class Mover { this.noBlockMoved = noBlockMoved; } + void setRetryFailed() { + this.retryFailed = true; + } + /** - * @return SUCCESS if all moves are success and there is no remaining move. + * @return NO_MOVE_PROGRESS if no progress in move after some retry. Return + * SUCCESS if all moves are success and there is no remaining move. * Return NO_MOVE_BLOCK if there moves available but all the moves * cannot be scheduled. Otherwise, return IN_PROGRESS since there * must be some remaining moves. */ ExitStatus getExitStatus() { - return !isHasRemaining() ? ExitStatus.SUCCESS - : isNoBlockMoved() ? ExitStatus.NO_MOVE_BLOCK - : ExitStatus.IN_PROGRESS; + if (retryFailed) { + return ExitStatus.NO_MOVE_PROGRESS; + } else { + return !isHasRemaining() ? ExitStatus.SUCCESS + : isNoBlockMoved() ? ExitStatus.NO_MOVE_BLOCK + : ExitStatus.IN_PROGRESS; + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java index 899b5c0ee8e..d3d814c10d2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/mover/TestMover.java @@ -404,7 +404,7 @@ public class TestMover { int rc = ToolRunner.run(conf, new Mover.Cli(), new String[] {"-p", file.toString()}); Assert.assertEquals("Movement should fail after some retry", - ExitStatus.IO_EXCEPTION.getExitCode(), rc); + ExitStatus.NO_MOVE_PROGRESS.getExitCode(), rc); } finally { cluster.shutdown(); }