From c0ac0a533701a56b09b302a660c3233971512168 Mon Sep 17 00:00:00 2001 From: Xiao Chen Date: Wed, 8 Aug 2018 10:36:44 -0700 Subject: [PATCH] HDFS-13658. Expose HighestPriorityLowRedundancy blocks statistics. Contributed by Kitti Nanasi. --- .../src/site/markdown/Metrics.md | 2 + .../hdfs/protocol/ECBlockGroupStats.java | 27 +++++++++++- .../hdfs/protocol/ReplicatedBlockStats.java | 28 +++++++++++- .../hdfs/protocolPB/PBHelperClient.java | 21 +++++++++ .../main/proto/ClientNamenodeProtocol.proto | 3 ++ .../metrics/NamenodeBeanMetrics.java | 10 +++++ .../federation/router/ErasureCoding.java | 13 ++++++ .../server/blockmanagement/BlockManager.java | 8 ++++ .../blockmanagement/LowRedundancyBlocks.java | 28 ++++++++++++ .../hdfs/server/namenode/FSNamesystem.java | 20 ++++++++- .../hdfs/server/namenode/NameNodeMXBean.java | 18 ++++++++ .../apache/hadoop/hdfs/tools/DFSAdmin.java | 10 +++++ .../TestLowRedundancyBlockQueues.java | 43 ++++++++++++------- .../namenode/metrics/TestNameNodeMetrics.java | 12 ++++++ .../hadoop/hdfs/tools/TestDFSAdmin.java | 32 +++++++++++--- 15 files changed, 247 insertions(+), 28 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md index f1dbc500dfe..4d59c6e61a3 100644 --- a/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md +++ b/hadoop-common-project/hadoop-common/src/site/markdown/Metrics.md @@ -240,6 +240,8 @@ Each metrics record contains tags such as HAState and Hostname as additional inf | `StaleDataNodes` | Current number of DataNodes marked stale due to delayed heartbeat | | `NumStaleStorages` | Number of storages marked as content stale (after NameNode restart/failover before first block report is received) | | `MissingReplOneBlocks` | Current number of missing blocks with replication factor 1 | +| `HighestPriorityLowRedundancyReplicatedBlocks` | Current number of non-corrupt, low redundancy replicated blocks with the highest risk of loss (have 0 or 1 replica). Will be recovered with the highest priority. | +| `HighestPriorityLowRedundancyECBlocks` | Current number of non-corrupt, low redundancy EC blocks with the highest risk of loss. Will be recovered with the highest priority. | | `NumFilesUnderConstruction` | Current number of files under construction | | `NumActiveClients` | Current number of active clients holding lease | | `HAState` | (HA-only) Current state of the NameNode: initializing or active or standby or stopping state | diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java index 9a8ad8cdb13..3dde6043468 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ECBlockGroupStats.java @@ -34,15 +34,26 @@ public final class ECBlockGroupStats { private final long missingBlockGroups; private final long bytesInFutureBlockGroups; private final long pendingDeletionBlocks; + private final Long highestPriorityLowRedundancyBlocks; public ECBlockGroupStats(long lowRedundancyBlockGroups, long corruptBlockGroups, long missingBlockGroups, long bytesInFutureBlockGroups, long pendingDeletionBlocks) { + this(lowRedundancyBlockGroups, corruptBlockGroups, missingBlockGroups, + bytesInFutureBlockGroups, pendingDeletionBlocks, null); + } + + public ECBlockGroupStats(long lowRedundancyBlockGroups, + long corruptBlockGroups, long missingBlockGroups, + long bytesInFutureBlockGroups, long pendingDeletionBlocks, + Long highestPriorityLowRedundancyBlocks) { this.lowRedundancyBlockGroups = lowRedundancyBlockGroups; this.corruptBlockGroups = corruptBlockGroups; this.missingBlockGroups = missingBlockGroups; this.bytesInFutureBlockGroups = bytesInFutureBlockGroups; this.pendingDeletionBlocks = pendingDeletionBlocks; + this.highestPriorityLowRedundancyBlocks + = highestPriorityLowRedundancyBlocks; } public long getBytesInFutureBlockGroups() { @@ -65,6 +76,14 @@ public final class ECBlockGroupStats { return pendingDeletionBlocks; } + public boolean hasHighestPriorityLowRedundancyBlocks() { + return getHighestPriorityLowRedundancyBlocks() != null; + } + + public Long getHighestPriorityLowRedundancyBlocks() { + return highestPriorityLowRedundancyBlocks; + } + @Override public String toString() { StringBuilder statsBuilder = new StringBuilder(); @@ -76,8 +95,12 @@ public final class ECBlockGroupStats { .append(", BytesInFutureBlockGroups=").append( getBytesInFutureBlockGroups()) .append(", PendingDeletionBlocks=").append( - getPendingDeletionBlocks()) - .append("]"); + getPendingDeletionBlocks()); + if (hasHighestPriorityLowRedundancyBlocks()) { + statsBuilder.append(", HighestPriorityLowRedundancyBlocks=") + .append(getHighestPriorityLowRedundancyBlocks()); + } + statsBuilder.append("]"); return statsBuilder.toString(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java index 49aadedcdec..c2100034bcd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocol/ReplicatedBlockStats.java @@ -35,17 +35,29 @@ public final class ReplicatedBlockStats { private final long missingReplicationOneBlocks; private final long bytesInFutureBlocks; private final long pendingDeletionBlocks; + private final Long highestPriorityLowRedundancyBlocks; public ReplicatedBlockStats(long lowRedundancyBlocks, long corruptBlocks, long missingBlocks, long missingReplicationOneBlocks, long bytesInFutureBlocks, long pendingDeletionBlocks) { + this(lowRedundancyBlocks, corruptBlocks, missingBlocks, + missingReplicationOneBlocks, bytesInFutureBlocks, pendingDeletionBlocks, + null); + } + + public ReplicatedBlockStats(long lowRedundancyBlocks, + long corruptBlocks, long missingBlocks, + long missingReplicationOneBlocks, long bytesInFutureBlocks, + long pendingDeletionBlocks, Long highestPriorityLowRedundancyBlocks) { this.lowRedundancyBlocks = lowRedundancyBlocks; this.corruptBlocks = corruptBlocks; this.missingBlocks = missingBlocks; this.missingReplicationOneBlocks = missingReplicationOneBlocks; this.bytesInFutureBlocks = bytesInFutureBlocks; this.pendingDeletionBlocks = pendingDeletionBlocks; + this.highestPriorityLowRedundancyBlocks + = highestPriorityLowRedundancyBlocks; } public long getLowRedundancyBlocks() { @@ -72,6 +84,14 @@ public final class ReplicatedBlockStats { return pendingDeletionBlocks; } + public boolean hasHighestPriorityLowRedundancyBlocks() { + return getHighestPriorityLowRedundancyBlocks() != null; + } + + public Long getHighestPriorityLowRedundancyBlocks(){ + return highestPriorityLowRedundancyBlocks; + } + @Override public String toString() { StringBuilder statsBuilder = new StringBuilder(); @@ -83,8 +103,12 @@ public final class ReplicatedBlockStats { getMissingReplicationOneBlocks()) .append(", BytesInFutureBlocks=").append(getBytesInFutureBlocks()) .append(", PendingDeletionBlocks=").append( - getPendingDeletionBlocks()) - .append("]"); + getPendingDeletionBlocks()); + if (hasHighestPriorityLowRedundancyBlocks()) { + statsBuilder.append(", HighestPriorityLowRedundancyBlocks=").append( + getHighestPriorityLowRedundancyBlocks()); + } + statsBuilder.append("]"); return statsBuilder.toString(); } } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java index 490ccb453b2..4a5a493bf50 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelperClient.java @@ -1990,6 +1990,13 @@ public class PBHelperClient { public static ReplicatedBlockStats convert( GetFsReplicatedBlockStatsResponseProto res) { + if (res.hasHighestPrioLowRedundancyBlocks()) { + return new ReplicatedBlockStats(res.getLowRedundancy(), + res.getCorruptBlocks(), res.getMissingBlocks(), + res.getMissingReplOneBlocks(), res.getBlocksInFuture(), + res.getPendingDeletionBlocks(), + res.getHighestPrioLowRedundancyBlocks()); + } return new ReplicatedBlockStats(res.getLowRedundancy(), res.getCorruptBlocks(), res.getMissingBlocks(), res.getMissingReplOneBlocks(), res.getBlocksInFuture(), @@ -1998,6 +2005,12 @@ public class PBHelperClient { public static ECBlockGroupStats convert( GetFsECBlockGroupStatsResponseProto res) { + if (res.hasHighestPrioLowRedundancyBlocks()) { + return new ECBlockGroupStats(res.getLowRedundancy(), + res.getCorruptBlocks(), res.getMissingBlocks(), + res.getBlocksInFuture(), res.getPendingDeletionBlocks(), + res.getHighestPrioLowRedundancyBlocks()); + } return new ECBlockGroupStats(res.getLowRedundancy(), res.getCorruptBlocks(), res.getMissingBlocks(), res.getBlocksInFuture(), res.getPendingDeletionBlocks()); @@ -2432,6 +2445,10 @@ public class PBHelperClient { replicatedBlockStats.getBytesInFutureBlocks()); result.setPendingDeletionBlocks( replicatedBlockStats.getPendingDeletionBlocks()); + if (replicatedBlockStats.hasHighestPriorityLowRedundancyBlocks()) { + result.setHighestPrioLowRedundancyBlocks( + replicatedBlockStats.getHighestPriorityLowRedundancyBlocks()); + } return result.build(); } @@ -2447,6 +2464,10 @@ public class PBHelperClient { ecBlockGroupStats.getBytesInFutureBlockGroups()); result.setPendingDeletionBlocks( ecBlockGroupStats.getPendingDeletionBlocks()); + if (ecBlockGroupStats.hasHighestPriorityLowRedundancyBlocks()) { + result.setHighestPrioLowRedundancyBlocks( + ecBlockGroupStats.getHighestPriorityLowRedundancyBlocks()); + } return result.build(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto index e51aeda65b2..ae4c93e4ea5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/proto/ClientNamenodeProtocol.proto @@ -348,6 +348,8 @@ message GetFsReplicatedBlockStatsResponseProto { required uint64 missing_repl_one_blocks = 4; required uint64 blocks_in_future = 5; required uint64 pending_deletion_blocks = 6; + optional uint64 highest_prio_low_redundancy_blocks = 7; + } message GetFsECBlockGroupStatsRequestProto { // no input paramters @@ -359,6 +361,7 @@ message GetFsECBlockGroupStatsResponseProto { required uint64 missing_blocks = 3; required uint64 blocks_in_future = 4; required uint64 pending_deletion_blocks = 5; + optional uint64 highest_prio_low_redundancy_blocks = 6; } enum DatanodeReportTypeProto { // type of the datanode report diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java index 4d22ae7dc32..e8ebf0dd8c5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/metrics/NamenodeBeanMetrics.java @@ -320,6 +320,16 @@ public class NamenodeBeanMetrics return 0; } + @Override + public long getHighestPriorityLowRedundancyReplicatedBlocks() { + return 0; + } + + @Override + public long getHighestPriorityLowRedundancyECBlocks() { + return 0; + } + @Override public String getCorruptFiles() { return "N/A"; diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ErasureCoding.java b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ErasureCoding.java index d2b2d50fdba..480b232ca42 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ErasureCoding.java +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/src/main/java/org/apache/hadoop/hdfs/server/federation/router/ErasureCoding.java @@ -185,12 +185,25 @@ public class ErasureCoding { long missingBlockGroups = 0; long bytesInFutureBlockGroups = 0; long pendingDeletionBlocks = 0; + long highestPriorityLowRedundancyBlocks = 0; + boolean hasHighestPriorityLowRedundancyBlocks = false; + for (ECBlockGroupStats stats : allStats.values()) { lowRedundancyBlockGroups += stats.getLowRedundancyBlockGroups(); corruptBlockGroups += stats.getCorruptBlockGroups(); missingBlockGroups += stats.getMissingBlockGroups(); bytesInFutureBlockGroups += stats.getBytesInFutureBlockGroups(); pendingDeletionBlocks += stats.getPendingDeletionBlocks(); + if (stats.hasHighestPriorityLowRedundancyBlocks()) { + hasHighestPriorityLowRedundancyBlocks = true; + highestPriorityLowRedundancyBlocks += + stats.getHighestPriorityLowRedundancyBlocks(); + } + } + if (hasHighestPriorityLowRedundancyBlocks) { + return new ECBlockGroupStats(lowRedundancyBlockGroups, corruptBlockGroups, + missingBlockGroups, bytesInFutureBlockGroups, pendingDeletionBlocks, + highestPriorityLowRedundancyBlocks); } return new ECBlockGroupStats(lowRedundancyBlockGroups, corruptBlockGroups, missingBlockGroups, bytesInFutureBlockGroups, pendingDeletionBlocks); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 72ea1c06926..bac89bfd64c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -4428,6 +4428,14 @@ public class BlockManager implements BlockStatsMXBean { return this.neededReconstruction.getCorruptReplicationOneBlockSize(); } + public long getHighestPriorityReplicatedBlockCount(){ + return this.neededReconstruction.getHighestPriorityReplicatedBlockCount(); + } + + public long getHighestPriorityECBlockCount(){ + return this.neededReconstruction.getHighestPriorityECBlockCount(); + } + public BlockInfo addBlockCollection(BlockInfo block, BlockCollection bc) { return blocksMap.addBlockCollection(block, bc); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java index e3f228d2947..40ea98053fa 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/LowRedundancyBlocks.java @@ -92,6 +92,10 @@ class LowRedundancyBlocks implements Iterable { private final LongAdder corruptReplicationOneBlocks = new LongAdder(); private final LongAdder lowRedundancyECBlockGroups = new LongAdder(); private final LongAdder corruptECBlockGroups = new LongAdder(); + private final LongAdder highestPriorityLowRedundancyReplicatedBlocks + = new LongAdder(); + private final LongAdder highestPriorityLowRedundancyECBlocks + = new LongAdder(); /** Create an object. */ LowRedundancyBlocks() { @@ -162,6 +166,18 @@ class LowRedundancyBlocks implements Iterable { return corruptReplicationOneBlocks.longValue(); } + /** Return the number of under replicated blocks + * with the highest priority to recover */ + long getHighestPriorityReplicatedBlockCount() { + return highestPriorityLowRedundancyReplicatedBlocks.longValue(); + } + + /** Return the number of under replicated EC blocks + * with the highest priority to recover */ + long getHighestPriorityECBlockCount() { + return highestPriorityLowRedundancyECBlocks.longValue(); + } + /** * Return low redundancy striped blocks excluding corrupt blocks. */ @@ -300,6 +316,9 @@ class LowRedundancyBlocks implements Iterable { if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS) { corruptECBlockGroups.increment(); } + if (priLevel == QUEUE_HIGHEST_PRIORITY) { + highestPriorityLowRedundancyECBlocks.increment(); + } } else { lowRedundancyBlocks.increment(); if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS) { @@ -308,6 +327,9 @@ class LowRedundancyBlocks implements Iterable { corruptReplicationOneBlocks.increment(); } } + if (priLevel == QUEUE_HIGHEST_PRIORITY) { + highestPriorityLowRedundancyReplicatedBlocks.increment(); + } } } @@ -380,6 +402,9 @@ class LowRedundancyBlocks implements Iterable { if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS) { corruptECBlockGroups.decrement(); } + if (priLevel == QUEUE_HIGHEST_PRIORITY) { + highestPriorityLowRedundancyECBlocks.decrement(); + } } else { lowRedundancyBlocks.decrement(); if (priLevel == QUEUE_WITH_CORRUPT_BLOCKS) { @@ -391,6 +416,9 @@ class LowRedundancyBlocks implements Iterable { "should be non-negative"; } } + if (priLevel == QUEUE_HIGHEST_PRIORITY) { + highestPriorityLowRedundancyReplicatedBlocks.decrement(); + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 20982528a5f..5eef12b5716 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -4207,7 +4207,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, return new ReplicatedBlockStats(getLowRedundancyReplicatedBlocks(), getCorruptReplicatedBlocks(), getMissingReplicatedBlocks(), getMissingReplicationOneBlocks(), getBytesInFutureReplicatedBlocks(), - getPendingDeletionReplicatedBlocks()); + getPendingDeletionReplicatedBlocks(), + getHighestPriorityLowRedundancyReplicatedBlocks()); } /** @@ -4219,7 +4220,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, ECBlockGroupStats getECBlockGroupStats() { return new ECBlockGroupStats(getLowRedundancyECBlockGroups(), getCorruptECBlockGroups(), getMissingECBlockGroups(), - getBytesInFutureECBlockGroups(), getPendingDeletionECBlocks()); + getBytesInFutureECBlockGroups(), getPendingDeletionECBlocks(), + getHighestPriorityLowRedundancyECBlocks()); } @Override // FSNamesystemMBean @@ -4826,6 +4828,20 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, return blockManager.getMissingReplicationOneBlocks(); } + @Override // ReplicatedBlocksMBean + @Metric({"HighestPriorityLowRedundancyReplicatedBlocks", "Number of " + + "replicated blocks which have the highest risk of loss."}) + public long getHighestPriorityLowRedundancyReplicatedBlocks() { + return blockManager.getHighestPriorityReplicatedBlockCount(); + } + + @Override // ReplicatedBlocksMBean + @Metric({"HighestPriorityLowRedundancyECBlocks", "Number of erasure coded " + + "blocks which have the highest risk of loss."}) + public long getHighestPriorityLowRedundancyECBlocks() { + return blockManager.getHighestPriorityECBlockCount(); + } + @Override // ReplicatedBlocksMBean @Metric({"BytesInFutureReplicatedBlocks", "Total bytes in replicated " + "blocks with future generation stamp"}) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java index e4ed3a90a4f..5c7bbbb4515 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeMXBean.java @@ -162,6 +162,24 @@ public interface NameNodeMXBean { */ public long getNumberOfMissingBlocksWithReplicationFactorOne(); + /** + * Gets the total number of replicated low redundancy blocks on the cluster + * with the highest risk of loss. + * + * @return the total number of low redundancy blocks on the cluster + * with the highest risk of loss. + */ + public long getHighestPriorityLowRedundancyReplicatedBlocks(); + + /** + * Gets the total number of erasure coded low redundancy blocks on the cluster + * with the highest risk of loss + * + * @return the total number of low redundancy blocks on the cluster + * with the highest risk of loss + */ + public long getHighestPriorityLowRedundancyECBlocks(); + /** * Gets the total number of snapshottable dirs in the system. * diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java index f7935572182..23332500a4b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/DFSAdmin.java @@ -549,6 +549,11 @@ public class DFSAdmin extends FsShell { replicatedBlockStats.getMissingReplicaBlocks()); System.out.println("\tMissing blocks (with replication factor 1): " + replicatedBlockStats.getMissingReplicationOneBlocks()); + if (replicatedBlockStats.hasHighestPriorityLowRedundancyBlocks()) { + System.out.println("\tLow redundancy blocks with highest priority " + + "to recover: " + + replicatedBlockStats.getHighestPriorityLowRedundancyBlocks()); + } System.out.println("\tPending deletion blocks: " + replicatedBlockStats.getPendingDeletionBlocks()); @@ -561,6 +566,11 @@ public class DFSAdmin extends FsShell { ecBlockGroupStats.getCorruptBlockGroups()); System.out.println("\tMissing block groups: " + ecBlockGroupStats.getMissingBlockGroups()); + if (ecBlockGroupStats.hasHighestPriorityLowRedundancyBlocks()) { + System.out.println("\tLow redundancy blocks with highest priority " + + "to recover: " + + ecBlockGroupStats.getHighestPriorityLowRedundancyBlocks()); + } System.out.println("\tPending deletion blocks: " + ecBlockGroupStats.getPendingDeletionBlocks()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java index 97a5a6e2227..cf40c39993a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestLowRedundancyBlockQueues.java @@ -63,7 +63,8 @@ public class TestLowRedundancyBlockQueues { private void verifyBlockStats(LowRedundancyBlocks queues, int lowRedundancyReplicaCount, int corruptReplicaCount, int corruptReplicationOneCount, int lowRedundancyStripedCount, - int corruptStripedCount) { + int corruptStripedCount, int highestPriorityReplicatedBlockCount, + int highestPriorityECBlockCount) { assertEquals("Low redundancy replica count incorrect!", lowRedundancyReplicaCount, queues.getLowRedundancyBlocks()); assertEquals("Corrupt replica count incorrect!", @@ -81,6 +82,14 @@ public class TestLowRedundancyBlockQueues { assertEquals("LowRedundancyBlocks queue size incorrect!", (lowRedundancyReplicaCount + corruptReplicaCount + lowRedundancyStripedCount + corruptStripedCount), queues.size()); + assertEquals("Highest priority replicated low redundancy " + + "blocks count is incorrect!", + highestPriorityReplicatedBlockCount, + queues.getHighestPriorityReplicatedBlockCount()); + assertEquals("Highest priority erasure coded low redundancy " + + "blocks count is incorrect!", + highestPriorityECBlockCount, + queues.getHighestPriorityECBlockCount()); } /** @@ -100,42 +109,46 @@ public class TestLowRedundancyBlockQueues { // Add a block with a single entry assertAdded(queues, block1, 1, 0, 3); assertInLevel(queues, block1, LowRedundancyBlocks.QUEUE_HIGHEST_PRIORITY); - verifyBlockStats(queues, 1, 0, 0, 0, 0); + verifyBlockStats(queues, 1, 0, 0, 0, 0, 1, 0); // Repeated additions fail assertFalse(queues.add(block1, 1, 0, 0, 3)); - verifyBlockStats(queues, 1, 0, 0, 0, 0); + verifyBlockStats(queues, 1, 0, 0, 0, 0, 1, 0); // Add a second block with two replicas assertAdded(queues, block2, 2, 0, 3); assertInLevel(queues, block2, LowRedundancyBlocks.QUEUE_LOW_REDUNDANCY); - verifyBlockStats(queues, 2, 0, 0, 0, 0); + verifyBlockStats(queues, 2, 0, 0, 0, 0, 1, 0); // Now try to add a block that is corrupt assertAdded(queues, block_corrupt, 0, 0, 3); assertInLevel(queues, block_corrupt, LowRedundancyBlocks.QUEUE_WITH_CORRUPT_BLOCKS); - verifyBlockStats(queues, 2, 1, 0, 0, 0); + verifyBlockStats(queues, 2, 1, 0, 0, 0, 1, 0); // Insert a very insufficiently redundancy block assertAdded(queues, block_very_low_redundancy, 4, 0, 25); assertInLevel(queues, block_very_low_redundancy, LowRedundancyBlocks.QUEUE_VERY_LOW_REDUNDANCY); - verifyBlockStats(queues, 3, 1, 0, 0, 0); + verifyBlockStats(queues, 3, 1, 0, 0, 0, 1, 0); // Insert a corrupt block with replication factor 1 assertAdded(queues, block_corrupt_repl_one, 0, 0, 1); - verifyBlockStats(queues, 3, 2, 1, 0, 0); + verifyBlockStats(queues, 3, 2, 1, 0, 0, 1, 0); // Bump up the expected count for corrupt replica one block from 1 to 3 queues.update(block_corrupt_repl_one, 0, 0, 0, 3, 0, 2); - verifyBlockStats(queues, 3, 2, 0, 0, 0); + verifyBlockStats(queues, 3, 2, 0, 0, 0, 1, 0); // Reduce the expected replicas to 1 queues.update(block_corrupt, 0, 0, 0, 1, 0, -2); - verifyBlockStats(queues, 3, 2, 1, 0, 0); + verifyBlockStats(queues, 3, 2, 1, 0, 0, 1, 0); queues.update(block_very_low_redundancy, 0, 0, 0, 1, -4, -24); - verifyBlockStats(queues, 2, 3, 2, 0, 0); + verifyBlockStats(queues, 2, 3, 2, 0, 0, 1, 0); + + // Reduce the expected replicas to 1 for block1 + queues.update(block1, 1, 0, 0, 1, 0, 0); + verifyBlockStats(queues, 2, 3, 2, 0, 0, 0, 0); } @Test @@ -145,12 +158,12 @@ public class TestLowRedundancyBlockQueues { assertAdded(queues, corruptBlock, 0, 0, 3); assertInLevel(queues, corruptBlock, LowRedundancyBlocks.QUEUE_WITH_CORRUPT_BLOCKS); - verifyBlockStats(queues, 0, 1, 0, 0, 0); + verifyBlockStats(queues, 0, 1, 0, 0, 0, 0, 0); // Remove with wrong priority queues.remove(corruptBlock, LowRedundancyBlocks.QUEUE_LOW_REDUNDANCY); // Verify the number of corrupt block is decremented - verifyBlockStats(queues, 0, 0, 0, 0, 0); + verifyBlockStats(queues, 0, 0, 0, 0, 0, 0, 0); } @Test @@ -186,17 +199,17 @@ public class TestLowRedundancyBlockQueues { assertInLevel(queues, block, LowRedundancyBlocks.QUEUE_LOW_REDUNDANCY); } - verifyBlockStats(queues, 0, 0, 0, numUR, 0); + verifyBlockStats(queues, 0, 0, 0, numUR, 0, 0, 1); } // add a corrupted block BlockInfo block_corrupt = genStripedBlockInfo(-10, numBytes); assertEquals(numCorrupt, queues.getCorruptBlockSize()); - verifyBlockStats(queues, 0, 0, 0, numUR, numCorrupt); + verifyBlockStats(queues, 0, 0, 0, numUR, numCorrupt, 0, 1); assertAdded(queues, block_corrupt, dataBlkNum - 1, 0, groupSize); numCorrupt++; - verifyBlockStats(queues, 0, 0, 0, numUR, numCorrupt); + verifyBlockStats(queues, 0, 0, 0, numUR, numCorrupt, 0, 1); assertInLevel(queues, block_corrupt, LowRedundancyBlocks.QUEUE_WITH_CORRUPT_BLOCKS); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java index 05cf2ea622f..57a1b01edea 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/metrics/TestNameNodeMetrics.java @@ -412,10 +412,12 @@ public class TestNameNodeMetrics { // Verify replica metrics assertGauge("LowRedundancyReplicatedBlocks", 0L, rb); assertGauge("CorruptReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); // Verify striped block groups metrics assertGauge("LowRedundancyECBlockGroups", 0L, rb); assertGauge("CorruptECBlockGroups", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); } /** @@ -492,9 +494,11 @@ public class TestNameNodeMetrics { // Verify replicated blocks metrics assertGauge("LowRedundancyReplicatedBlocks", 1L, rb); assertGauge("CorruptReplicatedBlocks", 1L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 1L, rb); // Verify striped blocks metrics assertGauge("LowRedundancyECBlockGroups", 0L, rb); assertGauge("CorruptECBlockGroups", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); verifyAggregatedMetricsTally(); @@ -517,9 +521,11 @@ public class TestNameNodeMetrics { // Verify replicated blocks metrics assertGauge("LowRedundancyReplicatedBlocks", 0L, rb); assertGauge("CorruptReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); // Verify striped blocks metrics assertGauge("LowRedundancyECBlockGroups", 0L, rb); assertGauge("CorruptECBlockGroups", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); verifyAggregatedMetricsTally(); @@ -580,9 +586,11 @@ public class TestNameNodeMetrics { // Verify replica metrics assertGauge("LowRedundancyReplicatedBlocks", 0L, rb); assertGauge("CorruptReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); // Verify striped block groups metrics assertGauge("LowRedundancyECBlockGroups", 1L, rb); assertGauge("CorruptECBlockGroups", 1L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 1L, rb); verifyAggregatedMetricsTally(); @@ -602,9 +610,11 @@ public class TestNameNodeMetrics { // Verify replicated blocks metrics assertGauge("LowRedundancyReplicatedBlocks", 0L, rb); assertGauge("CorruptReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); // Verify striped blocks metrics assertGauge("LowRedundancyECBlockGroups", 0L, rb); assertGauge("CorruptECBlockGroups", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); verifyAggregatedMetricsTally(); @@ -666,6 +676,8 @@ public class TestNameNodeMetrics { assertGauge("UnderReplicatedBlocks", 1L, rb); assertGauge("MissingBlocks", 1L, rb); assertGauge("MissingReplOneBlocks", 1L, rb); + assertGauge("HighestPriorityLowRedundancyReplicatedBlocks", 0L, rb); + assertGauge("HighestPriorityLowRedundancyECBlocks", 0L, rb); fs.delete(file, true); waitForDnMetricValue(NS_METRICS, "UnderReplicatedBlocks", 0L); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java index 647327cc331..5e4709e0e8b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/TestDFSAdmin.java @@ -579,7 +579,7 @@ public class TestDFSAdmin { // Verify report command for all counts to be zero resetStream(); assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client); + verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client, 0L, 0L); final short replFactor = 1; final long fileLength = 512L; @@ -614,7 +614,7 @@ public class TestDFSAdmin { // Verify report command for all counts to be zero resetStream(); assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client); + verifyNodesAndCorruptBlocks(numDn, numDn, 0, 0, client, 0L, 0L); // Choose a DataNode to shutdown final List datanodes = miniCluster.getDataNodes(); @@ -636,7 +636,7 @@ public class TestDFSAdmin { // Verify report command to show dead DataNode assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, 0, client); + verifyNodesAndCorruptBlocks(numDn, numDn - 1, 0, 0, client, 0L, 1L); // Corrupt the replicated block final int blockFilesCorrupted = miniCluster @@ -664,7 +664,7 @@ public class TestDFSAdmin { // verify report command for corrupt replicated block resetStream(); assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 0, client); + verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 0, client, 0L, 1L); lbs = miniCluster.getFileSystem().getClient(). getNamenode().getBlockLocations( @@ -689,7 +689,7 @@ public class TestDFSAdmin { // and EC block group resetStream(); assertEquals(0, ToolRunner.run(dfsAdmin, new String[] {"-report"})); - verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 1, client); + verifyNodesAndCorruptBlocks(numDn, numDn - 1, 1, 1, client, 0L, 0L); } } @@ -834,7 +834,10 @@ public class TestDFSAdmin { final int numLiveDn, final int numCorruptBlocks, final int numCorruptECBlockGroups, - final DFSClient client) throws IOException { + final DFSClient client, + final Long highestPriorityLowRedundancyReplicatedBlocks, + final Long highestPriorityLowRedundancyECBlocks) + throws IOException { /* init vars */ final String outStr = scanIntoString(out); @@ -847,12 +850,23 @@ public class TestDFSAdmin { final String expectedCorruptedECBlockGroupsStr = String.format( "Block groups with corrupt internal blocks: %d", numCorruptECBlockGroups); + final String highestPriorityLowRedundancyReplicatedBlocksStr + = String.format( + "\tLow redundancy blocks with highest priority " + + "to recover: %d", + highestPriorityLowRedundancyReplicatedBlocks); + final String highestPriorityLowRedundancyECBlocksStr = String.format( + "\tLow redundancy blocks with highest priority " + + "to recover: %d", + highestPriorityLowRedundancyReplicatedBlocks); // verify nodes and corrupt blocks assertThat(outStr, is(allOf( containsString(expectedLiveNodesStr), containsString(expectedCorruptedBlocksStr), - containsString(expectedCorruptedECBlockGroupsStr)))); + containsString(expectedCorruptedECBlockGroupsStr), + containsString(highestPriorityLowRedundancyReplicatedBlocksStr), + containsString(highestPriorityLowRedundancyECBlocksStr)))); assertEquals( numDn, @@ -867,8 +881,12 @@ public class TestDFSAdmin { client.getCorruptBlocksCount()); assertEquals(numCorruptBlocks, client.getNamenode() .getReplicatedBlockStats().getCorruptBlocks()); + assertEquals(highestPriorityLowRedundancyReplicatedBlocks, client.getNamenode() + .getReplicatedBlockStats().getHighestPriorityLowRedundancyBlocks()); assertEquals(numCorruptECBlockGroups, client.getNamenode() .getECBlockGroupStats().getCorruptBlockGroups()); + assertEquals(highestPriorityLowRedundancyECBlocks, client.getNamenode() + .getECBlockGroupStats().getHighestPriorityLowRedundancyBlocks()); } @Test