diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index a708333c080..b0e82fa4afd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -250,6 +250,9 @@ Branch-2 ( Unreleased changes ) HDFS-3572. Cleanup code which inits SPNEGO in HttpServer (todd) + HDFS-3475. Make the replication monitor multipliers configurable. + (harsh via eli) + OPTIMIZATIONS HDFS-2982. Startup performance suffers when there are many edit log diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 146ed8358fe..ddadbdd44f7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -165,6 +165,14 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY = "dfs.datanode.socket.reuse.keepalive"; public static final int DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT = 1000; + // Replication monitoring related keys + public static final String DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION = + "dfs.namenode.invalidate.work.pct.per.iteration"; + public static final int DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION_DEFAULT = 32; + public static final String DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION = + "dfs.namenode.replication.work.multiplier.per.iteration"; + public static final int DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION_DEFAULT = 2; + //Delegation token related keys public static final String DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY = "dfs.namenode.delegation.key.update-interval"; public static final long DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = 24*60*60*1000; // 1 day diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 7e4a4857c76..7767ebec08f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -68,6 +68,7 @@ import org.apache.hadoop.net.Node; import org.apache.hadoop.util.Daemon; import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; import com.google.common.collect.Sets; /** @@ -193,6 +194,9 @@ public class BlockManager { /** value returned by MAX_CORRUPT_FILES_RETURNED */ final int maxCorruptFilesReturned; + final float blocksInvalidateWorkPct; + final int blocksReplWorkMultiplier; + /** variable to enable check for enough racks */ final boolean shouldCheckForEnoughRacks; @@ -245,7 +249,25 @@ public class BlockManager { this.maxReplicationStreams = conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY, DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT); this.shouldCheckForEnoughRacks = conf.get(DFSConfigKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY) != null; - + + this.blocksInvalidateWorkPct = conf.getFloat( + DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION, + DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION_DEFAULT); + Preconditions.checkArgument( + (this.blocksInvalidateWorkPct > 0), + DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION + + " = '" + this.blocksInvalidateWorkPct + "' is invalid. " + + "It should be a positive, non-zero float value " + + "indicating a percentage."); + this.blocksReplWorkMultiplier = conf.getInt( + DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION, + DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION_DEFAULT); + Preconditions.checkArgument( + (this.blocksReplWorkMultiplier > 0), + DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION + + " = '" + this.blocksReplWorkMultiplier + "' is invalid. " + + "It should be a positive, non-zero integer value."); + this.replicationRecheckInterval = conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY, DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT) * 1000L; @@ -2897,8 +2919,6 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block * Periodically calls computeReplicationWork(). */ private class ReplicationMonitor implements Runnable { - private static final int INVALIDATE_WORK_PCT_PER_ITERATION = 32; - private static final int REPLICATION_WORK_MULTIPLIER_PER_ITERATION = 2; @Override public void run() { @@ -2938,9 +2958,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block final int numlive = heartbeatManager.getLiveDatanodeCount(); final int blocksToProcess = numlive - * ReplicationMonitor.REPLICATION_WORK_MULTIPLIER_PER_ITERATION; + * this.blocksReplWorkMultiplier; final int nodesToProcess = (int) Math.ceil(numlive - * ReplicationMonitor.INVALIDATE_WORK_PCT_PER_ITERATION / 100.0); + * this.blocksInvalidateWorkPct); int workFound = this.computeReplicationWork(blocksToProcess); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 9e1a4356c60..3bf11b49280 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -870,5 +870,35 @@ ${dfs.web.authentication.kerberos.principal} + + dfs.namenode.invalidate.work.pct.per.iteration + 0.32f + + *Note*: Advanced property. Change with caution. + This determines the percentage amount of block + invalidations (deletes) to do over a single DN heartbeat + deletion command. The final deletion count is determined by applying this + percentage to the number of live nodes in the system. + The resultant number is the number of blocks from the deletion list + chosen for proper invalidation over a single heartbeat of a single DN. + Value should be a positive, non-zero percentage in float notation (X.Yf), + with 1.0f meaning 100%. + + + + + dfs.namenode.replication.work.multiplier.per.iteration + 2 + + *Note*: Advanced property. Change with caution. + This determines the total amount of block transfers to begin in + parallel at a DN, for replication, when such a command list is being + sent over a DN heartbeat by the NN. The actual number is obtained by + multiplying this multiplier with the total number of live nodes in the + cluster. The result number is the number of blocks to begin transfers + immediately for, per DN heartbeat. This number can be any positive, + non-zero integer. + +