diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
index a708333c080..b0e82fa4afd 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@@ -250,6 +250,9 @@ Branch-2 ( Unreleased changes )
HDFS-3572. Cleanup code which inits SPNEGO in HttpServer (todd)
+ HDFS-3475. Make the replication monitor multipliers configurable.
+ (harsh via eli)
+
OPTIMIZATIONS
HDFS-2982. Startup performance suffers when there are many edit log
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
index 146ed8358fe..ddadbdd44f7 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java
@@ -165,6 +165,14 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY = "dfs.datanode.socket.reuse.keepalive";
public static final int DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT = 1000;
+ // Replication monitoring related keys
+ public static final String DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION =
+ "dfs.namenode.invalidate.work.pct.per.iteration";
+ public static final int DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION_DEFAULT = 32;
+ public static final String DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION =
+ "dfs.namenode.replication.work.multiplier.per.iteration";
+ public static final int DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION_DEFAULT = 2;
+
//Delegation token related keys
public static final String DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY = "dfs.namenode.delegation.key.update-interval";
public static final long DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = 24*60*60*1000; // 1 day
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
index 7e4a4857c76..7767ebec08f 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@@ -68,6 +68,7 @@ import org.apache.hadoop.net.Node;
import org.apache.hadoop.util.Daemon;
import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
/**
@@ -193,6 +194,9 @@ public class BlockManager {
/** value returned by MAX_CORRUPT_FILES_RETURNED */
final int maxCorruptFilesReturned;
+ final float blocksInvalidateWorkPct;
+ final int blocksReplWorkMultiplier;
+
/** variable to enable check for enough racks */
final boolean shouldCheckForEnoughRacks;
@@ -245,7 +249,25 @@ public class BlockManager {
this.maxReplicationStreams = conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_KEY,
DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT);
this.shouldCheckForEnoughRacks = conf.get(DFSConfigKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY) != null;
-
+
+ this.blocksInvalidateWorkPct = conf.getFloat(
+ DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION,
+ DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION_DEFAULT);
+ Preconditions.checkArgument(
+ (this.blocksInvalidateWorkPct > 0),
+ DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION +
+ " = '" + this.blocksInvalidateWorkPct + "' is invalid. " +
+ "It should be a positive, non-zero float value " +
+ "indicating a percentage.");
+ this.blocksReplWorkMultiplier = conf.getInt(
+ DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION,
+ DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION_DEFAULT);
+ Preconditions.checkArgument(
+ (this.blocksReplWorkMultiplier > 0),
+ DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION +
+ " = '" + this.blocksReplWorkMultiplier + "' is invalid. " +
+ "It should be a positive, non-zero integer value.");
+
this.replicationRecheckInterval =
conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY,
DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT) * 1000L;
@@ -2897,8 +2919,6 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
* Periodically calls computeReplicationWork().
*/
private class ReplicationMonitor implements Runnable {
- private static final int INVALIDATE_WORK_PCT_PER_ITERATION = 32;
- private static final int REPLICATION_WORK_MULTIPLIER_PER_ITERATION = 2;
@Override
public void run() {
@@ -2938,9 +2958,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
final int numlive = heartbeatManager.getLiveDatanodeCount();
final int blocksToProcess = numlive
- * ReplicationMonitor.REPLICATION_WORK_MULTIPLIER_PER_ITERATION;
+ * this.blocksReplWorkMultiplier;
final int nodesToProcess = (int) Math.ceil(numlive
- * ReplicationMonitor.INVALIDATE_WORK_PCT_PER_ITERATION / 100.0);
+ * this.blocksInvalidateWorkPct);
int workFound = this.computeReplicationWork(blocksToProcess);
diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
index 9e1a4356c60..3bf11b49280 100644
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml
@@ -870,5 +870,35 @@
${dfs.web.authentication.kerberos.principal}
+
+ dfs.namenode.invalidate.work.pct.per.iteration
+ 0.32f
+
+ *Note*: Advanced property. Change with caution.
+ This determines the percentage amount of block
+ invalidations (deletes) to do over a single DN heartbeat
+ deletion command. The final deletion count is determined by applying this
+ percentage to the number of live nodes in the system.
+ The resultant number is the number of blocks from the deletion list
+ chosen for proper invalidation over a single heartbeat of a single DN.
+ Value should be a positive, non-zero percentage in float notation (X.Yf),
+ with 1.0f meaning 100%.
+
+
+
+
+ dfs.namenode.replication.work.multiplier.per.iteration
+ 2
+
+ *Note*: Advanced property. Change with caution.
+ This determines the total amount of block transfers to begin in
+ parallel at a DN, for replication, when such a command list is being
+ sent over a DN heartbeat by the NN. The actual number is obtained by
+ multiplying this multiplier with the total number of live nodes in the
+ cluster. The result number is the number of blocks to begin transfers
+ immediately for, per DN heartbeat. This number can be any positive,
+ non-zero integer.
+
+