HDFS-3475. Make the replication monitor multipliers configurable. Contributed by Harsh J Chouraria
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1355089 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
32d3ed55d0
commit
550853203b
|
@ -250,6 +250,9 @@ Branch-2 ( Unreleased changes )
|
||||||
|
|
||||||
HDFS-3572. Cleanup code which inits SPNEGO in HttpServer (todd)
|
HDFS-3572. Cleanup code which inits SPNEGO in HttpServer (todd)
|
||||||
|
|
||||||
|
HDFS-3475. Make the replication monitor multipliers configurable.
|
||||||
|
(harsh via eli)
|
||||||
|
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
HDFS-2982. Startup performance suffers when there are many edit log
|
HDFS-2982. Startup performance suffers when there are many edit log
|
||||||
|
|
|
@ -165,6 +165,14 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
public static final String DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY = "dfs.datanode.socket.reuse.keepalive";
|
public static final String DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_KEY = "dfs.datanode.socket.reuse.keepalive";
|
||||||
public static final int DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT = 1000;
|
public static final int DFS_DATANODE_SOCKET_REUSE_KEEPALIVE_DEFAULT = 1000;
|
||||||
|
|
||||||
|
// Replication monitoring related keys
|
||||||
|
public static final String DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION =
|
||||||
|
"dfs.namenode.invalidate.work.pct.per.iteration";
|
||||||
|
public static final int DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION_DEFAULT = 32;
|
||||||
|
public static final String DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION =
|
||||||
|
"dfs.namenode.replication.work.multiplier.per.iteration";
|
||||||
|
public static final int DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION_DEFAULT = 2;
|
||||||
|
|
||||||
//Delegation token related keys
|
//Delegation token related keys
|
||||||
public static final String DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY = "dfs.namenode.delegation.key.update-interval";
|
public static final String DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY = "dfs.namenode.delegation.key.update-interval";
|
||||||
public static final long DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = 24*60*60*1000; // 1 day
|
public static final long DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT = 24*60*60*1000; // 1 day
|
||||||
|
|
|
@ -68,6 +68,7 @@ import org.apache.hadoop.net.Node;
|
||||||
import org.apache.hadoop.util.Daemon;
|
import org.apache.hadoop.util.Daemon;
|
||||||
|
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import com.google.common.collect.Sets;
|
import com.google.common.collect.Sets;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -193,6 +194,9 @@ public class BlockManager {
|
||||||
/** value returned by MAX_CORRUPT_FILES_RETURNED */
|
/** value returned by MAX_CORRUPT_FILES_RETURNED */
|
||||||
final int maxCorruptFilesReturned;
|
final int maxCorruptFilesReturned;
|
||||||
|
|
||||||
|
final float blocksInvalidateWorkPct;
|
||||||
|
final int blocksReplWorkMultiplier;
|
||||||
|
|
||||||
/** variable to enable check for enough racks */
|
/** variable to enable check for enough racks */
|
||||||
final boolean shouldCheckForEnoughRacks;
|
final boolean shouldCheckForEnoughRacks;
|
||||||
|
|
||||||
|
@ -246,6 +250,24 @@ public class BlockManager {
|
||||||
DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT);
|
DFSConfigKeys.DFS_NAMENODE_REPLICATION_MAX_STREAMS_DEFAULT);
|
||||||
this.shouldCheckForEnoughRacks = conf.get(DFSConfigKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY) != null;
|
this.shouldCheckForEnoughRacks = conf.get(DFSConfigKeys.NET_TOPOLOGY_SCRIPT_FILE_NAME_KEY) != null;
|
||||||
|
|
||||||
|
this.blocksInvalidateWorkPct = conf.getFloat(
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION,
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION_DEFAULT);
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
(this.blocksInvalidateWorkPct > 0),
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_INVALIDATE_WORK_PCT_PER_ITERATION +
|
||||||
|
" = '" + this.blocksInvalidateWorkPct + "' is invalid. " +
|
||||||
|
"It should be a positive, non-zero float value " +
|
||||||
|
"indicating a percentage.");
|
||||||
|
this.blocksReplWorkMultiplier = conf.getInt(
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION,
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION_DEFAULT);
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
(this.blocksReplWorkMultiplier > 0),
|
||||||
|
DFSConfigKeys.DFS_NAMENODE_REPLICATION_WORK_MULTIPLIER_PER_ITERATION +
|
||||||
|
" = '" + this.blocksReplWorkMultiplier + "' is invalid. " +
|
||||||
|
"It should be a positive, non-zero integer value.");
|
||||||
|
|
||||||
this.replicationRecheckInterval =
|
this.replicationRecheckInterval =
|
||||||
conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY,
|
conf.getInt(DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_KEY,
|
||||||
DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT) * 1000L;
|
DFSConfigKeys.DFS_NAMENODE_REPLICATION_INTERVAL_DEFAULT) * 1000L;
|
||||||
|
@ -2897,8 +2919,6 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
||||||
* Periodically calls computeReplicationWork().
|
* Periodically calls computeReplicationWork().
|
||||||
*/
|
*/
|
||||||
private class ReplicationMonitor implements Runnable {
|
private class ReplicationMonitor implements Runnable {
|
||||||
private static final int INVALIDATE_WORK_PCT_PER_ITERATION = 32;
|
|
||||||
private static final int REPLICATION_WORK_MULTIPLIER_PER_ITERATION = 2;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
|
@ -2938,9 +2958,9 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block
|
||||||
|
|
||||||
final int numlive = heartbeatManager.getLiveDatanodeCount();
|
final int numlive = heartbeatManager.getLiveDatanodeCount();
|
||||||
final int blocksToProcess = numlive
|
final int blocksToProcess = numlive
|
||||||
* ReplicationMonitor.REPLICATION_WORK_MULTIPLIER_PER_ITERATION;
|
* this.blocksReplWorkMultiplier;
|
||||||
final int nodesToProcess = (int) Math.ceil(numlive
|
final int nodesToProcess = (int) Math.ceil(numlive
|
||||||
* ReplicationMonitor.INVALIDATE_WORK_PCT_PER_ITERATION / 100.0);
|
* this.blocksInvalidateWorkPct);
|
||||||
|
|
||||||
int workFound = this.computeReplicationWork(blocksToProcess);
|
int workFound = this.computeReplicationWork(blocksToProcess);
|
||||||
|
|
||||||
|
|
|
@ -870,5 +870,35 @@
|
||||||
<value>${dfs.web.authentication.kerberos.principal}</value>
|
<value>${dfs.web.authentication.kerberos.principal}</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.invalidate.work.pct.per.iteration</name>
|
||||||
|
<value>0.32f</value>
|
||||||
|
<description>
|
||||||
|
*Note*: Advanced property. Change with caution.
|
||||||
|
This determines the percentage amount of block
|
||||||
|
invalidations (deletes) to do over a single DN heartbeat
|
||||||
|
deletion command. The final deletion count is determined by applying this
|
||||||
|
percentage to the number of live nodes in the system.
|
||||||
|
The resultant number is the number of blocks from the deletion list
|
||||||
|
chosen for proper invalidation over a single heartbeat of a single DN.
|
||||||
|
Value should be a positive, non-zero percentage in float notation (X.Yf),
|
||||||
|
with 1.0f meaning 100%.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.namenode.replication.work.multiplier.per.iteration</name>
|
||||||
|
<value>2</value>
|
||||||
|
<description>
|
||||||
|
*Note*: Advanced property. Change with caution.
|
||||||
|
This determines the total amount of block transfers to begin in
|
||||||
|
parallel at a DN, for replication, when such a command list is being
|
||||||
|
sent over a DN heartbeat by the NN. The actual number is obtained by
|
||||||
|
multiplying this multiplier with the total number of live nodes in the
|
||||||
|
cluster. The result number is the number of blocks to begin transfers
|
||||||
|
immediately for, per DN heartbeat. This number can be any positive,
|
||||||
|
non-zero integer.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
</configuration>
|
</configuration>
|
||||||
|
|
Loading…
Reference in New Issue