HDFS-14795. Add Throttler for writing block. Contributed by Lisheng Sun.
This commit is contained in:
parent
eefe9bc85c
commit
f580a87079
|
@ -119,6 +119,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
||||||
"dfs.datanode.data.transfer.bandwidthPerSec";
|
"dfs.datanode.data.transfer.bandwidthPerSec";
|
||||||
public static final long DFS_DATANODE_DATA_TRANSFER_BANDWIDTHPERSEC_DEFAULT =
|
public static final long DFS_DATANODE_DATA_TRANSFER_BANDWIDTHPERSEC_DEFAULT =
|
||||||
0; // A value of zero indicates no limit
|
0; // A value of zero indicates no limit
|
||||||
|
public static final String DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY =
|
||||||
|
"dfs.datanode.data.write.bandwidthPerSec";
|
||||||
|
// A value of zero indicates no limit
|
||||||
|
public static final long DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_DEFAULT = 0;
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public static final String DFS_DATANODE_READAHEAD_BYTES_KEY =
|
public static final String DFS_DATANODE_READAHEAD_BYTES_KEY =
|
||||||
HdfsClientConfigKeys.DFS_DATANODE_READAHEAD_BYTES_KEY;
|
HdfsClientConfigKeys.DFS_DATANODE_READAHEAD_BYTES_KEY;
|
||||||
|
|
|
@ -46,6 +46,9 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_MAX_NUM_BLOCKS_TO_LOG_DEF
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_MAX_NUM_BLOCKS_TO_LOG_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_MAX_NUM_BLOCKS_TO_LOG_KEY;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT;
|
||||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_KEY;
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_KEY;
|
||||||
|
import static org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage.PIPELINE_SETUP_APPEND_RECOVERY;
|
||||||
|
import static org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage.PIPELINE_SETUP_CREATE;
|
||||||
|
import static org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage.PIPELINE_SETUP_STREAMING_RECOVERY;
|
||||||
import static org.apache.hadoop.util.ExitUtil.terminate;
|
import static org.apache.hadoop.util.ExitUtil.terminate;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
|
@ -2500,8 +2503,8 @@ public class DataNode extends ReconfigurableBase
|
||||||
final String clientname;
|
final String clientname;
|
||||||
final CachingStrategy cachingStrategy;
|
final CachingStrategy cachingStrategy;
|
||||||
|
|
||||||
/** Throttle to block replication when data transfers. */
|
/** Throttle to block replication when data transfers or writes. */
|
||||||
private DataTransferThrottler transferThrottler;
|
private DataTransferThrottler throttler;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Connect to the first item in the target list. Pass along the
|
* Connect to the first item in the target list. Pass along the
|
||||||
|
@ -2529,14 +2532,10 @@ public class DataNode extends ReconfigurableBase
|
||||||
this.clientname = clientname;
|
this.clientname = clientname;
|
||||||
this.cachingStrategy =
|
this.cachingStrategy =
|
||||||
new CachingStrategy(true, getDnConf().readaheadLength);
|
new CachingStrategy(true, getDnConf().readaheadLength);
|
||||||
// 1. the stage is PIPELINE_SETUP_CREATE,that is moving blocks, set
|
if (isTransfer(stage, clientname)) {
|
||||||
// throttler.
|
this.throttler = xserver.getTransferThrottler();
|
||||||
// 2. the stage is PIPELINE_SETUP_APPEND_RECOVERY or
|
} else if(isWrite(stage)) {
|
||||||
// PIPELINE_SETUP_STREAMING_RECOVERY,
|
this.throttler = xserver.getWriteThrottler();
|
||||||
// that is writing and recovering pipeline, don't set throttle.
|
|
||||||
if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE
|
|
||||||
&& clientname.isEmpty()) {
|
|
||||||
this.transferThrottler = xserver.getTransferThrottler();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2596,7 +2595,7 @@ public class DataNode extends ReconfigurableBase
|
||||||
targetStorageIds);
|
targetStorageIds);
|
||||||
|
|
||||||
// send data & checksum
|
// send data & checksum
|
||||||
blockSender.sendBlock(out, unbufOut, transferThrottler);
|
blockSender.sendBlock(out, unbufOut, throttler);
|
||||||
|
|
||||||
// no response necessary
|
// no response necessary
|
||||||
LOG.info("{}, at {}: Transmitted {} (numBytes={}) to {}",
|
LOG.info("{}, at {}: Transmitted {} (numBytes={}) to {}",
|
||||||
|
@ -3739,4 +3738,32 @@ public class DataNode extends ReconfigurableBase
|
||||||
}
|
}
|
||||||
return this.diskBalancer;
|
return this.diskBalancer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct DataTransfer in {@link DataNode#transferBlock}, the
|
||||||
|
* BlockConstructionStage is PIPELINE_SETUP_CREATE and clientName is "".
|
||||||
|
*/
|
||||||
|
private static boolean isTransfer(BlockConstructionStage stage,
|
||||||
|
String clientName) {
|
||||||
|
if (stage == PIPELINE_SETUP_CREATE && clientName.isEmpty()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct DataTransfer in
|
||||||
|
* {@link DataNode#transferReplicaForPipelineRecovery}.
|
||||||
|
*
|
||||||
|
* When recover pipeline, BlockConstructionStage is
|
||||||
|
* PIPELINE_SETUP_APPEND_RECOVERY,
|
||||||
|
* PIPELINE_SETUP_STREAMING_RECOVERY,PIPELINE_CLOSE_RECOVERY. If
|
||||||
|
* BlockConstructionStage is PIPELINE_CLOSE_RECOVERY, don't need transfer
|
||||||
|
* replica. So BlockConstructionStage is PIPELINE_SETUP_APPEND_RECOVERY,
|
||||||
|
* PIPELINE_SETUP_STREAMING_RECOVERY.
|
||||||
|
*/
|
||||||
|
private static boolean isWrite(BlockConstructionStage stage) {
|
||||||
|
return (stage == PIPELINE_SETUP_STREAMING_RECOVERY
|
||||||
|
|| stage == PIPELINE_SETUP_APPEND_RECOVERY);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -905,8 +905,8 @@ class DataXceiver extends Receiver implements Runnable {
|
||||||
// receive the block and mirror to the next target
|
// receive the block and mirror to the next target
|
||||||
if (blockReceiver != null) {
|
if (blockReceiver != null) {
|
||||||
String mirrorAddr = (mirrorSock == null) ? null : mirrorNode;
|
String mirrorAddr = (mirrorSock == null) ? null : mirrorNode;
|
||||||
blockReceiver.receiveBlock(mirrorOut, mirrorIn, replyOut,
|
blockReceiver.receiveBlock(mirrorOut, mirrorIn, replyOut, mirrorAddr,
|
||||||
mirrorAddr, null, targets, false);
|
dataXceiverServer.getWriteThrottler(), targets, false);
|
||||||
|
|
||||||
// send close-ack for transfer-RBW/Finalized
|
// send close-ack for transfer-RBW/Finalized
|
||||||
if (isTransfer) {
|
if (isTransfer) {
|
||||||
|
|
|
@ -171,6 +171,8 @@ class DataXceiverServer implements Runnable {
|
||||||
|
|
||||||
private final DataTransferThrottler transferThrottler;
|
private final DataTransferThrottler transferThrottler;
|
||||||
|
|
||||||
|
private final DataTransferThrottler writeThrottler;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Stores an estimate for block size to check if the disk partition has enough
|
* Stores an estimate for block size to check if the disk partition has enough
|
||||||
* space. Newer clients pass the expected block size to the DataNode. For
|
* space. Newer clients pass the expected block size to the DataNode. For
|
||||||
|
@ -205,6 +207,15 @@ class DataXceiverServer implements Runnable {
|
||||||
} else {
|
} else {
|
||||||
this.transferThrottler = null;
|
this.transferThrottler = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bandwidthPerSec = conf.getLongBytes(
|
||||||
|
DFSConfigKeys.DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY,
|
||||||
|
DFSConfigKeys.DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_DEFAULT);
|
||||||
|
if (bandwidthPerSec > 0) {
|
||||||
|
this.writeThrottler = new DataTransferThrottler(bandwidthPerSec);
|
||||||
|
} else {
|
||||||
|
this.writeThrottler = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -458,6 +469,10 @@ class DataXceiverServer implements Runnable {
|
||||||
return transferThrottler;
|
return transferThrottler;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DataTransferThrottler getWriteThrottler() {
|
||||||
|
return writeThrottler;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Release a peer.
|
* Release a peer.
|
||||||
*
|
*
|
||||||
|
|
|
@ -4145,9 +4145,21 @@
|
||||||
<name>dfs.datanode.data.transfer.bandwidthPerSec</name>
|
<name>dfs.datanode.data.transfer.bandwidthPerSec</name>
|
||||||
<value>0</value>
|
<value>0</value>
|
||||||
<description>
|
<description>
|
||||||
Specifies the maximum amount of bandwidth that each datanode can utilize for the data transfering purpose in term
|
Specifies the maximum amount of bandwidth that the data transfering can utilize for transfering block when
|
||||||
of the number of bytes per second.
|
BlockConstructionStage is
|
||||||
when the bandwidth value is zero, there is no limit.
|
PIPELINE_SETUP_CREATE and clientName is empty.
|
||||||
|
When the bandwidth value is zero, there is no limit.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.datanode.data.write.bandwidthPerSec</name>
|
||||||
|
<value>0</value>
|
||||||
|
<description>
|
||||||
|
Specifies the maximum amount of bandwidth that the data transfering can utilize for writing block or pipeline
|
||||||
|
recovery when
|
||||||
|
BlockConstructionStage is PIPELINE_SETUP_APPEND_RECOVERY or PIPELINE_SETUP_STREAMING_RECOVERY.
|
||||||
|
When the bandwidth value is zero, there is no limit.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,8 @@ import org.apache.log4j.Level;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY;
|
||||||
|
|
||||||
/** Test transferring RBW between datanodes */
|
/** Test transferring RBW between datanodes */
|
||||||
public class TestTransferRbw {
|
public class TestTransferRbw {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
|
@ -102,13 +104,22 @@ public class TestTransferRbw {
|
||||||
final String bpid = cluster.getNamesystem().getBlockPoolId();
|
final String bpid = cluster.getNamesystem().getBlockPoolId();
|
||||||
{
|
{
|
||||||
final DataNode oldnode = cluster.getDataNodes().get(0);
|
final DataNode oldnode = cluster.getDataNodes().get(0);
|
||||||
|
// DataXceiverServer#writeThrottler is null if
|
||||||
|
// dfs.datanode.data.write.bandwidthPerSec default value is 0.
|
||||||
|
Assert.assertNull(oldnode.xserver.getWriteThrottler());
|
||||||
oldrbw = getRbw(oldnode, bpid);
|
oldrbw = getRbw(oldnode, bpid);
|
||||||
LOG.info("oldrbw = " + oldrbw);
|
LOG.info("oldrbw = " + oldrbw);
|
||||||
|
|
||||||
//add a datanode
|
//add a datanode
|
||||||
|
conf.setLong(DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY,
|
||||||
|
1024 * 1024 * 8);
|
||||||
cluster.startDataNodes(conf, 1, true, null, null);
|
cluster.startDataNodes(conf, 1, true, null, null);
|
||||||
newnode = cluster.getDataNodes().get(REPLICATION);
|
newnode = cluster.getDataNodes().get(REPLICATION);
|
||||||
|
// DataXceiverServer#writeThrottler#balancer is equal to
|
||||||
|
// dfs.datanode.data.write.bandwidthPerSec value if
|
||||||
|
// dfs.datanode.data.write.bandwidthPerSec value is not zero.
|
||||||
|
Assert.assertEquals(1024 * 1024 * 8,
|
||||||
|
newnode.xserver.getWriteThrottler().getBandwidth());
|
||||||
final DatanodeInfo oldnodeinfo;
|
final DatanodeInfo oldnodeinfo;
|
||||||
{
|
{
|
||||||
final DatanodeInfo[] datatnodeinfos = cluster.getNameNodeRpc(
|
final DatanodeInfo[] datatnodeinfos = cluster.getNameNodeRpc(
|
||||||
|
|
Loading…
Reference in New Issue