HDFS-14795. Add Throttler for writing block. Contributed by Lisheng Sun.

This commit is contained in:
Inigo Goiri 2019-09-17 14:55:34 -07:00
parent eefe9bc85c
commit f580a87079
6 changed files with 86 additions and 17 deletions

View File

@ -119,6 +119,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
"dfs.datanode.data.transfer.bandwidthPerSec";
public static final long DFS_DATANODE_DATA_TRANSFER_BANDWIDTHPERSEC_DEFAULT =
0; // A value of zero indicates no limit
public static final String DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY =
"dfs.datanode.data.write.bandwidthPerSec";
// A value of zero indicates no limit
public static final long DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_DEFAULT = 0;
@Deprecated
public static final String DFS_DATANODE_READAHEAD_BYTES_KEY =
HdfsClientConfigKeys.DFS_DATANODE_READAHEAD_BYTES_KEY;

View File

@ -46,6 +46,9 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_MAX_NUM_BLOCKS_TO_LOG_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_DEFAULT;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_METRICS_LOGGER_PERIOD_SECONDS_KEY;
import static org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage.PIPELINE_SETUP_APPEND_RECOVERY;
import static org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage.PIPELINE_SETUP_CREATE;
import static org.apache.hadoop.hdfs.protocol.datatransfer.BlockConstructionStage.PIPELINE_SETUP_STREAMING_RECOVERY;
import static org.apache.hadoop.util.ExitUtil.terminate;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
@ -2500,8 +2503,8 @@ private class DataTransfer implements Runnable {
final String clientname;
final CachingStrategy cachingStrategy;
/** Throttle to block replication when data transfers. */
private DataTransferThrottler transferThrottler;
/** Throttle to block replication when data transfers or writes. */
private DataTransferThrottler throttler;
/**
* Connect to the first item in the target list. Pass along the
@ -2529,14 +2532,10 @@ private class DataTransfer implements Runnable {
this.clientname = clientname;
this.cachingStrategy =
new CachingStrategy(true, getDnConf().readaheadLength);
// 1. the stage is PIPELINE_SETUP_CREATEthat is moving blocks, set
// throttler.
// 2. the stage is PIPELINE_SETUP_APPEND_RECOVERY or
// PIPELINE_SETUP_STREAMING_RECOVERY,
// that is writing and recovering pipeline, don't set throttle.
if (stage == BlockConstructionStage.PIPELINE_SETUP_CREATE
&& clientname.isEmpty()) {
this.transferThrottler = xserver.getTransferThrottler();
if (isTransfer(stage, clientname)) {
this.throttler = xserver.getTransferThrottler();
} else if(isWrite(stage)) {
this.throttler = xserver.getWriteThrottler();
}
}
@ -2596,7 +2595,7 @@ public void run() {
targetStorageIds);
// send data & checksum
blockSender.sendBlock(out, unbufOut, transferThrottler);
blockSender.sendBlock(out, unbufOut, throttler);
// no response necessary
LOG.info("{}, at {}: Transmitted {} (numBytes={}) to {}",
@ -3739,4 +3738,32 @@ private DiskBalancer getDiskBalancer() throws IOException {
}
return this.diskBalancer;
}
/**
* Construct DataTransfer in {@link DataNode#transferBlock}, the
* BlockConstructionStage is PIPELINE_SETUP_CREATE and clientName is "".
*/
private static boolean isTransfer(BlockConstructionStage stage,
String clientName) {
if (stage == PIPELINE_SETUP_CREATE && clientName.isEmpty()) {
return true;
}
return false;
}
/**
* Construct DataTransfer in
* {@link DataNode#transferReplicaForPipelineRecovery}.
*
* When recover pipeline, BlockConstructionStage is
* PIPELINE_SETUP_APPEND_RECOVERY,
* PIPELINE_SETUP_STREAMING_RECOVERY,PIPELINE_CLOSE_RECOVERY. If
* BlockConstructionStage is PIPELINE_CLOSE_RECOVERY, don't need transfer
* replica. So BlockConstructionStage is PIPELINE_SETUP_APPEND_RECOVERY,
* PIPELINE_SETUP_STREAMING_RECOVERY.
*/
private static boolean isWrite(BlockConstructionStage stage) {
return (stage == PIPELINE_SETUP_STREAMING_RECOVERY
|| stage == PIPELINE_SETUP_APPEND_RECOVERY);
}
}

View File

@ -905,8 +905,8 @@ public void writeBlock(final ExtendedBlock block,
// receive the block and mirror to the next target
if (blockReceiver != null) {
String mirrorAddr = (mirrorSock == null) ? null : mirrorNode;
blockReceiver.receiveBlock(mirrorOut, mirrorIn, replyOut,
mirrorAddr, null, targets, false);
blockReceiver.receiveBlock(mirrorOut, mirrorIn, replyOut, mirrorAddr,
dataXceiverServer.getWriteThrottler(), targets, false);
// send close-ack for transfer-RBW/Finalized
if (isTransfer) {

View File

@ -171,6 +171,8 @@ void release() {
private final DataTransferThrottler transferThrottler;
private final DataTransferThrottler writeThrottler;
/**
* Stores an estimate for block size to check if the disk partition has enough
* space. Newer clients pass the expected block size to the DataNode. For
@ -205,6 +207,15 @@ void release() {
} else {
this.transferThrottler = null;
}
bandwidthPerSec = conf.getLongBytes(
DFSConfigKeys.DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY,
DFSConfigKeys.DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_DEFAULT);
if (bandwidthPerSec > 0) {
this.writeThrottler = new DataTransferThrottler(bandwidthPerSec);
} else {
this.writeThrottler = null;
}
}
@Override
@ -458,6 +469,10 @@ public DataTransferThrottler getTransferThrottler() {
return transferThrottler;
}
public DataTransferThrottler getWriteThrottler() {
return writeThrottler;
}
/**
* Release a peer.
*

View File

@ -4145,9 +4145,21 @@
<name>dfs.datanode.data.transfer.bandwidthPerSec</name>
<value>0</value>
<description>
Specifies the maximum amount of bandwidth that each datanode can utilize for the data transfering purpose in term
of the number of bytes per second.
when the bandwidth value is zero, there is no limit.
Specifies the maximum amount of bandwidth that the data transfering can utilize for transfering block when
BlockConstructionStage is
PIPELINE_SETUP_CREATE and clientName is empty.
When the bandwidth value is zero, there is no limit.
</description>
</property>
<property>
<name>dfs.datanode.data.write.bandwidthPerSec</name>
<value>0</value>
<description>
Specifies the maximum amount of bandwidth that the data transfering can utilize for writing block or pipeline
recovery when
BlockConstructionStage is PIPELINE_SETUP_APPEND_RECOVERY or PIPELINE_SETUP_STREAMING_RECOVERY.
When the bandwidth value is zero, there is no limit.
</description>
</property>

View File

@ -42,6 +42,8 @@
import org.junit.Assert;
import org.junit.Test;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY;
/** Test transferring RBW between datanodes */
public class TestTransferRbw {
private static final Logger LOG =
@ -102,13 +104,22 @@ public void testTransferRbw() throws Exception {
final String bpid = cluster.getNamesystem().getBlockPoolId();
{
final DataNode oldnode = cluster.getDataNodes().get(0);
// DataXceiverServer#writeThrottler is null if
// dfs.datanode.data.write.bandwidthPerSec default value is 0.
Assert.assertNull(oldnode.xserver.getWriteThrottler());
oldrbw = getRbw(oldnode, bpid);
LOG.info("oldrbw = " + oldrbw);
//add a datanode
conf.setLong(DFS_DATANODE_DATA_WRITE_BANDWIDTHPERSEC_KEY,
1024 * 1024 * 8);
cluster.startDataNodes(conf, 1, true, null, null);
newnode = cluster.getDataNodes().get(REPLICATION);
// DataXceiverServer#writeThrottler#balancer is equal to
// dfs.datanode.data.write.bandwidthPerSec value if
// dfs.datanode.data.write.bandwidthPerSec value is not zero.
Assert.assertEquals(1024 * 1024 * 8,
newnode.xserver.getWriteThrottler().getBandwidth());
final DatanodeInfo oldnodeinfo;
{
final DatanodeInfo[] datatnodeinfos = cluster.getNameNodeRpc(