HDFS-11015. Enforce timeout in balancer. Contributed by Kihwal Lee.
(cherry picked from commitf6367c5f44
) (cherry picked from commitff806cbfc7
) (cherry picked from commit262518fa5b
)
This commit is contained in:
parent
d05e737b84
commit
c05d701161
|
@ -175,6 +175,8 @@ Release 2.7.4 - UNRELEASED
|
|||
HDFS-10627. Volume Scanner marks a block as "suspect" even if
|
||||
the exception is network-related. (Rushabh S Shah via kihwal)
|
||||
|
||||
HDFS-11015. Enforce timeout in balancer. (kihwal via zhz)
|
||||
|
||||
Release 2.7.3 - 2016-08-25
|
||||
|
||||
INCOMPATIBLE CHANGES
|
||||
|
|
|
@ -454,6 +454,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
|
|||
public static final long DFS_BALANCER_GETBLOCKS_SIZE_DEFAULT = 2L*1024*1024*1024; // 2GB
|
||||
public static final String DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY = "dfs.balancer.getBlocks.min-block-size";
|
||||
public static final long DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_DEFAULT = 10L*1024*1024; // 10MB
|
||||
public static final String DFS_BALANCER_BLOCK_MOVE_TIMEOUT = "dfs.balancer.block-move.timeout";
|
||||
public static final int DFS_BALANCER_BLOCK_MOVE_TIMEOUT_DEFAULT = 0;
|
||||
|
||||
public static final String DFS_MOVER_MOVEDWINWIDTH_KEY = "dfs.mover.movedWinWidth";
|
||||
public static final long DFS_MOVER_MOVEDWINWIDTH_DEFAULT = 5400*1000L;
|
||||
|
|
|
@ -255,10 +255,14 @@ public class Balancer {
|
|||
final long getBlocksMinBlockSize = getLong(conf,
|
||||
DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY,
|
||||
DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_DEFAULT);
|
||||
final int blockMoveTimeout = conf.getInt(
|
||||
DFSConfigKeys.DFS_BALANCER_BLOCK_MOVE_TIMEOUT,
|
||||
DFSConfigKeys.DFS_BALANCER_BLOCK_MOVE_TIMEOUT_DEFAULT);
|
||||
|
||||
this.dispatcher = new Dispatcher(theblockpool, p.nodesToBeIncluded,
|
||||
p.nodesToBeExcluded, movedWinWidth, moverThreads, dispatcherThreads,
|
||||
maxConcurrentMovesPerNode, getBlocksSize, getBlocksMinBlockSize, conf);
|
||||
maxConcurrentMovesPerNode, getBlocksSize, getBlocksMinBlockSize,
|
||||
blockMoveTimeout, conf);
|
||||
this.threshold = p.threshold;
|
||||
this.policy = p.policy;
|
||||
|
||||
|
|
|
@ -121,6 +121,7 @@ public class Dispatcher {
|
|||
|
||||
private final long getBlocksSize;
|
||||
private final long getBlocksMinBlockSize;
|
||||
private final long blockMoveTimeout;
|
||||
|
||||
static class Allocator {
|
||||
private final int max;
|
||||
|
@ -321,6 +322,11 @@ public class Dispatcher {
|
|||
NetUtils.createSocketAddr(target.getDatanodeInfo().getXferAddr()),
|
||||
HdfsServerConstants.READ_TIMEOUT);
|
||||
|
||||
// Set read timeout so that it doesn't hang forever against
|
||||
// unresponsive nodes. Datanode normally sends IN_PROGRESS response
|
||||
// twice within the client read timeout period (every 30 seconds by
|
||||
// default). Here, we make it give up after 5 minutes of no response.
|
||||
sock.setSoTimeout(HdfsServerConstants.READ_TIMEOUT * 5);
|
||||
sock.setKeepAlive(true);
|
||||
|
||||
OutputStream unbufOut = sock.getOutputStream();
|
||||
|
@ -375,13 +381,26 @@ public class Dispatcher {
|
|||
source.getDatanodeInfo().getDatanodeUuid(), proxySource.datanode);
|
||||
}
|
||||
|
||||
/** Receive a block copy response from the input stream */
|
||||
/** Check whether to continue waiting for response */
|
||||
private boolean stopWaitingForResponse(long startTime) {
|
||||
return source.isIterationOver() ||
|
||||
(blockMoveTimeout > 0 &&
|
||||
(Time.monotonicNow() - startTime > blockMoveTimeout));
|
||||
}
|
||||
|
||||
/** Receive a reportedBlock copy response from the input stream */
|
||||
private void receiveResponse(DataInputStream in) throws IOException {
|
||||
long startTime = Time.monotonicNow();
|
||||
BlockOpResponseProto response =
|
||||
BlockOpResponseProto.parseFrom(vintPrefixed(in));
|
||||
while (response.getStatus() == Status.IN_PROGRESS) {
|
||||
// read intermediate responses
|
||||
response = BlockOpResponseProto.parseFrom(vintPrefixed(in));
|
||||
// Stop waiting for slow block moves. Even if it stops waiting,
|
||||
// the actual move may continue.
|
||||
if (stopWaitingForResponse(startTime)) {
|
||||
throw new IOException("Block move timed out");
|
||||
}
|
||||
}
|
||||
String logInfo = "block move is failed";
|
||||
DataTransferProtoUtil.checkBlockOpStatus(response, logInfo);
|
||||
|
@ -614,6 +633,7 @@ public class Dispatcher {
|
|||
|
||||
private final List<Task> tasks = new ArrayList<Task>(2);
|
||||
private long blocksToReceive = 0L;
|
||||
private final long startTime = Time.monotonicNow();
|
||||
/**
|
||||
* Source blocks point to the objects in {@link Dispatcher#globalBlocks}
|
||||
* because we want to keep one copy of a block and be aware that the
|
||||
|
@ -625,6 +645,13 @@ public class Dispatcher {
|
|||
dn.super(storageType, maxSize2Move);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the iteration is over
|
||||
*/
|
||||
public boolean isIterationOver() {
|
||||
return (Time.monotonicNow()-startTime > MAX_ITERATION_TIME);
|
||||
}
|
||||
|
||||
/** Add a task */
|
||||
void addTask(Task task) {
|
||||
Preconditions.checkState(task.target != this,
|
||||
|
@ -765,11 +792,9 @@ public class Dispatcher {
|
|||
* elapsed time of the iteration has exceeded the max time limit.
|
||||
*/
|
||||
private void dispatchBlocks() {
|
||||
final long startTime = Time.monotonicNow();
|
||||
this.blocksToReceive = 2 * getScheduledSize();
|
||||
boolean isTimeUp = false;
|
||||
int noPendingMoveIteration = 0;
|
||||
while (!isTimeUp && getScheduledSize() > 0
|
||||
while (getScheduledSize() > 0 && !isIterationOver()
|
||||
&& (!srcBlocks.isEmpty() || blocksToReceive > 0)) {
|
||||
if (LOG.isTraceEnabled()) {
|
||||
LOG.trace(this + " blocksToReceive=" + blocksToReceive
|
||||
|
@ -809,14 +834,6 @@ public class Dispatcher {
|
|||
}
|
||||
}
|
||||
|
||||
// check if time is up or not
|
||||
if (Time.monotonicNow() - startTime > MAX_ITERATION_TIME) {
|
||||
LOG.info("Time up (max time=" + MAX_ITERATION_TIME/1000
|
||||
+ " seconds). Skipping " + this);
|
||||
isTimeUp = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Now we can not schedule any block to move and there are
|
||||
// no new blocks added to the source block list, so we wait.
|
||||
try {
|
||||
|
@ -826,6 +843,11 @@ public class Dispatcher {
|
|||
} catch (InterruptedException ignored) {
|
||||
}
|
||||
}
|
||||
|
||||
if (isIterationOver()) {
|
||||
LOG.info("The maximum iteration time (" + MAX_ITERATION_TIME/1000
|
||||
+ " seconds) has been reached. Stopping " + this);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -845,13 +867,14 @@ public class Dispatcher {
|
|||
int dispatcherThreads, int maxConcurrentMovesPerNode, Configuration conf) {
|
||||
this(nnc, includedNodes, excludedNodes, movedWinWidth,
|
||||
moverThreads, dispatcherThreads, maxConcurrentMovesPerNode,
|
||||
0L, 0L, conf);
|
||||
0L, 0L, 0, conf);
|
||||
}
|
||||
|
||||
Dispatcher(NameNodeConnector nnc, Set<String> includedNodes,
|
||||
Set<String> excludedNodes, long movedWinWidth, int moverThreads,
|
||||
int dispatcherThreads, int maxConcurrentMovesPerNode,
|
||||
long getBlocksSize, long getBlocksMinBlockSize, Configuration conf) {
|
||||
long getBlocksSize, long getBlocksMinBlockSize,
|
||||
int blockMoveTimeout, Configuration conf) {
|
||||
this.nnc = nnc;
|
||||
this.excludedNodes = excludedNodes;
|
||||
this.includedNodes = includedNodes;
|
||||
|
@ -866,6 +889,7 @@ public class Dispatcher {
|
|||
|
||||
this.getBlocksSize = getBlocksSize;
|
||||
this.getBlocksMinBlockSize = getBlocksMinBlockSize;
|
||||
this.blockMoveTimeout = blockMoveTimeout;
|
||||
|
||||
this.saslClient = new SaslDataTransferClient(conf,
|
||||
DataTransferSaslUtil.getSaslPropertiesResolver(conf),
|
||||
|
|
|
@ -2531,6 +2531,21 @@
|
|||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.balancer.block-move.timeout</name>
|
||||
<value>0</value>
|
||||
<description>
|
||||
Maximum amount of time in milliseconds for a block to move. If this is set
|
||||
greater than 0, Balancer will stop waiting for a block move completion
|
||||
after this time. In typical clusters, a 3 to 5 minute timeout is reasonable.
|
||||
If timeout happens to a large proportion of block moves, this needs to be
|
||||
increased. It could also be that too much work is dispatched and many nodes
|
||||
are constantly exceeding the bandwidth limit as a result. In that case,
|
||||
other balancer parameters might need to be adjusted.
|
||||
It is disabled (0) by default.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.lock.suppress.warning.interval</name>
|
||||
<value>10s</value>
|
||||
|
|
Loading…
Reference in New Issue