HDFS-6837. Code cleanup for Balancer and Dispatcher. Contributed by Tsz Wo Nicholas Sze.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1617337 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jing Zhao 2014-08-11 18:01:14 +00:00
parent 946be75704
commit e60673697d
8 changed files with 189 additions and 176 deletions

View File

@ -387,6 +387,9 @@ Release 2.6.0 - UNRELEASED
HDFS-6828. Separate block replica dispatching from Balancer. (szetszwo via
jing9)
HDFS-6837. Code cleanup for Balancer and Dispatcher. (szetszwo via
jing9)
OPTIMIZATIONS
HDFS-6690. Deduplicate xattr names in memory. (wang)

View File

@ -44,7 +44,8 @@
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.BalancerDatanode;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Source;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Task;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.Util;
@ -184,10 +185,10 @@ public class Balancer {
// all data node lists
private final Collection<Source> overUtilized = new LinkedList<Source>();
private final Collection<Source> aboveAvgUtilized = new LinkedList<Source>();
private final Collection<BalancerDatanode.StorageGroup> belowAvgUtilized
= new LinkedList<BalancerDatanode.StorageGroup>();
private final Collection<BalancerDatanode.StorageGroup> underUtilized
= new LinkedList<BalancerDatanode.StorageGroup>();
private final Collection<StorageGroup> belowAvgUtilized
= new LinkedList<StorageGroup>();
private final Collection<StorageGroup> underUtilized
= new LinkedList<StorageGroup>();
/* Check that this Balancer is compatible with the Block Placement Policy
* used by the Namenode.
@ -209,8 +210,22 @@ private static void checkReplicationPolicyCompatibility(Configuration conf
* when connection fails.
*/
Balancer(NameNodeConnector theblockpool, Parameters p, Configuration conf) {
final long movedWinWidth = conf.getLong(
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY,
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT);
final int moverThreads = conf.getInt(
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_KEY,
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_DEFAULT);
final int dispatcherThreads = conf.getInt(
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY,
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT);
final int maxConcurrentMovesPerNode = conf.getInt(
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT);
this.dispatcher = new Dispatcher(theblockpool, p.nodesToBeIncluded,
p.nodesToBeExcluded, conf);
p.nodesToBeExcluded, movedWinWidth, moverThreads, dispatcherThreads,
maxConcurrentMovesPerNode, conf);
this.threshold = p.threshold;
this.policy = p.policy;
}
@ -255,7 +270,7 @@ private long init(List<DatanodeStorageReport> reports) {
// over-utilized, above-average, below-average and under-utilized.
long overLoadedBytes = 0L, underLoadedBytes = 0L;
for(DatanodeStorageReport r : reports) {
final BalancerDatanode dn = dispatcher.newDatanode(r);
final DDatanode dn = dispatcher.newDatanode(r);
for(StorageType t : StorageType.asList()) {
final Double utilization = policy.getUtilization(r, t);
if (utilization == null) { // datanode does not have such storage type
@ -268,9 +283,9 @@ private long init(List<DatanodeStorageReport> reports) {
final long maxSize2Move = computeMaxSize2Move(capacity,
getRemaining(r, t), utilizationDiff, threshold);
final BalancerDatanode.StorageGroup g;
final StorageGroup g;
if (utilizationDiff > 0) {
final Source s = dn.addSource(t, utilization, maxSize2Move, dispatcher);
final Source s = dn.addSource(t, maxSize2Move, dispatcher);
if (thresholdDiff <= 0) { // within threshold
aboveAvgUtilized.add(s);
} else {
@ -279,7 +294,7 @@ private long init(List<DatanodeStorageReport> reports) {
}
g = s;
} else {
g = dn.addStorageGroup(t, utilization, maxSize2Move);
g = dn.addStorageGroup(t, maxSize2Move);
if (thresholdDiff <= 0) { // within threshold
belowAvgUtilized.add(g);
} else {
@ -328,7 +343,7 @@ private void logUtilizationCollections() {
logUtilizationCollection("underutilized", underUtilized);
}
private static <T extends BalancerDatanode.StorageGroup>
private static <T extends StorageGroup>
void logUtilizationCollection(String name, Collection<T> items) {
LOG.info(items.size() + " " + name + ": " + items);
}
@ -381,8 +396,7 @@ private void chooseStorageGroups(final Matcher matcher) {
* datanodes or the candidates are source nodes with (utilization > Avg), and
* the others are target nodes with (utilization < Avg).
*/
private <G extends BalancerDatanode.StorageGroup,
C extends BalancerDatanode.StorageGroup>
private <G extends StorageGroup, C extends StorageGroup>
void chooseStorageGroups(Collection<G> groups, Collection<C> candidates,
Matcher matcher) {
for(final Iterator<G> i = groups.iterator(); i.hasNext();) {
@ -398,9 +412,8 @@ void chooseStorageGroups(Collection<G> groups, Collection<C> candidates,
* For the given datanode, choose a candidate and then schedule it.
* @return true if a candidate is chosen; false if no candidates is chosen.
*/
private <C extends BalancerDatanode.StorageGroup>
boolean choose4One(BalancerDatanode.StorageGroup g,
Collection<C> candidates, Matcher matcher) {
private <C extends StorageGroup> boolean choose4One(StorageGroup g,
Collection<C> candidates, Matcher matcher) {
final Iterator<C> i = candidates.iterator();
final C chosen = chooseCandidate(g, i, matcher);
@ -418,8 +431,7 @@ boolean choose4One(BalancerDatanode.StorageGroup g,
return true;
}
private void matchSourceWithTargetToMove(Source source,
BalancerDatanode.StorageGroup target) {
private void matchSourceWithTargetToMove(Source source, StorageGroup target) {
long size = Math.min(source.availableSizeToMove(), target.availableSizeToMove());
final Task task = new Task(target, size);
source.addTask(task);
@ -430,8 +442,7 @@ private void matchSourceWithTargetToMove(Source source,
}
/** Choose a candidate for the given datanode. */
private <G extends BalancerDatanode.StorageGroup,
C extends BalancerDatanode.StorageGroup>
private <G extends StorageGroup, C extends StorageGroup>
C chooseCandidate(G g, Iterator<C> candidates, Matcher matcher) {
if (g.hasSpaceForScheduling()) {
for(; candidates.hasNext(); ) {
@ -439,7 +450,7 @@ C chooseCandidate(G g, Iterator<C> candidates, Matcher matcher) {
if (!c.hasSpaceForScheduling()) {
candidates.remove();
} else if (matcher.match(dispatcher.getCluster(),
g.getDatanode(), c.getDatanode())) {
g.getDatanodeInfo(), c.getDatanodeInfo())) {
return c;
}
}
@ -457,34 +468,15 @@ private void resetData(Configuration conf) {
dispatcher.reset(conf);;
}
// Exit status
enum ReturnStatus {
// These int values will map directly to the balancer process's exit code.
SUCCESS(0),
IN_PROGRESS(1),
ALREADY_RUNNING(-1),
NO_MOVE_BLOCK(-2),
NO_MOVE_PROGRESS(-3),
IO_EXCEPTION(-4),
ILLEGAL_ARGS(-5),
INTERRUPTED(-6);
final int code;
ReturnStatus(int code) {
this.code = code;
}
}
/** Run an iteration for all datanodes. */
private ReturnStatus run(int iteration, Formatter formatter,
private ExitStatus run(int iteration, Formatter formatter,
Configuration conf) {
try {
final List<DatanodeStorageReport> reports = dispatcher.init();
final long bytesLeftToMove = init(reports);
if (bytesLeftToMove == 0) {
System.out.println("The cluster is balanced. Exiting...");
return ReturnStatus.SUCCESS;
return ExitStatus.SUCCESS;
} else {
LOG.info( "Need to move "+ StringUtils.byteDesc(bytesLeftToMove)
+ " to make the cluster balanced." );
@ -498,7 +490,7 @@ private ReturnStatus run(int iteration, Formatter formatter,
final long bytesToMove = chooseStorageGroups();
if (bytesToMove == 0) {
System.out.println("No block can be moved. Exiting...");
return ReturnStatus.NO_MOVE_BLOCK;
return ExitStatus.NO_MOVE_BLOCK;
} else {
LOG.info( "Will move " + StringUtils.byteDesc(bytesToMove) +
" in this iteration");
@ -519,19 +511,19 @@ private ReturnStatus run(int iteration, Formatter formatter,
* Exit no byte has been moved for 5 consecutive iterations.
*/
if (!dispatcher.dispatchAndCheckContinue()) {
return ReturnStatus.NO_MOVE_PROGRESS;
return ExitStatus.NO_MOVE_PROGRESS;
}
return ReturnStatus.IN_PROGRESS;
return ExitStatus.IN_PROGRESS;
} catch (IllegalArgumentException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.ILLEGAL_ARGS;
return ExitStatus.ILLEGAL_ARGUMENTS;
} catch (IOException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.IO_EXCEPTION;
return ExitStatus.IO_EXCEPTION;
} catch (InterruptedException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.INTERRUPTED;
return ExitStatus.INTERRUPTED;
} finally {
dispatcher.shutdownNow();
}
@ -570,14 +562,14 @@ static int run(Collection<URI> namenodes, final Parameters p,
Collections.shuffle(connectors);
for(NameNodeConnector nnc : connectors) {
final Balancer b = new Balancer(nnc, p, conf);
final ReturnStatus r = b.run(iteration, formatter, conf);
final ExitStatus r = b.run(iteration, formatter, conf);
// clean all lists
b.resetData(conf);
if (r == ReturnStatus.IN_PROGRESS) {
if (r == ExitStatus.IN_PROGRESS) {
done = false;
} else if (r != ReturnStatus.SUCCESS) {
} else if (r != ExitStatus.SUCCESS) {
//must be an error statue, return.
return r.code;
return r.getExitCode();
}
}
@ -590,7 +582,7 @@ static int run(Collection<URI> namenodes, final Parameters p,
nnc.close();
}
}
return ReturnStatus.SUCCESS.code;
return ExitStatus.SUCCESS.getExitCode();
}
/* Given elaspedTime in ms, return a printable string */
@ -661,10 +653,10 @@ public int run(String[] args) {
return Balancer.run(namenodes, parse(args), conf);
} catch (IOException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.IO_EXCEPTION.code;
return ExitStatus.IO_EXCEPTION.getExitCode();
} catch (InterruptedException e) {
System.out.println(e + ". Exiting ...");
return ReturnStatus.INTERRUPTED.code;
return ExitStatus.INTERRUPTED.getExitCode();
} finally {
System.out.format("%-24s ", DateFormat.getDateTimeInstance().format(new Date()));
System.out.println("Balancing took " + time2Str(Time.now()-startTime));

View File

@ -48,7 +48,6 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.protocol.Block;
@ -63,6 +62,7 @@
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.server.balancer.Dispatcher.DDatanode.StorageGroup;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants;
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations;
import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocations;
@ -91,7 +91,6 @@ public class Dispatcher {
// minutes
private final NameNodeConnector nnc;
private final KeyManager keyManager;
private final SaslDataTransferClient saslClient;
/** Set of datanodes to be excluded. */
@ -100,11 +99,10 @@ public class Dispatcher {
private final Set<String> includedNodes;
private final Collection<Source> sources = new HashSet<Source>();
private final Collection<BalancerDatanode.StorageGroup> targets
= new HashSet<BalancerDatanode.StorageGroup>();
private final Collection<StorageGroup> targets = new HashSet<StorageGroup>();
private final GlobalBlockMap globalBlocks = new GlobalBlockMap();
private final MovedBlocks<BalancerDatanode.StorageGroup> movedBlocks;
private final MovedBlocks<StorageGroup> movedBlocks;
/** Map (datanodeUuid,storageType -> StorageGroup) */
private final StorageGroupMap storageGroupMap = new StorageGroupMap();
@ -135,8 +133,7 @@ private DBlock get(Block b) {
}
/** Remove all blocks except for the moved blocks. */
private void removeAllButRetain(
MovedBlocks<BalancerDatanode.StorageGroup> movedBlocks) {
private void removeAllButRetain(MovedBlocks<StorageGroup> movedBlocks) {
for (Iterator<Block> i = map.keySet().iterator(); i.hasNext();) {
if (!movedBlocks.contains(i.next())) {
i.remove();
@ -150,17 +147,15 @@ private static String toKey(String datanodeUuid, StorageType storageType) {
return datanodeUuid + ":" + storageType;
}
private final Map<String, BalancerDatanode.StorageGroup> map
= new HashMap<String, BalancerDatanode.StorageGroup>();
private final Map<String, StorageGroup> map = new HashMap<String, StorageGroup>();
BalancerDatanode.StorageGroup get(String datanodeUuid,
StorageType storageType) {
StorageGroup get(String datanodeUuid, StorageType storageType) {
return map.get(toKey(datanodeUuid, storageType));
}
void put(BalancerDatanode.StorageGroup g) {
final String key = toKey(g.getDatanode().getDatanodeUuid(), g.storageType);
final BalancerDatanode.StorageGroup existing = map.put(key, g);
void put(StorageGroup g) {
final String key = toKey(g.getDatanodeInfo().getDatanodeUuid(), g.storageType);
final StorageGroup existing = map.put(key, g);
Preconditions.checkState(existing == null);
}
@ -177,8 +172,8 @@ void clear() {
private class PendingMove {
private DBlock block;
private Source source;
private BalancerDatanode proxySource;
private BalancerDatanode.StorageGroup target;
private DDatanode proxySource;
private StorageGroup target;
private PendingMove() {
}
@ -235,24 +230,24 @@ private boolean markMovedIfGoodBlock(DBlock block) {
* @return true if a proxy is found; otherwise false
*/
private boolean chooseProxySource() {
final DatanodeInfo targetDN = target.getDatanode();
final DatanodeInfo targetDN = target.getDatanodeInfo();
// if node group is supported, first try add nodes in the same node group
if (cluster.isNodeGroupAware()) {
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameNodeGroup(loc.getDatanode(), targetDN)
for (StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameNodeGroup(loc.getDatanodeInfo(), targetDN)
&& addTo(loc)) {
return true;
}
}
}
// check if there is replica which is on the same rack with the target
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameRack(loc.getDatanode(), targetDN) && addTo(loc)) {
for (StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameRack(loc.getDatanodeInfo(), targetDN) && addTo(loc)) {
return true;
}
}
// find out a non-busy replica
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
for (StorageGroup loc : block.getLocations()) {
if (addTo(loc)) {
return true;
}
@ -261,10 +256,10 @@ && addTo(loc)) {
}
/** add to a proxy source for specific block movement */
private boolean addTo(BalancerDatanode.StorageGroup g) {
final BalancerDatanode bdn = g.getBalancerDatanode();
if (bdn.addPendingBlock(this)) {
proxySource = bdn;
private boolean addTo(StorageGroup g) {
final DDatanode dn = g.getDDatanode();
if (dn.addPendingBlock(this)) {
proxySource = dn;
return true;
}
return false;
@ -281,14 +276,13 @@ private void dispatch() {
DataInputStream in = null;
try {
sock.connect(
NetUtils.createSocketAddr(target.getDatanode().getXferAddr()),
NetUtils.createSocketAddr(target.getDatanodeInfo().getXferAddr()),
HdfsServerConstants.READ_TIMEOUT);
/*
* Unfortunately we don't have a good way to know if the Datanode is
* taking a really long time to move a block, OR something has gone
* wrong and it's never going to finish. To deal with this scenario, we
* set a long timeout (20 minutes) to avoid hanging the balancer
* indefinitely.
* set a long timeout (20 minutes) to avoid hanging indefinitely.
*/
sock.setSoTimeout(BLOCK_MOVE_READ_TIMEOUT);
@ -298,9 +292,10 @@ private void dispatch() {
InputStream unbufIn = sock.getInputStream();
ExtendedBlock eb = new ExtendedBlock(nnc.getBlockpoolID(),
block.getBlock());
Token<BlockTokenIdentifier> accessToken = keyManager.getAccessToken(eb);
final KeyManager km = nnc.getKeyManager();
Token<BlockTokenIdentifier> accessToken = km.getAccessToken(eb);
IOStreamPair saslStreams = saslClient.socketSend(sock, unbufOut,
unbufIn, keyManager, accessToken, target.getDatanode());
unbufIn, km, accessToken, target.getDatanodeInfo());
unbufOut = saslStreams.out;
unbufIn = saslStreams.in;
out = new DataOutputStream(new BufferedOutputStream(unbufOut,
@ -314,21 +309,19 @@ private void dispatch() {
LOG.info("Successfully moved " + this);
} catch (IOException e) {
LOG.warn("Failed to move " + this + ": " + e.getMessage());
/*
* proxy or target may have an issue, insert a small delay before using
* these nodes further. This avoids a potential storm of
* "threads quota exceeded" Warnings when the balancer gets out of sync
* with work going on in datanode.
*/
// Proxy or target may have some issues, delay before using these nodes
// further in order to avoid a potential storm of "threads quota
// exceeded" warnings when the dispatcher gets out of sync with work
// going on in datanodes.
proxySource.activateDelay(DELAY_AFTER_ERROR);
target.getBalancerDatanode().activateDelay(DELAY_AFTER_ERROR);
target.getDDatanode().activateDelay(DELAY_AFTER_ERROR);
} finally {
IOUtils.closeStream(out);
IOUtils.closeStream(in);
IOUtils.closeSocket(sock);
proxySource.removePendingBlock(this);
target.getBalancerDatanode().removePendingBlock(this);
target.getDDatanode().removePendingBlock(this);
synchronized (this) {
reset();
@ -342,8 +335,8 @@ private void dispatch() {
/** Send a block replace request to the output stream */
private void sendRequest(DataOutputStream out, ExtendedBlock eb,
Token<BlockTokenIdentifier> accessToken) throws IOException {
new Sender(out).replaceBlock(eb, target.storageType, accessToken, source
.getDatanode().getDatanodeUuid(), proxySource.datanode);
new Sender(out).replaceBlock(eb, target.storageType, accessToken,
source.getDatanodeInfo().getDatanodeUuid(), proxySource.datanode);
}
/** Receive a block copy response from the input stream */
@ -368,8 +361,7 @@ private void reset() {
}
/** A class for keeping track of block locations in the dispatcher. */
private static class DBlock extends
MovedBlocks.Locations<BalancerDatanode.StorageGroup> {
private static class DBlock extends MovedBlocks.Locations<StorageGroup> {
DBlock(Block block) {
super(block);
}
@ -377,10 +369,10 @@ private static class DBlock extends
/** The class represents a desired move. */
static class Task {
private final BalancerDatanode.StorageGroup target;
private final StorageGroup target;
private long size; // bytes scheduled to move
Task(BalancerDatanode.StorageGroup target, long size) {
Task(StorageGroup target, long size) {
this.target = target;
this.size = size;
}
@ -391,28 +383,25 @@ long getSize() {
}
/** A class that keeps track of a datanode. */
static class BalancerDatanode {
static class DDatanode {
/** A group of storages in a datanode with the same storage type. */
class StorageGroup {
final StorageType storageType;
final double utilization;
final long maxSize2Move;
private long scheduledSize = 0L;
private StorageGroup(StorageType storageType, double utilization,
long maxSize2Move) {
private StorageGroup(StorageType storageType, long maxSize2Move) {
this.storageType = storageType;
this.utilization = utilization;
this.maxSize2Move = maxSize2Move;
}
BalancerDatanode getBalancerDatanode() {
return BalancerDatanode.this;
private DDatanode getDDatanode() {
return DDatanode.this;
}
DatanodeInfo getDatanode() {
return BalancerDatanode.this.datanode;
DatanodeInfo getDatanodeInfo() {
return DDatanode.this.datanode;
}
/** Decide if still need to move more bytes */
@ -447,7 +436,7 @@ String getDisplayName() {
@Override
public String toString() {
return "" + utilization;
return getDisplayName();
}
}
@ -461,10 +450,10 @@ public String toString() {
@Override
public String toString() {
return getClass().getSimpleName() + ":" + datanode + ":" + storageMap;
return getClass().getSimpleName() + ":" + datanode + ":" + storageMap.values();
}
private BalancerDatanode(DatanodeStorageReport r, int maxConcurrentMoves) {
private DDatanode(DatanodeStorageReport r, int maxConcurrentMoves) {
this.datanode = r.getDatanodeInfo();
this.maxConcurrentMoves = maxConcurrentMoves;
this.pendings = new ArrayList<PendingMove>(maxConcurrentMoves);
@ -475,18 +464,14 @@ private void put(StorageType storageType, StorageGroup g) {
Preconditions.checkState(existing == null);
}
StorageGroup addStorageGroup(StorageType storageType, double utilization,
long maxSize2Move) {
final StorageGroup g = new StorageGroup(storageType, utilization,
maxSize2Move);
StorageGroup addStorageGroup(StorageType storageType, long maxSize2Move) {
final StorageGroup g = new StorageGroup(storageType, maxSize2Move);
put(storageType, g);
return g;
}
Source addSource(StorageType storageType, double utilization,
long maxSize2Move, Dispatcher balancer) {
final Source s = balancer.new Source(storageType, utilization,
maxSize2Move, this);
Source addSource(StorageType storageType, long maxSize2Move, Dispatcher d) {
final Source s = d.new Source(storageType, maxSize2Move, this);
put(storageType, s);
return s;
}
@ -528,7 +513,7 @@ synchronized boolean removePendingBlock(PendingMove pendingBlock) {
}
/** A node that can be the sources of a block move */
class Source extends BalancerDatanode.StorageGroup {
class Source extends DDatanode.StorageGroup {
private final List<Task> tasks = new ArrayList<Task>(2);
private long blocksToReceive = 0L;
@ -539,9 +524,8 @@ class Source extends BalancerDatanode.StorageGroup {
*/
private final List<DBlock> srcBlocks = new ArrayList<DBlock>();
private Source(StorageType storageType, double utilization,
long maxSize2Move, BalancerDatanode dn) {
dn.super(storageType, utilization, maxSize2Move);
private Source(StorageType storageType, long maxSize2Move, DDatanode dn) {
dn.super(storageType, maxSize2Move);
}
/** Add a task */
@ -565,7 +549,7 @@ Iterator<DBlock> getBlockIterator() {
*/
private long getBlockList() throws IOException {
final long size = Math.min(MAX_BLOCKS_SIZE_TO_FETCH, blocksToReceive);
final BlocksWithLocations newBlocks = nnc.getBlocks(getDatanode(), size);
final BlocksWithLocations newBlocks = nnc.getBlocks(getDatanodeInfo(), size);
long bytesReceived = 0;
for (BlockWithLocations blk : newBlocks.getBlocks()) {
@ -579,7 +563,7 @@ private long getBlockList() throws IOException {
final String[] datanodeUuids = blk.getDatanodeUuids();
final StorageType[] storageTypes = blk.getStorageTypes();
for (int i = 0; i < datanodeUuids.length; i++) {
final BalancerDatanode.StorageGroup g = storageGroupMap.get(
final StorageGroup g = storageGroupMap.get(
datanodeUuids[i], storageTypes[i]);
if (g != null) { // not unknown
block.addLocation(g);
@ -617,7 +601,7 @@ private boolean isGoodBlockCandidate(DBlock block) {
private PendingMove chooseNextMove() {
for (Iterator<Task> i = tasks.iterator(); i.hasNext();) {
final Task task = i.next();
final BalancerDatanode target = task.target.getBalancerDatanode();
final DDatanode target = task.target.getDDatanode();
PendingMove pendingBlock = new PendingMove();
if (target.addPendingBlock(pendingBlock)) {
// target is not busy, so do a tentative block allocation
@ -670,7 +654,7 @@ private void dispatchBlocks() {
final long startTime = Time.monotonicNow();
this.blocksToReceive = 2 * getScheduledSize();
boolean isTimeUp = false;
int noPendingBlockIteration = 0;
int noPendingMoveIteration = 0;
while (!isTimeUp && getScheduledSize() > 0
&& (!srcBlocks.isEmpty() || blocksToReceive > 0)) {
final PendingMove p = chooseNextMove();
@ -699,11 +683,11 @@ public void run() {
return;
}
} else {
// source node cannot find a pendingBlockToMove, iteration +1
noPendingBlockIteration++;
// source node cannot find a pending block to move, iteration +1
noPendingMoveIteration++;
// in case no blocks can be moved for source node's task,
// jump out of while-loop after 5 iterations.
if (noPendingBlockIteration >= MAX_NO_PENDING_MOVE_ITERATIONS) {
if (noPendingMoveIteration >= MAX_NO_PENDING_MOVE_ITERATIONS) {
resetScheduledSize();
}
}
@ -726,29 +710,19 @@ public void run() {
}
}
Dispatcher(NameNodeConnector theblockpool, Set<String> includedNodes,
Set<String> excludedNodes, Configuration conf) {
this.nnc = theblockpool;
this.keyManager = nnc.getKeyManager();
public Dispatcher(NameNodeConnector nnc, Set<String> includedNodes,
Set<String> excludedNodes, long movedWinWidth, int moverThreads,
int dispatcherThreads, int maxConcurrentMovesPerNode, Configuration conf) {
this.nnc = nnc;
this.excludedNodes = excludedNodes;
this.includedNodes = includedNodes;
final long movedWinWidth = conf.getLong(
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_KEY,
DFSConfigKeys.DFS_BALANCER_MOVEDWINWIDTH_DEFAULT);
movedBlocks = new MovedBlocks<BalancerDatanode.StorageGroup>(movedWinWidth);
this.movedBlocks = new MovedBlocks<StorageGroup>(movedWinWidth);
this.cluster = NetworkTopology.getInstance(conf);
this.moveExecutor = Executors.newFixedThreadPool(conf.getInt(
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_KEY,
DFSConfigKeys.DFS_BALANCER_MOVERTHREADS_DEFAULT));
this.dispatchExecutor = Executors.newFixedThreadPool(conf.getInt(
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_KEY,
DFSConfigKeys.DFS_BALANCER_DISPATCHERTHREADS_DEFAULT));
this.maxConcurrentMovesPerNode = conf.getInt(
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT);
this.moveExecutor = Executors.newFixedThreadPool(moverThreads);
this.dispatchExecutor = Executors.newFixedThreadPool(dispatcherThreads);
this.maxConcurrentMovesPerNode = maxConcurrentMovesPerNode;
final boolean fallbackToSimpleAuthAllowed = conf.getBoolean(
CommonConfigurationKeys.IPC_CLIENT_FALLBACK_TO_SIMPLE_AUTH_ALLOWED_KEY,
@ -784,7 +758,7 @@ long bytesToMove() {
return b;
}
void add(Source source, BalancerDatanode.StorageGroup target) {
void add(Source source, StorageGroup target) {
sources.add(source);
targets.add(target);
}
@ -826,8 +800,8 @@ List<DatanodeStorageReport> init() throws IOException {
return trimmed;
}
public BalancerDatanode newDatanode(DatanodeStorageReport r) {
return new BalancerDatanode(r, maxConcurrentMovesPerNode);
public DDatanode newDatanode(DatanodeStorageReport r) {
return new DDatanode(r, maxConcurrentMovesPerNode);
}
public boolean dispatchAndCheckContinue() throws InterruptedException {
@ -884,8 +858,8 @@ static void setBlockMoveWaitTime(long time) {
private void waitForMoveCompletion() {
for(;;) {
boolean empty = true;
for (BalancerDatanode.StorageGroup t : targets) {
if (!t.getBalancerDatanode().isPendingQEmpty()) {
for (StorageGroup t : targets) {
if (!t.getDDatanode().isPendingQEmpty()) {
empty = false;
break;
}
@ -907,8 +881,8 @@ private void waitForMoveCompletion() {
* 2. the block does not have a replica on the target;
* 3. doing the move does not reduce the number of racks that the block has
*/
private boolean isGoodBlockCandidate(Source source,
BalancerDatanode.StorageGroup target, DBlock block) {
private boolean isGoodBlockCandidate(Source source, StorageGroup target,
DBlock block) {
if (source.storageType != target.storageType) {
return false;
}
@ -933,17 +907,17 @@ && isOnSameNodeGroupWithReplicas(target, block, source)) {
* Determine whether moving the given block replica from source to target
* would reduce the number of racks of the block replicas.
*/
private boolean reduceNumOfRacks(Source source,
BalancerDatanode.StorageGroup target, DBlock block) {
final DatanodeInfo sourceDn = source.getDatanode();
if (cluster.isOnSameRack(sourceDn, target.getDatanode())) {
private boolean reduceNumOfRacks(Source source, StorageGroup target,
DBlock block) {
final DatanodeInfo sourceDn = source.getDatanodeInfo();
if (cluster.isOnSameRack(sourceDn, target.getDatanodeInfo())) {
// source and target are on the same rack
return false;
}
boolean notOnSameRack = true;
synchronized (block) {
for (BalancerDatanode.StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameRack(loc.getDatanode(), target.getDatanode())) {
for (StorageGroup loc : block.getLocations()) {
if (cluster.isOnSameRack(loc.getDatanodeInfo(), target.getDatanodeInfo())) {
notOnSameRack = false;
break;
}
@ -953,8 +927,8 @@ private boolean reduceNumOfRacks(Source source,
// target is not on the same rack as any replica
return false;
}
for (BalancerDatanode.StorageGroup g : block.getLocations()) {
if (g != source && cluster.isOnSameRack(g.getDatanode(), sourceDn)) {
for (StorageGroup g : block.getLocations()) {
if (g != source && cluster.isOnSameRack(g.getDatanodeInfo(), sourceDn)) {
// source is on the same rack of another replica
return false;
}
@ -971,10 +945,10 @@ private boolean reduceNumOfRacks(Source source,
* group with target
*/
private boolean isOnSameNodeGroupWithReplicas(
BalancerDatanode.StorageGroup target, DBlock block, Source source) {
final DatanodeInfo targetDn = target.getDatanode();
for (BalancerDatanode.StorageGroup g : block.getLocations()) {
if (g != source && cluster.isOnSameNodeGroup(g.getDatanode(), targetDn)) {
StorageGroup target, DBlock block, Source source) {
final DatanodeInfo targetDn = target.getDatanodeInfo();
for (StorageGroup g : block.getLocations()) {
if (g != source && cluster.isOnSameNodeGroup(g.getDatanodeInfo(), targetDn)) {
return true;
}
}

View File

@ -0,0 +1,44 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.server.balancer;
/**
* Exit status - The values associated with each exit status is directly mapped
* to the process's exit code in command line.
*/
public enum ExitStatus {
SUCCESS(0),
IN_PROGRESS(1),
ALREADY_RUNNING(-1),
NO_MOVE_BLOCK(-2),
NO_MOVE_PROGRESS(-3),
IO_EXCEPTION(-4),
ILLEGAL_ARGUMENTS(-5),
INTERRUPTED(-6);
private final int code;
private ExitStatus(int code) {
this.code = code;
}
/** @return the command line exit code. */
public int getExitCode() {
return code;
}
}

View File

@ -570,10 +570,10 @@ private void runBalancer(Configuration conf,
final int r = Balancer.run(namenodes, p, conf);
if (conf.getInt(DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_KEY,
DFSConfigKeys.DFS_DATANODE_BALANCE_MAX_NUM_CONCURRENT_MOVES_DEFAULT) ==0) {
assertEquals(Balancer.ReturnStatus.NO_MOVE_PROGRESS.code, r);
assertEquals(ExitStatus.NO_MOVE_PROGRESS.getExitCode(), r);
return;
} else {
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
}
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster);
LOG.info("Rebalancing with default ctor.");
@ -717,7 +717,7 @@ public void testUnknownDatanode() throws Exception {
Balancer.Parameters.DEFAULT.threshold,
datanodes, Balancer.Parameters.DEFAULT.nodesToBeIncluded);
final int r = Balancer.run(namenodes, p, conf);
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
} finally {
cluster.shutdown();
}

View File

@ -98,7 +98,7 @@ public void testBalancerWithHANameNodes() throws Exception {
assertEquals(1, namenodes.size());
assertTrue(namenodes.contains(HATestUtil.getLogicalUri(cluster)));
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
cluster, Balancer.Parameters.DEFAULT);
} finally {

View File

@ -160,7 +160,7 @@ static void runBalancer(Suite s,
// start rebalancing
final Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(s.conf);
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, s.conf);
Assert.assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
Assert.assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
LOG.info("BALANCER 2");
wait(s.clients, totalUsed, totalCapacity);

View File

@ -176,7 +176,7 @@ private void runBalancer(Configuration conf,
// start rebalancing
Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
assertEquals(Balancer.ReturnStatus.SUCCESS.code, r);
assertEquals(ExitStatus.SUCCESS.getExitCode(), r);
waitForHeartBeat(totalUsedSpace, totalCapacity);
LOG.info("Rebalancing with default factor.");
@ -190,8 +190,8 @@ private void runBalancerCanFinish(Configuration conf,
// start rebalancing
Collection<URI> namenodes = DFSUtil.getNsServiceRpcUris(conf);
final int r = Balancer.run(namenodes, Balancer.Parameters.DEFAULT, conf);
Assert.assertTrue(r == Balancer.ReturnStatus.SUCCESS.code ||
(r == Balancer.ReturnStatus.NO_MOVE_PROGRESS.code));
Assert.assertTrue(r == ExitStatus.SUCCESS.getExitCode() ||
(r == ExitStatus.NO_MOVE_PROGRESS.getExitCode()));
waitForHeartBeat(totalUsedSpace, totalCapacity);
LOG.info("Rebalancing with default factor.");
}