HDFS-6710. Change BlockPlacementPolicy to consider block storage policy in replica deletion.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-6584@1612265 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tsz-wo Sze 2014-07-21 13:34:10 +00:00
parent 2b07af0c59
commit 014be2510f
9 changed files with 98 additions and 48 deletions

View File

@ -11,6 +11,9 @@ HDFS-6584: Archival Storage
HDFS-6671. Change BlockPlacementPolicy to consider block storage policy HDFS-6671. Change BlockPlacementPolicy to consider block storage policy
in replicaiton. (szetszwo) in replicaiton. (szetszwo)
HDFS-6710. Change BlockPlacementPolicy to consider block storage policy
in replica deletion. (szetszwo)
Trunk (Unreleased) Trunk (Unreleased)
INCOMPATIBLE CHANGES INCOMPATIBLE CHANGES

View File

@ -118,15 +118,40 @@ public class BlockStoragePolicy {
public List<StorageType> chooseStorageTypes(final short replication, public List<StorageType> chooseStorageTypes(final short replication,
final Iterable<StorageType> chosen) { final Iterable<StorageType> chosen) {
final List<StorageType> types = chooseStorageTypes(replication); final List<StorageType> types = chooseStorageTypes(replication);
diff(types, chosen, null);
return types;
}
//remove the chosen storage types /**
for(StorageType c : chosen) { * Compute the list difference t = t - c.
final int i = types.indexOf(c); * Further, if e is not null, set e = e + c - t;
*/
private static void diff(List<StorageType> t, Iterable<StorageType> c,
List<StorageType> e) {
for(StorageType storagetype : c) {
final int i = t.indexOf(storagetype);
if (i >= 0) { if (i >= 0) {
types.remove(i); t.remove(i);
} else if (e != null) {
e.add(storagetype);
} }
} }
return types; }
/**
* Choose excess storage types for deletion, given the
* replication number and the storage types of the chosen replicas.
*
* @param replication the replication number.
* @param chosen the storage types of the chosen replicas.
* @return a list of {@link StorageType}s for deletion.
*/
public List<StorageType> chooseExcess(final short replication,
final Iterable<StorageType> chosen) {
final List<StorageType> types = chooseStorageTypes(replication);
final List<StorageType> excess = new LinkedList<StorageType>();
diff(types, chosen, excess);
return excess;
} }
/** @return the fallback {@link StorageType} for creation. */ /** @return the fallback {@link StorageType} for creation. */
@ -264,5 +289,5 @@ public class BlockStoragePolicy {
throw new IllegalArgumentException(message + " in " throw new IllegalArgumentException(message + " in "
+ DFS_BLOCK_STORAGE_POLICIES_KEY + " \"" + DFS_BLOCK_STORAGE_POLICIES_KEY + " \""
+ conf.get(DFS_BLOCK_STORAGE_POLICIES_KEY) + "\"."); + conf.get(DFS_BLOCK_STORAGE_POLICIES_KEY) + "\".");
} }
} }

View File

@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.BlockStoragePolicy;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.HAUtil;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
import org.apache.hadoop.hdfs.protocol.BlockListAsLongs.BlockReportIterator; import org.apache.hadoop.hdfs.protocol.BlockListAsLongs.BlockReportIterator;
@ -2712,6 +2713,10 @@ public class BlockManager {
assert namesystem.hasWriteLock(); assert namesystem.hasWriteLock();
// first form a rack to datanodes map and // first form a rack to datanodes map and
BlockCollection bc = getBlockCollection(b); BlockCollection bc = getBlockCollection(b);
final BlockStoragePolicy storagePolicy = storagePolicySuite.getPolicy(bc.getStoragePolicyID());
final List<StorageType> excessTypes = storagePolicy.chooseExcess(
replication, DatanodeStorageInfo.toStorageTypes(nonExcess));
final Map<String, List<DatanodeStorageInfo>> rackMap final Map<String, List<DatanodeStorageInfo>> rackMap
= new HashMap<String, List<DatanodeStorageInfo>>(); = new HashMap<String, List<DatanodeStorageInfo>>();
@ -2741,7 +2746,7 @@ public class BlockManager {
cur = delNodeHintStorage; cur = delNodeHintStorage;
} else { // regular excessive replica removal } else { // regular excessive replica removal
cur = replicator.chooseReplicaToDelete(bc, b, replication, cur = replicator.chooseReplicaToDelete(bc, b, replication,
moreThanOne, exactlyOne); moreThanOne, exactlyOne, excessTypes);
} }
firstOne = false; firstOne = false;

View File

@ -119,18 +119,21 @@ public abstract class BlockPlacementPolicy {
* @param srcBC block collection of file to which block-to-be-deleted belongs * @param srcBC block collection of file to which block-to-be-deleted belongs
* @param block The block to be deleted * @param block The block to be deleted
* @param replicationFactor The required number of replicas for this block * @param replicationFactor The required number of replicas for this block
* @param existingReplicas The replica locations of this block that are present * @param moreThanOne The replica locations of this block that are present
on at least two unique racks. * on more than one unique racks.
* @param moreExistingReplicas Replica locations of this block that are not * @param exactlyOne Replica locations of this block that are present
listed in the previous parameter. * on exactly one unique racks.
* @param excessTypes The excess {@link StorageType}s according to the
* {@link BlockStoragePolicy}.
* @return the replica that is the best candidate for deletion * @return the replica that is the best candidate for deletion
*/ */
abstract public DatanodeStorageInfo chooseReplicaToDelete( abstract public DatanodeStorageInfo chooseReplicaToDelete(
BlockCollection srcBC, BlockCollection srcBC,
Block block, Block block,
short replicationFactor, short replicationFactor,
Collection<DatanodeStorageInfo> existingReplicas, Collection<DatanodeStorageInfo> moreThanOne,
Collection<DatanodeStorageInfo> moreExistingReplicas); Collection<DatanodeStorageInfo> exactlyOne,
List<StorageType> excessTypes);
/** /**
* Used to setup a BlockPlacementPolicy object. This should be defined by * Used to setup a BlockPlacementPolicy object. This should be defined by

View File

@ -22,7 +22,6 @@ import static org.apache.hadoop.util.Time.now;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
import java.util.TreeSet; import java.util.TreeSet;
@ -273,8 +272,8 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
// Keep a copy of original excludedNodes // Keep a copy of original excludedNodes
final Set<Node> oldExcludedNodes = avoidStaleNodes ? final Set<Node> oldExcludedNodes = avoidStaleNodes ?
new HashSet<Node>(excludedNodes) : null; new HashSet<Node>(excludedNodes) : null;
final List<StorageType> storageTypes = chooseStorageTypes(storagePolicy, final List<StorageType> storageTypes = storagePolicy.chooseStorageTypes(
(short)totalReplicasExpected, results); (short)totalReplicasExpected, DatanodeStorageInfo.toStorageTypes(results));
try { try {
if (numOfResults == 0) { if (numOfResults == 0) {
writer = chooseLocalStorage(writer, excludedNodes, blocksize, writer = chooseLocalStorage(writer, excludedNodes, blocksize,
@ -671,28 +670,6 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
} }
return true; return true;
} }
private static List<StorageType> chooseStorageTypes(
final BlockStoragePolicy storagePolicy, final short replication,
final Iterable<DatanodeStorageInfo> chosen) {
return storagePolicy.chooseStorageTypes(
replication, new Iterable<StorageType>() {
@Override
public Iterator<StorageType> iterator() {
return new Iterator<StorageType>() {
final Iterator<DatanodeStorageInfo> i = chosen.iterator();
@Override
public boolean hasNext() {return i.hasNext();}
@Override
public StorageType next() {return i.next().getStorageType();}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
});
}
/** /**
* Return a pipeline of nodes. * Return a pipeline of nodes.
@ -759,7 +736,8 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection bc, public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection bc,
Block block, short replicationFactor, Block block, short replicationFactor,
Collection<DatanodeStorageInfo> first, Collection<DatanodeStorageInfo> first,
Collection<DatanodeStorageInfo> second) { Collection<DatanodeStorageInfo> second,
final List<StorageType> excessTypes) {
long oldestHeartbeat = long oldestHeartbeat =
now() - heartbeatInterval * tolerateHeartbeatMultiplier; now() - heartbeatInterval * tolerateHeartbeatMultiplier;
DatanodeStorageInfo oldestHeartbeatStorage = null; DatanodeStorageInfo oldestHeartbeatStorage = null;
@ -769,6 +747,10 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
// Pick the node with the oldest heartbeat or with the least free space, // Pick the node with the oldest heartbeat or with the least free space,
// if all hearbeats are within the tolerable heartbeat interval // if all hearbeats are within the tolerable heartbeat interval
for(DatanodeStorageInfo storage : pickupReplicaSet(first, second)) { for(DatanodeStorageInfo storage : pickupReplicaSet(first, second)) {
if (!excessTypes.contains(storage.getStorageType())) {
continue;
}
final DatanodeDescriptor node = storage.getDatanodeDescriptor(); final DatanodeDescriptor node = storage.getDatanodeDescriptor();
long free = node.getRemaining(); long free = node.getRemaining();
long lastHeartbeat = node.getLastUpdate(); long lastHeartbeat = node.getLastUpdate();
@ -781,9 +763,10 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
minSpaceStorage = storage; minSpaceStorage = storage;
} }
} }
final DatanodeStorageInfo storage = oldestHeartbeatStorage != null?
return oldestHeartbeatStorage != null? oldestHeartbeatStorage oldestHeartbeatStorage : minSpaceStorage;
: minSpaceStorage; excessTypes.remove(storage.getStorageType());
return storage;
} }
/** /**

View File

@ -292,6 +292,26 @@ public class DatanodeStorageInfo {
return "[" + storageType + "]" + storageID + ":" + state; return "[" + storageType + "]" + storageID + ":" + state;
} }
static Iterable<StorageType> toStorageTypes(
final Iterable<DatanodeStorageInfo> infos) {
return new Iterable<StorageType>() {
@Override
public Iterator<StorageType> iterator() {
return new Iterator<StorageType>() {
final Iterator<DatanodeStorageInfo> i = infos.iterator();
@Override
public boolean hasNext() {return i.hasNext();}
@Override
public StorageType next() {return i.next().getStorageType();}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
};
}
/** @return the first {@link DatanodeStorageInfo} corresponding to /** @return the first {@link DatanodeStorageInfo} corresponding to
* the given datanode * the given datanode
*/ */

View File

@ -46,6 +46,7 @@ import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.LogVerificationAppender; import org.apache.hadoop.hdfs.LogVerificationAppender;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.TestBlockStoragePolicy; import org.apache.hadoop.hdfs.TestBlockStoragePolicy;
import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
@ -933,8 +934,10 @@ public class TestReplicationPolicy {
// replica nodes, while storages[2] and dataNodes[5] are in second set. // replica nodes, while storages[2] and dataNodes[5] are in second set.
assertEquals(2, first.size()); assertEquals(2, first.size());
assertEquals(2, second.size()); assertEquals(2, second.size());
List<StorageType> excessTypes = new ArrayList<StorageType>();
excessTypes.add(StorageType.DEFAULT);
DatanodeStorageInfo chosen = replicator.chooseReplicaToDelete( DatanodeStorageInfo chosen = replicator.chooseReplicaToDelete(
null, null, (short)3, first, second); null, null, (short)3, first, second, excessTypes);
// Within first set, storages[1] with less free space // Within first set, storages[1] with less free space
assertEquals(chosen, storages[1]); assertEquals(chosen, storages[1]);
@ -942,8 +945,9 @@ public class TestReplicationPolicy {
assertEquals(0, first.size()); assertEquals(0, first.size());
assertEquals(3, second.size()); assertEquals(3, second.size());
// Within second set, storages[5] with less free space // Within second set, storages[5] with less free space
excessTypes.add(StorageType.DEFAULT);
chosen = replicator.chooseReplicaToDelete( chosen = replicator.chooseReplicaToDelete(
null, null, (short)2, first, second); null, null, (short)2, first, second, excessTypes);
assertEquals(chosen, storages[5]); assertEquals(chosen, storages[5]);
} }

View File

@ -36,6 +36,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.HdfsConfiguration;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.TestBlockStoragePolicy; import org.apache.hadoop.hdfs.TestBlockStoragePolicy;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNode;
@ -612,8 +613,10 @@ public class TestReplicationPolicyWithNodeGroup {
replicaList, rackMap, first, second); replicaList, rackMap, first, second);
assertEquals(3, first.size()); assertEquals(3, first.size());
assertEquals(1, second.size()); assertEquals(1, second.size());
List<StorageType> excessTypes = new ArrayList<StorageType>();
excessTypes.add(StorageType.DEFAULT);
DatanodeStorageInfo chosen = replicator.chooseReplicaToDelete( DatanodeStorageInfo chosen = replicator.chooseReplicaToDelete(
null, null, (short)3, first, second); null, null, (short)3, first, second, excessTypes);
// Within first set {dataNodes[0], dataNodes[1], dataNodes[2]}, // Within first set {dataNodes[0], dataNodes[1], dataNodes[2]},
// dataNodes[0] and dataNodes[1] are in the same nodegroup, // dataNodes[0] and dataNodes[1] are in the same nodegroup,
// but dataNodes[1] is chosen as less free space // but dataNodes[1] is chosen as less free space
@ -624,16 +627,18 @@ public class TestReplicationPolicyWithNodeGroup {
assertEquals(1, second.size()); assertEquals(1, second.size());
// Within first set {dataNodes[0], dataNodes[2]}, dataNodes[2] is chosen // Within first set {dataNodes[0], dataNodes[2]}, dataNodes[2] is chosen
// as less free space // as less free space
excessTypes.add(StorageType.DEFAULT);
chosen = replicator.chooseReplicaToDelete( chosen = replicator.chooseReplicaToDelete(
null, null, (short)2, first, second); null, null, (short)2, first, second, excessTypes);
assertEquals(chosen, storages[2]); assertEquals(chosen, storages[2]);
replicator.adjustSetsWithChosenReplica(rackMap, first, second, chosen); replicator.adjustSetsWithChosenReplica(rackMap, first, second, chosen);
assertEquals(0, first.size()); assertEquals(0, first.size());
assertEquals(2, second.size()); assertEquals(2, second.size());
// Within second set, dataNodes[5] with less free space // Within second set, dataNodes[5] with less free space
excessTypes.add(StorageType.DEFAULT);
chosen = replicator.chooseReplicaToDelete( chosen = replicator.chooseReplicaToDelete(
null, null, (short)1, first, second); null, null, (short)1, first, second, excessTypes);
assertEquals(chosen, storages[5]); assertEquals(chosen, storages[5]);
} }

View File

@ -38,6 +38,7 @@ import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DFSUtil;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.MiniDFSNNTopology;
import org.apache.hadoop.hdfs.StorageType;
import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB; import org.apache.hadoop.hdfs.protocolPB.DatanodeProtocolClientSideTranslatorPB;
@ -588,7 +589,8 @@ public class TestDNFencing {
public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection inode, public DatanodeStorageInfo chooseReplicaToDelete(BlockCollection inode,
Block block, short replicationFactor, Block block, short replicationFactor,
Collection<DatanodeStorageInfo> first, Collection<DatanodeStorageInfo> first,
Collection<DatanodeStorageInfo> second) { Collection<DatanodeStorageInfo> second,
List<StorageType> excessTypes) {
Collection<DatanodeStorageInfo> chooseFrom = !first.isEmpty() ? first : second; Collection<DatanodeStorageInfo> chooseFrom = !first.isEmpty() ? first : second;