HDFS-16423. Balancer should not get blocks on stale storages (#3883)
Reviewed-by: litao <tomleescut@gmail.com> Signed-off-by: Takanobu Asanuma <tasanuma@apache.org>
This commit is contained in:
parent
e355646330
commit
db2c3200e6
|
@ -1664,9 +1664,16 @@ public class BlockManager implements BlockStatsMXBean {
|
||||||
if(numBlocks == 0) {
|
if(numBlocks == 0) {
|
||||||
return new BlocksWithLocations(new BlockWithLocations[0]);
|
return new BlocksWithLocations(new BlockWithLocations[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// skip stale storage
|
||||||
|
DatanodeStorageInfo[] storageInfos = Arrays
|
||||||
|
.stream(node.getStorageInfos())
|
||||||
|
.filter(s -> !s.areBlockContentsStale())
|
||||||
|
.toArray(DatanodeStorageInfo[]::new);
|
||||||
|
|
||||||
// starting from a random block
|
// starting from a random block
|
||||||
int startBlock = ThreadLocalRandom.current().nextInt(numBlocks);
|
int startBlock = ThreadLocalRandom.current().nextInt(numBlocks);
|
||||||
Iterator<BlockInfo> iter = node.getBlockIterator(startBlock);
|
Iterator<BlockInfo> iter = node.getBlockIterator(startBlock, storageInfos);
|
||||||
List<BlockWithLocations> results = new ArrayList<BlockWithLocations>();
|
List<BlockWithLocations> results = new ArrayList<BlockWithLocations>();
|
||||||
List<BlockInfo> pending = new ArrayList<BlockInfo>();
|
List<BlockInfo> pending = new ArrayList<BlockInfo>();
|
||||||
long totalSize = 0;
|
long totalSize = 0;
|
||||||
|
@ -1685,8 +1692,8 @@ public class BlockManager implements BlockStatsMXBean {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(totalSize<size) {
|
if(totalSize<size) {
|
||||||
iter = node.getBlockIterator(); // start from the beginning
|
iter = node.getBlockIterator(0, storageInfos); // start from the beginning
|
||||||
for(int i=0; i<startBlock&&totalSize<size; i++) {
|
for(int i = 0; i < startBlock && totalSize < size && iter.hasNext(); i++) {
|
||||||
curBlock = iter.next();
|
curBlock = iter.next();
|
||||||
if(!curBlock.isComplete()) continue;
|
if(!curBlock.isComplete()) continue;
|
||||||
if (curBlock.getNumBytes() < minBlockSize) {
|
if (curBlock.getNumBytes() < minBlockSize) {
|
||||||
|
|
|
@ -647,6 +647,17 @@ public class DatanodeDescriptor extends DatanodeInfo {
|
||||||
return new BlockIterator(startBlock, getStorageInfos());
|
return new BlockIterator(startBlock, getStorageInfos());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get iterator, which starts iterating from the specified block and storages.
|
||||||
|
*
|
||||||
|
* @param startBlock on which blocks are start iterating
|
||||||
|
* @param storageInfos specified storages
|
||||||
|
*/
|
||||||
|
Iterator<BlockInfo> getBlockIterator(
|
||||||
|
final int startBlock, final DatanodeStorageInfo[] storageInfos) {
|
||||||
|
return new BlockIterator(startBlock, storageInfos);
|
||||||
|
}
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public void incrementPendingReplicationWithoutTargets() {
|
public void incrementPendingReplicationWithoutTargets() {
|
||||||
pendingReplicationWithoutTargets++;
|
pendingReplicationWithoutTargets++;
|
||||||
|
|
|
@ -168,6 +168,11 @@ public class DatanodeStorageInfo {
|
||||||
return blockContentsStale;
|
return blockContentsStale;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public void setBlockContentsStale(boolean value) {
|
||||||
|
blockContentsStale = value;
|
||||||
|
}
|
||||||
|
|
||||||
void markStaleAfterFailover() {
|
void markStaleAfterFailover() {
|
||||||
heartbeatedSinceFailover = false;
|
heartbeatedSinceFailover = false;
|
||||||
blockContentsStale = true;
|
blockContentsStale = true;
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
|
||||||
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
||||||
|
@ -468,4 +469,68 @@ public class TestGetBlocks {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testReadSkipStaleStorage() throws Exception {
|
||||||
|
final short repFactor = (short) 1;
|
||||||
|
final int blockNum = 64;
|
||||||
|
final int storageNum = 2;
|
||||||
|
final int fileLen = BLOCK_SIZE * blockNum;
|
||||||
|
final Path path = new Path("testReadSkipStaleStorage");
|
||||||
|
final Configuration conf = new HdfsConfiguration();
|
||||||
|
|
||||||
|
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
|
||||||
|
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
|
||||||
|
.numDataNodes(1)
|
||||||
|
.storagesPerDatanode(storageNum)
|
||||||
|
.build();
|
||||||
|
cluster.waitActive();
|
||||||
|
|
||||||
|
FileSystem fs = cluster.getFileSystem();
|
||||||
|
DFSTestUtil.createFile(fs, path, false, 1024, fileLen,
|
||||||
|
BLOCK_SIZE, repFactor, 0, true);
|
||||||
|
|
||||||
|
// get datanode info
|
||||||
|
ClientProtocol client = NameNodeProxies.createProxy(conf,
|
||||||
|
cluster.getFileSystem(0).getUri(),
|
||||||
|
ClientProtocol.class).getProxy();
|
||||||
|
DatanodeInfo[] dataNodes = client.getDatanodeReport(DatanodeReportType.ALL);
|
||||||
|
|
||||||
|
// get storage info
|
||||||
|
BlockManager bm0 = cluster.getNamesystem(0).getBlockManager();
|
||||||
|
DatanodeStorageInfo[] storageInfos = bm0.getDatanodeManager()
|
||||||
|
.getDatanode(dataNodes[0].getDatanodeUuid()).getStorageInfos();
|
||||||
|
|
||||||
|
InetSocketAddress addr = new InetSocketAddress("localhost",
|
||||||
|
cluster.getNameNodePort());
|
||||||
|
NamenodeProtocol namenode = NameNodeProxies.createProxy(conf,
|
||||||
|
DFSUtilClient.getNNUri(addr), NamenodeProtocol.class).getProxy();
|
||||||
|
|
||||||
|
// check blocks count equals to blockNum
|
||||||
|
BlockWithLocations[] blocks = namenode.getBlocks(
|
||||||
|
dataNodes[0], fileLen*2, 0, 0).getBlocks();
|
||||||
|
assertEquals(blockNum, blocks.length);
|
||||||
|
|
||||||
|
// calculate the block count on storage[0]
|
||||||
|
int count = 0;
|
||||||
|
for (BlockWithLocations b : blocks) {
|
||||||
|
for (String s : b.getStorageIDs()) {
|
||||||
|
if (s.equals(storageInfos[0].getStorageID())) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// set storage[0] stale
|
||||||
|
storageInfos[0].setBlockContentsStale(true);
|
||||||
|
blocks = namenode.getBlocks(
|
||||||
|
dataNodes[0], fileLen*2, 0, 0).getBlocks();
|
||||||
|
assertEquals(blockNum - count, blocks.length);
|
||||||
|
|
||||||
|
// set all storage stale
|
||||||
|
bm0.getDatanodeManager().markAllDatanodesStale();
|
||||||
|
blocks = namenode.getBlocks(
|
||||||
|
dataNodes[0], fileLen*2, 0, 0).getBlocks();
|
||||||
|
assertEquals(0, blocks.length);
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -126,6 +126,7 @@ public class TestBalancerService {
|
||||||
TestBalancer.initConf(conf);
|
TestBalancer.initConf(conf);
|
||||||
try {
|
try {
|
||||||
setupCluster(conf);
|
setupCluster(conf);
|
||||||
|
TestBalancerWithHANameNodes.waitStoragesNoStale(cluster, client, 0);
|
||||||
long totalCapacity = addOneDataNode(conf); // make cluster imbalanced
|
long totalCapacity = addOneDataNode(conf); // make cluster imbalanced
|
||||||
|
|
||||||
Thread balancerThread =
|
Thread balancerThread =
|
||||||
|
@ -193,6 +194,7 @@ public class TestBalancerService {
|
||||||
cluster.transitionToActive(0);
|
cluster.transitionToActive(0);
|
||||||
cluster.waitActive();
|
cluster.waitActive();
|
||||||
|
|
||||||
|
TestBalancerWithHANameNodes.waitStoragesNoStale(cluster, client, 0);
|
||||||
long totalCapacity = addOneDataNode(conf);
|
long totalCapacity = addOneDataNode(conf);
|
||||||
TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
|
TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
|
||||||
cluster, BalancerParameters.DEFAULT);
|
cluster, BalancerParameters.DEFAULT);
|
||||||
|
|
|
@ -47,12 +47,17 @@ import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
|
||||||
import org.apache.hadoop.hdfs.NameNodeProxies;
|
import org.apache.hadoop.hdfs.NameNodeProxies;
|
||||||
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
|
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
|
||||||
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
|
import org.apache.hadoop.hdfs.protocol.ClientProtocol;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||||
|
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||||
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
|
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||||
|
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
|
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
|
||||||
import org.apache.hadoop.hdfs.server.namenode.ha.ObserverReadProxyProvider;
|
import org.apache.hadoop.hdfs.server.namenode.ha.ObserverReadProxyProvider;
|
||||||
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
|
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
|
||||||
|
import org.apache.hadoop.test.GenericTestUtils;
|
||||||
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -75,6 +80,26 @@ public class TestBalancerWithHANameNodes {
|
||||||
TestBalancer.initTestSetup();
|
TestBalancer.initTestSetup();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static void waitStoragesNoStale(MiniDFSCluster cluster,
|
||||||
|
ClientProtocol client, int nnIndex) throws Exception {
|
||||||
|
// trigger a full block report and wait all storages out of stale
|
||||||
|
cluster.triggerBlockReports();
|
||||||
|
DatanodeInfo[] dataNodes = client.getDatanodeReport(HdfsConstants.DatanodeReportType.ALL);
|
||||||
|
GenericTestUtils.waitFor(() -> {
|
||||||
|
BlockManager bm = cluster.getNamesystem(nnIndex).getBlockManager();
|
||||||
|
for (DatanodeInfo dn : dataNodes) {
|
||||||
|
DatanodeStorageInfo[] storageInfos = bm.getDatanodeManager()
|
||||||
|
.getDatanode(dn.getDatanodeUuid()).getStorageInfos();
|
||||||
|
for (DatanodeStorageInfo s : storageInfos) {
|
||||||
|
if (s.areBlockContentsStale()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}, 300, 60000);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test a cluster with even distribution, then a new empty node is added to
|
* Test a cluster with even distribution, then a new empty node is added to
|
||||||
* the cluster. Test start a cluster with specified number of nodes, and fills
|
* the cluster. Test start a cluster with specified number of nodes, and fills
|
||||||
|
@ -103,13 +128,17 @@ public class TestBalancerWithHANameNodes {
|
||||||
client = NameNodeProxies.createProxy(conf, FileSystem.getDefaultUri(conf),
|
client = NameNodeProxies.createProxy(conf, FileSystem.getDefaultUri(conf),
|
||||||
ClientProtocol.class).getProxy();
|
ClientProtocol.class).getProxy();
|
||||||
|
|
||||||
doTest(conf);
|
doTest(conf, true);
|
||||||
} finally {
|
} finally {
|
||||||
cluster.shutdown();
|
cluster.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void doTest(Configuration conf) throws Exception {
|
void doTest(Configuration conf) throws Exception {
|
||||||
|
doTest(conf, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void doTest(Configuration conf, boolean withHA) throws Exception {
|
||||||
int numOfDatanodes = TEST_CAPACITIES.length;
|
int numOfDatanodes = TEST_CAPACITIES.length;
|
||||||
long totalCapacity = TestBalancer.sum(TEST_CAPACITIES);
|
long totalCapacity = TestBalancer.sum(TEST_CAPACITIES);
|
||||||
// fill up the cluster to be 30% full
|
// fill up the cluster to be 30% full
|
||||||
|
@ -123,6 +152,12 @@ public class TestBalancerWithHANameNodes {
|
||||||
HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
|
HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
|
||||||
cluster.getNameNode(1));
|
cluster.getNameNode(1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// all storages are stale after HA
|
||||||
|
if (withHA) {
|
||||||
|
waitStoragesNoStale(cluster, client, 0);
|
||||||
|
}
|
||||||
|
|
||||||
// start up an empty node with the same capacity and on the same rack
|
// start up an empty node with the same capacity and on the same rack
|
||||||
long newNodeCapacity = TestBalancer.CAPACITY; // new node's capacity
|
long newNodeCapacity = TestBalancer.CAPACITY; // new node's capacity
|
||||||
String newNodeRack = TestBalancer.RACK2; // new node's rack
|
String newNodeRack = TestBalancer.RACK2; // new node's rack
|
||||||
|
|
Loading…
Reference in New Issue