HDFS-16423. Balancer should not get blocks on stale storages (#3883)

Reviewed-by: litao <tomleescut@gmail.com>
Signed-off-by: Takanobu Asanuma <tasanuma@apache.org>
This commit is contained in:
qinyuren 2022-01-19 16:59:42 +08:00 committed by GitHub
parent e355646330
commit db2c3200e6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 129 additions and 4 deletions

View File

@ -1664,9 +1664,16 @@ public class BlockManager implements BlockStatsMXBean {
if(numBlocks == 0) { if(numBlocks == 0) {
return new BlocksWithLocations(new BlockWithLocations[0]); return new BlocksWithLocations(new BlockWithLocations[0]);
} }
// skip stale storage
DatanodeStorageInfo[] storageInfos = Arrays
.stream(node.getStorageInfos())
.filter(s -> !s.areBlockContentsStale())
.toArray(DatanodeStorageInfo[]::new);
// starting from a random block // starting from a random block
int startBlock = ThreadLocalRandom.current().nextInt(numBlocks); int startBlock = ThreadLocalRandom.current().nextInt(numBlocks);
Iterator<BlockInfo> iter = node.getBlockIterator(startBlock); Iterator<BlockInfo> iter = node.getBlockIterator(startBlock, storageInfos);
List<BlockWithLocations> results = new ArrayList<BlockWithLocations>(); List<BlockWithLocations> results = new ArrayList<BlockWithLocations>();
List<BlockInfo> pending = new ArrayList<BlockInfo>(); List<BlockInfo> pending = new ArrayList<BlockInfo>();
long totalSize = 0; long totalSize = 0;
@ -1685,8 +1692,8 @@ public class BlockManager implements BlockStatsMXBean {
} }
} }
if(totalSize<size) { if(totalSize<size) {
iter = node.getBlockIterator(); // start from the beginning iter = node.getBlockIterator(0, storageInfos); // start from the beginning
for(int i=0; i<startBlock&&totalSize<size; i++) { for(int i = 0; i < startBlock && totalSize < size && iter.hasNext(); i++) {
curBlock = iter.next(); curBlock = iter.next();
if(!curBlock.isComplete()) continue; if(!curBlock.isComplete()) continue;
if (curBlock.getNumBytes() < minBlockSize) { if (curBlock.getNumBytes() < minBlockSize) {

View File

@ -647,6 +647,17 @@ public class DatanodeDescriptor extends DatanodeInfo {
return new BlockIterator(startBlock, getStorageInfos()); return new BlockIterator(startBlock, getStorageInfos());
} }
/**
* Get iterator, which starts iterating from the specified block and storages.
*
* @param startBlock on which blocks are start iterating
* @param storageInfos specified storages
*/
Iterator<BlockInfo> getBlockIterator(
final int startBlock, final DatanodeStorageInfo[] storageInfos) {
return new BlockIterator(startBlock, storageInfos);
}
@VisibleForTesting @VisibleForTesting
public void incrementPendingReplicationWithoutTargets() { public void incrementPendingReplicationWithoutTargets() {
pendingReplicationWithoutTargets++; pendingReplicationWithoutTargets++;

View File

@ -168,6 +168,11 @@ public class DatanodeStorageInfo {
return blockContentsStale; return blockContentsStale;
} }
@VisibleForTesting
public void setBlockContentsStale(boolean value) {
blockContentsStale = value;
}
void markStaleAfterFailover() { void markStaleAfterFailover() {
heartbeatedSinceFailover = false; heartbeatedSinceFailover = false;
blockContentsStale = true; blockContentsStale = true;

View File

@ -45,6 +45,7 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeDescriptor;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo; import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
@ -468,4 +469,68 @@ public class TestGetBlocks {
cluster.shutdown(); cluster.shutdown();
} }
} }
@Test
public void testReadSkipStaleStorage() throws Exception {
final short repFactor = (short) 1;
final int blockNum = 64;
final int storageNum = 2;
final int fileLen = BLOCK_SIZE * blockNum;
final Path path = new Path("testReadSkipStaleStorage");
final Configuration conf = new HdfsConfiguration();
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf)
.numDataNodes(1)
.storagesPerDatanode(storageNum)
.build();
cluster.waitActive();
FileSystem fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, path, false, 1024, fileLen,
BLOCK_SIZE, repFactor, 0, true);
// get datanode info
ClientProtocol client = NameNodeProxies.createProxy(conf,
cluster.getFileSystem(0).getUri(),
ClientProtocol.class).getProxy();
DatanodeInfo[] dataNodes = client.getDatanodeReport(DatanodeReportType.ALL);
// get storage info
BlockManager bm0 = cluster.getNamesystem(0).getBlockManager();
DatanodeStorageInfo[] storageInfos = bm0.getDatanodeManager()
.getDatanode(dataNodes[0].getDatanodeUuid()).getStorageInfos();
InetSocketAddress addr = new InetSocketAddress("localhost",
cluster.getNameNodePort());
NamenodeProtocol namenode = NameNodeProxies.createProxy(conf,
DFSUtilClient.getNNUri(addr), NamenodeProtocol.class).getProxy();
// check blocks count equals to blockNum
BlockWithLocations[] blocks = namenode.getBlocks(
dataNodes[0], fileLen*2, 0, 0).getBlocks();
assertEquals(blockNum, blocks.length);
// calculate the block count on storage[0]
int count = 0;
for (BlockWithLocations b : blocks) {
for (String s : b.getStorageIDs()) {
if (s.equals(storageInfos[0].getStorageID())) {
count++;
}
}
}
// set storage[0] stale
storageInfos[0].setBlockContentsStale(true);
blocks = namenode.getBlocks(
dataNodes[0], fileLen*2, 0, 0).getBlocks();
assertEquals(blockNum - count, blocks.length);
// set all storage stale
bm0.getDatanodeManager().markAllDatanodesStale();
blocks = namenode.getBlocks(
dataNodes[0], fileLen*2, 0, 0).getBlocks();
assertEquals(0, blocks.length);
}
} }

View File

@ -126,6 +126,7 @@ public class TestBalancerService {
TestBalancer.initConf(conf); TestBalancer.initConf(conf);
try { try {
setupCluster(conf); setupCluster(conf);
TestBalancerWithHANameNodes.waitStoragesNoStale(cluster, client, 0);
long totalCapacity = addOneDataNode(conf); // make cluster imbalanced long totalCapacity = addOneDataNode(conf); // make cluster imbalanced
Thread balancerThread = Thread balancerThread =
@ -193,6 +194,7 @@ public class TestBalancerService {
cluster.transitionToActive(0); cluster.transitionToActive(0);
cluster.waitActive(); cluster.waitActive();
TestBalancerWithHANameNodes.waitStoragesNoStale(cluster, client, 0);
long totalCapacity = addOneDataNode(conf); long totalCapacity = addOneDataNode(conf);
TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client, TestBalancer.waitForBalancer(totalUsedSpace, totalCapacity, client,
cluster, BalancerParameters.DEFAULT); cluster, BalancerParameters.DEFAULT);

View File

@ -47,12 +47,17 @@ import org.apache.hadoop.hdfs.MiniDFSNNTopology.NNConf;
import org.apache.hadoop.hdfs.NameNodeProxies; import org.apache.hadoop.hdfs.NameNodeProxies;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys; import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.protocol.ClientProtocol; import org.apache.hadoop.hdfs.protocol.ClientProtocol;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster; import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.DatanodeStorageInfo;
import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil; import org.apache.hadoop.hdfs.server.namenode.ha.HATestUtil;
import org.apache.hadoop.hdfs.server.namenode.ha.ObserverReadProxyProvider; import org.apache.hadoop.hdfs.server.namenode.ha.ObserverReadProxyProvider;
import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport; import org.apache.hadoop.hdfs.server.protocol.DatanodeStorageReport;
import org.apache.hadoop.test.GenericTestUtils;
import org.apache.hadoop.test.GenericTestUtils.LogCapturer; import org.apache.hadoop.test.GenericTestUtils.LogCapturer;
import org.junit.Test; import org.junit.Test;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -75,6 +80,26 @@ public class TestBalancerWithHANameNodes {
TestBalancer.initTestSetup(); TestBalancer.initTestSetup();
} }
public static void waitStoragesNoStale(MiniDFSCluster cluster,
ClientProtocol client, int nnIndex) throws Exception {
// trigger a full block report and wait all storages out of stale
cluster.triggerBlockReports();
DatanodeInfo[] dataNodes = client.getDatanodeReport(HdfsConstants.DatanodeReportType.ALL);
GenericTestUtils.waitFor(() -> {
BlockManager bm = cluster.getNamesystem(nnIndex).getBlockManager();
for (DatanodeInfo dn : dataNodes) {
DatanodeStorageInfo[] storageInfos = bm.getDatanodeManager()
.getDatanode(dn.getDatanodeUuid()).getStorageInfos();
for (DatanodeStorageInfo s : storageInfos) {
if (s.areBlockContentsStale()) {
return false;
}
}
}
return true;
}, 300, 60000);
}
/** /**
* Test a cluster with even distribution, then a new empty node is added to * Test a cluster with even distribution, then a new empty node is added to
* the cluster. Test start a cluster with specified number of nodes, and fills * the cluster. Test start a cluster with specified number of nodes, and fills
@ -103,13 +128,17 @@ public class TestBalancerWithHANameNodes {
client = NameNodeProxies.createProxy(conf, FileSystem.getDefaultUri(conf), client = NameNodeProxies.createProxy(conf, FileSystem.getDefaultUri(conf),
ClientProtocol.class).getProxy(); ClientProtocol.class).getProxy();
doTest(conf); doTest(conf, true);
} finally { } finally {
cluster.shutdown(); cluster.shutdown();
} }
} }
void doTest(Configuration conf) throws Exception { void doTest(Configuration conf) throws Exception {
doTest(conf, false);
}
void doTest(Configuration conf, boolean withHA) throws Exception {
int numOfDatanodes = TEST_CAPACITIES.length; int numOfDatanodes = TEST_CAPACITIES.length;
long totalCapacity = TestBalancer.sum(TEST_CAPACITIES); long totalCapacity = TestBalancer.sum(TEST_CAPACITIES);
// fill up the cluster to be 30% full // fill up the cluster to be 30% full
@ -123,6 +152,12 @@ public class TestBalancerWithHANameNodes {
HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0), HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0),
cluster.getNameNode(1)); cluster.getNameNode(1));
} }
// all storages are stale after HA
if (withHA) {
waitStoragesNoStale(cluster, client, 0);
}
// start up an empty node with the same capacity and on the same rack // start up an empty node with the same capacity and on the same rack
long newNodeCapacity = TestBalancer.CAPACITY; // new node's capacity long newNodeCapacity = TestBalancer.CAPACITY; // new node's capacity
String newNodeRack = TestBalancer.RACK2; // new node's rack String newNodeRack = TestBalancer.RACK2; // new node's rack