HDFS-17037. Consider nonDfsUsed when running balancer. (#5715). Contributed by Shuyan Zhang.

Signed-off-by: He Xiaoqiao <hexiaoqiao@apache.org>
This commit is contained in:
zhangshuyan 2023-06-09 16:19:08 +08:00 committed by GitHub
parent 7bb09f1010
commit 9c989515ba
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 77 additions and 37 deletions

View File

@ -75,7 +75,7 @@ public class StorageReport {
} }
public long getRemaining() { public long getRemaining() {
return remaining; return Math.max(remaining, 0L);
} }
public long getBlockPoolUsed() { public long getBlockPoolUsed() {

View File

@ -104,21 +104,21 @@ abstract class BalancingPolicy {
for(StorageReport s : r.getStorageReports()) { for(StorageReport s : r.getStorageReports()) {
final StorageType t = s.getStorage().getStorageType(); final StorageType t = s.getStorage().getStorageType();
totalCapacities.add(t, s.getCapacity()); totalCapacities.add(t, s.getCapacity());
totalUsedSpaces.add(t, s.getDfsUsed()); totalUsedSpaces.add(t, s.getCapacity() - s.getRemaining());
} }
} }
@Override @Override
Double getUtilization(DatanodeStorageReport r, final StorageType t) { Double getUtilization(DatanodeStorageReport r, final StorageType t) {
long capacity = 0L; long capacity = 0L;
long dfsUsed = 0L; long totalUsed = 0L;
for(StorageReport s : r.getStorageReports()) { for(StorageReport s : r.getStorageReports()) {
if (s.getStorage().getStorageType() == t) { if (s.getStorage().getStorageType() == t) {
capacity += s.getCapacity(); capacity += s.getCapacity();
dfsUsed += s.getDfsUsed(); totalUsed += s.getCapacity() - s.getRemaining();
} }
} }
return capacity == 0L? null: dfsUsed*100.0/capacity; return capacity == 0L ? null : totalUsed * 100.0 / capacity;
} }
} }
@ -138,7 +138,13 @@ abstract class BalancingPolicy {
void accumulateSpaces(DatanodeStorageReport r) { void accumulateSpaces(DatanodeStorageReport r) {
for(StorageReport s : r.getStorageReports()) { for(StorageReport s : r.getStorageReports()) {
final StorageType t = s.getStorage().getStorageType(); final StorageType t = s.getStorage().getStorageType();
totalCapacities.add(t, s.getCapacity()); // Use s.getRemaining() + s.getBlockPoolUsed() instead of
// s.getCapacity() here to avoid moving blocks towards nodes with
// little actual available space.
// The util is computed as blockPoolUsed/(remaining+blockPoolUsed),
// which means nodes with more remaining space and less blockPoolUsed
// will serve as the recipient during the balancing process.
totalCapacities.add(t, s.getRemaining() + s.getBlockPoolUsed());
totalUsedSpaces.add(t, s.getBlockPoolUsed()); totalUsedSpaces.add(t, s.getBlockPoolUsed());
} }
} }
@ -149,11 +155,11 @@ abstract class BalancingPolicy {
long blockPoolUsed = 0L; long blockPoolUsed = 0L;
for(StorageReport s : r.getStorageReports()) { for(StorageReport s : r.getStorageReports()) {
if (s.getStorage().getStorageType() == t) { if (s.getStorage().getStorageType() == t) {
capacity += s.getCapacity(); capacity += s.getRemaining() + s.getBlockPoolUsed();
blockPoolUsed += s.getBlockPoolUsed(); blockPoolUsed += s.getBlockPoolUsed();
} }
} }
return capacity == 0L? null: blockPoolUsed*100.0/capacity; return capacity == 0L ? null : blockPoolUsed * 100.0 / capacity;
} }
} }
} }

View File

@ -41,6 +41,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_WEB_AUTHENTICATION_KERBER
import java.lang.reflect.Field; import java.lang.reflect.Field;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy;
import org.junit.AfterClass; import org.junit.AfterClass;
import static org.apache.hadoop.hdfs.server.datanode.SimulatedFSDataset.CONFIG_PROPERTY_NONDFSUSED;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
@ -502,8 +504,9 @@ public class TestBalancer {
balanced = true; balanced = true;
int actualExcludedNodeCount = 0; int actualExcludedNodeCount = 0;
for (DatanodeInfo datanode : datanodeReport) { for (DatanodeInfo datanode : datanodeReport) {
double nodeUtilization = ((double)datanode.getDfsUsed()) double nodeUtilization =
/ datanode.getCapacity(); ((double) datanode.getDfsUsed() + datanode.getNonDfsUsed()) /
datanode.getCapacity();
if (Dispatcher.Util.isExcluded(p.getExcludedNodes(), datanode)) { if (Dispatcher.Util.isExcluded(p.getExcludedNodes(), datanode)) {
if (checkExcludeNodesUtilization) { if (checkExcludeNodesUtilization) {
assertTrue(nodeUtilization == 0); assertTrue(nodeUtilization == 0);
@ -641,7 +644,7 @@ public class TestBalancer {
private void doTest(Configuration conf, long[] capacities, String[] racks, private void doTest(Configuration conf, long[] capacities, String[] racks,
long newCapacity, String newRack, NewNodeInfo nodes, long newCapacity, String newRack, NewNodeInfo nodes,
boolean useTool, boolean useFile) throws Exception { boolean useTool, boolean useFile) throws Exception {
doTest(conf, capacities, racks, newCapacity, newRack, nodes, doTest(conf, capacities, racks, newCapacity, 0L, newRack, nodes,
useTool, useFile, false, 0.3); useTool, useFile, false, 0.3);
} }
@ -666,8 +669,8 @@ public class TestBalancer {
* @throws Exception * @throws Exception
*/ */
private void doTest(Configuration conf, long[] capacities, private void doTest(Configuration conf, long[] capacities,
String[] racks, long newCapacity, String newRack, NewNodeInfo nodes, String[] racks, long newCapacity, long newNonDfsUsed, String newRack,
boolean useTool, boolean useFile, NewNodeInfo nodes, boolean useTool, boolean useFile,
boolean useNamesystemSpy, double clusterUtilization) throws Exception { boolean useNamesystemSpy, double clusterUtilization) throws Exception {
LOG.info("capacities = " + long2String(capacities)); LOG.info("capacities = " + long2String(capacities));
LOG.info("racks = " + Arrays.asList(racks)); LOG.info("racks = " + Arrays.asList(racks));
@ -701,10 +704,11 @@ public class TestBalancer {
long totalCapacity = sum(capacities); long totalCapacity = sum(capacities);
// fill up the cluster to be `clusterUtilization` full // fill up the cluster to be `clusterUtilization` full
long totalUsedSpace = (long) (totalCapacity * clusterUtilization); long totalDfsUsedSpace = (long) (totalCapacity * clusterUtilization);
createFile(cluster, filePath, totalUsedSpace / numOfDatanodes, createFile(cluster, filePath, totalDfsUsedSpace / numOfDatanodes,
(short) numOfDatanodes, 0); (short) numOfDatanodes, 0);
conf.setLong(CONFIG_PROPERTY_NONDFSUSED, newNonDfsUsed);
if (nodes == null) { // there is no specification of new nodes. if (nodes == null) { // there is no specification of new nodes.
// start up an empty node with the same capacity and on the same rack // start up an empty node with the same capacity and on the same rack
cluster.startDataNodes(conf, 1, true, null, cluster.startDataNodes(conf, 1, true, null,
@ -774,9 +778,11 @@ public class TestBalancer {
// run balancer and validate results // run balancer and validate results
if (useTool) { if (useTool) {
runBalancerCli(conf, totalUsedSpace, totalCapacity, p, useFile, expectedExcludedNodes); runBalancerCli(conf, totalDfsUsedSpace, newNonDfsUsed,
totalCapacity, p, useFile, expectedExcludedNodes);
} else { } else {
runBalancer(conf, totalUsedSpace, totalCapacity, p, expectedExcludedNodes); runBalancer(conf, totalDfsUsedSpace, newNonDfsUsed,
totalCapacity, p, expectedExcludedNodes, true);
} }
} finally { } finally {
if(cluster != null) { if(cluster != null) {
@ -791,16 +797,18 @@ public class TestBalancer {
BalancerParameters.DEFAULT, 0); BalancerParameters.DEFAULT, 0);
} }
private void runBalancer(Configuration conf, long totalUsedSpace, private void runBalancer(Configuration conf, long totalDfsUsedSpace,
long totalCapacity, BalancerParameters p, int excludedNodes) long totalCapacity, BalancerParameters p, int excludedNodes)
throws Exception { throws Exception {
runBalancer(conf, totalUsedSpace, totalCapacity, p, excludedNodes, true); runBalancer(conf, totalDfsUsedSpace, 0, totalCapacity, p, excludedNodes,
true);
} }
private void runBalancer(Configuration conf, long totalUsedSpace, private void runBalancer(Configuration conf, long totalDfsUsedSpace,
long totalCapacity, BalancerParameters p, int excludedNodes, long totalNonDfsUsedSpace, long totalCapacity, BalancerParameters p,
boolean checkExcludeNodesUtilization) throws Exception { int excludedNodes, boolean checkExcludeNodesUtilization)
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster); throws Exception {
waitForHeartBeat(totalDfsUsedSpace, totalCapacity, client, cluster);
int retry = 5; int retry = 5;
while (retry > 0) { while (retry > 0) {
@ -816,9 +824,10 @@ public class TestBalancer {
} else { } else {
assertEquals(ExitStatus.SUCCESS.getExitCode(), run); assertEquals(ExitStatus.SUCCESS.getExitCode(), run);
} }
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster); waitForHeartBeat(totalDfsUsedSpace, totalCapacity, client, cluster);
LOG.info(" ."); LOG.info(" .");
try { try {
long totalUsedSpace = totalDfsUsedSpace + totalNonDfsUsedSpace;
waitForBalancer(totalUsedSpace, totalCapacity, client, cluster, p, waitForBalancer(totalUsedSpace, totalCapacity, client, cluster, p,
excludedNodes, checkExcludeNodesUtilization); excludedNodes, checkExcludeNodesUtilization);
} catch (TimeoutException e) { } catch (TimeoutException e) {
@ -892,10 +901,10 @@ public class TestBalancer {
return ExitStatus.SUCCESS.getExitCode(); return ExitStatus.SUCCESS.getExitCode();
} }
private void runBalancerCli(Configuration conf, long totalUsedSpace, private void runBalancerCli(Configuration conf, long totalDfsUsedSpace,
long totalCapacity, BalancerParameters p, boolean useFile, long totalNonDfsUsedSpace, long totalCapacity, BalancerParameters p,
int expectedExcludedNodes) throws Exception { boolean useFile, int expectedExcludedNodes) throws Exception {
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster); waitForHeartBeat(totalDfsUsedSpace, totalCapacity, client, cluster);
List <String> args = new ArrayList<String>(); List <String> args = new ArrayList<String>();
args.add("-policy"); args.add("-policy");
args.add("datanode"); args.add("datanode");
@ -939,8 +948,9 @@ public class TestBalancer {
final int r = tool.run(args.toArray(new String[0])); // start rebalancing final int r = tool.run(args.toArray(new String[0])); // start rebalancing
assertEquals("Tools should exit 0 on success", 0, r); assertEquals("Tools should exit 0 on success", 0, r);
waitForHeartBeat(totalUsedSpace, totalCapacity, client, cluster); waitForHeartBeat(totalDfsUsedSpace, totalCapacity, client, cluster);
LOG.info("Rebalancing with default ctor."); LOG.info("Rebalancing with default ctor.");
long totalUsedSpace = totalDfsUsedSpace + totalNonDfsUsedSpace;
waitForBalancer(totalUsedSpace, totalCapacity, client, cluster, p, expectedExcludedNodes); waitForBalancer(totalUsedSpace, totalCapacity, client, cluster, p, expectedExcludedNodes);
if (excludeHostsFile != null && excludeHostsFile.exists()) { if (excludeHostsFile != null && excludeHostsFile.exists()) {
@ -1112,6 +1122,16 @@ public class TestBalancer {
new String[]{RACK0, RACK1}, CAPACITY, RACK2); new String[]{RACK0, RACK1}, CAPACITY, RACK2);
} }
/** Test a cluster with even distribution,
* then a new node with nonDfsUsed is added to the cluster. */
@Test(timeout=100000)
public void testBalancer3() throws Exception {
Configuration conf = new HdfsConfiguration();
initConf(conf);
doTest(conf, new long[]{CAPACITY, CAPACITY}, new String[]{RACK0, RACK1},
CAPACITY, 1000L, RACK2, null, false, false, false, 0.3);
}
private void testBalancerDefaultConstructor(Configuration conf, private void testBalancerDefaultConstructor(Configuration conf,
long[] capacities, String[] racks, long newCapacity, String newRack) long[] capacities, String[] racks, long newCapacity, String newRack)
throws Exception { throws Exception {
@ -1504,10 +1524,11 @@ public class TestBalancer {
conf.setLong(DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY, 1L); conf.setLong(DFSConfigKeys.DFS_BALANCER_GETBLOCKS_MIN_BLOCK_SIZE_KEY, 1L);
final int BLOCK_SIZE = 1024*1024; final int BLOCK_SIZE = 1024*1024;
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
cluster = new MiniDFSCluster cluster = new MiniDFSCluster
.Builder(conf) .Builder(conf)
.numDataNodes(1) .numDataNodes(1)
.storageCapacities(new long[] { BLOCK_SIZE * 10 }) .simulatedCapacities(new long[]{BLOCK_SIZE * 10})
.storageTypes(new StorageType[] { DEFAULT }) .storageTypes(new StorageType[] { DEFAULT })
.storagesPerDatanode(1) .storagesPerDatanode(1)
.build(); .build();
@ -1517,11 +1538,12 @@ public class TestBalancer {
final Path path1 = new Path("/" + METHOD_NAME + ".01.dat"); final Path path1 = new Path("/" + METHOD_NAME + ".01.dat");
DistributedFileSystem fs = cluster.getFileSystem(); DistributedFileSystem fs = cluster.getFileSystem();
DFSTestUtil.createFile(fs, path1, BLOCK_SIZE, BLOCK_SIZE * 2, BLOCK_SIZE, DFSTestUtil.createFile(fs, path1, BLOCK_SIZE, BLOCK_SIZE * 3, BLOCK_SIZE,
(short) 1, SEED); (short) 1, SEED);
// Add another DN with the same capacity, cluster is now unbalanced // Add another DN with the same capacity, cluster is now unbalanced
cluster.startDataNodes(conf, 1, true, null, null); cluster.startDataNodes(conf, 1, true, null, null, null,
new long[]{BLOCK_SIZE * 10}, false);
cluster.triggerHeartbeats(); cluster.triggerHeartbeats();
Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf); Collection<URI> namenodes = DFSUtil.getInternalNsRpcUris(conf);
@ -1773,7 +1795,7 @@ public class TestBalancer {
pBuilder.setExcludedNodes(excludedList); pBuilder.setExcludedNodes(excludedList);
// start balancer and check the failed num of moving task // start balancer and check the failed num of moving task
runBalancer(conf, totalUsedSpace, totalCapacity, pBuilder.build(), runBalancer(conf, totalUsedSpace, 0, totalCapacity, pBuilder.build(),
excludedList.size(), false); excludedList.size(), false);
// check total blocks, max wait time 60s // check total blocks, max wait time 60s
@ -1891,7 +1913,7 @@ public class TestBalancer {
capacities[i] = CAPACITY; capacities[i] = CAPACITY;
racks[i] = (i < numDNs/2 ? RACK0 : RACK1); racks[i] = (i < numDNs/2 ? RACK0 : RACK1);
} }
doTest(conf, capacities, racks, CAPACITY, RACK2, doTest(conf, capacities, racks, CAPACITY, 0L, RACK2,
// Use only 1 node and set the starting capacity to 50% to allow the // Use only 1 node and set the starting capacity to 50% to allow the
// balancing to complete in only one iteration. This is necessary // balancing to complete in only one iteration. This is necessary
// because the startGetBlocksTime and endGetBlocksTime measures across // because the startGetBlocksTime and endGetBlocksTime measures across

View File

@ -162,6 +162,11 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
private static final DatanodeStorage.State DEFAULT_STATE = private static final DatanodeStorage.State DEFAULT_STATE =
DatanodeStorage.State.NORMAL; DatanodeStorage.State.NORMAL;
public static final String CONFIG_PROPERTY_NONDFSUSED =
"dfs.datanode.simulateddatastorage.nondfsused";
public static final long DEFAULT_NONDFSUSED = 0L;
static final byte[] nullCrcFileData; static final byte[] nullCrcFileData;
private final DataNodeLockManager datasetLockManager; private final DataNodeLockManager datasetLockManager;
@ -467,11 +472,12 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
new ConcurrentHashMap<>(); new ConcurrentHashMap<>();
private final long capacity; // in bytes private final long capacity; // in bytes
private long nonDfsUsed;
private final DatanodeStorage dnStorage; private final DatanodeStorage dnStorage;
private final SimulatedVolume volume; private final SimulatedVolume volume;
synchronized long getFree() { synchronized long getFree() {
return capacity - getUsed(); return capacity - getUsed() - getNonDfsUsed();
} }
long getCapacity() { long getCapacity() {
@ -486,6 +492,10 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
return used; return used;
} }
synchronized long getNonDfsUsed() {
return nonDfsUsed;
}
synchronized long getBlockPoolUsed(String bpid) throws IOException { synchronized long getBlockPoolUsed(String bpid) throws IOException {
return getBPStorage(bpid).getUsed(); return getBPStorage(bpid).getUsed();
} }
@ -506,7 +516,7 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
getBPStorage(bpid).free(amount); getBPStorage(bpid).free(amount);
} }
SimulatedStorage(long cap, DatanodeStorage.State state, SimulatedStorage(long cap, DatanodeStorage.State state, long nonDfsUsed,
FileIoProvider fileIoProvider, Configuration conf) { FileIoProvider fileIoProvider, Configuration conf) {
capacity = cap; capacity = cap;
dnStorage = new DatanodeStorage( dnStorage = new DatanodeStorage(
@ -515,6 +525,7 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
DataNodeVolumeMetrics volumeMetrics = DataNodeVolumeMetrics volumeMetrics =
DataNodeVolumeMetrics.create(conf, dnStorage.getStorageID()); DataNodeVolumeMetrics.create(conf, dnStorage.getStorageID());
this.volume = new SimulatedVolume(this, fileIoProvider, volumeMetrics); this.volume = new SimulatedVolume(this, fileIoProvider, volumeMetrics);
this.nonDfsUsed = nonDfsUsed;
} }
synchronized void addBlockPool(String bpid) { synchronized void addBlockPool(String bpid) {
@ -548,7 +559,7 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
synchronized StorageReport getStorageReport(String bpid) { synchronized StorageReport getStorageReport(String bpid) {
return new StorageReport(dnStorage, return new StorageReport(dnStorage,
false, getCapacity(), getUsed(), getFree(), false, getCapacity(), getUsed(), getFree(),
map.get(bpid).getUsed(), 0L); map.get(bpid).getUsed(), getNonDfsUsed());
} }
SimulatedVolume getVolume() { SimulatedVolume getVolume() {
@ -733,6 +744,7 @@ public class SimulatedFSDataset implements FsDatasetSpi<FsVolumeSpi> {
this.storages.add(new SimulatedStorage( this.storages.add(new SimulatedStorage(
conf.getLong(CONFIG_PROPERTY_CAPACITY, DEFAULT_CAPACITY), conf.getLong(CONFIG_PROPERTY_CAPACITY, DEFAULT_CAPACITY),
conf.getEnum(CONFIG_PROPERTY_STATE, DEFAULT_STATE), conf.getEnum(CONFIG_PROPERTY_STATE, DEFAULT_STATE),
conf.getLong(CONFIG_PROPERTY_NONDFSUSED, DEFAULT_NONDFSUSED),
fileIoProvider, conf)); fileIoProvider, conf));
} }
} }