HDFS-8863. The remaining space check in BlockPlacementPolicyDefault is flawed. (Kihwal Lee via yliu)

2015-08-20 20:43:26 +08:00 · 2015-08-20 20:43:26 +08:00 · d4b6c4a3cd
parent a2299bf4ee
commit d4b6c4a3cd
4 changed files with 76 additions and 10 deletions
--- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
+++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt
@ -8,11 +8,11 @@ Release 2.7.2 - UNRELEASED
  IMPROVEMENTS
-  HDFS-8659. Block scanner INFO message is spamming logs. (Yongjun Zhang)
+    HDFS-8659. Block scanner INFO message is spamming logs. (Yongjun Zhang)
  OPTIMIZATIONS
-  HDFS-8722. Optimize datanode writes for small writes and flushes (kihwal)
+    HDFS-8722. Optimize datanode writes for small writes and flushes (kihwal)
  BUG FIXES
@ -27,6 +27,9 @@ Release 2.7.2 - UNRELEASED
    HDFS-8867. Enable optimized block reports. (Daryn Sharp via jing9)
    HDFS-8863. The remaining space check in BlockPlacementPolicyDefault is
    flawed. (Kihwal Lee via yliu)
 Release 2.7.1 - 2015-07-06
  INCOMPATIBLE CHANGES
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java
@ -779,7 +779,8 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy {
    final long requiredSize = blockSize * HdfsConstants.MIN_BLOCKS_FOR_WRITE;
    final long scheduledSize = blockSize * node.getBlocksScheduled(storage.getStorageType());
-    final long remaining = node.getRemaining(storage.getStorageType());
+    final long remaining = node.getRemaining(storage.getStorageType(),
        requiredSize);
    if (requiredSize > remaining - scheduledSize) {
      logNodeIsNotChosen(storage, "the node does not have enough "
          + storage.getStorageType() + " space"
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/DatanodeDescriptor.java
@ -44,6 +44,7 @@ import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
 import org.apache.hadoop.hdfs.server.namenode.CachedBlock;
 import org.apache.hadoop.hdfs.server.protocol.BlockReportContext;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage;
 import org.apache.hadoop.hdfs.server.protocol.DatanodeStorage.State;
 import org.apache.hadoop.hdfs.server.protocol.StorageReport;
 import org.apache.hadoop.hdfs.server.protocol.VolumeFailureSummary;
 import org.apache.hadoop.hdfs.util.EnumCounters;
@ -657,16 +658,26 @@ public class DatanodeDescriptor extends DatanodeInfo {
  }
  /**
-   * @return Approximate number of blocks currently scheduled to be written 
+   * Return the sum of remaining spaces of the specified type. If the remaining
   * space of a storage is less than minSize, it won't be counted toward the
   * sum.
   *
   * @param t The storage type. If null, the type is ignored.
   * @param minSize The minimum free space required.
   * @return the sum of remaining spaces that are bigger than minSize.
   */
-  public long getRemaining(StorageType t) {
+  public long getRemaining(StorageType t, long minSize) {
    long remaining = 0;
-    for(DatanodeStorageInfo s : getStorageInfos()) {
+    for (DatanodeStorageInfo s : getStorageInfos()) {
-      if (s.getStorageType() == t) {
+      if (s.getState() == State.NORMAL &&
-        remaining += s.getRemaining();
+          (t == null || s.getStorageType() == t)) {
        long r = s.getRemaining();
        if (r >= minSize) {
          remaining += r;
        }
      }
    }
-    return remaining;    
+    return remaining;
  }
  /**
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java
@ -101,6 +101,16 @@ public class TestReplicationPolicy {
        dnCacheCapacity, dnCacheUsed, xceiverCount, volFailures, null);
  }
  private static void updateHeartbeatForExtraStorage(long capacity,
      long dfsUsed, long remaining, long blockPoolUsed) {
    DatanodeDescriptor dn = dataNodes[5];
    dn.getStorageInfos()[1].setUtilizationForTesting(
        capacity, dfsUsed, remaining, blockPoolUsed);
    dn.updateHeartbeat(
        BlockManagerTestUtil.getStorageReportsForDatanode(dn),
        0L, 0L, 0, 0, null);
  }
  @BeforeClass
  public static void setupCluster() throws Exception {
    Configuration conf = new HdfsConfiguration();
@ -114,6 +124,16 @@ public class TestReplicationPolicy {
    storages = DFSTestUtil.createDatanodeStorageInfos(racks);
    dataNodes = DFSTestUtil.toDatanodeDescriptor(storages);
    // create an extra storage for dn5.
    DatanodeStorage extraStorage = new DatanodeStorage(
        storages[5].getStorageID() + "-extra", DatanodeStorage.State.NORMAL,
        StorageType.DEFAULT);
 /*    DatanodeStorageInfo si = new DatanodeStorageInfo(
        storages[5].getDatanodeDescriptor(), extraStorage);
 */
    BlockManagerTestUtil.updateStorage(storages[5].getDatanodeDescriptor(),
        extraStorage);
    FileSystem.setDefaultUri(conf, "hdfs://localhost:0");
    conf.set(DFSConfigKeys.DFS_NAMENODE_HTTP_ADDRESS_KEY, "0.0.0.0:0");
    File baseDir = PathUtils.getTestDir(TestReplicationPolicy.class);
@ -136,11 +156,17 @@ public class TestReplicationPolicy {
      bm.getDatanodeManager().getHeartbeatManager().addDatanode(
          dataNodes[i]);
    }
    resetHeartbeatForStorages();
  }
  private static void resetHeartbeatForStorages() {
    for (int i=0; i < NUM_OF_DATANODES; i++) {
      updateHeartbeatWithUsage(dataNodes[i],
          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
-          2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0L, 0L, 0, 0);
+          HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0L, 0L, 0, 0);
    }    
    // No available space in the extra storage of dn0
    updateHeartbeatForExtraStorage(0L, 0L, 0L, 0L);
  }
  private static boolean isOnSameRack(DatanodeStorageInfo left, DatanodeStorageInfo right) {
@ -150,6 +176,31 @@ public class TestReplicationPolicy {
  private static boolean isOnSameRack(DatanodeStorageInfo left, DatanodeDescriptor right) {
    return cluster.isOnSameRack(left.getDatanodeDescriptor(), right);
  }
  /**
   * Test whether the remaining space per storage is individually
   * considered.
   */
  @Test
  public void testChooseNodeWithMultipleStorages() throws Exception {
    updateHeartbeatWithUsage(dataNodes[5],
        2* HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
        (2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE)/3, 0L,
        0L, 0L, 0, 0);
    updateHeartbeatForExtraStorage(
        2* HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L,
        (2*HdfsConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE)/3, 0L);
    DatanodeStorageInfo[] targets;
    targets = chooseTarget (1, dataNodes[5],
        new ArrayList<DatanodeStorageInfo>(), null);
    assertEquals(1, targets.length);
    assertEquals(storages[4], targets[0]);
    resetHeartbeatForStorages();
  }
  /**
   * In this testcase, client is dataNodes[0]. So the 1st replica should be
   * placed on dataNodes[0], the 2nd replica should be placed on