HDFS-10530. BlockManager reconstruction work scheduling should correctly adhere to EC block placement policy. Contributed by Manoj Govindassamy and Rui Gao.

2017-03-16 15:07:38 -07:00 · 2017-03-16 15:07:38 -07:00 · 4812518b23
commit 4812518b23
parent 09ad8effb8
3 changed files with 119 additions and 7 deletions
--- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java
@ -4179,7 +4179,7 @@ boolean isPlacementPolicySatisfied(BlockInfo storedBlock) {
    BlockPlacementPolicy placementPolicy = placementPolicies
        .getPolicy(blockType);
    int numReplicas = blockType == STRIPED ? ((BlockInfoStriped) storedBlock)
-        .getRealDataBlockNum() : storedBlock.getReplication();
+        .getRealTotalBlockNum() : storedBlock.getReplication();
    return placementPolicy.verifyBlockPlacement(locs, numReplicas)
        .isPlacementPolicySatisfied();
  }
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java
@ -1918,7 +1918,7 @@ public void testBalancerWithStripedFile() throws Exception {
  }

  private void doTestBalancerWithStripedFile(Configuration conf) throws Exception {
-    int numOfDatanodes = dataBlocks + parityBlocks + 2;
+    int numOfDatanodes = dataBlocks + parityBlocks + 3;
    int numOfRacks = dataBlocks;
    long capacity = 20 * defaultBlockSize;
    long[] capacities = new long[numOfDatanodes];
@ -1956,11 +1956,12 @@ private void doTestBalancerWithStripedFile(Configuration conf) throws Exception
      LocatedBlocks locatedBlocks = client.getBlockLocations(fileName, 0, fileLen);
      StripedFileTestUtil.verifyLocatedStripedBlocks(locatedBlocks, groupSize);

-      // add one datanode
+      // add datanodes in new rack
      String newRack = "/rack" + (++numOfRacks);
-      cluster.startDataNodes(conf, 1, true, null,
-          new String[]{newRack}, null, new long[]{capacity});
-      totalCapacity += capacity;
+      cluster.startDataNodes(conf, 2, true, null,
+          new String[]{newRack, newRack}, null,
+          new long[]{capacity, capacity});
+      totalCapacity += capacity*2;
      cluster.triggerHeartbeats();

      // run balancer and validate results
--- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
+++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestBlockManager.java
@ -40,6 +40,7 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumSet;
+import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map.Entry;
@ -55,6 +56,8 @@
 import java.util.concurrent.TimeoutException;
 import java.util.concurrent.atomic.AtomicBoolean;

+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.CreateFlag;
 import org.apache.hadoop.fs.FSDataInputStream;
@ -68,6 +71,7 @@
 import org.apache.hadoop.hdfs.DistributedFileSystem;
 import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.StripedFileTestUtil;
 import org.apache.hadoop.hdfs.protocol.Block;
 import org.apache.hadoop.hdfs.protocol.BlockListAsLongs;
 import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
@ -125,13 +129,14 @@ public class TestBlockManager {
   * of times trying to trigger the incorrect behavior.
   */
  private static final int NUM_TEST_ITERS = 30;
-  
  private static final int BLOCK_SIZE = 64*1024;
+  private static final Log LOG = LogFactory.getLog(TestBlockManager.class);

  private FSNamesystem fsn;
  private BlockManager bm;
  private long mockINodeId;

+
  @Before
  public void setupMockCluster() throws IOException {
    Configuration conf = new HdfsConfiguration();
@ -1287,4 +1292,110 @@ public void testIsReplicaCorruptCall() throws Exception {
        isReplicaCorrupt(Mockito.any(BlockInfo.class),
            Mockito.any(DatanodeDescriptor.class));
  }
+
+  @Test (timeout = 300000)
+  public void testPlacementPolicySatisfied() throws Exception {
+    LOG.info("Starting testPlacementPolicySatisfied.");
+    final String[] initialRacks = new String[]{
+        "/rack0", "/rack1", "/rack2", "/rack3", "/rack4", "/rack5"};
+    final String[] initialHosts = new String[]{
+        "host0", "host1", "host2", "host3", "host4", "host5"};
+    final int numDataBlocks = StripedFileTestUtil.getDefaultECPolicy()
+        .getNumDataUnits();
+    final int numParityBlocks = StripedFileTestUtil.getDefaultECPolicy()
+        .getNumParityUnits();
+    final long blockSize = 64 * 1024;
+    Configuration conf = new Configuration();
+    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, blockSize);
+    conf.setLong(DFSConfigKeys.DFS_NAMENODE_REDUNDANCY_INTERVAL_SECONDS_KEY, 1);
+    conf.set(DFSConfigKeys.DFS_NAMENODE_EC_POLICIES_ENABLED_KEY,
+        StripedFileTestUtil.getDefaultECPolicy().getName());
+    MiniDFSCluster cluster = null;
+    try {
+      cluster = new MiniDFSCluster.Builder(conf)
+          .racks(initialRacks)
+          .hosts(initialHosts)
+          .numDataNodes(initialRacks.length)
+          .build();
+      cluster.waitActive();
+      final DistributedFileSystem dfs = cluster.getFileSystem();
+      final Path ecDir = new Path("/ec");
+      final Path testFileUnsatisfied = new Path(ecDir, "test1");
+      final Path testFileSatisfied = new Path(ecDir, "test2");
+      cluster.getFileSystem().getClient().mkdirs(ecDir.toString(), null, true);
+      cluster.getFileSystem().getClient()
+          .setErasureCodingPolicy(ecDir.toString(),
+              StripedFileTestUtil.getDefaultECPolicy().getName());
+      long fileLen = blockSize * numDataBlocks;
+
+      // Create a file to be stored in 6 racks.
+      DFSTestUtil.createFile(dfs, testFileUnsatisfied, fileLen, (short) 1, 1);
+      // Block placement policy should be satisfied as rack count
+      // is less than numDataBlocks + numParityBlocks.
+      verifyPlacementPolicy(cluster, testFileUnsatisfied, true);
+
+      LOG.info("Adding 3 new hosts in the existing racks.");
+      cluster.startDataNodes(conf, 3, true, null,
+          new String[]{"/rack3", "/rack4", "/rack5"},
+          new String[]{"host3-2", "host4-2", "host5-2"}, null);
+      cluster.triggerHeartbeats();
+
+      LOG.info("Waiting for EC reconstruction to complete.");
+      DFSTestUtil.waitForReplication(dfs, testFileUnsatisfied,
+          (short)(numDataBlocks + numParityBlocks), 30 * 1000);
+      // Block placement policy should still be satisfied
+      // as there are only 6 racks.
+      verifyPlacementPolicy(cluster, testFileUnsatisfied, true);
+
+      LOG.info("Adding 3 new hosts in 3 new racks.");
+      cluster.startDataNodes(conf, 3, true, null,
+          new String[]{"/rack6", "/rack7", "/rack8"},
+          new String[]{"host6", "host7", "host8"},
+          null);
+      cluster.triggerHeartbeats();
+      // Addition of new racks can make the existing EC files block
+      // placements unsatisfied and there is NO automatic block
+      // reconstruction for this yet.
+      // TODO:
+      //  Verify for block placement satisfied once the automatic
+      //  block reconstruction is implemented.
+      verifyPlacementPolicy(cluster, testFileUnsatisfied, false);
+
+      // Create a new file
+      DFSTestUtil.createFile(dfs, testFileSatisfied, fileLen, (short) 1, 1);
+      // The new file should be rightly placed on all 9 racks
+      // and the block placement policy should be satisfied.
+      verifyPlacementPolicy(cluster, testFileUnsatisfied, false);
+    } finally {
+      if (cluster != null) {
+        cluster.shutdown();
+      }
+    }
+  }
+
+  private void verifyPlacementPolicy(final MiniDFSCluster cluster,
+      final Path file, boolean isBlockPlacementSatisfied) throws IOException {
+    DistributedFileSystem dfs = cluster.getFileSystem();
+    BlockManager blockManager = cluster.getNamesystem().getBlockManager();
+    LocatedBlock lb = DFSTestUtil.getAllBlocks(dfs, file).get(0);
+    BlockInfo blockInfo =
+        blockManager.getStoredBlock(lb.getBlock().getLocalBlock());
+    Iterator<DatanodeStorageInfo> itr = blockInfo.getStorageInfos();
+    LOG.info("Block " + blockInfo + " storages: ");
+    while (itr.hasNext()) {
+      DatanodeStorageInfo dn = itr.next();
+      LOG.info(" Rack: " + dn.getDatanodeDescriptor().getNetworkLocation()
+          + ", DataNode: " + dn.getDatanodeDescriptor().getXferAddr());
+    }
+    if (isBlockPlacementSatisfied) {
+      assertTrue("Block group of " + file + "should be placement" +
+              " policy satisfied, currently!",
+          blockManager.isPlacementPolicySatisfied(blockInfo));
+    } else {
+      assertFalse("Block group of " + file + " should be placement" +
+              " policy unsatisfied, currently!",
+          blockManager.isPlacementPolicySatisfied(blockInfo));
+    }
+  }
+
 }