From fd9a7f84bcf6664443568a16df1d1101393e6901 Mon Sep 17 00:00:00 2001 From: Ming Ma Date: Mon, 2 Nov 2015 19:36:37 -0800 Subject: [PATCH] HDFS-9313. Possible NullPointerException in BlockManager if no excess replica can be chosen. (mingma) (cherry picked from commit d565480da2f646b40c3180e1ccb2935c9863dfef) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 ++ .../blockmanagement/BlockPlacementPolicy.java | 8 +++-- .../BlockPlacementPolicyDefault.java | 6 ++++ .../TestReplicationPolicy.java | 31 +++++++++++++++++++ 4 files changed, 45 insertions(+), 3 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 6da1264b37c..8d2ddf5f888 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -1373,6 +1373,9 @@ Release 2.8.0 - UNRELEASED HDFS-9329. TestBootstrapStandby#testRateThrottling is flaky because fsimage size is smaller than IO buffer size. (zhz) + HDFS-9313. Possible NullPointerException in BlockManager if no excess + replica can be chosen. (mingma) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java index 69a49aac784..e75efa0a6ee 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicy.java @@ -23,8 +23,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.StorageType; @@ -35,13 +33,17 @@ import org.apache.hadoop.net.NetworkTopology; import org.apache.hadoop.net.Node; import org.apache.hadoop.util.ReflectionUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + /** * This interface is used for choosing the desired number of targets * for placing block replicas. */ @InterfaceAudience.Private public abstract class BlockPlacementPolicy { - static final Log LOG = LogFactory.getLog(BlockPlacementPolicy.class); + static final Logger LOG = LoggerFactory.getLogger( + BlockPlacementPolicy.class); @InterfaceAudience.Private public static class NotEnoughReplicasException extends Exception { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index d9b8d60b36a..2723ed95f91 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -981,6 +981,12 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { excessTypes); } firstOne = false; + if (cur == null) { + LOG.warn("No excess replica can be found. excessTypes: {}." + + " moreThanOne: {}. exactlyOne: {}.", excessTypes, moreThanOne, + exactlyOne); + break; + } // adjust rackmap, moreThanOne, and exactlyOne adjustSetsWithChosenReplica(rackMap, moreThanOne, exactlyOne, cur); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java index e3218642333..60aa9ef042c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/blockmanagement/TestReplicationPolicy.java @@ -1000,6 +1000,14 @@ public class TestReplicationPolicy extends BaseReplicationPolicyTest { BlockStoragePolicySuite POLICY_SUITE = BlockStoragePolicySuite .createDefaultSuite(); BlockStoragePolicy storagePolicy = POLICY_SUITE.getDefaultPolicy(); + DatanodeStorageInfo excessSSD = DFSTestUtil.createDatanodeStorageInfo( + "Storage-excess-SSD-ID", "localhost", + storages[0].getDatanodeDescriptor().getNetworkLocation(), + "foo.com", StorageType.SSD); + updateHeartbeatWithUsage(excessSSD.getDatanodeDescriptor(), + 2* HdfsServerConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, + 2* HdfsServerConstants.MIN_BLOCKS_FOR_WRITE*BLOCK_SIZE, 0L, 0L, 0L, 0, + 0); // use delete hint case. @@ -1022,6 +1030,29 @@ public class TestReplicationPolicy extends BaseReplicationPolicyTest { excessReplicas = replicator.chooseReplicasToDelete(nonExcess, 3, excessTypes, storages[3].getDatanodeDescriptor(), null); assertTrue(excessReplicas.contains(excessStorage)); + + + // The block was initially created on excessSSD(rack r1), + // storages[4](rack r3) and storages[5](rack r3) with + // ONESSD_STORAGE_POLICY_NAME storage policy. + // Right after balancer moves the block from storages[5] to + // storages[3](rack r2), the application changes the storage policy from + // ONESSD_STORAGE_POLICY_NAME to HOT_STORAGE_POLICY_ID. In this case, + // no replica can be chosen as the excessive replica as + // chooseReplicasToDelete only considers storages[4] and storages[5] that + // are the same rack. But neither's storage type is SSD. + // TODO BlockPlacementPolicyDefault should be able to delete excessSSD. + nonExcess.clear(); + nonExcess.add(excessSSD); + nonExcess.add(storages[3]); + nonExcess.add(storages[4]); + nonExcess.add(storages[5]); + excessTypes = storagePolicy.chooseExcess((short) 3, + DatanodeStorageInfo.toStorageTypes(nonExcess)); + excessReplicas = replicator.chooseReplicasToDelete(nonExcess, 3, + excessTypes, storages[3].getDatanodeDescriptor(), + storages[5].getDatanodeDescriptor()); + assertTrue(excessReplicas.size() == 0); } @Test