From 7279fe8661d694fb0775f4f84333c1317f4e6899 Mon Sep 17 00:00:00 2001 From: LeonGao Date: Wed, 13 Oct 2021 18:14:03 -0700 Subject: [PATCH] HDFS-16268. Balancer stuck when moving striped blocks due to NPE (#3546) --- .../hdfs/server/balancer/Dispatcher.java | 20 ++++++++--- .../hdfs/server/balancer/TestBalancer.java | 33 +++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java index abcfc120780..90fd7e6fa12 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Dispatcher.java @@ -241,7 +241,7 @@ public class PendingMove { private DDatanode proxySource; private StorageGroup target; - private PendingMove(Source source, StorageGroup target) { + PendingMove(Source source, StorageGroup target) { this.source = source; this.target = target; } @@ -282,12 +282,18 @@ private boolean chooseBlockAndProxy() { /** * @return true if the given block is good for the tentative move. */ - private boolean markMovedIfGoodBlock(DBlock block, StorageType targetStorageType) { + boolean markMovedIfGoodBlock(DBlock block, StorageType targetStorageType) { synchronized (block) { synchronized (movedBlocks) { if (isGoodBlockCandidate(source, target, targetStorageType, block)) { if (block instanceof DBlockStriped) { reportedBlock = ((DBlockStriped) block).getInternalBlock(source); + if (reportedBlock == null) { + LOG.info( + "No striped internal block on source {}, block {}. Skipping.", + source, block); + return false; + } } else { reportedBlock = block; } @@ -501,7 +507,7 @@ public DBlockStriped(Block block, byte[] indices, short dataBlockNum, this.cellSize = cellSize; } - public DBlock getInternalBlock(StorageGroup storage) { + DBlock getInternalBlock(StorageGroup storage) { int idxInLocs = locations.indexOf(storage); if (idxInLocs == -1) { return null; @@ -520,7 +526,11 @@ public DBlock getInternalBlock(StorageGroup storage) { @Override public long getNumBytes(StorageGroup storage) { - return getInternalBlock(storage).getNumBytes(); + DBlock block = getInternalBlock(storage); + if (block == null) { + return 0; + } + return block.getNumBytes(); } } @@ -1309,7 +1319,7 @@ public static boolean checkForSuccess( * 2. the block does not have a replica/internalBlock on the target; * 3. doing the move does not reduce the number of racks that the block has */ - private boolean isGoodBlockCandidate(StorageGroup source, StorageGroup target, + boolean isGoodBlockCandidate(StorageGroup source, StorageGroup target, StorageType targetStorageType, DBlock block) { if (source.equals(target)) { return false; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java index 2070a332b6b..8ec968d36c2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/balancer/TestBalancer.java @@ -42,11 +42,16 @@ import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicy; import org.junit.AfterClass; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import static org.mockito.Mockito.any; import static org.mockito.Mockito.anyLong; import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.when; import java.io.File; import java.io.IOException; @@ -1664,11 +1669,39 @@ private void doTestBalancerWithStripedFile(Configuration conf) throws Exception // verify locations of striped blocks locatedBlocks = client.getBlockLocations(fileName, 0, fileLen); StripedFileTestUtil.verifyLocatedStripedBlocks(locatedBlocks, groupSize); + + // Test handling NPE with striped blocks + testNullStripedBlocks(conf); + } finally { cluster.shutdown(); } } + private void testNullStripedBlocks(Configuration conf) throws IOException { + NameNodeConnector nnc = NameNodeConnector.newNameNodeConnectors( + DFSUtil.getInternalNsRpcUris(conf), + Balancer.class.getSimpleName(), Balancer.BALANCER_ID_PATH, conf, + BalancerParameters.DEFAULT.getMaxIdleIteration()).get(0); + Dispatcher dispatcher = new Dispatcher(nnc, Collections.emptySet(), + Collections. emptySet(), 1, 1, 0, + 1, 1, conf); + Dispatcher spyDispatcher = spy(dispatcher); + Dispatcher.PendingMove move = spyDispatcher.new PendingMove( + mock(Dispatcher.Source.class), + mock(Dispatcher.DDatanode.StorageGroup.class)); + Dispatcher.DBlockStriped block = mock(Dispatcher.DBlockStriped.class); + + doReturn(null).when(block).getInternalBlock(any()); + doReturn(true) + .when(spyDispatcher) + .isGoodBlockCandidate(any(), any(), any(), any()); + + when(move.markMovedIfGoodBlock(block, DEFAULT)).thenCallRealMethod(); + + assertFalse(move.markMovedIfGoodBlock(block, DEFAULT)); + } + /** * Test Balancer runs fine when logging in with a keytab in kerberized env. * Reusing testUnknownDatanode here for basic functionality testing.