From d4adf921a30ae922a3440f437a3efa7b8727dcea Mon Sep 17 00:00:00 2001 From: Erik Krogen Date: Fri, 21 Sep 2018 14:57:52 -0700 Subject: [PATCH] HDFS-13898. [SBN read] Throw retriable exception for getBlockLocations when ObserverNameNode is in safemode. Contributed by Chao Sun. --- .../hdfs/server/namenode/FSNamesystem.java | 5 +- .../hdfs/server/namenode/NameNodeAdapter.java | 7 ++ .../server/namenode/ha/TestObserverNode.java | 67 +++++++++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 4a2a5a95542..caff8d0d66e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -91,6 +91,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT; import org.apache.hadoop.hdfs.protocol.HdfsConstants; import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*; +import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE; +import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.OBSERVER; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType; @@ -1947,7 +1949,8 @@ LocatedBlocks getBlockLocations(String clientMachine, String srcArg, SafeModeException se = newSafemodeException( "Zero blocklocations for " + srcArg); if (haEnabled && haContext != null && - haContext.getState().getServiceState() == HAServiceState.ACTIVE) { + (haContext.getState().getServiceState() == ACTIVE || + haContext.getState().getServiceState() == OBSERVER)) { throw new RetriableException(se); } else { throw se; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java index a71538d8d52..3ba3d35253f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import static org.mockito.Mockito.spy; @@ -223,6 +224,12 @@ public static FSNamesystem spyOnNamesystem(NameNode nn) { return fsnSpy; } + public static BlockManager spyOnBlockManager(NameNode nn) { + BlockManager bmSpy = Mockito.spy(nn.getNamesystem().getBlockManager()); + nn.getNamesystem().setBlockManagerForTesting(bmSpy); + return bmSpy; + } + public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) { ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests()); fsn.setFsLockForTests(spy); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java index 89bfffb4494..c9e79fa6158 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java @@ -24,8 +24,15 @@ import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.io.retry.FailoverProxyProvider; import org.apache.hadoop.io.retry.RetryInvocationHandler; import org.apache.hadoop.test.GenericTestUtils; @@ -38,9 +45,12 @@ import java.io.IOException; import java.lang.reflect.Proxy; import java.net.URI; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.TimeUnit; +import static org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY; @@ -48,6 +58,13 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.anyShort; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; // Main unit tests for ObserverNode public class TestObserverNode { @@ -299,6 +316,56 @@ public void testBootstrap() throws Exception { assertEquals(0, rc); } + /** + * Test the case where Observer should throw RetriableException, just like + * active NN, for certain open() calls where block locations are not + * available. See HDFS-13898 for details. + */ + @Test + public void testObserverNodeSafeModeWithBlockLocations() throws Exception { + setUpCluster(1); + setObserverRead(true); + + // Avoid starting DNs for the mini cluster. + BlockManager bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[0]); + doNothing().when(bmSpy) + .verifyReplication(anyString(), anyShort(), anyString()); + + // Create a new file - the request should go to active. + dfs.createNewFile(testPath); + assertSentTo(0); + + rollEditLogAndTail(0); + dfs.open(testPath); + assertSentTo(2); + + // Set observer to safe mode. + dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_ENTER); + + // Mock block manager for observer to generate some fake blocks which + // will trigger the (retriable) safe mode exception. + final DatanodeInfo[] empty = {}; + bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[2]); + doAnswer((invocation) -> { + ExtendedBlock b = new ExtendedBlock("fake-pool", new Block(12345L)); + LocatedBlock fakeBlock = new LocatedBlock(b, empty); + List fakeBlocks = new ArrayList<>(); + fakeBlocks.add(fakeBlock); + return new LocatedBlocks(0, false, fakeBlocks, null, true, null, null); + }).when(bmSpy).createLocatedBlocks(any(), anyLong(), anyBoolean(), + anyLong(), anyLong(), anyBoolean(), anyBoolean(), any(), any()); + + // Open the file again - it should throw retriable exception and then + // failover to active. + dfs.open(testPath); + assertSentTo(0); + + // Remove safe mode on observer, request should still go to it. + dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + dfs.open(testPath); + assertSentTo(2); + } + // TODO this does not currently work because fetching the service state from // e.g. the StandbyNameNode also waits for the transaction ID to catch up. // This is disabled pending HDFS-13872 and HDFS-13749.