diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 63eed2da1bd..f29d2086946 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -93,6 +93,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LI import org.apache.hadoop.hdfs.protocol.HdfsConstants; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_STORAGE_POLICY_ENABLED_KEY; import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*; +import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE; +import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.OBSERVER; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType; @@ -1955,7 +1957,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean, SafeModeException se = newSafemodeException( "Zero blocklocations for " + srcArg); if (haEnabled && haContext != null && - haContext.getState().getServiceState() == HAServiceState.ACTIVE) { + (haContext.getState().getServiceState() == ACTIVE || + haContext.getState().getServiceState() == OBSERVER)) { throw new RetriableException(se); } else { throw se; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java index b85527a9481..9a9455415e4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import static org.mockito.Mockito.spy; @@ -223,6 +224,12 @@ public class NameNodeAdapter { return fsnSpy; } + public static BlockManager spyOnBlockManager(NameNode nn) { + BlockManager bmSpy = Mockito.spy(nn.getNamesystem().getBlockManager()); + nn.getNamesystem().setBlockManagerForTesting(bmSpy); + return bmSpy; + } + public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) { ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests()); fsn.setFsLockForTests(spy); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java index 89bfffb4494..c9e79fa6158 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestObserverNode.java @@ -24,8 +24,15 @@ import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.protocol.HdfsConstants; +import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.DatanodeInfo; +import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlock; +import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster; +import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager; import org.apache.hadoop.hdfs.server.namenode.NameNode; +import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.io.retry.FailoverProxyProvider; import org.apache.hadoop.io.retry.RetryInvocationHandler; import org.apache.hadoop.test.GenericTestUtils; @@ -38,9 +45,12 @@ import java.io.File; import java.io.IOException; import java.lang.reflect.Proxy; import java.net.URI; +import java.util.ArrayList; +import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.TimeUnit; +import static org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY; @@ -48,6 +58,13 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; +import static org.mockito.Matchers.any; +import static org.mockito.Matchers.anyBoolean; +import static org.mockito.Matchers.anyLong; +import static org.mockito.Matchers.anyShort; +import static org.mockito.Matchers.anyString; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.doNothing; // Main unit tests for ObserverNode public class TestObserverNode { @@ -299,6 +316,56 @@ public class TestObserverNode { assertEquals(0, rc); } + /** + * Test the case where Observer should throw RetriableException, just like + * active NN, for certain open() calls where block locations are not + * available. See HDFS-13898 for details. + */ + @Test + public void testObserverNodeSafeModeWithBlockLocations() throws Exception { + setUpCluster(1); + setObserverRead(true); + + // Avoid starting DNs for the mini cluster. + BlockManager bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[0]); + doNothing().when(bmSpy) + .verifyReplication(anyString(), anyShort(), anyString()); + + // Create a new file - the request should go to active. + dfs.createNewFile(testPath); + assertSentTo(0); + + rollEditLogAndTail(0); + dfs.open(testPath); + assertSentTo(2); + + // Set observer to safe mode. + dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_ENTER); + + // Mock block manager for observer to generate some fake blocks which + // will trigger the (retriable) safe mode exception. + final DatanodeInfo[] empty = {}; + bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[2]); + doAnswer((invocation) -> { + ExtendedBlock b = new ExtendedBlock("fake-pool", new Block(12345L)); + LocatedBlock fakeBlock = new LocatedBlock(b, empty); + List fakeBlocks = new ArrayList<>(); + fakeBlocks.add(fakeBlock); + return new LocatedBlocks(0, false, fakeBlocks, null, true, null, null); + }).when(bmSpy).createLocatedBlocks(any(), anyLong(), anyBoolean(), + anyLong(), anyLong(), anyBoolean(), anyBoolean(), any(), any()); + + // Open the file again - it should throw retriable exception and then + // failover to active. + dfs.open(testPath); + assertSentTo(0); + + // Remove safe mode on observer, request should still go to it. + dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + dfs.open(testPath); + assertSentTo(2); + } + // TODO this does not currently work because fetching the service state from // e.g. the StandbyNameNode also waits for the transaction ID to catch up. // This is disabled pending HDFS-13872 and HDFS-13749.