HDFS-13898. [SBN read] Throw retriable exception for getBlockLocations when ObserverNameNode is in safemode. Contributed by Chao Sun.

This commit is contained in:
Erik Krogen 2018-09-21 14:57:52 -07:00 committed by Chen Liang
parent 07558629db
commit d4adf921a3
3 changed files with 78 additions and 1 deletions

View File

@ -91,6 +91,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LI
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*; import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE;
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.OBSERVER;
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo; import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType; import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType;
@ -1947,7 +1949,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
SafeModeException se = newSafemodeException( SafeModeException se = newSafemodeException(
"Zero blocklocations for " + srcArg); "Zero blocklocations for " + srcArg);
if (haEnabled && haContext != null && if (haEnabled && haContext != null &&
haContext.getState().getServiceState() == HAServiceState.ACTIVE) { (haContext.getState().getServiceState() == ACTIVE ||
haContext.getState().getServiceState() == OBSERVER)) {
throw new RetriableException(se); throw new RetriableException(se);
} else { } else {
throw se; throw se;

View File

@ -17,6 +17,7 @@
*/ */
package org.apache.hadoop.hdfs.server.namenode; package org.apache.hadoop.hdfs.server.namenode;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports; import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
import static org.mockito.Mockito.spy; import static org.mockito.Mockito.spy;
@ -223,6 +224,12 @@ public class NameNodeAdapter {
return fsnSpy; return fsnSpy;
} }
public static BlockManager spyOnBlockManager(NameNode nn) {
BlockManager bmSpy = Mockito.spy(nn.getNamesystem().getBlockManager());
nn.getNamesystem().setBlockManagerForTesting(bmSpy);
return bmSpy;
}
public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) { public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests()); ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
fsn.setFsLockForTests(spy); fsn.setFsLockForTests(spy);

View File

@ -24,8 +24,15 @@ import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.hdfs.protocol.HdfsConstants; import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster; import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
import org.apache.hadoop.io.retry.FailoverProxyProvider; import org.apache.hadoop.io.retry.FailoverProxyProvider;
import org.apache.hadoop.io.retry.RetryInvocationHandler; import org.apache.hadoop.io.retry.RetryInvocationHandler;
import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils;
@ -38,9 +45,12 @@ import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.lang.reflect.Proxy; import java.lang.reflect.Proxy;
import java.net.URI; import java.net.URI;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY;
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY;
@ -48,6 +58,13 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
import static org.mockito.Matchers.any;
import static org.mockito.Matchers.anyBoolean;
import static org.mockito.Matchers.anyLong;
import static org.mockito.Matchers.anyShort;
import static org.mockito.Matchers.anyString;
import static org.mockito.Mockito.doAnswer;
import static org.mockito.Mockito.doNothing;
// Main unit tests for ObserverNode // Main unit tests for ObserverNode
public class TestObserverNode { public class TestObserverNode {
@ -299,6 +316,56 @@ public class TestObserverNode {
assertEquals(0, rc); assertEquals(0, rc);
} }
/**
* Test the case where Observer should throw RetriableException, just like
* active NN, for certain open() calls where block locations are not
* available. See HDFS-13898 for details.
*/
@Test
public void testObserverNodeSafeModeWithBlockLocations() throws Exception {
setUpCluster(1);
setObserverRead(true);
// Avoid starting DNs for the mini cluster.
BlockManager bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[0]);
doNothing().when(bmSpy)
.verifyReplication(anyString(), anyShort(), anyString());
// Create a new file - the request should go to active.
dfs.createNewFile(testPath);
assertSentTo(0);
rollEditLogAndTail(0);
dfs.open(testPath);
assertSentTo(2);
// Set observer to safe mode.
dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_ENTER);
// Mock block manager for observer to generate some fake blocks which
// will trigger the (retriable) safe mode exception.
final DatanodeInfo[] empty = {};
bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[2]);
doAnswer((invocation) -> {
ExtendedBlock b = new ExtendedBlock("fake-pool", new Block(12345L));
LocatedBlock fakeBlock = new LocatedBlock(b, empty);
List<LocatedBlock> fakeBlocks = new ArrayList<>();
fakeBlocks.add(fakeBlock);
return new LocatedBlocks(0, false, fakeBlocks, null, true, null, null);
}).when(bmSpy).createLocatedBlocks(any(), anyLong(), anyBoolean(),
anyLong(), anyLong(), anyBoolean(), anyBoolean(), any(), any());
// Open the file again - it should throw retriable exception and then
// failover to active.
dfs.open(testPath);
assertSentTo(0);
// Remove safe mode on observer, request should still go to it.
dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
dfs.open(testPath);
assertSentTo(2);
}
// TODO this does not currently work because fetching the service state from // TODO this does not currently work because fetching the service state from
// e.g. the StandbyNameNode also waits for the transaction ID to catch up. // e.g. the StandbyNameNode also waits for the transaction ID to catch up.
// This is disabled pending HDFS-13872 and HDFS-13749. // This is disabled pending HDFS-13872 and HDFS-13749.