HDFS-13898. [SBN read] Throw retriable exception for getBlockLocations when ObserverNameNode is in safemode. Contributed by Chao Sun.
This commit is contained in:
parent
07558629db
commit
d4adf921a3
|
@ -91,6 +91,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LI
|
|||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_SNAPSHOT_DIFF_LISTING_LIMIT_DEFAULT;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import static org.apache.hadoop.hdfs.server.namenode.FSDirStatAndListingOp.*;
|
||||
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.ACTIVE;
|
||||
import static org.apache.hadoop.ha.HAServiceProtocol.HAServiceState.OBSERVER;
|
||||
|
||||
import org.apache.hadoop.hdfs.protocol.ErasureCodingPolicyInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.OpenFilesIterator.OpenFilesType;
|
||||
|
@ -1947,7 +1949,8 @@ public class FSNamesystem implements Namesystem, FSNamesystemMBean,
|
|||
SafeModeException se = newSafemodeException(
|
||||
"Zero blocklocations for " + srcArg);
|
||||
if (haEnabled && haContext != null &&
|
||||
haContext.getState().getServiceState() == HAServiceState.ACTIVE) {
|
||||
(haContext.getState().getServiceState() == ACTIVE ||
|
||||
haContext.getState().getServiceState() == OBSERVER)) {
|
||||
throw new RetriableException(se);
|
||||
} else {
|
||||
throw se;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
*/
|
||||
package org.apache.hadoop.hdfs.server.namenode;
|
||||
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||
import org.apache.hadoop.hdfs.server.protocol.SlowDiskReports;
|
||||
import static org.mockito.Mockito.spy;
|
||||
|
||||
|
@ -223,6 +224,12 @@ public class NameNodeAdapter {
|
|||
return fsnSpy;
|
||||
}
|
||||
|
||||
public static BlockManager spyOnBlockManager(NameNode nn) {
|
||||
BlockManager bmSpy = Mockito.spy(nn.getNamesystem().getBlockManager());
|
||||
nn.getNamesystem().setBlockManagerForTesting(bmSpy);
|
||||
return bmSpy;
|
||||
}
|
||||
|
||||
public static ReentrantReadWriteLock spyOnFsLock(FSNamesystem fsn) {
|
||||
ReentrantReadWriteLock spy = Mockito.spy(fsn.getFsLockForTests());
|
||||
fsn.setFsLockForTests(spy);
|
||||
|
|
|
@ -24,8 +24,15 @@ import org.apache.hadoop.fs.permission.FsPermission;
|
|||
import org.apache.hadoop.hdfs.DistributedFileSystem;
|
||||
import org.apache.hadoop.hdfs.MiniDFSCluster;
|
||||
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
|
||||
import org.apache.hadoop.hdfs.protocol.Block;
|
||||
import org.apache.hadoop.hdfs.protocol.DatanodeInfo;
|
||||
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
|
||||
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
|
||||
import org.apache.hadoop.hdfs.qjournal.MiniQJMHACluster;
|
||||
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNode;
|
||||
import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter;
|
||||
import org.apache.hadoop.io.retry.FailoverProxyProvider;
|
||||
import org.apache.hadoop.io.retry.RetryInvocationHandler;
|
||||
import org.apache.hadoop.test.GenericTestUtils;
|
||||
|
@ -38,9 +45,12 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.lang.reflect.Proxy;
|
||||
import java.net.URI;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY;
|
||||
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_TAILEDITS_INPROGRESS_KEY;
|
||||
|
@ -48,6 +58,13 @@ import static org.junit.Assert.assertEquals;
|
|||
import static org.junit.Assert.assertTrue;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import static org.mockito.Matchers.any;
|
||||
import static org.mockito.Matchers.anyBoolean;
|
||||
import static org.mockito.Matchers.anyLong;
|
||||
import static org.mockito.Matchers.anyShort;
|
||||
import static org.mockito.Matchers.anyString;
|
||||
import static org.mockito.Mockito.doAnswer;
|
||||
import static org.mockito.Mockito.doNothing;
|
||||
|
||||
// Main unit tests for ObserverNode
|
||||
public class TestObserverNode {
|
||||
|
@ -299,6 +316,56 @@ public class TestObserverNode {
|
|||
assertEquals(0, rc);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the case where Observer should throw RetriableException, just like
|
||||
* active NN, for certain open() calls where block locations are not
|
||||
* available. See HDFS-13898 for details.
|
||||
*/
|
||||
@Test
|
||||
public void testObserverNodeSafeModeWithBlockLocations() throws Exception {
|
||||
setUpCluster(1);
|
||||
setObserverRead(true);
|
||||
|
||||
// Avoid starting DNs for the mini cluster.
|
||||
BlockManager bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[0]);
|
||||
doNothing().when(bmSpy)
|
||||
.verifyReplication(anyString(), anyShort(), anyString());
|
||||
|
||||
// Create a new file - the request should go to active.
|
||||
dfs.createNewFile(testPath);
|
||||
assertSentTo(0);
|
||||
|
||||
rollEditLogAndTail(0);
|
||||
dfs.open(testPath);
|
||||
assertSentTo(2);
|
||||
|
||||
// Set observer to safe mode.
|
||||
dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_ENTER);
|
||||
|
||||
// Mock block manager for observer to generate some fake blocks which
|
||||
// will trigger the (retriable) safe mode exception.
|
||||
final DatanodeInfo[] empty = {};
|
||||
bmSpy = NameNodeAdapter.spyOnBlockManager(namenodes[2]);
|
||||
doAnswer((invocation) -> {
|
||||
ExtendedBlock b = new ExtendedBlock("fake-pool", new Block(12345L));
|
||||
LocatedBlock fakeBlock = new LocatedBlock(b, empty);
|
||||
List<LocatedBlock> fakeBlocks = new ArrayList<>();
|
||||
fakeBlocks.add(fakeBlock);
|
||||
return new LocatedBlocks(0, false, fakeBlocks, null, true, null, null);
|
||||
}).when(bmSpy).createLocatedBlocks(any(), anyLong(), anyBoolean(),
|
||||
anyLong(), anyLong(), anyBoolean(), anyBoolean(), any(), any());
|
||||
|
||||
// Open the file again - it should throw retriable exception and then
|
||||
// failover to active.
|
||||
dfs.open(testPath);
|
||||
assertSentTo(0);
|
||||
|
||||
// Remove safe mode on observer, request should still go to it.
|
||||
dfsCluster.getFileSystem(2).setSafeMode(SafeModeAction.SAFEMODE_LEAVE);
|
||||
dfs.open(testPath);
|
||||
assertSentTo(2);
|
||||
}
|
||||
|
||||
// TODO this does not currently work because fetching the service state from
|
||||
// e.g. the StandbyNameNode also waits for the transaction ID to catch up.
|
||||
// This is disabled pending HDFS-13872 and HDFS-13749.
|
||||
|
|
Loading…
Reference in New Issue